class SFTPSensor(BaseSensorOperator): @apply_defaults def __init__(self, filepath, filepattern, sftp_conn_id='sftp_default', *args, **kwargs): super(SFTPSensor, self).__init__(*args, **kwargs) self.filepath = filepath self.filepattern = filepattern self.sftp_conn_id = sftp_conn_id self.hook = SFTPHook(ftp_conn_id=sftp_conn_id, keepalive_interval=10) def poke(self, context): full_path = self.filepath file_pattern = re.compile(self.filepattern) fileDict = {} fileList = [] try: isFound = False directory = self.hook.describe_directory(full_path) logging.info('Polling Interval 1') for file in directory.keys(): if not re.match(file_pattern, file): self.log.info(file) self.log.info(file_pattern) del directory[file] if not directory: # If directory has no files that match the mask, exit return isFound # wait before we compare file sizes and timestamps again to # verify that the file is done transferring to remote loc time.sleep(30) logging.info('Post-Wait Polling') newDirectoryResults = self.hook.describe_directory(full_path) for file in newDirectoryResults.keys(): if file in directory.keys(): if newDirectoryResults[file]['size'] == directory[file]['size'] and \ newDirectoryResults[file]['modify'] == directory[file]['modify']: fileList.append(file) print( 'filename: {} with size {} and modified time of {} met all criteria to be moved.' .format(file, newDirectoryResults[file]['size'], newDirectoryResults[file]['modify'])) isFound = True context["task_instance"].xcom_push("file_name", fileList) return isFound except IOError as e: if e.errno != SFTP_NO_SUCH_FILE: raise e return False
def pollForFiles(**kwargs): # Create some local scope variables for use later in proc sftpConnName = kwargs['SFTP_Connection_Name'] feedGroups = kwargs['Feed_Groups'] # Connect to SFTP site using provided credentials - should be saved in Connections sourceHook = SFTPHook(ftp_conn_id = sftpConnName) # Create empty dictionary for storing files that match file masks fileMatches = {} # Loop through feed locations and their regex for this SFTP site. for i in feedGroups: fullPath = i['Feed_Group_Location'] filePattern = i['Feed_Group_Regex'] feedGroupName = i['Feed_Group_Name'] logging.info('Evaluating Feed Group {}'.format(feedGroupName)) try: directory = sourceHook.describe_directory(path = fullPath) for file in directory.keys(): if re.match(filePattern, file): fileMatches[os.path.join(fullPath, file)] = directory[file] except Exception as e: logging.error('Error attempting to poll feed group {} in directory {}'.format(feedGroupName, fullPath)) raise e # If we do not find a file that matches a file mask in any of the directories, exit. if not fileMatches: return 0 # If no trigger files or renaming is utilized by the client when placing files on SFTP, we # have to resort to polling for files, waiting for a time period and then comparing the size/modified time # to see if they are ready to pull down. time.sleep(SLEEP_TIME) for j in feedGroups: fullPath = j['Feed_Group_Location'] filePattern = j['Feed_Group_Regex'] feedGroupName = j['Feed_Group_Name'] logging.info('Evaluating Feed Group {} after sleeping'.format(feedGroupName)) try: newDirResults = sourceHook.describe_directory(fullPath) for file in newDirResults: fullFilePath = os.path.join(fullPath, file) if fullFilePath in fileMatches.keys(): if newDirResults[file]['size'] == fileMatches[fullFilePath]['size'] and \ newDirResults[file]['modify'] == fileMatches[fullFilePath]['modify']: # If file hasn't changed size or modified time since first look, set to ready for another process to pick up and transfer. sourceHook.conn.rename(fullFilePath, fullFilePath + '.ready') logging.info('Tagged the {} file as ready.'.format(fullFilePath)) except Exception as e: logging.error('Error attempting to rename files in feed group {} in directory {}'.format(feedGroupName, fullPath)) raise e
def pollForFiles(**kwargs): """ Poll the flatfiles directory for files to process. """ fileRegex = r'^BP_STORE.*\.txt$' autoPath = '/airflow_test/postgresdb_etl_poc_flatfiles/auto' processPath = '/airflow_test/postgresdb_etl_poc_flatfiles/processing' sourceHook = SFTPHook(ftp_conn_id='kub2VM') fileMatches = {} try: directory = sourceHook.describe_directory(path=autoPath) for file in directory.keys(): if re.match(fileRegex, file): fileMatches[file] = directory[file] except Exception as e: logging.error('Error attempting to poll directory {}'.format(autoPath)) raise e print('FileMatches: {}'.format(fileMatches)) for file in fileMatches.keys(): sourceHook.conn.rename(os.path.join(autoPath, file), os.path.join(processPath, file)) dag_params = {} dag_params['fileName'] = file dag_params['processPath'] = processPath triggerConfig = { 'fileName': file, 'processPath': processPath, } trigger_dag(dag_id='ProcessStoreFeed', run_id='trig_{}'.format(timezone.utcnow().isoformat()), conf=json.dumps(triggerConfig), execution_date=None, replace_microseconds=False) logging.info('Triggered DAG Job for File: {}'.format(file)) # Introduce a delay between scheduling dags so there is an order to execution. # I'm worried that if we submit sub-second for multiples that it'll try to run them all at once. time.sleep(10) if sourceHook.conn: sourceHook.close_conn()
def sftp_to_pg(**kwargs): today = datetime.date.today().strftime('%y%m%d') conn = SFTPHook('sftp_cityftp') files = conn.describe_directory('/Home/IET/PNC') file_name = [ fn for fn in files.keys() if fn.startswith(f"tls.cityofdetroit.out.{today}") ][0] conn.retrieve_file(f"/Home/IET/PNC/{file_name}", f"/tmp/{file_name}") pg_conn = PostgresHook('etl_postgres') pg_conn.run("truncate table escrow.escrow") pg_conn.run( f"copy escrow.escrow from '/tmp/{file_name}' (FORMAT CSV, HEADER FALSE) " )
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
def lookAtDir(**context): sourceHook = SFTPHook(ftp_conn_id=SOURCE_SFTP_CONN_ID) resultDir = sourceHook.describe_directory(SOURCE_FILEPATH) logging.info('Directory Contents: {}'.format(resultDir))
class TestSFTPHook(unittest.TestCase): @provide_session def update_connection(self, login, session=None): connection = (session.query(Connection).filter( Connection.conn_id == "sftp_default").first()) old_login = connection.login connection.login = login session.commit() return old_login def setUp(self): self.old_login = self.update_connection(SFTP_CONNECTION_USER) self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file: file.write('Test file') with open( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR, TMP_FILE_FOR_TESTS), 'a') as file: file.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR]) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_create_and_delete_directories(self): base_dir = "base_dir" sub_dir = "sub_dir" new_dir_path = os.path.join(base_dir, sub_dir) self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(base_dir in output) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir)) self.assertTrue(sub_dir in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path)) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_path not in output) self.assertTrue(base_dir not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR, TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR]) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @parameterized.expand([ (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS), True), (os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), True), (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS + "abc"), False), (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, "abc"), False), ]) def test_path_exists(self, path, exists): result = self.hook.path_exists(path) self.assertEqual(result, exists) @parameterized.expand([ ("test/path/file.bin", None, None, True), ("test/path/file.bin", "test", None, True), ("test/path/file.bin", "test/", None, True), ("test/path/file.bin", None, "bin", True), ("test/path/file.bin", "test", "bin", True), ("test/path/file.bin", "test/", "file.bin", True), ("test/path/file.bin", None, "file.bin", True), ("test/path/file.bin", "diff", None, False), ("test/path/file.bin", "test//", None, False), ("test/path/file.bin", None, ".txt", False), ("test/path/file.bin", "diff", ".txt", False), ]) def test_path_match(self, path, prefix, delimiter, match): result = self.hook._is_path_match(path=path, prefix=prefix, delimiter=delimiter) self.assertEqual(result, match) def test_get_tree_map(self): tree_map = self.hook.get_tree_map( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) files, dirs, unknowns = tree_map self.assertEqual(files, [ os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR, TMP_FILE_FOR_TESTS) ]) self.assertEqual(dirs, [os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)]) self.assertEqual(unknowns, []) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) self.update_connection(self.old_login)
def pollForFiles(**kwargs): # Create some local scope variables for use later in proc sftpName = kwargs['SFTP_Name'] sftpConnName = kwargs['SFTP_Connection_Name'] feedGroups = kwargs['Feed_Groups'] # Connect to SFTP site using provided credentials - should be saved in Connections sourceHook = SFTPHook(ftp_conn_id = sftpConnName) # Create empty dictionary for storing files that match file masks fileMatches = {} # Loop through feed locations and their regex for this SFTP site. for i in feedGroups: fullPath = i['Feed_Group_Location'] filePattern = i['Feed_Group_Regex'] feedGroupName = i['Feed_Group_Name'] try: directory = sourceHook.describe_directory(path = fullPath) for file in directory.keys(): if re.match(filePattern, file): fileMatches[os.path.join(fullPath, file)] = directory[file] except Exception as e: logging.error('Error attempting to poll feed group {} in directory {}'.format(feedGroupName, fullPath)) raise e # If we do not find a file that matches a file mask in any of the directories, exit. if not fileMatches: return 0 # If no trigger files or renaming is utilized by the client when placing files on SFTP, we # have to resort to polling for files, waiting for a time period and then comparing the size/modified time # to see if they are ready to pull down. time.sleep(SLEEP_TIME) for j in feedGroups: fullPath = j['Feed_Group_Location'] filePattern = j['Feed_Group_Regex'] feedGroupName = j['Feed_Group_Name'] newFileMatches = {} try: newDirResults = sourceHook.describe_directory(fullPath) # Add only the files that match regular expression for this feed group for file in newDirResults.keys(): if re.match(filePattern, file): newFileMatches[os.path.join(fullPath, file)] = newDirResults[file] for file in newFileMatches.keys(): # fullFilePath = os.path.join(fullPath, file) if file in fileMatches.keys(): if newFileMatches[file]['size'] == fileMatches[file]['size'] and \ newFileMatches[file]['modify'] == fileMatches[file]['modify']: readyFile = file + '.ready' # If file hasn't changed size or modified time since first look, set to ready for another process to pick up and transfer. sourceHook.conn.rename(file, readyFile) logging.info('SFTP: {} FeedGroup: {} File: {} is ready.'.format(sftpName, feedGroupName, os.path.basename(file))) triggerConfig = { 'SFTP_Name': sftpName, 'SFTP_Connection_Name': sftpConnName, 'File_Name': readyFile, } triggerConfig.update(j) trigger_dag( dag_id = 'SingleFileTransferJob', run_id = 'trig_{}'.format(timezone.utcnow().isoformat()), conf = json.dumps(triggerConfig), execution_date = None, replace_microseconds = False ) except Exception as e: logging.error('Error attempting to rename files in feed group {} in directory {}'.format(feedGroupName, fullPath)) raise e
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name) ) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.get_mod_time(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name) ) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.get_mod_time(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection( login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection( login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
def get_file(**kwargs): conn = SFTPHook('sftp_novatus') d = conn.describe_directory('/outgoing/') conn.retrieve_file(path, f"/tmp/{file_name}")