def archive_files_in_sftp(**context): sftp_conn = SFTPHook(ftp_conn_id=ALMA_SFTP_CONNECTION_ID) # Paramiko is the underlying package used for SSH/SFTP conns # the paramiko client exposes a lot more core SFTP functionality paramiko_conn = sftp_conn.get_conn() most_recent_date = context['task_instance'].xcom_pull( task_ids='get_list_of_alma_sftp_files_to_transer', key='most_recent_date') list_of_files = context['task_instance'].xcom_pull( task_ids='get_list_of_alma_sftp_files_to_transer') archive_path = "archive" if archive_path not in sftp_conn.list_directory("./"): sftp_conn.create_directory(path=f"./{archive_path}") elif str(most_recent_date) not in sftp_conn.list_directory( f"./{archive_path}"): sftp_conn.create_directory(f"./{archive_path}/{most_recent_date}") count = 0 for filename in list_of_files: logging.info( f"Moving {filename} to {archive_path}/{most_recent_date}/{filename}" ) paramiko_conn.rename(f"{filename}", f"{archive_path}/{most_recent_date}/{filename}") count += 1 return count
def execute(self, context): source_hook = SFTPHook(ftp_conn_id=self.source_conn_id) source_files = source_hook.list_directory(self.source_path) for target in self.target_full_path: target_connection = target[0] target_path = target[1] self.log.info( f"Beginning transfer to SFTP site {target_connection} and directory {target_path}" ) target_hook = SFTPHook(ftp_conn_id=target_connection) target_files = target_hook.list_directory(target_path) for file in source_files: if self.filter_function is None or self.filter_function(file): if self.overwrite_target is True or file not in target_files: source_hook.retrieve_file( op.join(self.source_path, file), op.join(self.work_path, file)) self.log.info( "Downloaded the file %s from the source SFTP", file) try: target_hook.store_file( op.join(target_path, file), op.join(self.work_path, file)) self.log.info( "Uploaded the file %s to the destination SFTP", file) finally: os.remove(os.path.join(self.work_path, file))
def execute(self, context): sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) s3_hook = S3Hook(s3_conn_id=self.s3_conn_id) sftp_hook.get_conn() file_list = sftp_hook.list_directory(self.ftp_folder) if (self.filter): filter(self.filter, file_list) # create tmp directory if not os.path.exists(self.tmp_directory): os.makedirs(self.tmp_directory) for file_name in file_list: s3_key_file = self.s3_key + "/" + str(file_name) exists = s3_hook.check_for_key(s3_key_file, self.s3_bucket) if (exists) and (not self.replace): continue ftp_file_fullpath = self.ftp_folder + "/" + str(file_name) local_file_fullpath = self.tmp_directory + "/" + str(file_name) logging.info("Dowloading file [" + str(ftp_file_fullpath) + "] from sftp to local [" + str(local_file_fullpath) + "]") sftp_hook.get_file(ftp_file_fullpath, local_file_fullpath) logging.info("Done.") logging.info("Uploading file [" + str(local_file_fullpath) + "] to S3 on bucket [" + str(self.s3_bucket) + "] and key [" + str(s3_key_file)+"]") s3_hook.load_file(local_file_fullpath, s3_key_file, self.s3_bucket, self.replace) logging.info("Done.")
class SFTPSensor(BaseSensorOperator): @apply_defaults def __init__(self, filepath, filepattern, sftp_conn_id='sftp_default', *args, **kwargs): super(SFTPSensor, self).__init__(*args, **kwargs) self.filepath = filepath self.filepattern = filepattern self.hook = SFTPHook(sftp_conn_id) def poke(self, context): full_path = self.filepath file_pattern = re.compile(self.filepattern) fileList = [] try: isFound = False directory = self.hook.list_directory(full_path) for files in directory: if not re.match(file_pattern, files): self.log.info(files) self.log.info(file_pattern) else: fileList.append(files) print('I found the file! {}'.format(files)) isFound = True context["task_instance"].xcom_push("file_name", fileList) return isFound except IOError as e: if e.errno != SFTP_NO_SUCH_FILE: raise e return False
class SFTPSensor(BaseSensorOperator): """ #Airflow sftp sensor monitors a particular location for a particular file pattern """ @apply_defaults def __init__(self, filepath, filepattern, sftp_conn_id='sftp_default', *args, **kwargs): super(SFTPSensor, self).__init__(*args, **kwargs) self.filepath = filepath self.filepattern = filepattern self.hook = SFTPHook(sftp_conn_id) def poke(self, context): full_path = self.filepath dict_files = {} oldest_file = "" files = self.hook.list_directory(full_path) pattern = self.filepattern for file in files: if not fnmatch.fnmatch(file, pattern): self.log.info(file) self.log.info(pattern) else: self.log.info("File found {}".format(file)) dict_files[int(self.hook.get_mod_time(full_path + "/" + file))] = file print("files found with modified time : {0}".format(dict_files)) length_dict = len(dict_files) if length_dict > 0: dict_of_files_sorted = sorted(list(dict_files.keys())) oldest_file = dict_files[dict_of_files_sorted[0]] context["task_instance"].xcom_push("file_name", oldest_file) self.log.info("xcom_pushed : {}".format(oldest_file)) return True
def execute(self, context): sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) s3_hook = S3Hook(self.s3_conn_id) sftp_files = sftp_hook.list_directory(self.sftp_path) filtered_files_by_extensions = [ key for key in sftp_files if key.lower().endswith(self.file_extensions) ] for sftp_file in filtered_files_by_extensions: with NamedTemporaryFile("w") as f: sftp_hook.retrieve_file(f'{self.sftp_path}/{sftp_file}', f.name) s3_key = self.get_s3_key(f'{self.s3_prefix}/{sftp_file}') s3_hook.load_file(filename=f.name, key=s3_key, bucket_name=self.s3_bucket, replace=True) # Add the empty _SUCCESS file to indicate the task is done successfully s3_key = self.get_s3_key(f'{self.s3_prefix}/_SUCCESS') s3_hook.load_string('', key=s3_key, bucket_name=self.s3_bucket, replace=True)
def execute(self, context): self.log.info("Going to start Bulk sftp to s3 operator") sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) sftp_hook.no_host_key_check = True list_dir = sftp_hook.list_directory(self.sftp_path) if len(list_dir) < 1: self.log.info("Got no files to process. Skipping") return False self.log.info(f"Got {len(list_dir)} files to move") temp_files = [] file_path_list = [] ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id) sftp_client = ssh_hook.get_conn().open_sftp() s3_hook = S3Hook(self.aws_conn_id) for file_name in list_dir: file_path = os.path.join(self.sftp_path, file_name) file_path_list.append(file_path) s3_key = str(os.path.join(self.dest_path, file_name)) file_metadata = {"ftp": NamedTemporaryFile("w"), "s3_key": s3_key} for i in range(0, 5): try: self.log.info(f"Downloading {file_path}") sftp_client.get(file_path, file_metadata["ftp"].name) file_metadata["ftp"].flush() temp_files.append(file_metadata) break except Exception: self.log.info( f"Got no response from server, waiting for next try number {(i + 1)}" ) if i < 4: time.sleep(2 ** i + random.random()) sftp_client = ( SSHHook(ssh_conn_id=self.sftp_conn_id) .get_conn() .open_sftp() ) else: raise self.log.info(f"Uploading to S3 with {self.workers} workers") with Pool(self.workers) as pool: pool.starmap( s3_hook.load_file, [ (x["ftp"].name, x["s3_key"], self.dest_bucket, True, False) for x in temp_files ], ) self.log.info("Finished executing Bulk sftp to s3 operator") return file_path_list
def poke(self, context): self.log.info(f'Checking for new files between {self.source_conn_id}{self.source_path} and {self.target_full_path}') source_hook = SFTPHook(ftp_conn_id=self.source_conn_id) source_files = source_hook.list_directory(self.source_path) new_files = 0 for target in self.target_full_path: target_connection = target[0] target_path = target[1] target_hook = SFTPHook(ftp_conn_id=target_connection) target_files = target_hook.list_directory(target_path) for file in source_files: if self.filter_function is None or self.filter_function(file): if file not in target_files: new_files += 1 if new_files == 0: self.log.info(f"No new files detected. Waiting {self.poke_interval} seconds and checking again.") return False else: return True
def execute(self, context): self.log.info("Going to start Bulk Rename sftp operator") sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) sftp_hook.no_host_key_check = True if self.source_files: if type(self.source_files) is str: source_files_list = ast.literal_eval(self.source_files) if self.source_path: source_files_list = sftp_hook.list_directory(self.source_path) source_files_list = [ os.path.join(self.source_path, x) for x in source_files_list ] file_path_list = [] if self.file_limit: source_files_list = source_files_list[: self.file_limit] for key in source_files_list: file_path = key.split("/")[-1] file_path = os.path.join(self.dest_path, file_path) self.log.info(f"Renaming {key} to {file_path}") conn = sftp_hook.get_conn() for i in range(0, 5): try: try: conn.remove(file_path) print("Deleted duplicated file") except IOError: pass conn.rename(key, file_path) file_path_list.append(file_path) break except IOError: self.log.info("File not found, skipping") break except Exception: self.log.info( f"Got no response from server, waiting for next try number {(i + 1)}" ) if i < 4: time.sleep(2 ** i + random.random()) sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) sftp_hook.no_host_key_check = True conn = sftp_hook.get_conn() else: raise self.log.info("Finished executing Bulk Rename sftp operator") return file_path_list
def check_for_file_py(**kwargs): path = kwargs.get('path', None) logging.info('path type: {} || path value: {}'.format(type(path), path)) sftp_conn_id = kwargs.get('sftp_conn_id', None) filename = kwargs.get('templates_dict').get('filename', None) sftp_hook = SFTPHook(ftp_conn_id=sftp_conn_id) logging.info('sftp_hook type: {} || sftp_hook value: {}'.format( type(sftp_hook), sftp_hook)) sftp_client = sftp_hook.get_conn() fileList = sftp_hook.list_directory(FILEPATH) logging.info('FileList: {}'.format(fileList)) if FILENAME in fileList: return True else: return False
def execute(self, context): self.log.info("Going to start bulk delete file sftp operator") sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) sftp_hook.no_host_key_check = True source_files_list = sftp_hook.list_directory(self.source_path) source_files_list = [ os.path.join(self.source_path, x) for x in source_files_list ] self.log.info( f"Going to delete {len(source_files_list)} with {self.workers} workers" ) with Pool(self.workers) as pool: pool.map(self.delete_file, source_files_list) self.log.info("Finished executing bulk delete file sftp operator") return True
def calculate_list_of_files_to_move(**context): sftp_conn = SFTPHook(ftp_conn_id=ALMA_SFTP_CONNECTION_ID) files_list = sftp_conn.list_directory("./") # Ignore an file that does not start with this prefix just_alma_bibs_files = [ f for f in files_list if f.startswith("alma_bibs__20") ] if just_alma_bibs_files: most_recent_date = determine_most_recent_date(just_alma_bibs_files) context['task_instance'].xcom_push(key="most_recent_date", value=most_recent_date) return [ f for f in files_list if f.startswith(f"alma_bibs__{most_recent_date}") ] else: raise ValueError( 'No matching files were found on the alma sftp server')
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class TestSFTPHook(unittest.TestCase): @provide_session def update_connection(self, login, session=None): connection = (session.query(Connection).filter( Connection.conn_id == "sftp_default").first()) old_login = connection.login connection.login = login session.commit() return old_login def setUp(self): self.old_login = self.update_connection(SFTP_CONNECTION_USER) self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file: file.write('Test file') with open( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR, TMP_FILE_FOR_TESTS), 'a') as file: file.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR]) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_create_and_delete_directories(self): base_dir = "base_dir" sub_dir = "sub_dir" new_dir_path = os.path.join(base_dir, sub_dir) self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(base_dir in output) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir)) self.assertTrue(sub_dir in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path)) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_path not in output) self.assertTrue(base_dir not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR, TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR]) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @parameterized.expand([ (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS), True), (os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), True), (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS + "abc"), False), (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, "abc"), False), ]) def test_path_exists(self, path, exists): result = self.hook.path_exists(path) self.assertEqual(result, exists) @parameterized.expand([ ("test/path/file.bin", None, None, True), ("test/path/file.bin", "test", None, True), ("test/path/file.bin", "test/", None, True), ("test/path/file.bin", None, "bin", True), ("test/path/file.bin", "test", "bin", True), ("test/path/file.bin", "test/", "file.bin", True), ("test/path/file.bin", None, "file.bin", True), ("test/path/file.bin", "diff", None, False), ("test/path/file.bin", "test//", None, False), ("test/path/file.bin", None, ".txt", False), ("test/path/file.bin", "diff", ".txt", False), ]) def test_path_match(self, path, prefix, delimiter, match): result = self.hook._is_path_match(path=path, prefix=prefix, delimiter=delimiter) self.assertEqual(result, match) def test_get_tree_map(self): tree_map = self.hook.get_tree_map( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) files, dirs, unknowns = tree_map self.assertEqual(files, [ os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR, TMP_FILE_FOR_TESTS) ]) self.assertEqual(dirs, [os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)]) self.assertEqual(unknowns, []) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) self.update_connection(self.old_login)
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name) ) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.get_mod_time(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection( login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection( login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name) ) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.get_mod_time(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))