def execute(self, context): sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) s3_hook = S3Hook(s3_conn_id=self.s3_conn_id) sftp_hook.get_conn() file_list = sftp_hook.list_directory(self.ftp_folder) if (self.filter): filter(self.filter, file_list) # create tmp directory if not os.path.exists(self.tmp_directory): os.makedirs(self.tmp_directory) for file_name in file_list: s3_key_file = self.s3_key + "/" + str(file_name) exists = s3_hook.check_for_key(s3_key_file, self.s3_bucket) if (exists) and (not self.replace): continue ftp_file_fullpath = self.ftp_folder + "/" + str(file_name) local_file_fullpath = self.tmp_directory + "/" + str(file_name) logging.info("Dowloading file [" + str(ftp_file_fullpath) + "] from sftp to local [" + str(local_file_fullpath) + "]") sftp_hook.get_file(ftp_file_fullpath, local_file_fullpath) logging.info("Done.") logging.info("Uploading file [" + str(local_file_fullpath) + "] to S3 on bucket [" + str(self.s3_bucket) + "] and key [" + str(s3_key_file)+"]") s3_hook.load_file(local_file_fullpath, s3_key_file, self.s3_bucket, self.replace) logging.info("Done.")
def execute(self, context): conn_source = SFTPHook(ftp_conn_id=self.conn_id_source) my_conn_source = conn_source.get_conn() conn_destination = SFTPHook(ftp_conn_id=self.conn_id_destination) my_conn_destination = conn_destination.get_conn() source_file = my_conn_source.sftp_client.file(self.file_source_path, 'r') source_file.seek(self.chunk_number * self.chunk_size) payload = source_file.read(self.chunk_size) destination_file = my_conn_destination.sftp_client.file( self.file_destination_path, 'r+') destination_file.seek(self.chunk_number * self.chunk_size) destination_file.write(payload)
def execute(self, context): self.log.info("Going to start Bulk Rename sftp operator") sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) sftp_hook.no_host_key_check = True if self.source_files: if type(self.source_files) is str: source_files_list = ast.literal_eval(self.source_files) if self.source_path: source_files_list = sftp_hook.list_directory(self.source_path) source_files_list = [ os.path.join(self.source_path, x) for x in source_files_list ] file_path_list = [] if self.file_limit: source_files_list = source_files_list[: self.file_limit] for key in source_files_list: file_path = key.split("/")[-1] file_path = os.path.join(self.dest_path, file_path) self.log.info(f"Renaming {key} to {file_path}") conn = sftp_hook.get_conn() for i in range(0, 5): try: try: conn.remove(file_path) print("Deleted duplicated file") except IOError: pass conn.rename(key, file_path) file_path_list.append(file_path) break except IOError: self.log.info("File not found, skipping") break except Exception: self.log.info( f"Got no response from server, waiting for next try number {(i + 1)}" ) if i < 4: time.sleep(2 ** i + random.random()) sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) sftp_hook.no_host_key_check = True conn = sftp_hook.get_conn() else: raise self.log.info("Finished executing Bulk Rename sftp operator") return file_path_list
def execute(self, context): sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) s3_hook = S3Hook(self.s3_conn_id) s3_client = s3_hook.get_conn() sftp_client = sftp_hook.get_conn() s3_keys = s3_hook.list_keys(self.s3_bucket, prefix=self.s3_prefix) s3_keys_filtered_by_extensions = [ s3_key for s3_key in s3_keys if s3_key.lower().endswith(self.file_extensions) ] part_count = 0 for s3_key in s3_keys_filtered_by_extensions: with NamedTemporaryFile("w") as f: s3_client.download_file(self.s3_bucket, s3_key, f.name) _, file_extension = os.path.splitext(s3_key) remote_filename = f'{self.sftp_filename_prefix}-part-{part_count}{file_extension}' remote_path = os.path.join(self.sftp_path, remote_filename) sftp_client.put(f.name, remote_path) part_count += 1
def moveFromSourceToLocal(**kwargs): """ Use information from the dag_run passed in by the filefinder DAG to start pulling down a ready file. """ # Variablelize (my word) the dag_run config needed for this step. # This might be a good candidate for externalizing sftpConn = kwargs['dag_run'].conf['SFTP_Connection_Name'] sourceFullPath = kwargs['dag_run'].conf['File_Name'] # Strip the ".ready" from the filename as we get the basename of the file fileName = os.path.basename(kwargs['dag_run'].conf['File_Name']).replace( '.ready', '') destFullPath = os.path.join(LOCAL_LANDING_PATH, fileName) sftpHook = SFTPHook(ftp_conn_id=sftpConn) conn = sftpHook.get_conn() initialMD5sum = getMD5sumRemote(conn, sourceFullPath) logging.info('Initial MD5Sum: {}'.format(initialMD5sum)) sftpHook.retrieve_file(sourceFullPath, destFullPath) currentMD5sum = getMD5sumLocal(destFullPath) logging.info('currentMD5Sum: {}'.format(currentMD5sum)) if initialMD5sum != currentMD5sum: logging.error( 'MD5Sum mismatch. Initial: {} Post-Transfer: {}'.format( initialMD5sum, currentMD5sum)) raise Exception( 'MD5Sum values before and after transfer do not match. Possible transfer issue. Initial: {} Post-Transfer: {}' .format(initialMD5sum, currentMD5sum))
def archive_files_in_sftp(**context): sftp_conn = SFTPHook(ftp_conn_id=ALMA_SFTP_CONNECTION_ID) # Paramiko is the underlying package used for SSH/SFTP conns # the paramiko client exposes a lot more core SFTP functionality paramiko_conn = sftp_conn.get_conn() most_recent_date = context['task_instance'].xcom_pull( task_ids='get_list_of_alma_sftp_files_to_transer', key='most_recent_date') list_of_files = context['task_instance'].xcom_pull( task_ids='get_list_of_alma_sftp_files_to_transer') archive_path = "archive" if archive_path not in sftp_conn.list_directory("./"): sftp_conn.create_directory(path=f"./{archive_path}") elif str(most_recent_date) not in sftp_conn.list_directory( f"./{archive_path}"): sftp_conn.create_directory(f"./{archive_path}/{most_recent_date}") count = 0 for filename in list_of_files: logging.info( f"Moving {filename} to {archive_path}/{most_recent_date}/{filename}" ) paramiko_conn.rename(f"{filename}", f"{archive_path}/{most_recent_date}/{filename}") count += 1 return count
def createTestFile(**kwargs): """ Create a test file on one of the SFTP sites to initiate the transfer process """ SFTP_Name = dag_config['SFTP_Polling_Sites'][0]['SFTP_Name'] SFTP_Connection_Name = dag_config['SFTP_Polling_Sites'][0]['SFTP_Connection_Name'] SFTP_Destination_Path = dag_config['SFTP_Polling_Sites'][0]['Feed_Groups'][0]['Feed_Group_Location'] fileName = os.path.join(SFTP_Destination_Path, 'testfile_{}.txt'.format(randint(0, 9999999))) createFileCommand = "echo 'Hello World!' > {}".format(fileName) gpgCommand = "gpg --output {}.gpg -e -r [email protected] {}".format(fileName, fileName) sftpHook = SFTPHook(ftp_conn_id = SFTP_Connection_Name) print('SFTP_Name: {}'.format(SFTP_Name)) print('SFTP_Connection: {}'.format(SFTP_Connection_Name)) print('SFTP_Destination_Path: {}'.format(SFTP_Destination_Path)) print('Random Filename: {}'.format(fileName)) print('GPG Command: {}'.format(gpgCommand)) conn = sftpHook.get_conn() tempResults = conn.execute(createFileCommand) decodedString = [x.decode('utf-8') for x in tempResults] print('Create File Results: {}'.format(decodedString)) tempResults = conn.execute(gpgCommand) decodedString = [x.decode('utf-8') for x in tempResults] print('GPG Results: {}'.format(decodedString))
def execute(self, context): conn = SFTPHook(ftp_conn_id=self.conn_id) my_conn = conn.get_conn() total_size = my_conn.lstat(self.file_path).st_size master_variable_dict = Variable.get(self.master_variable) master_variable_dict[self.chunks_variable_name] = math.ceil(total_size / self.chunk_size) Variable.set(self.master_variable, master_variable_dict) time.sleep(5)
def execute(self, context): self.log.info("Going to start Rename SFTP Operator") sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id) sftp_hook.no_host_key_check = True conn = sftp_hook.get_conn() try: conn.rename(self.source_file, self.dest_file) except IOError: self.log.info("File not found, skipping") self.log.info("Finished executing RenameSFTPOperator")
def check_for_file_py(**kwargs): path = kwargs.get('path', None) logging.info('path type: {} || path value: {}'.format(type(path), path)) sftp_conn_id = kwargs.get('sftp_conn_id', None) filename = kwargs.get('templates_dict').get('filename', None) sftp_hook = SFTPHook(ftp_conn_id=sftp_conn_id) logging.info('sftp_hook type: {} || sftp_hook value: {}'.format( type(sftp_hook), sftp_hook)) sftp_client = sftp_hook.get_conn() fileList = sftp_hook.list_directory(FILEPATH) logging.info('FileList: {}'.format(fileList)) if FILENAME in fileList: return True else: return False
def execute(self, context): conn_source = SFTPHook(ftp_conn_id=self.conn_id_source) my_conn_source = conn_source.get_conn() source_file = my_conn_source.sftp_client.file(self.file_source_path, 'r') source_file.seek(self.chunk_number * self.chunk_size) payload = source_file.read(self.chunk_size) client = boto3.client('s3') # aws_access_key_id=self.ACCESS_KEY, # aws_secret_access_key=self.SECRET_KEY, # aws_session_token=self.SESSION_TOKEN) client.upload_part(Body=payload, Bucket=self.bucket, Key=self.key, PartNumber=self.chunk_number, UploadId=self.upload_id)
def moveFileFromLocalToDest(**kwargs): """ Take decrypted file and move to destination location """ """ Use information from the dag_run passed in by the filefinder DAG to start pulling down a ready file. """ # Variablelize (my word) the dag_run config needed for this step. # This might be a good candidate for externalizing sftpConn = kwargs['dag_run'].conf['Feed_Dest_Connection_Name'] fileName = os.path.basename(kwargs['dag_run'].conf['File_Name']).replace( '.gpg.ready', '') destPath = kwargs['dag_run'].conf['Feed_Dest_Location'] sourceFullPath = os.path.join(LOCAL_LANDING_PATH, fileName) destFullPath = os.path.join(destPath, fileName) logging.info( 'Attempting to transfer {} on airflow host to {}@{} site'.format( sourceFullPath, destFullPath, sftpConn)) sftpHook = SFTPHook(ftp_conn_id=sftpConn) conn = sftpHook.get_conn() initialMD5sum = getMD5sumLocal(sourceFullPath) logging.info('Local MD5Sum: {}'.format(initialMD5sum)) sftpHook.store_file(destFullPath, sourceFullPath) currentMD5sum = getMD5sumRemote(conn, destFullPath) logging.info('Remote MD5Sum: {}'.format(currentMD5sum)) if initialMD5sum != currentMD5sum: logging.error( 'MD5Sum mismatch. Initial: {} Post-Transfer: {}'.format( initialMD5sum, currentMD5sum)) raise Exception( 'MD5Sum values before and after transfer do not match. Possible transfer issue. Initial: {} Post-Transfer: {}' .format(initialMD5sum, currentMD5sum)) logging.info('Trasfer Succeeded.')
def execute(self, context): conn_destination = SFTPHook(ftp_conn_id=self.conn_id_destination) my_conn_destination = conn_destination.get_conn() start_byte = self.chunk_number * self.chunk_size stop_byte = (self.chunk_number + 1) * self.chunk_size - 1 client = boto3.client('s3', aws_access_key_id=self.ACCESS_KEY, aws_secret_access_key=self.SECRET_KEY, aws_session_token=self.SESSION_TOKEN) chunk = client.get_object(Bucket=self.bucket, Key=self.key, Range='bytes={}-{}'.format( start_byte, stop_byte)) payload = chunk['Body'].read() destination_file = my_conn_destination.sftp_client.file( self.file_destination_path, 'r+') destination_file.seek(self.chunk_number * self.chunk_size) destination_file.write(payload)
def execute(self, context): conn = SFTPHook(ftp_conn_id=self.conn_id) my_conn = conn.get_conn() my_conn.sftp_client.file(self.file_path, 'a+')
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class TestSFTPHook(unittest.TestCase): @provide_session def update_connection(self, login, session=None): connection = (session.query(Connection).filter( Connection.conn_id == "sftp_default").first()) old_login = connection.login connection.login = login session.commit() return old_login def setUp(self): self.old_login = self.update_connection(SFTP_CONNECTION_USER) self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file: file.write('Test file') with open( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR, TMP_FILE_FOR_TESTS), 'a') as file: file.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR]) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_create_and_delete_directories(self): base_dir = "base_dir" sub_dir = "sub_dir" new_dir_path = os.path.join(base_dir, sub_dir) self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(base_dir in output) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir)) self.assertTrue(sub_dir in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path)) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_path not in output) self.assertTrue(base_dir not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR, TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [SUB_DIR]) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection(login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection(login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @parameterized.expand([ (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS), True), (os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), True), (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS + "abc"), False), (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, "abc"), False), ]) def test_path_exists(self, path, exists): result = self.hook.path_exists(path) self.assertEqual(result, exists) @parameterized.expand([ ("test/path/file.bin", None, None, True), ("test/path/file.bin", "test", None, True), ("test/path/file.bin", "test/", None, True), ("test/path/file.bin", None, "bin", True), ("test/path/file.bin", "test", "bin", True), ("test/path/file.bin", "test/", "file.bin", True), ("test/path/file.bin", None, "file.bin", True), ("test/path/file.bin", "diff", None, False), ("test/path/file.bin", "test//", None, False), ("test/path/file.bin", None, ".txt", False), ("test/path/file.bin", "diff", ".txt", False), ]) def test_path_match(self, path, prefix, delimiter, match): result = self.hook._is_path_match(path=path, prefix=prefix, delimiter=delimiter) self.assertEqual(result, match) def test_get_tree_map(self): tree_map = self.hook.get_tree_map( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) files, dirs, unknowns = tree_map self.assertEqual(files, [ os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR, TMP_FILE_FOR_TESTS) ]) self.assertEqual(dirs, [os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)]) self.assertEqual(unknowns, []) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) self.update_connection(self.old_login)
def execute(self, context): conn = SFTPHook(ftp_conn_id=self.conn_id) my_conn = conn.get_conn() files_to_be_removed = my_conn.listdir(self.dir_path) for file_name in files_to_be_removed: my_conn.remove(self.dir_path + file_name)
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name) ) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.get_mod_time(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_default(self, get_connection): connection = Connection(login='******', host='host') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_enabled(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_disabled_for_all_but_true(self, get_connection): connection = Connection( login='******', host='host', extra='{"no_host_key_check": "foo"}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_ignore(self, get_connection): connection = Connection( login='******', host='host', extra='{"ignore_hostkey_verification": true}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, True) @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection') def test_no_host_key_check_no_ignore(self, get_connection): connection = Connection( login='******', host='host', extra='{"ignore_hostkey_verification": false}') get_connection.return_value = connection hook = SFTPHook() self.assertEqual(hook.no_host_key_check, False) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory(os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name) ) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS) ) output = self.hook.get_mod_time(path=os.path.join( TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class SFTPHookTest(unittest.TestCase): def setUp(self): configuration.load_test_config() self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f: f.write('Test file') def test_get_conn(self): output = self.hook.get_conn() self.assertEqual(type(output), pysftp.Connection) def test_close_conn(self): self.hook.conn = self.hook.get_conn() self.assertTrue(self.hook.conn is not None) self.hook.close_conn() self.assertTrue(self.hook.conn is None) def test_describe_directory(self): output = self.hook.describe_directory(TMP_PATH) self.assertTrue(TMP_DIR_FOR_TESTS in output) def test_list_directory(self): output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_create_and_delete_directory(self): new_dir_name = 'new_dir' self.hook.create_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name in output) self.hook.delete_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name)) output = self.hook.describe_directory( os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertTrue(new_dir_name not in output) def test_store_retrieve_and_delete_file(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, [TMP_FILE_FOR_TESTS]) retrieved_file_name = 'retrieved.txt' self.hook.retrieve_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, retrieved_file_name)) self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH)) os.remove(os.path.join(TMP_PATH, retrieved_file_name)) self.hook.delete_file( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) output = self.hook.list_directory( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) self.assertEqual(output, []) def test_get_mod_time(self): self.hook.store_file( remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS), local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) output = self.hook.get_mod_time( path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS)) self.assertEqual(len(output), 14) def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))