コード例 #1
0
ファイル: sftp_to_s3.py プロジェクト: zx-ventures/airflow
    def execute(self, context):
        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
        s3_hook = S3Hook(s3_conn_id=self.s3_conn_id)
        sftp_hook.get_conn()
        file_list = sftp_hook.list_directory(self.ftp_folder)
        if (self.filter):
            filter(self.filter, file_list)

        # create tmp directory
        if not os.path.exists(self.tmp_directory):
            os.makedirs(self.tmp_directory)

        for file_name in file_list:
            s3_key_file = self.s3_key + "/" + str(file_name)
            exists = s3_hook.check_for_key(s3_key_file, self.s3_bucket)

            if (exists) and (not self.replace):
                continue

            ftp_file_fullpath = self.ftp_folder + "/" + str(file_name)
            local_file_fullpath = self.tmp_directory + "/" + str(file_name)

            logging.info("Dowloading file [" + str(ftp_file_fullpath) +
                         "] from sftp to local [" + str(local_file_fullpath) +
                         "]")
            sftp_hook.get_file(ftp_file_fullpath, local_file_fullpath)
            logging.info("Done.")
            logging.info("Uploading file [" + str(local_file_fullpath) +
                         "] to S3 on bucket [" + str(self.s3_bucket) +
                         "] and key [" + str(s3_key_file)+"]")
            s3_hook.load_file(local_file_fullpath, s3_key_file,
                              self.s3_bucket, self.replace)
            logging.info("Done.")
コード例 #2
0
 def execute(self, context):
     conn_source = SFTPHook(ftp_conn_id=self.conn_id_source)
     my_conn_source = conn_source.get_conn()
     conn_destination = SFTPHook(ftp_conn_id=self.conn_id_destination)
     my_conn_destination = conn_destination.get_conn()
     source_file = my_conn_source.sftp_client.file(self.file_source_path,
                                                   'r')
     source_file.seek(self.chunk_number * self.chunk_size)
     payload = source_file.read(self.chunk_size)
     destination_file = my_conn_destination.sftp_client.file(
         self.file_destination_path, 'r+')
     destination_file.seek(self.chunk_number * self.chunk_size)
     destination_file.write(payload)
コード例 #3
0
    def execute(self, context):
        self.log.info("Going to start Bulk Rename sftp operator")
        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
        sftp_hook.no_host_key_check = True
        if self.source_files:
            if type(self.source_files) is str:
                source_files_list = ast.literal_eval(self.source_files)

        if self.source_path:
            source_files_list = sftp_hook.list_directory(self.source_path)
            source_files_list = [
                os.path.join(self.source_path, x) for x in source_files_list
            ]

        file_path_list = []
        if self.file_limit:
            source_files_list = source_files_list[: self.file_limit]
        for key in source_files_list:
            file_path = key.split("/")[-1]
            file_path = os.path.join(self.dest_path, file_path)
            self.log.info(f"Renaming {key} to {file_path}")

            conn = sftp_hook.get_conn()
            for i in range(0, 5):
                try:
                    try:
                        conn.remove(file_path)
                        print("Deleted duplicated file")
                    except IOError:
                        pass

                    conn.rename(key, file_path)
                    file_path_list.append(file_path)
                    break
                except IOError:
                    self.log.info("File not found, skipping")
                    break
                except Exception:
                    self.log.info(
                        f"Got no response from server, waiting for next try number {(i + 1)}"
                    )
                    if i < 4:
                        time.sleep(2 ** i + random.random())
                        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
                        sftp_hook.no_host_key_check = True
                        conn = sftp_hook.get_conn()
                    else:
                        raise

        self.log.info("Finished executing Bulk Rename sftp operator")
        return file_path_list
コード例 #4
0
    def execute(self, context):
        sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
        s3_hook = S3Hook(self.s3_conn_id)

        s3_client = s3_hook.get_conn()
        sftp_client = sftp_hook.get_conn()

        s3_keys = s3_hook.list_keys(self.s3_bucket, prefix=self.s3_prefix)

        s3_keys_filtered_by_extensions = [
            s3_key for s3_key in s3_keys
            if s3_key.lower().endswith(self.file_extensions)
        ]

        part_count = 0

        for s3_key in s3_keys_filtered_by_extensions:
            with NamedTemporaryFile("w") as f:
                s3_client.download_file(self.s3_bucket, s3_key, f.name)

                _, file_extension = os.path.splitext(s3_key)
                remote_filename = f'{self.sftp_filename_prefix}-part-{part_count}{file_extension}'
                remote_path = os.path.join(self.sftp_path, remote_filename)

                sftp_client.put(f.name, remote_path)

                part_count += 1
コード例 #5
0
def moveFromSourceToLocal(**kwargs):
    """
    Use information from the dag_run passed in by the filefinder DAG to start pulling down a ready file.
    """
    # Variablelize (my word) the dag_run config needed for this step.
    # This might be a good candidate for externalizing
    sftpConn = kwargs['dag_run'].conf['SFTP_Connection_Name']
    sourceFullPath = kwargs['dag_run'].conf['File_Name']

    # Strip the ".ready" from the filename as we get the basename of the file
    fileName = os.path.basename(kwargs['dag_run'].conf['File_Name']).replace(
        '.ready', '')
    destFullPath = os.path.join(LOCAL_LANDING_PATH, fileName)

    sftpHook = SFTPHook(ftp_conn_id=sftpConn)

    conn = sftpHook.get_conn()

    initialMD5sum = getMD5sumRemote(conn, sourceFullPath)
    logging.info('Initial MD5Sum: {}'.format(initialMD5sum))

    sftpHook.retrieve_file(sourceFullPath, destFullPath)

    currentMD5sum = getMD5sumLocal(destFullPath)
    logging.info('currentMD5Sum: {}'.format(currentMD5sum))

    if initialMD5sum != currentMD5sum:
        logging.error(
            'MD5Sum mismatch.  Initial: {}  Post-Transfer: {}'.format(
                initialMD5sum, currentMD5sum))
        raise Exception(
            'MD5Sum values before and after transfer do not match. Possible transfer issue. Initial: {} Post-Transfer: {}'
            .format(initialMD5sum, currentMD5sum))
コード例 #6
0
def archive_files_in_sftp(**context):
    sftp_conn = SFTPHook(ftp_conn_id=ALMA_SFTP_CONNECTION_ID)
    # Paramiko is the underlying package used for SSH/SFTP conns
    # the paramiko client exposes a lot more core SFTP functionality
    paramiko_conn = sftp_conn.get_conn()

    most_recent_date = context['task_instance'].xcom_pull(
        task_ids='get_list_of_alma_sftp_files_to_transer',
        key='most_recent_date')
    list_of_files = context['task_instance'].xcom_pull(
        task_ids='get_list_of_alma_sftp_files_to_transer')
    archive_path = "archive"

    if archive_path not in sftp_conn.list_directory("./"):
        sftp_conn.create_directory(path=f"./{archive_path}")
    elif str(most_recent_date) not in sftp_conn.list_directory(
            f"./{archive_path}"):
        sftp_conn.create_directory(f"./{archive_path}/{most_recent_date}")

    count = 0
    for filename in list_of_files:
        logging.info(
            f"Moving {filename} to {archive_path}/{most_recent_date}/{filename}"
        )

        paramiko_conn.rename(f"{filename}",
                             f"{archive_path}/{most_recent_date}/{filename}")
        count += 1
    return count
コード例 #7
0
def createTestFile(**kwargs):
    """
    Create a test file on one of the SFTP sites to initiate the transfer process
    """
    SFTP_Name = dag_config['SFTP_Polling_Sites'][0]['SFTP_Name']
    SFTP_Connection_Name = dag_config['SFTP_Polling_Sites'][0]['SFTP_Connection_Name']
    SFTP_Destination_Path = dag_config['SFTP_Polling_Sites'][0]['Feed_Groups'][0]['Feed_Group_Location']
    fileName = os.path.join(SFTP_Destination_Path, 'testfile_{}.txt'.format(randint(0, 9999999)))
    createFileCommand = "echo 'Hello World!' > {}".format(fileName)
    gpgCommand = "gpg --output {}.gpg -e -r [email protected] {}".format(fileName, fileName)

    sftpHook = SFTPHook(ftp_conn_id = SFTP_Connection_Name)

    print('SFTP_Name: {}'.format(SFTP_Name))
    print('SFTP_Connection: {}'.format(SFTP_Connection_Name))
    print('SFTP_Destination_Path: {}'.format(SFTP_Destination_Path))
    print('Random Filename: {}'.format(fileName))
    print('GPG Command: {}'.format(gpgCommand))

    conn = sftpHook.get_conn()

    tempResults = conn.execute(createFileCommand)
    decodedString = [x.decode('utf-8') for x in tempResults]
    print('Create File Results: {}'.format(decodedString))

    tempResults = conn.execute(gpgCommand)
    decodedString = [x.decode('utf-8') for x in tempResults]
    print('GPG Results: {}'.format(decodedString))
 def execute(self, context):
     conn = SFTPHook(ftp_conn_id=self.conn_id)
     my_conn = conn.get_conn()
     total_size = my_conn.lstat(self.file_path).st_size
     master_variable_dict = Variable.get(self.master_variable)
     master_variable_dict[self.chunks_variable_name] = math.ceil(total_size / self.chunk_size)
     Variable.set(self.master_variable, master_variable_dict)
     time.sleep(5)
コード例 #9
0
 def execute(self, context):
     self.log.info("Going to start Rename SFTP Operator")
     sftp_hook = SFTPHook(ftp_conn_id=self.sftp_conn_id)
     sftp_hook.no_host_key_check = True
     conn = sftp_hook.get_conn()
     try:
         conn.rename(self.source_file, self.dest_file)
     except IOError:
         self.log.info("File not found, skipping")
     self.log.info("Finished executing RenameSFTPOperator")
コード例 #10
0
def check_for_file_py(**kwargs):
    path = kwargs.get('path', None)
    logging.info('path type: {} || path value: {}'.format(type(path), path))
    sftp_conn_id = kwargs.get('sftp_conn_id', None)
    filename = kwargs.get('templates_dict').get('filename', None)
    sftp_hook = SFTPHook(ftp_conn_id=sftp_conn_id)
    logging.info('sftp_hook type: {} || sftp_hook value: {}'.format(
        type(sftp_hook), sftp_hook))
    sftp_client = sftp_hook.get_conn()
    fileList = sftp_hook.list_directory(FILEPATH)
    logging.info('FileList: {}'.format(fileList))
    if FILENAME in fileList:
        return True
    else:
        return False
 def execute(self, context):
     conn_source = SFTPHook(ftp_conn_id=self.conn_id_source)
     my_conn_source = conn_source.get_conn()
     source_file = my_conn_source.sftp_client.file(self.file_source_path,
                                                   'r')
     source_file.seek(self.chunk_number * self.chunk_size)
     payload = source_file.read(self.chunk_size)
     client = boto3.client('s3')
     # aws_access_key_id=self.ACCESS_KEY,
     # aws_secret_access_key=self.SECRET_KEY,
     # aws_session_token=self.SESSION_TOKEN)
     client.upload_part(Body=payload,
                        Bucket=self.bucket,
                        Key=self.key,
                        PartNumber=self.chunk_number,
                        UploadId=self.upload_id)
コード例 #12
0
def moveFileFromLocalToDest(**kwargs):
    """
    Take decrypted file and move to destination location
    """
    """
    Use information from the dag_run passed in by the filefinder DAG to start pulling down a ready file.
    """
    # Variablelize (my word) the dag_run config needed for this step.
    # This might be a good candidate for externalizing
    sftpConn = kwargs['dag_run'].conf['Feed_Dest_Connection_Name']
    fileName = os.path.basename(kwargs['dag_run'].conf['File_Name']).replace(
        '.gpg.ready', '')
    destPath = kwargs['dag_run'].conf['Feed_Dest_Location']

    sourceFullPath = os.path.join(LOCAL_LANDING_PATH, fileName)
    destFullPath = os.path.join(destPath, fileName)

    logging.info(
        'Attempting to transfer {} on airflow host to {}@{} site'.format(
            sourceFullPath, destFullPath, sftpConn))

    sftpHook = SFTPHook(ftp_conn_id=sftpConn)

    conn = sftpHook.get_conn()

    initialMD5sum = getMD5sumLocal(sourceFullPath)
    logging.info('Local MD5Sum: {}'.format(initialMD5sum))

    sftpHook.store_file(destFullPath, sourceFullPath)

    currentMD5sum = getMD5sumRemote(conn, destFullPath)
    logging.info('Remote MD5Sum: {}'.format(currentMD5sum))

    if initialMD5sum != currentMD5sum:
        logging.error(
            'MD5Sum mismatch.  Initial: {}  Post-Transfer: {}'.format(
                initialMD5sum, currentMD5sum))
        raise Exception(
            'MD5Sum values before and after transfer do not match. Possible transfer issue. Initial: {} Post-Transfer: {}'
            .format(initialMD5sum, currentMD5sum))

    logging.info('Trasfer Succeeded.')
コード例 #13
0
    def execute(self, context):
        conn_destination = SFTPHook(ftp_conn_id=self.conn_id_destination)
        my_conn_destination = conn_destination.get_conn()

        start_byte = self.chunk_number * self.chunk_size
        stop_byte = (self.chunk_number + 1) * self.chunk_size - 1

        client = boto3.client('s3',
                              aws_access_key_id=self.ACCESS_KEY,
                              aws_secret_access_key=self.SECRET_KEY,
                              aws_session_token=self.SESSION_TOKEN)

        chunk = client.get_object(Bucket=self.bucket,
                                  Key=self.key,
                                  Range='bytes={}-{}'.format(
                                      start_byte, stop_byte))
        payload = chunk['Body'].read()

        destination_file = my_conn_destination.sftp_client.file(
            self.file_destination_path, 'r+')
        destination_file.seek(self.chunk_number * self.chunk_size)
        destination_file.write(payload)
 def execute(self, context):
     conn = SFTPHook(ftp_conn_id=self.conn_id)
     my_conn = conn.get_conn()
     my_conn.sftp_client.file(self.file_path, 'a+')
コード例 #15
0
ファイル: test_sftp_hook.py プロジェクト: zxiu2049/airflow
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name))
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.get_mod_time(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_default(self, get_connection):
        connection = Connection(login='******', host='host')
        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_enabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled_for_all_but_true(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": "foo"}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_no_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
コード例 #16
0
class TestSFTPHook(unittest.TestCase):
    @provide_session
    def update_connection(self, login, session=None):
        connection = (session.query(Connection).filter(
            Connection.conn_id == "sftp_default").first())
        old_login = connection.login
        connection.login = login
        session.commit()
        return old_login

    def setUp(self):
        self.old_login = self.update_connection(SFTP_CONNECTION_USER)
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR))

        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file:
            file.write('Test file')
        with open(
                os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR,
                             TMP_FILE_FOR_TESTS), 'a') as file:
            file.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [SUB_DIR])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_create_and_delete_directories(self):
        base_dir = "base_dir"
        sub_dir = "sub_dir"
        new_dir_path = os.path.join(base_dir, sub_dir)
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(base_dir in output)
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir))
        self.assertTrue(sub_dir in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path))
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_path not in output)
        self.assertTrue(base_dir not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [SUB_DIR, TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name))
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [SUB_DIR])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.get_mod_time(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_default(self, get_connection):
        connection = Connection(login='******', host='host')
        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_enabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled_for_all_but_true(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": "foo"}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_no_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @parameterized.expand([
        (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS), True),
        (os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), True),
        (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS + "abc"), False),
        (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, "abc"), False),
    ])
    def test_path_exists(self, path, exists):
        result = self.hook.path_exists(path)
        self.assertEqual(result, exists)

    @parameterized.expand([
        ("test/path/file.bin", None, None, True),
        ("test/path/file.bin", "test", None, True),
        ("test/path/file.bin", "test/", None, True),
        ("test/path/file.bin", None, "bin", True),
        ("test/path/file.bin", "test", "bin", True),
        ("test/path/file.bin", "test/", "file.bin", True),
        ("test/path/file.bin", None, "file.bin", True),
        ("test/path/file.bin", "diff", None, False),
        ("test/path/file.bin", "test//", None, False),
        ("test/path/file.bin", None, ".txt", False),
        ("test/path/file.bin", "diff", ".txt", False),
    ])
    def test_path_match(self, path, prefix, delimiter, match):
        result = self.hook._is_path_match(path=path,
                                          prefix=prefix,
                                          delimiter=delimiter)
        self.assertEqual(result, match)

    def test_get_tree_map(self):
        tree_map = self.hook.get_tree_map(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        files, dirs, unknowns = tree_map

        self.assertEqual(files, [
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR,
                         TMP_FILE_FOR_TESTS)
        ])
        self.assertEqual(dirs,
                         [os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)])
        self.assertEqual(unknowns, [])

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        self.update_connection(self.old_login)
コード例 #17
0
 def execute(self, context):
     conn = SFTPHook(ftp_conn_id=self.conn_id)
     my_conn = conn.get_conn()
     files_to_be_removed = my_conn.listdir(self.dir_path)
     for file_name in files_to_be_removed:
         my_conn.remove(self.dir_path + file_name)
コード例 #18
0
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name)
        )
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.get_mod_time(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_default(self, get_connection):
        connection = Connection(login='******', host='host')
        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_enabled(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"no_host_key_check": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"no_host_key_check": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled_for_all_but_true(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"no_host_key_check": "foo"}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_ignore(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"ignore_hostkey_verification": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_no_ignore(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"ignore_hostkey_verification": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
コード例 #19
0
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name)
        )
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.get_mod_time(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
コード例 #20
0
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name))
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.get_mod_time(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))