예제 #1
0
    def test_pickle_file_transfer_get(self):
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(task_id="test_create_file",
                                       ssh_hook=self.hook,
                                       command="echo '{0}' > {1}".format(
                                           test_remote_file_content,
                                           self.test_remote_filepath),
                                       do_xcom_push=True,
                                       dag=self.dag)
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(task_id="test_sftp",
                                     ssh_hook=self.hook,
                                     local_filepath=self.test_local_filepath,
                                     remote_filepath=self.test_remote_filepath,
                                     operation=SFTPOperation.GET,
                                     dag=self.dag)
        self.assertIsNotNone(get_test_task)
        ti2 = TaskInstance(task=get_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath, 'r') as file:
            content_received = file.read()
        self.assertEqual(content_received.strip(), test_remote_file_content)
예제 #2
0
    def test_pickle_file_transfer_put(self):
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(task_id="test_sftp",
                                     ssh_hook=self.hook,
                                     local_filepath=self.test_local_filepath,
                                     remote_filepath=self.test_remote_filepath,
                                     operation=SFTPOperation.PUT,
                                     create_intermediate_dirs=True,
                                     dag=self.dag)
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(task_id="test_check_file",
                                      ssh_hook=self.hook,
                                      command="cat {0}".format(
                                          self.test_remote_filepath),
                                      do_xcom_push=True,
                                      dag=self.dag)
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids='test_check_file',
                          key='return_value').strip(), test_local_file_content)
예제 #3
0
    def test_file_transfer_no_intermediate_dir_error_get(self):
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(task_id="test_create_file",
                                       ssh_hook=self.hook,
                                       command="echo '{0}' > {1}".format(
                                           test_remote_file_content,
                                           self.test_remote_filepath),
                                       do_xcom_push=True,
                                       dag=self.dag)
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag)
            self.assertIsNotNone(get_test_task)
            ti2 = TaskInstance(task=get_test_task,
                               execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
예제 #4
0
    def test_file_transfer_no_intermediate_dir_error_put(self):
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # Try to put test file to remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with self.assertRaises(Exception) as error:
            put_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath_int_dir,
                operation=SFTPOperation.PUT,
                create_intermediate_dirs=False,
                dag=self.dag)
            self.assertIsNotNone(put_test_task)
            ti2 = TaskInstance(task=put_test_task,
                               execution_date=timezone.utcnow())
            ti2.run()
        self.assertIn('No such file', str(error.exception))
예제 #5
0
    def test_pickle_file_transfer_put(self):
        test_local_file_content = (
            b"This is local file content \n which is multiline "
            b"continuing....with other character\nanother line here \n this is last line"
        )
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="put_test_task",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.PUT,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        assert put_test_task is not None
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
            task_id="check_file_task",
            ssh_hook=self.hook,
            command=f"cat {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert check_file_task is not None
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        assert (ti3.xcom_pull(
            task_ids=check_file_task.task_id,
            key='return_value').strip() == test_local_file_content)
예제 #6
0
    def test_file_transfer_with_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command=
            f"echo '{test_remote_file_content}' > {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert create_file_task is not None
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath_int_dir,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.GET,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        assert get_test_task is not None
        ti2 = TaskInstance(task=get_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath_int_dir) as file:
            content_received = file.read()
        assert content_received.strip() == test_remote_file_content
예제 #7
0
    def test_file_transfer_no_intermediate_dir_error_get(self):
        test_remote_file_content = (
            "This is remote file content \n which is also multiline "
            "another line here \n this is last line. EOF")

        # create a test file remotely
        create_file_task = SSHOperator(
            task_id="test_create_file",
            ssh_hook=self.hook,
            command=
            f"echo '{test_remote_file_content}' > {self.test_remote_filepath}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert create_file_task is not None
        ti1 = TaskInstance(task=create_file_task,
                           execution_date=timezone.utcnow())
        ti1.run()

        # Try to GET test file from remote
        # This should raise an error with "No such file" as the directory
        # does not exist
        with pytest.raises(Exception) as ctx:
            get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath_int_dir,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag,
            )
            assert get_test_task is not None
            ti2 = TaskInstance(task=get_test_task,
                               execution_date=timezone.utcnow())
            ti2.run()
        assert 'No such file' in str(ctx.value)
예제 #8
0
    def test_arg_checking(self):
        # Exception should be raised if neither ssh_hook nor ssh_conn_id is provided
        with self.assertRaisesRegex(
                AirflowException,
                "Cannot operate without ssh_hook or ssh_conn_id."):
            task_0 = SFTPOperator(task_id="test_sftp",
                                  local_filepath=self.test_local_filepath,
                                  remote_filepath=self.test_remote_filepath,
                                  operation=SFTPOperation.PUT,
                                  dag=self.dag)
            task_0.execute(None)

        # if ssh_hook is invalid/not provided, use ssh_conn_id to create SSHHook
        task_1 = SFTPOperator(
            task_id="test_sftp",
            ssh_hook="string_rather_than_SSHHook",  # invalid ssh_hook
            ssh_conn_id=TEST_CONN_ID,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.PUT,
            dag=self.dag)
        try:
            task_1.execute(None)
        except Exception:  # pylint: disable=broad-except
            pass
        self.assertEqual(task_1.ssh_hook.ssh_conn_id, TEST_CONN_ID)

        task_2 = SFTPOperator(
            task_id="test_sftp",
            ssh_conn_id=TEST_CONN_ID,  # no ssh_hook provided
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.PUT,
            dag=self.dag)
        try:
            task_2.execute(None)
        except Exception:  # pylint: disable=broad-except
            pass
        self.assertEqual(task_2.ssh_hook.ssh_conn_id, TEST_CONN_ID)

        # if both valid ssh_hook and ssh_conn_id are provided, ignore ssh_conn_id
        task_3 = SFTPOperator(task_id="test_sftp",
                              ssh_hook=self.hook,
                              ssh_conn_id=TEST_CONN_ID,
                              local_filepath=self.test_local_filepath,
                              remote_filepath=self.test_remote_filepath,
                              operation=SFTPOperation.PUT,
                              dag=self.dag)
        try:
            task_3.execute(None)
        except Exception:  # pylint: disable=broad-except
            pass
        self.assertEqual(task_3.ssh_hook.ssh_conn_id, self.hook.ssh_conn_id)
예제 #9
0
queries = []
for service in services:
    delete = PythonOperator(task_id=f'delete_older_{service}_file',
                            python_callable=delete_older_file,
                            op_kwargs={'service': service},
                            dag=dag)

    query = PythonOperator(task_id=f'query_narrativedx_{service}',
                           python_callable=query_narrativedx,
                           op_kwargs={'service': service},
                           dag=dag)

    sftp = SFTPOperator(
        task_id=f'upload_{service}_to_sftp',
        ssh_conn_id='coh_sftp',
        local_filepath=str(
            basepath.joinpath(f'NarrativeDX - {service} - {exec_date}.csv')),
        remote_filepath=f'/sftp/NarrativeDX - {service} - {exec_date}.csv',
        operation='put',
        create_intermediate_dirs=True,
        dag=dag)

    # set each query downstream from the previous one in order not to slam the db
    if len(queries) > 0:
        queries[-1] >> query

    queries.append(query)

    delete >> query >> sftp
예제 #10
0
@task
def delete_sftp_file():
    """Delete a file at SFTP SERVER"""
    SFTPHook().delete_file(SFTP_FILE_COMPLETE_PATH)


with DAG(
        "example_sftp_to_wasb",
        schedule_interval=None,
        catchup=False,
        start_date=datetime(2021, 1, 1),  # Override to match your needs
) as dag:
    transfer_files_to_sftp_step = SFTPOperator(
        task_id="transfer_files_from_local_to_sftp",
        local_filepath=FILE_COMPLETE_PATH,
        remote_filepath=SFTP_FILE_COMPLETE_PATH,
    )

    # [START how_to_sftp_to_wasb]
    transfer_files_to_azure = SFTPToWasbOperator(
        task_id="transfer_files_from_sftp_to_wasb",
        # SFTP args
        sftp_source_path=SFTP_SRC_PATH,
        # AZURE args
        container_name=AZURE_CONTAINER_NAME,
        blob_prefix=BLOB_PREFIX,
    )
    # [END how_to_sftp_to_wasb]

    delete_blob_file_step = WasbDeleteBlobOperator(
예제 #11
0
                            command=query_cmd_patient,
                            dag=dag)

copy_patient = SSHOperator(ssh_conn_id='tableau_server',
                           task_id='copy_claro_patient',
                           command=copy_cmd_patient,
                           dag=dag)

encrypt_patient = BashOperator(task_id='encrypt_file_patient',
                               bash_command=encrypt_cmd_patient,
                               dag=dag)

sftp_patient = SFTPOperator(
    task_id='upload_claro_to_sftp_patient',
    ssh_conn_id='claro_sftp',
    local_filepath=f'{basepath}/files/{output_file_patient}.gpg',
    remote_filepath=f'/{output_file_patient}.gpg',
    create_intermediate_dirs=True,
    dag=dag)

query_patient >> copy_patient >> encrypt_patient >> sftp_patient

# physician roster
output_file_roster = 'Claro_Physician_Roster_{{ next_ds_nodash }}.txt'
output_path_roster = f'C:\\Airflow\\claro\\{output_file_roster}'

query_cmd_roster = (f'sqlcmd -S {claro_server} -d Clarity_PRD_Report '
                    f'-i {Variable.get("claro_query_filepath_roster")} '
                    f'-o {output_path_roster} '
                    f'-s"," -W -X -I -l 30 -h -1')