def test_execute_move_single_file(self, sftp_hook, gcs_hook): task = GoogleCloudStorageToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_path=DESTINATION_SFTP, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) task.execute(None) gcs_hook.assert_called_once_with( gcp_conn_id=GCP_CONN_ID, delegate_to=DELEGATE_TO ) sftp_hook.assert_called_once_with(SFTP_CONN_ID) args, kwargs = gcs_hook.return_value.download.call_args self.assertEqual(kwargs["bucket_name"], TEST_BUCKET) self.assertEqual(kwargs["object_name"], SOURCE_OBJECT_NO_WILDCARD) args, kwargs = sftp_hook.return_value.store_file.call_args self.assertEqual( args[0], os.path.join(DESTINATION_SFTP, SOURCE_OBJECT_NO_WILDCARD) ) gcs_hook.return_value.delete.assert_called_once_with( TEST_BUCKET, SOURCE_OBJECT_NO_WILDCARD )
def test_execute_more_than_one_wildcard_exception(self, sftp_hook, gcs_hook): gcs_hook.return_value.list.return_value = SOURCE_FILES_LIST[:2] operator = GoogleCloudStorageToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_MULTIPLE_WILDCARDS, destination_path=DESTINATION_SFTP, move_object=False, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) with self.assertRaises(AirflowException): operator.execute(None)
def test_execute_move_with_wildcard(self, sftp_hook, gcs_hook): gcs_hook.return_value.list.return_value = SOURCE_FILES_LIST[:2] operator = GoogleCloudStorageToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_path=DESTINATION_SFTP, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) operator.execute(None) gcs_hook.return_value.list.assert_called_with( TEST_BUCKET, delimiter=".txt", prefix="test_object" ) call_one, call_two = gcs_hook.return_value.delete.call_args_list self.assertEqual(call_one[0], (TEST_BUCKET, "test_object/file1.txt")) self.assertEqual(call_two[0], (TEST_BUCKET, "test_object/file2.txt"))
BUCKET_SRC = os.environ.get("GCP_GCS_BUCKET_1_SRC", "test-gcs-sftp") OBJECT_SRC_1 = "parent-1.bin" OBJECT_SRC_2 = "parent-2.bin" OBJECT_SRC_3 = "subdir-1/*" DESTINATION_PATH_1 = "/tmp/single-file/" DESTINATION_PATH_2 = "/tmp/dirs/" with models.DAG( "example_gcs_to_sftp", default_args=default_args, schedule_interval=None ) as dag: # [START howto_operator_gcs_to_sftp_copy_single_file] copy_file_from_gcs_to_sftp = GoogleCloudStorageToSFTPOperator( task_id="file-copy-gsc-to-sftp", source_bucket=BUCKET_SRC, source_object=OBJECT_SRC_1, destination_path=DESTINATION_PATH_1, ) # [END howto_operator_gcs_to_sftp_copy_single_file] # [START howto_operator_gcs_to_sftp_move_single_file_destination] move_file_from_gcs_to_sftp = GoogleCloudStorageToSFTPOperator( task_id="file-move-gsc-to-sftp", source_bucket=BUCKET_SRC, source_object=OBJECT_SRC_2, destination_path=DESTINATION_PATH_1, move_object=True, ) # [END howto_operator_gcs_to_sftp_move_single_file_destination] # [START howto_operator_gcs_to_sftp_copy_directory]