def test_should_copy_single_file(self, mock_named_temporary_file, mock_gdrive, mock_gcs_hook): type(mock_named_temporary_file.return_value.__enter__.return_value ).name = mock.PropertyMock(side_effect=["TMP1"]) task = GCSToGoogleDriveOperator( task_id="copy_single_file", source_bucket="data", source_object="sales/sales-2017/january.avro", destination_object="copied_sales/2017/january-backup.avro", ) task.execute(mock.MagicMock()) mock_gcs_hook.assert_has_calls([ mock.call(delegate_to=None, google_cloud_storage_conn_id="google_cloud_default"), mock.call().download(bucket_name="data", filename="TMP1", object_name="sales/sales-2017/january.avro"), ]) mock_gdrive.assert_has_calls([ mock.call(delegate_to=None, gcp_conn_id="google_cloud_default"), mock.call().upload_file( local_location="TMP1", remote_location="copied_sales/2017/january-backup.avro"), ])
def test_should_raise_exception_on_multiple_wildcard( self, mock_named_temporary_file, mock_gdrive, mock_gcs_hook): task = GCSToGoogleDriveOperator(task_id="move_files", source_bucket="data", source_object="sales/*/*.avro", move_object=True) with self.assertRaisesRegex(AirflowException, "Only one wildcard"): task.execute(mock.MagicMock())
def test_should_move_files(self, mock_named_temporary_file, mock_gdrive, mock_gcs_hook): type(mock_named_temporary_file.return_value.__enter__.return_value ).name = mock.PropertyMock(side_effect=["TMP1", "TMP2", "TMP3"]) mock_gcs_hook.return_value.list.return_value = [ "sales/A.avro", "sales/B.avro", "sales/C.avro" ] task = GCSToGoogleDriveOperator( task_id="move_files", source_bucket="data", source_object="sales/sales-2017/*.avro", move_object=True, ) task.execute(mock.MagicMock()) mock_gcs_hook.assert_has_calls([ mock.call(delegate_to=None, google_cloud_storage_conn_id="google_cloud_default"), mock.call().list("data", delimiter=".avro", prefix="sales/sales-2017/"), mock.call().download(bucket_name="data", filename="TMP1", object_name="sales/A.avro"), mock.call().delete("data", "sales/A.avro"), mock.call().download(bucket_name="data", filename="TMP2", object_name="sales/B.avro"), mock.call().delete("data", "sales/B.avro"), mock.call().download(bucket_name="data", filename="TMP3", object_name="sales/C.avro"), mock.call().delete("data", "sales/C.avro"), ]) mock_gdrive.assert_has_calls([ mock.call(delegate_to=None, gcp_conn_id="google_cloud_default"), mock.call().upload_file(local_location="TMP1", remote_location="sales/A.avro"), mock.call().upload_file(local_location="TMP2", remote_location="sales/B.avro"), mock.call().upload_file(local_location="TMP3", remote_location="sales/C.avro"), ])
from airflow.utils.dates import days_ago GCS_TO_GDRIVE_BUCKET = os.environ.get("GCS_TO_DRIVE_BUCKET", "example-object") default_args = {"start_date": days_ago(1)} with models.DAG( "example_gcs_to_gdrive", default_args=default_args, schedule_interval=None, # Override to match your needs tags=['example'], ) as dag: # [START howto_operator_gcs_to_gdrive_copy_single_file] copy_single_file = GCSToGoogleDriveOperator( task_id="copy_single_file", source_bucket=GCS_TO_GDRIVE_BUCKET, source_object="sales/january.avro", destination_object="copied_sales/january-backup.avro", ) # [END howto_operator_gcs_to_gdrive_copy_single_file] # [START howto_operator_gcs_to_gdrive_copy_files] copy_files = GCSToGoogleDriveOperator( task_id="copy_files", source_bucket=GCS_TO_GDRIVE_BUCKET, source_object="sales/*", destination_object="copied_sales/", ) # [END howto_operator_gcs_to_gdrive_copy_files] # [START howto_operator_gcs_to_gdrive_move_files] move_files = GCSToGoogleDriveOperator( task_id="move_files", source_bucket=GCS_TO_GDRIVE_BUCKET,