Example #1
0
    def test_execute(self, gcs_mock_hook, s3_one_mock_hook, s3_two_mock_hook):
        """Test the execute function when the run is successful."""

        operator = S3ToGCSOperator(task_id=TASK_ID,
                                   bucket=S3_BUCKET,
                                   prefix=S3_PREFIX,
                                   delimiter=S3_DELIMITER,
                                   dest_gcs_conn_id=GCS_CONN_ID,
                                   dest_gcs=GCS_PATH_PREFIX)

        s3_one_mock_hook.return_value.list_keys.return_value = MOCK_FILES
        s3_two_mock_hook.return_value.list_keys.return_value = MOCK_FILES

        uploaded_files = operator.execute(None)
        gcs_mock_hook.return_value.upload.assert_has_calls([
            mock.call('gcs-bucket', 'data/TEST1.csv', mock.ANY, gzip=False),
            mock.call('gcs-bucket', 'data/TEST3.csv', mock.ANY, gzip=False),
            mock.call('gcs-bucket', 'data/TEST2.csv', mock.ANY, gzip=False)
        ],
                                                           any_order=True)

        s3_one_mock_hook.assert_called_once_with(aws_conn_id=AWS_CONN_ID,
                                                 verify=None)
        s3_two_mock_hook.assert_called_once_with(aws_conn_id=AWS_CONN_ID,
                                                 verify=None)
        gcs_mock_hook.assert_called_once_with(
            google_cloud_storage_conn_id=GCS_CONN_ID, delegate_to=None)

        # we expect MOCK_FILES to be uploaded
        self.assertEqual(sorted(MOCK_FILES), sorted(uploaded_files))
Example #2
0
    def test_execute_with_gzip(self, gcs_mock_hook, s3_one_mock_hook,
                               s3_two_mock_hook):
        """Test the execute function when the run is successful."""

        operator = S3ToGCSOperator(
            task_id=TASK_ID,
            bucket=S3_BUCKET,
            prefix=S3_PREFIX,
            delimiter=S3_DELIMITER,
            dest_gcs_conn_id=GCS_CONN_ID,
            dest_gcs=GCS_PATH_PREFIX,
            gzip=True,
        )

        s3_one_mock_hook.return_value.list_keys.return_value = MOCK_FILES
        s3_two_mock_hook.return_value.list_keys.return_value = MOCK_FILES

        operator.execute(None)
        gcs_mock_hook.assert_called_once_with(
            google_cloud_storage_conn_id=GCS_CONN_ID,
            delegate_to=None,
            impersonation_chain=None,
        )
        gcs_mock_hook.return_value.upload.assert_has_calls(
            [
                mock.call('gcs-bucket', 'data/TEST2.csv', mock.ANY, gzip=True),
                mock.call('gcs-bucket', 'data/TEST1.csv', mock.ANY, gzip=True),
                mock.call('gcs-bucket', 'data/TEST3.csv', mock.ANY, gzip=True),
            ],
            any_order=True,
        )
Example #3
0
    def test_init(self):
        """Test S3ToGCSOperator instance is properly initialized."""

        operator = S3ToGCSOperator(task_id=TASK_ID,
                                   bucket=S3_BUCKET,
                                   prefix=S3_PREFIX,
                                   delimiter=S3_DELIMITER,
                                   gcp_conn_id=GCS_CONN_ID,
                                   dest_gcs=GCS_PATH_PREFIX)

        self.assertEqual(operator.task_id, TASK_ID)
        self.assertEqual(operator.bucket, S3_BUCKET)
        self.assertEqual(operator.prefix, S3_PREFIX)
        self.assertEqual(operator.delimiter, S3_DELIMITER)
        self.assertEqual(operator.gcp_conn_id, GCS_CONN_ID)
        self.assertEqual(operator.dest_gcs, GCS_PATH_PREFIX)
Example #4
0
    def test_init(self):
        """Test S3ToGCSOperator instance is properly initialized."""

        operator = S3ToGCSOperator(
            task_id=TASK_ID,
            bucket=S3_BUCKET,
            prefix=S3_PREFIX,
            delimiter=S3_DELIMITER,
            gcp_conn_id=GCS_CONN_ID,
            dest_gcs=GCS_PATH_PREFIX,
            google_impersonation_chain=IMPERSONATION_CHAIN,
        )

        assert operator.task_id == TASK_ID
        assert operator.bucket == S3_BUCKET
        assert operator.prefix == S3_PREFIX
        assert operator.delimiter == S3_DELIMITER
        assert operator.gcp_conn_id == GCS_CONN_ID
        assert operator.dest_gcs == GCS_PATH_PREFIX
        assert operator.google_impersonation_chain == IMPERSONATION_CHAIN
Example #5
0
        schedule_interval='@once',
        start_date=datetime(2021, 1, 1),
        catchup=False,
        tags=['example'],
) as dag:
    create_s3_bucket = S3CreateBucketOperator(task_id="create_s3_bucket",
                                              bucket_name=S3BUCKET_NAME,
                                              region_name='us-east-1')

    create_gcs_bucket = GCSCreateBucketOperator(
        task_id="create_bucket",
        bucket_name=GCS_BUCKET,
        project_id=GCP_PROJECT_ID,
    )
    # [START howto_transfer_s3togcs_operator]
    transfer_to_gcs = S3ToGCSOperator(task_id='s3_to_gcs_task',
                                      bucket=S3BUCKET_NAME,
                                      prefix=PREFIX,
                                      dest_gcs=GCS_BUCKET_URL)
    # [END howto_transfer_s3togcs_operator]

    delete_s3_bucket = S3DeleteBucketOperator(task_id='delete_s3_bucket',
                                              bucket_name=S3BUCKET_NAME,
                                              force_delete=True)

    delete_gcs_bucket = GCSDeleteBucketOperator(task_id='delete_gcs_bucket',
                                                bucket_name=GCS_BUCKET)

    (create_s3_bucket >> upload_file() >> create_gcs_bucket >> transfer_to_gcs
     >> delete_s3_bucket >> delete_gcs_bucket)