Example #1
0
    def test_upload_data(self, mock_gdrive_hook, mock_file_handle):
        mock_gdrive_hook.return_value.get_media_request.return_value = mock.MagicMock(
        )

        file_id = mock_gdrive_hook.get_file_id.return_value["id"]
        mime_type = mock_gdrive_hook.get_file_id.return_value["mime_type"]

        mock_gcs_hook = mock.Mock()

        op = GoogleDriveToGCSOperator(
            task_id="test_task",
            folder_id=FOLDER_ID,
            file_name=FILE_NAME,
            drive_id=DRIVE_ID,
            destination_bucket=BUCKET,
            destination_object=OBJECT,
        )

        op._upload_data(
            gcs_hook=mock_gcs_hook,
            gdrive_hook=mock_gdrive_hook,
        )
        # Test writing to file
        mock_gdrive_hook.get_media_request.assert_called_once_with(
            file_id=file_id)
        mock_gdrive_hook.download_content_from_request.assert_called_once_with(
            file_handle=mock_file_handle(),
            request=mock_gdrive_hook.get_media_request.return_value,
            chunk_size=104857600,
        )

        # Test upload
        mock_gcs_hook.upload.assert_called_once_with(
            bucket_name=BUCKET,
            object_name=OBJECT,
            data=mock_file_handle().getvalue(),
            mime_type=mime_type)
Example #2
0
    def test_execute(self, mock_gdrive_hook, mock_gcs_hook):
        context = {}
        op = GoogleDriveToGCSOperator(
            task_id="test_task",
            folder_id=FOLDER_ID,
            file_name=FILE_NAME,
            drive_id=DRIVE_ID,
            destination_bucket=BUCKET,
            destination_object=OBJECT,
            gcp_conn_id=GCP_CONN_ID,
            impersonation_chain=IMPERSONATION_CHAIN,
        )
        meta = {"id": "123xyz"}
        mock_gdrive_hook.return_value.get_file_id.return_value = meta

        op.execute(context)
        mock_gdrive_hook.return_value.get_file_id.assert_called_once_with(
            folder_id=FOLDER_ID, file_name=FILE_NAME, drive_id=DRIVE_ID)

        mock_gdrive_hook.return_value.download_file.assert_called_once_with(
            file_id=meta["id"], file_handle=mock.ANY)

        mock_gcs_hook.return_value.provide_file_and_upload.assert_called_once_with(
            bucket_name=BUCKET, object_name=OBJECT)
Example #3
0
    def test_execute(self, mock_upload_data, mock_gdrive_hook, mock_gcs_hook):
        context = {}
        op = GoogleDriveToGCSOperator(
            task_id="test_task",
            folder_id=FOLDER_ID,
            file_name=FILE_NAME,
            drive_id=DRIVE_ID,
            destination_bucket=BUCKET,
            destination_object=OBJECT,
            gcp_conn_id=GCP_CONN_ID,
            impersonation_chain=IMPERSONATION_CHAIN,
        )
        op.execute(context)

        mock_gdrive_hook.assert_called_once_with(
            gcp_conn_id=GCP_CONN_ID,
            delegate_to=None,
            impersonation_chain=IMPERSONATION_CHAIN,
        )
        mock_gcs_hook.assert_called_once_with(
            gcp_conn_id=GCP_CONN_ID,
            delegate_to=None,
            impersonation_chain=IMPERSONATION_CHAIN,
        )
from airflow.providers.google.cloud.transfers.gdrive_to_gcs import GoogleDriveToGCSOperator
from airflow.providers.google.suite.sensors.drive import GoogleDriveFileExistenceSensor

BUCKET = os.environ.get("GCP_GCS_BUCKET", "test28397yeo")
OBJECT = os.environ.get("GCP_GCS_OBJECT", "abc123xyz")
FOLDER_ID = os.environ.get("FILE_ID", "1234567890qwerty")
FILE_NAME = os.environ.get("FILE_NAME", "file.pdf")

with models.DAG(
        "example_gdrive_to_gcs_with_gdrive_sensor",
        start_date=datetime(2021, 1, 1),
        catchup=False,
        schedule_interval='@once',  # Override to match your needs
        tags=["example"],
) as dag:
    # [START detect_file]
    detect_file = GoogleDriveFileExistenceSensor(task_id="detect_file",
                                                 folder_id=FOLDER_ID,
                                                 file_name=FILE_NAME)
    # [END detect_file]
    # [START upload_gdrive_to_gcs]
    upload_gdrive_to_gcs = GoogleDriveToGCSOperator(
        task_id="upload_gdrive_object_to_gcs",
        folder_id=FOLDER_ID,
        file_name=FILE_NAME,
        bucket_name=BUCKET,
        object_name=OBJECT,
    )
    # [END upload_gdrive_to_gcs]
    detect_file >> upload_gdrive_to_gcs