Exemple #1
0
    def test_execute(self, mock_hook):
        source_project_dataset_tables = '{}.{}'.format(TEST_DATASET, TEST_TABLE_ID)
        destination_project_dataset_table = '{}.{}'.format(TEST_DATASET + '_new', TEST_TABLE_ID)
        write_disposition = 'WRITE_EMPTY'
        create_disposition = 'CREATE_IF_NEEDED'
        labels = {'k1': 'v1'}
        encryption_configuration = {'key': 'kk'}

        operator = BigQueryToBigQueryOperator(
            task_id=TASK_ID,
            source_project_dataset_tables=source_project_dataset_tables,
            destination_project_dataset_table=destination_project_dataset_table,
            write_disposition=write_disposition,
            create_disposition=create_disposition,
            labels=labels,
            encryption_configuration=encryption_configuration,
        )

        operator.execute(None)
        mock_hook.return_value.get_conn.return_value.cursor.return_value.run_copy.assert_called_once_with(
            source_project_dataset_tables=source_project_dataset_tables,
            destination_project_dataset_table=destination_project_dataset_table,
            write_disposition=write_disposition,
            create_disposition=create_disposition,
            labels=labels,
            encryption_configuration=encryption_configuration,
        )
Exemple #2
0
DATA_EXPORT_BUCKET_NAME = os.environ.get("GCP_BIGQUERY_EXPORT_BUCKET_NAME",
                                         "test-bigquery-sample-data")
ORIGIN = "origin"
TARGET = "target"

default_args = {"start_date": days_ago(1)}

with models.DAG(
        "example_bigquery_transfer",
        default_args=default_args,
        schedule_interval=None,  # Override to match your needs
        tags=["example"],
) as dag:
    copy_selected_data = BigQueryToBigQueryOperator(
        task_id="copy_selected_data",
        source_project_dataset_tables=f"{DATASET_NAME}.{ORIGIN}",
        destination_project_dataset_table=f"{DATASET_NAME}.{TARGET}",
    )

    bigquery_to_gcs = BigQueryToGCSOperator(
        task_id="bigquery_to_gcs",
        source_project_dataset_table=f"{DATASET_NAME}.{ORIGIN}",
        destination_cloud_storage_uris=[
            f"gs://{DATA_EXPORT_BUCKET_NAME}/export-bigquery.csv"
        ],
    )

    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create_dataset", dataset_id=DATASET_NAME)

    for table in [ORIGIN, TARGET]: