コード例 #1
0
    def test_execute(self, mock_hook):
        source_project_dataset_table = '{}.{}'.format(TEST_DATASET,
                                                      TEST_TABLE_ID)
        destination_cloud_storage_uris = ['gs://some-bucket/some-file.txt']
        compression = 'NONE'
        export_format = 'CSV'
        field_delimiter = ','
        print_header = True
        labels = {'k1': 'v1'}

        operator = BigQueryToGCSOperator(
            task_id=TASK_ID,
            source_project_dataset_table=source_project_dataset_table,
            destination_cloud_storage_uris=destination_cloud_storage_uris,
            compression=compression,
            export_format=export_format,
            field_delimiter=field_delimiter,
            print_header=print_header,
            labels=labels)

        operator.execute(None)
        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_extract \
            .assert_called_once_with(
                source_project_dataset_table=source_project_dataset_table,
                destination_cloud_storage_uris=destination_cloud_storage_uris,
                compression=compression,
                export_format=export_format,
                field_delimiter=field_delimiter,
                print_header=print_header,
                labels=labels
            )
コード例 #2
0
    def test_execute(self, mock_hook):
        source_project_dataset_table = '{}.{}'.format(TEST_DATASET,
                                                      TEST_TABLE_ID)
        destination_cloud_storage_uris = ['gs://some-bucket/some-file.txt']
        compression = 'NONE'
        export_format = 'CSV'
        field_delimiter = ','
        print_header = True
        labels = {'k1': 'v1'}

        mock_hook().project_id = PROJECT_ID

        configuration = {
            'extract': {
                'sourceTable': {
                    'projectId': mock_hook().project_id,
                    'datasetId': TEST_DATASET,
                    'tableId': TEST_TABLE_ID,
                },
                'compression': compression,
                'destinationUris': destination_cloud_storage_uris,
                'destinationFormat': export_format,
                'fieldDelimiter': field_delimiter,
                'printHeader': print_header,
            },
            'labels': labels,
        }

        operator = BigQueryToGCSOperator(
            task_id=TASK_ID,
            source_project_dataset_table=source_project_dataset_table,
            destination_cloud_storage_uris=destination_cloud_storage_uris,
            compression=compression,
            export_format=export_format,
            field_delimiter=field_delimiter,
            print_header=print_header,
            labels=labels,
        )

        operator.execute(None)

        mock_hook.return_value.insert_job.assert_called_once_with(
            configuration=configuration)
コード例 #3
0
with models.DAG(
        "example_bigquery_transfer",
        default_args=default_args,
        schedule_interval=None,  # Override to match your needs
        tags=["example"],
) as dag:
    copy_selected_data = BigQueryToBigQueryOperator(
        task_id="copy_selected_data",
        source_project_dataset_tables=f"{DATASET_NAME}.{ORIGIN}",
        destination_project_dataset_table=f"{DATASET_NAME}.{TARGET}",
    )

    bigquery_to_gcs = BigQueryToGCSOperator(
        task_id="bigquery_to_gcs",
        source_project_dataset_table=f"{DATASET_NAME}.{ORIGIN}",
        destination_cloud_storage_uris=[
            f"gs://{DATA_EXPORT_BUCKET_NAME}/export-bigquery.csv"
        ],
    )

    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create_dataset", dataset_id=DATASET_NAME)

    for table in [ORIGIN, TARGET]:
        create_table = BigQueryCreateEmptyTableOperator(
            task_id=f"create_{table}_table",
            dataset_id=DATASET_NAME,
            table_id=table,
            schema_fields=[
                {
                    "name": "emp_name",
コード例 #4
0
         "FROM " + os.environ["BIGQUERY_DATASET"] + ".ratings "
         "WHERE DATE(timestamp) <= DATE({{ ds }}) "
         "GROUP BY movieid "
         "ORDER BY avg_rating DESC"),
    write_disposition="WRITE_TRUNCATE",
    create_disposition="CREATE_IF_NEEDED",
    bigquery_conn_id="gcp",
    dag=dag,
)

extract_top_ratings = BigQueryToGCSOperator(
    task_id="extract_top_ratings",
    source_project_dataset_table=(os.environ["GCP_PROJECT"] + ":" +
                                  os.environ["BIGQUERY_DATASET"] + "." +
                                  "rating_results_{{ ds_nodash }}"),
    destination_cloud_storage_uris=[
        "gs://" + os.environ["RESULT_BUCKET"] + "/{{ ds_nodash }}.csv"
    ],
    export_format="CSV",
    bigquery_conn_id="gcp",
    dag=dag,
)

delete_result_table = BigQueryDeleteTableOperator(
    task_id="delete_result_table",
    deletion_dataset_table=(os.environ["GCP_PROJECT"] + ":" +
                            os.environ["BIGQUERY_DATASET"] + "." +
                            "rating_results_{{ ds_nodash }}"),
    bigquery_conn_id="gcp",
    dag=dag,
)