def test_execute(self, mock_hook): source_project_dataset_table = '{}.{}'.format(TEST_DATASET, TEST_TABLE_ID) destination_cloud_storage_uris = ['gs://some-bucket/some-file.txt'] compression = 'NONE' export_format = 'CSV' field_delimiter = ',' print_header = True labels = {'k1': 'v1'} operator = BigQueryToGCSOperator( task_id=TASK_ID, source_project_dataset_table=source_project_dataset_table, destination_cloud_storage_uris=destination_cloud_storage_uris, compression=compression, export_format=export_format, field_delimiter=field_delimiter, print_header=print_header, labels=labels) operator.execute(None) mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_extract \ .assert_called_once_with( source_project_dataset_table=source_project_dataset_table, destination_cloud_storage_uris=destination_cloud_storage_uris, compression=compression, export_format=export_format, field_delimiter=field_delimiter, print_header=print_header, labels=labels )
def test_execute(self, mock_hook): source_project_dataset_table = '{}.{}'.format(TEST_DATASET, TEST_TABLE_ID) destination_cloud_storage_uris = ['gs://some-bucket/some-file.txt'] compression = 'NONE' export_format = 'CSV' field_delimiter = ',' print_header = True labels = {'k1': 'v1'} mock_hook().project_id = PROJECT_ID configuration = { 'extract': { 'sourceTable': { 'projectId': mock_hook().project_id, 'datasetId': TEST_DATASET, 'tableId': TEST_TABLE_ID, }, 'compression': compression, 'destinationUris': destination_cloud_storage_uris, 'destinationFormat': export_format, 'fieldDelimiter': field_delimiter, 'printHeader': print_header, }, 'labels': labels, } operator = BigQueryToGCSOperator( task_id=TASK_ID, source_project_dataset_table=source_project_dataset_table, destination_cloud_storage_uris=destination_cloud_storage_uris, compression=compression, export_format=export_format, field_delimiter=field_delimiter, print_header=print_header, labels=labels, ) operator.execute(None) mock_hook.return_value.insert_job.assert_called_once_with( configuration=configuration)
with models.DAG( "example_bigquery_transfer", default_args=default_args, schedule_interval=None, # Override to match your needs tags=["example"], ) as dag: copy_selected_data = BigQueryToBigQueryOperator( task_id="copy_selected_data", source_project_dataset_tables=f"{DATASET_NAME}.{ORIGIN}", destination_project_dataset_table=f"{DATASET_NAME}.{TARGET}", ) bigquery_to_gcs = BigQueryToGCSOperator( task_id="bigquery_to_gcs", source_project_dataset_table=f"{DATASET_NAME}.{ORIGIN}", destination_cloud_storage_uris=[ f"gs://{DATA_EXPORT_BUCKET_NAME}/export-bigquery.csv" ], ) create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset", dataset_id=DATASET_NAME) for table in [ORIGIN, TARGET]: create_table = BigQueryCreateEmptyTableOperator( task_id=f"create_{table}_table", dataset_id=DATASET_NAME, table_id=table, schema_fields=[ { "name": "emp_name",
"FROM " + os.environ["BIGQUERY_DATASET"] + ".ratings " "WHERE DATE(timestamp) <= DATE({{ ds }}) " "GROUP BY movieid " "ORDER BY avg_rating DESC"), write_disposition="WRITE_TRUNCATE", create_disposition="CREATE_IF_NEEDED", bigquery_conn_id="gcp", dag=dag, ) extract_top_ratings = BigQueryToGCSOperator( task_id="extract_top_ratings", source_project_dataset_table=(os.environ["GCP_PROJECT"] + ":" + os.environ["BIGQUERY_DATASET"] + "." + "rating_results_{{ ds_nodash }}"), destination_cloud_storage_uris=[ "gs://" + os.environ["RESULT_BUCKET"] + "/{{ ds_nodash }}.csv" ], export_format="CSV", bigquery_conn_id="gcp", dag=dag, ) delete_result_table = BigQueryDeleteTableOperator( task_id="delete_result_table", deletion_dataset_table=(os.environ["GCP_PROJECT"] + ":" + os.environ["BIGQUERY_DATASET"] + "." + "rating_results_{{ ds_nodash }}"), bigquery_conn_id="gcp", dag=dag, )