def test_create_clustered_empty_table(self, mock_hook): schema_fields = [ {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, {"name": "date_hired", "type": "DATE", "mode": "REQUIRED"}, {"name": "date_birth", "type": "DATE", "mode": "NULLABLE"}, ] time_partitioning = {"type": "DAY", "field": "date_hired"} cluster_fields = ["date_birth"] operator = BigQueryCreateEmptyTableOperator( task_id=TASK_ID, dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, table_id=TEST_TABLE_ID, schema_fields=schema_fields, time_partitioning=time_partitioning, cluster_fields=cluster_fields, ) operator.execute(None) mock_hook.return_value.create_empty_table.assert_called_once_with( dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, table_id=TEST_TABLE_ID, schema_fields=schema_fields, time_partitioning=time_partitioning, cluster_fields=cluster_fields, labels=None, view=None, encryption_configuration=None, table_resource=None, exists_ok=False, )
def bq_create(tablename, bqdataset, **kwargs): table_schema = tablename.split('.')[0] table_name = tablename.split('.')[1] ti = kwargs['ti'] target_dataset = bqdataset schema = json.loads( str(ti.xcom_pull(task_ids='getschema_{}'.format(tablename)))) create = BigQueryCreateEmptyTableOperator( task_id='create_bq_{}'.format(tablename), project_id=gcp_project, dataset_id=target_dataset, table_id=table_name, schema_fields=schema, bigquery_conn_id=bq_conn) print(schema) create.execute(context=kwargs)
def test_execute(self, mock_hook): operator = BigQueryCreateEmptyTableOperator( task_id=TASK_ID, dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, table_id=TEST_TABLE_ID ) operator.execute(None) mock_hook.return_value.create_empty_table.assert_called_once_with( dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, table_id=TEST_TABLE_ID, schema_fields=None, time_partitioning={}, cluster_fields=None, labels=None, view=None, encryption_configuration=None, table_resource=None, exists_ok=False, )
task_id="create_bucket", bucket_name=GCS_BUCKET, project_id=GCP_PROJECT_ID, ) create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset", dataset_id=DATASET_NAME, ) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id=TABLE_NAME, schema_fields=[ {'name': 'campaign_name', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'campaign_id', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'ad_id', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'clicks', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'impressions', 'type': 'STRING', 'mode': 'NULLABLE'}, ], ) # [START howto_operator_facebook_ads_to_gcs] run_operator = FacebookAdsReportToGcsOperator( task_id='run_fetch_data', owner='airflow', bucket_name=GCS_BUCKET, parameters=PARAMETERS, fields=FIELDS, gcp_conn_id=GCS_CONN_ID, object_name=GCS_OBJ_PATH,
task_id="create-dataset", dataset_id=DATASET_NAME) create_dataset_with_location = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset_with_location", dataset_id=LOCATION_DATASET_NAME, location=BQ_LOCATION) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id="test_table", schema_fields=[ { "name": "emp_name", "type": "STRING", "mode": "REQUIRED" }, { "name": "salary", "type": "INTEGER", "mode": "NULLABLE" }, ], ) create_table_with_location = BigQueryCreateEmptyTableOperator( task_id="create_table_with_location", dataset_id=LOCATION_DATASET_NAME, table_id="test_table", schema_fields=[ {
dag_id, schedule_interval=None, # Override to match your needs start_date=days_ago(1), tags=["example"], user_defined_macros={"DATASET": DATASET_NAME, "TABLE": TABLE_1}, ) as dag_with_locations: create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET_NAME, location=location, ) create_table_1 = BigQueryCreateEmptyTableOperator( task_id="create_table_1", dataset_id=DATASET_NAME, table_id=TABLE_1, schema_fields=SCHEMA, location=location, ) create_table_2 = BigQueryCreateEmptyTableOperator( task_id="create_table_2", dataset_id=DATASET_NAME, table_id=TABLE_2, schema_fields=SCHEMA, location=location, ) create_dataset >> [create_table_1, create_table_2] delete_dataset = BigQueryDeleteDatasetOperator(
user_defined_macros={ "DATASET": DATASET_NAME, "TABLE": TABLE_NAME }, default_args={"project_id": PROJECT_ID}, ) as dag_with_locations: create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET_NAME, project_id=PROJECT_ID) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id=TABLE_NAME, schema_fields=SCHEMA, time_partitioning={ "type": "DAY", "field": "ds", }, ) # [START howto_sensor_bigquery_table] check_table_exists = BigQueryTableExistenceSensor( task_id="check_table_exists", project_id=PROJECT_ID, dataset_id=DATASET_NAME, table_id=TABLE_NAME) # [END howto_sensor_bigquery_table] execute_insert_query = BigQueryExecuteQueryOperator( task_id="execute_insert_query", sql=INSERT_ROWS_QUERY,
], ) create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset", dataset_id=DATASET_NAME) for table in [ORIGIN, TARGET]: create_table = BigQueryCreateEmptyTableOperator( task_id=f"create_{table}_table", dataset_id=DATASET_NAME, table_id=table, schema_fields=[ { "name": "emp_name", "type": "STRING", "mode": "REQUIRED" }, { "name": "salary", "type": "INTEGER", "mode": "NULLABLE" }, ], ) create_dataset >> create_table >> [copy_selected_data, bigquery_to_gcs] delete_dataset = BigQueryDeleteDatasetOperator(task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True) [copy_selected_data, bigquery_to_gcs] >> delete_dataset
with models.DAG( "example_bigquery_operations", schedule_interval='@once', # Override to match your needs start_date=days_ago(1), tags=["example"], ) as dag: # [START howto_operator_bigquery_create_table] create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id="test_table", schema_fields=[ { "name": "emp_name", "type": "STRING", "mode": "REQUIRED" }, { "name": "salary", "type": "INTEGER", "mode": "NULLABLE" }, ], ) # [END howto_operator_bigquery_create_table] # [START howto_operator_bigquery_update_table_schema] update_table_schema = BigQueryUpdateTableSchemaOperator( task_id="update_table_schema", dataset_id=DATASET_NAME, table_id="test_table",
dag=dag, ) # [END howto_operator_salesforce_to_gcs] create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset", dataset_id=DATASET_NAME, project_id=GCP_PROJECT_ID, gcp_conn_id=GCS_CONN_ID ) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id=TABLE_NAME, schema_fields=[ {'name': 'id', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'company', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'phone', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'email', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'createddate', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'lastmodifieddate', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'isdeleted', 'type': 'BOOL', 'mode': 'NULLABLE'}, ], ) load_csv = GCSToBigQueryOperator( task_id='gcs_to_bq', bucket=GCS_BUCKET, source_objects=[GCS_OBJ_PATH], destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}", write_disposition='WRITE_TRUNCATE', )
gcp_conn_id='gr_bigquery_conn', ignore_if_missing=True) drop_stage_reviews_data = BigQueryDeleteTableOperator( task_id="drop_stage_reviews_data", deletion_dataset_table= f"{GCP_PROJECT_ID}.{GCP_BQ_DATASET_STAGE}.{GCP_BQ_TABLE_REVIEWS}", gcp_conn_id='gr_bigquery_conn', ignore_if_missing=True) create_stage_table = DummyOperator(task_id='create_stage_table') create_stage_users_table = BigQueryCreateEmptyTableOperator( task_id='create_stage_users_table', dataset_id=GCP_BQ_DATASET_STAGE, table_id=GCP_BQ_TABLE_USERS, project_id=GCP_PROJECT_ID, bigquery_conn_id='gr_bigquery_conn', schema_fields=stage.users_schema) create_stage_authors_table = BigQueryCreateEmptyTableOperator( task_id='create_stage_authors_table', dataset_id=GCP_BQ_DATASET_STAGE, table_id=GCP_BQ_TABLE_AUTHORS, project_id=GCP_PROJECT_ID, bigquery_conn_id='gr_bigquery_conn', schema_fields=stage.authors_schema) create_stage_books_table = BigQueryCreateEmptyTableOperator( task_id='create_stage_books_table', dataset_id=GCP_BQ_DATASET_STAGE,