def test_execute(self, mock_hook): operator = BigQueryCreateEmptyDatasetOperator( task_id=TASK_ID, dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, location=TEST_DATASET_LOCATION) operator.execute(None) mock_hook.return_value.create_empty_dataset.assert_called_once_with( dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, location=TEST_DATASET_LOCATION, dataset_reference={}, exists_ok=False, )
tags=["example"], ) as dag: # [START howto_operator_export_database_to_gcs] export_database_to_gcs = CloudFirestoreExportDatabaseOperator( task_id="export_database_to_gcs", project_id=FIRESTORE_PROJECT_ID, body={ "outputUriPrefix": EXPORT_DESTINATION_URL, "collectionIds": [EXPORT_COLLECTION_ID] }, ) # [END howto_operator_export_database_to_gcs] create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset", dataset_id=DATASET_NAME, location=DATASET_LOCATION, project_id=GCP_PROJECT_ID, ) delete_dataset = BigQueryDeleteDatasetOperator(task_id="delete_dataset", dataset_id=DATASET_NAME, project_id=GCP_PROJECT_ID, delete_contents=True) # [START howto_operator_create_external_table_multiple_types] create_external_table_multiple_types = BigQueryCreateExternalTableOperator( task_id="create_external_table", bucket=BUCKET_NAME, table_resource={ "tableReference": { "projectId": GCP_PROJECT_ID,
with models.DAG( "example_facebook_ads_to_gcs", schedule_interval='@once', # Override to match your needs start_date=datetime(2021, 1, 1), catchup=False, ) as dag: create_bucket = GCSCreateBucketOperator( task_id="create_bucket", bucket_name=GCS_BUCKET, project_id=GCP_PROJECT_ID, ) create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset", dataset_id=DATASET_NAME, ) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id=TABLE_NAME, schema_fields=[ {'name': 'campaign_name', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'campaign_id', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'ad_id', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'clicks', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'impressions', 'type': 'STRING', 'mode': 'NULLABLE'}, ], )
source_project_dataset_tables="{}.selected_data_from_external_table". format(DATASET_NAME), destination_project_dataset_table= "{}.copy_of_selected_data_from_external_table".format(DATASET_NAME), ) bigquery_to_gcs = BigQueryToGCSOperator( task_id="bigquery_to_gcs", source_project_dataset_table="{}.selected_data_from_external_table". format(DATASET_NAME), destination_cloud_storage_uris=[ "gs://{}/export-bigquery.csv".format(DATA_EXPORT_BUCKET_NAME) ], ) create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET_NAME) create_dataset_with_location = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset_with_location", dataset_id=LOCATION_DATASET_NAME, location=BQ_LOCATION) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id="test_table", schema_fields=[ { "name": "emp_name", "type": "STRING", "mode": "REQUIRED"
gcs['updated_at']) + dt.timedelta(hours=7) json_gcs.append(gcs) storage_client = storage.Client() bucket = storage_client.get_bucket("airflow-postgres") blob = bucket.blob("users.csv") df = pd.DataFrame(data=json_gcs).to_csv(sep=",", header=False, index=False, quotechar='"', quoting=csv.QUOTE_ALL, encoding='utf-8') blob.upload_from_string(data=df) create_users_dataset = BigQueryCreateEmptyDatasetOperator( task_id='users_dataset', dataset_id=DATASET_NAME, dag=dag) convert_input_file = PythonOperator(task_id='convert_users', python_callable=users_converter, dag=dag) load_users = GCSToBigQueryOperator( task_id='gcs_to_bigquery_users', bucket='airflow-postgres', source_objects=['users.csv'], destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}", schema_fields=[ { 'name': 'created_at', 'type': 'TIMESTAMP', 'mode': 'REQUIRED'
dag_id = "example_bigquery_sensors" with models.DAG( dag_id, schedule_interval=None, # Override to match your needs start_date=days_ago(1), tags=["example"], user_defined_macros={ "DATASET": DATASET_NAME, "TABLE": TABLE_NAME }, default_args={"project_id": PROJECT_ID}, ) as dag_with_locations: create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET_NAME, project_id=PROJECT_ID) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id=TABLE_NAME, schema_fields=SCHEMA, time_partitioning={ "type": "DAY", "field": "ds", }, ) # [START howto_sensor_bigquery_table] check_table_exists = BigQueryTableExistenceSensor( task_id="check_table_exists",
BigQueryDeleteDatasetOperator, ) from airflow.providers.google.cloud.operators.gcs_to_bigquery import GCSToBigQueryOperator from airflow.utils.dates import days_ago DATASET_NAME = os.environ.get("GCP_DATASET_NAME", 'airflow_test') TABLE_NAME = os.environ.get("GCP_TABLE_NAME", 'gcs_to_bq_table') args = {'start_date': days_ago(2)} dag = models.DAG(dag_id='example_gcs_to_bigquery_operator', default_args=args, schedule_interval=None, tags=['example']) create_test_dataset = BigQueryCreateEmptyDatasetOperator( task_id='create_airflow_test_dataset', dataset_id=DATASET_NAME, dag=dag) # [START howto_operator_gcs_to_bigquery] load_csv = GCSToBigQueryOperator( task_id='gcs_to_bigquery_example', bucket='cloud-samples-data', source_objects=['bigquery/us-states/us-states.csv'], destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}", schema_fields=[ { 'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE' }, { 'name': 'post_abbr',
query=QUERY, include_deleted=True, bucket_name=GCS_BUCKET, object_name=GCS_OBJ_PATH, salesforce_conn_id=SALESFORCE_CONN_ID, export_format='csv', coerce_to_timestamp=False, record_time_added=False, gcp_conn_id=GCS_CONN_ID, task_id="upload_to_gcs", dag=dag, ) # [END howto_operator_salesforce_to_gcs] create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_dataset", dataset_id=DATASET_NAME, project_id=GCP_PROJECT_ID, gcp_conn_id=GCS_CONN_ID ) create_table = BigQueryCreateEmptyTableOperator( task_id="create_table", dataset_id=DATASET_NAME, table_id=TABLE_NAME, schema_fields=[ {'name': 'id', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'company', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'phone', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'email', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'createddate', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'lastmodifieddate', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'isdeleted', 'type': 'BOOL', 'mode': 'NULLABLE'},
f"(42, 'monthy python', '{INSERT_DATE}'), " f"(42, 'fishy fish', '{INSERT_DATE}');" ) # [END howto_operator_bigquery_query] with models.DAG( dag_id, schedule_interval='@once', # Override to match your needs start_date=datetime(2021, 1, 1), catchup=False, tags=["example"], user_defined_macros={"DATASET": DATASET, "TABLE": TABLE_1}, ) as dag_with_locations: create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET, location=location, ) create_table_1 = BigQueryCreateEmptyTableOperator( task_id="create_table_1", dataset_id=DATASET, table_id=TABLE_1, schema_fields=SCHEMA, location=location, ) create_table_2 = BigQueryCreateEmptyTableOperator( task_id="create_table_2", dataset_id=DATASET, table_id=TABLE_2,
resource= "{{ task_instance.xcom_pull(task_ids='call_parameters', key='endPoint') }}", object_name= 'data/{{execution_date.strftime("%Y") }}/{{execution_date.strftime("%m") }}/{{execution_date.strftime("%d") }}', bucket_name="{{ var.json.env.bucket }}", partitioned_key= "{{ task_instance.xcom_pull(task_ids='call_parameters', key='pickup_dimension_name') }}", query="{% include 'api_params.sql' %}", max_rows= "{{ task_instance.xcom_pull(task_ids='total_record', key='return_value') }}", dag=dag, ) create_staging_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_staging_dataset", gcp_conn_id="my_gcp_connection", dataset_id="{{ var.json.env.stg }}", dag=dag, ) create_production_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create_production_dataset", gcp_conn_id="my_gcp_connection", dataset_id="{{ var.json.env.production }}", dag=dag, ) create_fact_table = BigQueryInsertJobOperator( task_id="create_fact_table", configuration={ "query": { "query": "{% include 'create_fact_table.sql' %}",
IMPERSONATION_CHAIN = f"impersonated_account@{PROJECT_ID}.iam.gserviceaccount.com" DATA_SAMPLE_GCS_URL_PARTS = urlparse(DATA_SAMPLE_GCS_URL) DATA_SAMPLE_GCS_BUCKET_NAME = DATA_SAMPLE_GCS_URL_PARTS.netloc DATA_SAMPLE_GCS_OBJECT_NAME = DATA_SAMPLE_GCS_URL_PARTS.path[1:] with models.DAG( "example_bigquery_operations", schedule_interval=None, # Override to match your needs start_date=days_ago(1), tags=["example"], ) as dag: # [START howto_operator_bigquery_create_dataset] create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET_NAME, impersonation_chain=IMPERSONATION_CHAIN ) # [END howto_operator_bigquery_create_dataset] # [START howto_operator_bigquery_delete_dataset] delete_dataset = BigQueryDeleteDatasetOperator( task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True, impersonation_chain=IMPERSONATION_CHAIN ) # [END howto_operator_bigquery_delete_dataset]