) # [START howto_operator_bigquery_check] check_count = BigQueryCheckOperator( task_id="check_count", sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}", use_legacy_sql=False, location=location, ) # [END howto_operator_bigquery_check] # [START howto_operator_bigquery_value_check] check_value = BigQueryValueCheckOperator( task_id="check_value", sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}", pass_value=4, use_legacy_sql=False, location=location, ) # [END howto_operator_bigquery_value_check] # [START howto_operator_bigquery_interval_check] check_interval = BigQueryIntervalCheckOperator( task_id="check_interval", table=f"{DATASET_NAME}.{TABLE_1}", days_back=1, metrics_thresholds={"COUNT(*)": 1.5}, use_legacy_sql=False, location=location, ) # [END howto_operator_bigquery_interval_check]
fivetran_conn_id='fivetran_default', connector_id='{{ var.value.connector_id }}', poke_interval=5) """ #### BigQuery row validation task Ensure that data was copied to BigQuery correctly, i.e. the table and dataset exist. """ validate_bigquery = BigQueryTableExistenceSensor( task_id='validate_bigquery', project_id='{{ var.value.gcp_project_id }}', dataset_id=DATASET, table_id='forestfires', ) """ #### Row-level data quality check Run a data quality check on a few rows, ensuring that the data in BigQuery matches the ground truth in the correspoding JSON file. """ check_bq_row_count = BigQueryValueCheckOperator( task_id="check_row_count", sql=f"SELECT COUNT(*) FROM {DATASET}.{TABLE}", pass_value=516, use_legacy_sql=False, ) done = DummyOperator(task_id='done') fivetran_sync_start >> fivetran_sync_wait >> validate_bigquery validate_bigquery >> check_bq_row_count >> done
"expirationTime": (int(time.time()) + 300) * 1000 }) # [END howto_operator_bigquery_upsert_table] # [START howto_operator_bigquery_check] check_count = BigQueryCheckOperator( task_id="check_count", sql="SELECT COUNT(*) FROM {}.save_query_result".format(DATASET_NAME), use_legacy_sql=False, ) # [END howto_operator_bigquery_check] # [START howto_operator_bigquery_value_check] check_value = BigQueryValueCheckOperator( task_id="check_value", sql="SELECT COUNT(*) FROM {}.save_query_result".format(DATASET_NAME), pass_value=1000, use_legacy_sql=False, ) # [END howto_operator_bigquery_value_check] # [START howto_operator_bigquery_interval_check] check_interval = BigQueryIntervalCheckOperator( task_id="check_interval", table="{}.save_query_result".format(DATASET_NAME), days_back=1, metrics_thresholds={'COUNT(*)': 1.5}, use_legacy_sql=False, ) # [END howto_operator_bigquery_interval_check] create_dataset >> execute_query_save >> delete_dataset
{ "mode": "NULLABLE", "name": "tbd2", "type": "STRING" }, ] ) ############################################################## # Check mandatory columns for null values # # This function sets up a check to see if the numbber of null# # values allowed in the title column to be 1 # ############################################################## validateTitleNull = BigQueryValueCheckOperator( task_id="validateNullTitle", sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_NAME} WHERE title IS NULL", pass_value=1, use_legacy_sql=False ) ############################################################## # Update row to set tbd1 with uppercase value of title # ############################################################## UPDATE_STATEMENT = f"""UPDATE `{DATASET_NAME}.{TABLE_NAME}` SET tbd1 = LOWER(title) WHERE TRUE""" updateTableColumns = BigQueryInsertJobOperator( task_id="select_query_job", configuration={ "query": { "query": UPDATE_STATEMENT, "useLegacySql": False,