)

        # [START howto_operator_bigquery_check]
        check_count = BigQueryCheckOperator(
            task_id="check_count",
            sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
            use_legacy_sql=False,
            location=location,
        )
        # [END howto_operator_bigquery_check]

        # [START howto_operator_bigquery_value_check]
        check_value = BigQueryValueCheckOperator(
            task_id="check_value",
            sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
            pass_value=4,
            use_legacy_sql=False,
            location=location,
        )
        # [END howto_operator_bigquery_value_check]

        # [START howto_operator_bigquery_interval_check]
        check_interval = BigQueryIntervalCheckOperator(
            task_id="check_interval",
            table=f"{DATASET_NAME}.{TABLE_1}",
            days_back=1,
            metrics_thresholds={"COUNT(*)": 1.5},
            use_legacy_sql=False,
            location=location,
        )
        # [END howto_operator_bigquery_interval_check]
Esempio n. 2
0
        fivetran_conn_id='fivetran_default',
        connector_id='{{ var.value.connector_id }}',
        poke_interval=5)
    """
    #### BigQuery row validation task
    Ensure that data was copied to BigQuery correctly, i.e. the table and dataset
    exist.
    """
    validate_bigquery = BigQueryTableExistenceSensor(
        task_id='validate_bigquery',
        project_id='{{ var.value.gcp_project_id }}',
        dataset_id=DATASET,
        table_id='forestfires',
    )
    """
    #### Row-level data quality check
    Run a data quality check on a few rows, ensuring that the data in BigQuery
    matches the ground truth in the correspoding JSON file.
    """
    check_bq_row_count = BigQueryValueCheckOperator(
        task_id="check_row_count",
        sql=f"SELECT COUNT(*) FROM {DATASET}.{TABLE}",
        pass_value=516,
        use_legacy_sql=False,
    )

    done = DummyOperator(task_id='done')

    fivetran_sync_start >> fivetran_sync_wait >> validate_bigquery
    validate_bigquery >> check_bq_row_count >> done
Esempio n. 3
0
            "expirationTime": (int(time.time()) + 300) * 1000
        })
    # [END howto_operator_bigquery_upsert_table]

    # [START howto_operator_bigquery_check]
    check_count = BigQueryCheckOperator(
        task_id="check_count",
        sql="SELECT COUNT(*) FROM {}.save_query_result".format(DATASET_NAME),
        use_legacy_sql=False,
    )
    # [END howto_operator_bigquery_check]

    # [START howto_operator_bigquery_value_check]
    check_value = BigQueryValueCheckOperator(
        task_id="check_value",
        sql="SELECT COUNT(*) FROM {}.save_query_result".format(DATASET_NAME),
        pass_value=1000,
        use_legacy_sql=False,
    )
    # [END howto_operator_bigquery_value_check]

    # [START howto_operator_bigquery_interval_check]
    check_interval = BigQueryIntervalCheckOperator(
        task_id="check_interval",
        table="{}.save_query_result".format(DATASET_NAME),
        days_back=1,
        metrics_thresholds={'COUNT(*)': 1.5},
        use_legacy_sql=False,
    )
    # [END howto_operator_bigquery_interval_check]

    create_dataset >> execute_query_save >> delete_dataset
Esempio n. 4
0
	  {
	    "mode": "NULLABLE",
	    "name": "tbd2",
	    "type": "STRING"
          }, 
          ]
        )

    ##############################################################
    # Check mandatory columns for null values                    #
    # This function sets up a check to see if the numbber of null#
    # values allowed in the title column to be 1                 # 
    ##############################################################
        validateTitleNull = BigQueryValueCheckOperator(
          task_id="validateNullTitle",
          sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_NAME} WHERE title IS NULL",
          pass_value=1,
          use_legacy_sql=False
        )


    ##############################################################
    # Update row to set tbd1 with uppercase value of title       #
    ##############################################################
        UPDATE_STATEMENT = f"""UPDATE `{DATASET_NAME}.{TABLE_NAME}` SET tbd1 = LOWER(title) WHERE TRUE""" 
        
        updateTableColumns = BigQueryInsertJobOperator(
          task_id="select_query_job",
          configuration={
            "query": {
              "query": UPDATE_STATEMENT,
              "useLegacySql": False,