Beispiel #1
0
    def test_execute(self, mock_hook):

        max_results = '100'
        selected_fields = 'DATE'
        operator = BigQueryGetDataOperator(task_id=TASK_ID,
                                           dataset_id=TEST_DATASET,
                                           table_id=TEST_TABLE_ID,
                                           max_results=max_results,
                                           selected_fields=selected_fields,
                                           )
        operator.execute(None)
        mock_hook.return_value \
            .get_tabledata \
            .assert_called_once_with(
                dataset_id=TEST_DATASET,
                table_id=TEST_TABLE_ID,
                max_results=max_results,
                selected_fields=selected_fields,
            )
Beispiel #2
0
    def test_execute(self, mock_hook):

        max_results = 100
        selected_fields = 'DATE'
        operator = BigQueryGetDataOperator(
            task_id=TASK_ID,
            dataset_id=TEST_DATASET,
            table_id=TEST_TABLE_ID,
            max_results=max_results,
            selected_fields=selected_fields,
            location=TEST_DATASET_LOCATION,
        )
        operator.execute(None)
        mock_hook.return_value.list_rows.assert_called_once_with(
            dataset_id=TEST_DATASET,
            table_id=TEST_TABLE_ID,
            max_results=max_results,
            selected_fields=selected_fields,
            location=TEST_DATASET_LOCATION,
        )
Beispiel #3
0
        destination_dataset_table="{}.save_query_result".format(DATASET_NAME),
        query_params=[{
            "name": "to_address",
            "parameterType": {
                "type": "STRING"
            },
            "parameterValue": {
                "value": WALLET_ADDRESS
            },
        }],
    )

    get_data = BigQueryGetDataOperator(
        task_id="get_data",
        dataset_id=DATASET_NAME,
        table_id="save_query_result",
        max_results="10",
        selected_fields="value,to_address",
    )

    get_data_result = BashOperator(
        task_id="get_data_result",
        bash_command="echo \"{{ task_instance.xcom_pull('get-data') }}\"")

    create_external_table = BigQueryCreateExternalTableOperator(
        task_id="create_external_table",
        bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
        source_objects=[DATA_SAMPLE_GCS_OBJECT_NAME],
        destination_project_dataset_table="{}.external_table".format(
            DATASET_NAME),
        skip_leading_rows=1,
            location=location,
        )

        execute_query_save = BigQueryExecuteQueryOperator(
            task_id="execute_query_save",
            sql=f"SELECT * FROM {DATASET_NAME}.{TABLE_1}",
            use_legacy_sql=False,
            destination_dataset_table=f"{DATASET_NAME}.{TABLE_2}",
            location=location,
        )

        # [START howto_operator_bigquery_get_data]
        get_data = BigQueryGetDataOperator(
            task_id="get_data",
            dataset_id=DATASET_NAME,
            table_id=TABLE_1,
            max_results=10,
            selected_fields="value,name",
            location=location,
        )
        # [END howto_operator_bigquery_get_data]

        get_data_result = BashOperator(
            task_id="get_data_result",
            bash_command="echo \"{{ task_instance.xcom_pull('get_data') }}\"",
        )

        # [START howto_operator_bigquery_check]
        check_count = BigQueryCheckOperator(
            task_id="check_count",
            sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
            use_legacy_sql=False,
CONFIG_TABLE_NAME = 'config'

# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG('read_config_frombq_a',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:
    import logging

    ##############################################################
    # Use this to fetch the data from BQ config data             #
    ##############################################################
    get_data_bq = BigQueryGetDataOperator(
        task_id="get_config",
        dataset_id=DATASET_NAME,
        table_id=CONFIG_TABLE_NAME,
        max_results=1,
        selected_fields="schema_nm, table_nm, file_name, path, data_topic_nm")

    ##############################################################
    # The code below demonstrates how to use  configdata returned#
    # in the getdata                                             #
    ##############################################################


    def formatConfig(**kwargs):
        ti = kwargs['ti']
        data = ti.xcom_pull(task_ids='get_config')
        configdict = {}
        configdict['CVS_CAREMARK_MEMBER'] = {}
        for row in data:
    train_model = BigQueryExecuteQueryOperator(
        task_id="train_model",
        sql=TRAINING_QUERY,
        use_legacy_sql=False
    )


    get_preds = BigQueryExecuteQueryOperator(
        task_id="get_predictions",
        sql=SERVING_QUERY,
        use_legacy_sql=False,
        destination_dataset_table=DATASET_NAME + "." + DESTINATION_TABLE,
        write_disposition="WRITE_APPEND"
    )


    print_preds = BigQueryGetDataOperator(
        task_id="print_predictions",
        dataset_id=DATASET_NAME,
        table_id=DESTINATION_TABLE
    )

    linkedin_sync >> linkedin_sensor
    twitter_sync >> twitter_sensor
    
    [linkedin_sensor, twitter_sensor] >> dbt_run

    dbt_run >> ml_branch >> [train_model,  get_preds]
    get_preds >> print_preds