def test_execute(self, mock_hook):
        source_project_dataset_tables = '{}.{}'.format(
            TEST_DATASET, TEST_TABLE_ID)
        destination_project_dataset_table = '{}.{}'.format(
            TEST_DATASET + '_new', TEST_TABLE_ID)
        write_disposition = 'WRITE_EMPTY'
        create_disposition = 'CREATE_IF_NEEDED'
        labels = {'k1': 'v1'}

        operator = BigQueryToBigQueryOperator(
            task_id=TASK_ID,
            source_project_dataset_tables=source_project_dataset_tables,
            destination_project_dataset_table=destination_project_dataset_table,
            write_disposition=write_disposition,
            create_disposition=create_disposition,
            labels=labels
        )

        operator.execute(None)
        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_copy \
            .assert_called_once_with(
                source_project_dataset_tables=source_project_dataset_tables,
                destination_project_dataset_table=destination_project_dataset_table,
                write_disposition=write_disposition,
                create_disposition=create_disposition,
                labels=labels
            )
    def test_execute(self, mock_hook):
        source_project_dataset_tables = '{}.{}'.format(TEST_DATASET,
                                                       TEST_TABLE_ID)
        destination_project_dataset_table = '{}.{}'.format(
            TEST_DATASET + '_new', TEST_TABLE_ID)
        write_disposition = 'WRITE_EMPTY'
        create_disposition = 'CREATE_IF_NEEDED'
        labels = {'k1': 'v1'}
        encryption_configuration = {'key': 'kk'}

        operator = BigQueryToBigQueryOperator(
            task_id=TASK_ID,
            source_project_dataset_tables=source_project_dataset_tables,
            destination_project_dataset_table=destination_project_dataset_table,
            write_disposition=write_disposition,
            create_disposition=create_disposition,
            labels=labels,
            encryption_configuration=encryption_configuration)

        operator.execute(None)
        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_copy \
            .assert_called_once_with(
                source_project_dataset_tables=source_project_dataset_tables,
                destination_project_dataset_table=destination_project_dataset_table,
                write_disposition=write_disposition,
                create_disposition=create_disposition,
                labels=labels,
                encryption_configuration=encryption_configuration
            )
Example #3
0
        }],
    )

    execute_query_external_table = BigQueryOperator(
        task_id="execute-query-external-table",
        destination_dataset_table="{}.selected_data_from_external_table".
        format(DATASET_NAME),
        sql='SELECT * FROM `{}.external_table` WHERE name LIKE "W%"'.format(
            DATASET_NAME),
        use_legacy_sql=False,
    )

    copy_from_selected_data = BigQueryToBigQueryOperator(
        task_id="copy-from-selected-data",
        source_project_dataset_tables="{}.selected_data_from_external_table".
        format(DATASET_NAME),
        destination_project_dataset_table=
        "{}.copy_of_selected_data_from_external_table".format(DATASET_NAME),
    )

    bigquery_to_gcs = BigQueryToCloudStorageOperator(
        task_id="bigquery-to-gcs",
        source_project_dataset_table="{}.selected_data_from_external_table".
        format(DATASET_NAME),
        destination_cloud_storage_uris=[
            "gs://{}/export-bigquery.csv".format(DATA_EXPORT_BUCKET_NAME)
        ],
    )

    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create-dataset", dataset_id=DATASET_NAME)
Example #4
0
    step1 = DataflowTemplateOperator(
        task_id=TASK_ID,
        template=TEMPLATE,
        parameters=PARAMETERS,
        dataflow_default_options=DEFAULT_OPTIONS_TEMPLATE,
        poll_sleep=POLL_SLEEP)

    # Step 2 - BigQuery batch table ( csv, json, xml ) to standardized BigQuery table
    TASK_ID = 'step2_json_xml_csv_to_merged'
    step2 = BigQueryToBigQueryOperator(
        task_id=TASK_ID,
        source_project_dataset_tables=[
            'health-club-demo:healthclub.batch_googlefit_json',
            'health-club-demo:healthclub.batch_applehealth_xml',
            'health-club-demo:healthclub.batch_fitbit_csv'
        ],
        destination_project_dataset_table=
        'health-club-demo:healthclub.merged_member_daily',
        write_disposition='WRITE_APPEND',
        create_disposition='CREATE_NEVER',
        dataflow_default_options=DEFAULT_OPTIONS_TEMPLATE,
        poll_sleep=POLL_SLEEP)

    # Step 3 - Pull firestore "std_member" to BigQuery, Mock implementation due to Time
    TASK_ID = 'step3_std_member_firestore_to_bigquery'
    step3 = bash_operator.BashOperator(
        task_id=TASK_ID,
        bash_command='echo step3_std_member_firestore_to_bigquery completed.')

    # Step 4 - Retent only 1 years of data counting from membership start date
    TASK_ID = 'step4_retent_1year_memberdata'