def test_execute(self, mock_hook): source_project_dataset_tables = '{}.{}'.format( TEST_DATASET, TEST_TABLE_ID) destination_project_dataset_table = '{}.{}'.format( TEST_DATASET + '_new', TEST_TABLE_ID) write_disposition = 'WRITE_EMPTY' create_disposition = 'CREATE_IF_NEEDED' labels = {'k1': 'v1'} operator = BigQueryToBigQueryOperator( task_id=TASK_ID, source_project_dataset_tables=source_project_dataset_tables, destination_project_dataset_table=destination_project_dataset_table, write_disposition=write_disposition, create_disposition=create_disposition, labels=labels ) operator.execute(None) mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_copy \ .assert_called_once_with( source_project_dataset_tables=source_project_dataset_tables, destination_project_dataset_table=destination_project_dataset_table, write_disposition=write_disposition, create_disposition=create_disposition, labels=labels )
def test_execute(self, mock_hook): source_project_dataset_tables = '{}.{}'.format(TEST_DATASET, TEST_TABLE_ID) destination_project_dataset_table = '{}.{}'.format( TEST_DATASET + '_new', TEST_TABLE_ID) write_disposition = 'WRITE_EMPTY' create_disposition = 'CREATE_IF_NEEDED' labels = {'k1': 'v1'} encryption_configuration = {'key': 'kk'} operator = BigQueryToBigQueryOperator( task_id=TASK_ID, source_project_dataset_tables=source_project_dataset_tables, destination_project_dataset_table=destination_project_dataset_table, write_disposition=write_disposition, create_disposition=create_disposition, labels=labels, encryption_configuration=encryption_configuration) operator.execute(None) mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_copy \ .assert_called_once_with( source_project_dataset_tables=source_project_dataset_tables, destination_project_dataset_table=destination_project_dataset_table, write_disposition=write_disposition, create_disposition=create_disposition, labels=labels, encryption_configuration=encryption_configuration )
}], ) execute_query_external_table = BigQueryOperator( task_id="execute-query-external-table", destination_dataset_table="{}.selected_data_from_external_table". format(DATASET_NAME), sql='SELECT * FROM `{}.external_table` WHERE name LIKE "W%"'.format( DATASET_NAME), use_legacy_sql=False, ) copy_from_selected_data = BigQueryToBigQueryOperator( task_id="copy-from-selected-data", source_project_dataset_tables="{}.selected_data_from_external_table". format(DATASET_NAME), destination_project_dataset_table= "{}.copy_of_selected_data_from_external_table".format(DATASET_NAME), ) bigquery_to_gcs = BigQueryToCloudStorageOperator( task_id="bigquery-to-gcs", source_project_dataset_table="{}.selected_data_from_external_table". format(DATASET_NAME), destination_cloud_storage_uris=[ "gs://{}/export-bigquery.csv".format(DATA_EXPORT_BUCKET_NAME) ], ) create_dataset = BigQueryCreateEmptyDatasetOperator( task_id="create-dataset", dataset_id=DATASET_NAME)
step1 = DataflowTemplateOperator( task_id=TASK_ID, template=TEMPLATE, parameters=PARAMETERS, dataflow_default_options=DEFAULT_OPTIONS_TEMPLATE, poll_sleep=POLL_SLEEP) # Step 2 - BigQuery batch table ( csv, json, xml ) to standardized BigQuery table TASK_ID = 'step2_json_xml_csv_to_merged' step2 = BigQueryToBigQueryOperator( task_id=TASK_ID, source_project_dataset_tables=[ 'health-club-demo:healthclub.batch_googlefit_json', 'health-club-demo:healthclub.batch_applehealth_xml', 'health-club-demo:healthclub.batch_fitbit_csv' ], destination_project_dataset_table= 'health-club-demo:healthclub.merged_member_daily', write_disposition='WRITE_APPEND', create_disposition='CREATE_NEVER', dataflow_default_options=DEFAULT_OPTIONS_TEMPLATE, poll_sleep=POLL_SLEEP) # Step 3 - Pull firestore "std_member" to BigQuery, Mock implementation due to Time TASK_ID = 'step3_std_member_firestore_to_bigquery' step3 = bash_operator.BashOperator( task_id=TASK_ID, bash_command='echo step3_std_member_firestore_to_bigquery completed.') # Step 4 - Retent only 1 years of data counting from membership start date TASK_ID = 'step4_retent_1year_memberdata'