def test_load_all_tables_concurrently(self, mock_start, mock_wait): """Test that start_table_load THEN wait_for_table load are called.""" start_load_jobs = [ (self.mock_load_job, self.mock_dataset.table(table.name)) for table in self.mock_export_config.COUNTY_TABLES_TO_EXPORT ] mock_start.side_effect = start_load_jobs mock_parent = mock.Mock() mock_parent.attach_mock(mock_start, 'start') mock_parent.attach_mock(mock_wait, 'wait') bq_load.load_all_tables_concurrently( self.mock_dataset, self.mock_export_config.COUNTY_TABLES_TO_EXPORT, self.schema_type) start_calls = [ mock.call.start(self.mock_dataset, table.name, self.schema_type) for table in self.mock_export_config.COUNTY_TABLES_TO_EXPORT ] wait_calls = [ mock.call.wait(self.mock_load_job, self.mock_dataset.table(table.name)) for table in self.mock_export_config.COUNTY_TABLES_TO_EXPORT ] mock_parent.assert_has_calls(start_calls + wait_calls)
def export_all_then_load_all(big_query_client: BigQueryClient, schema_type: SchemaType): """Export all tables from Cloud SQL in the given schema, then load all tables to BigQuery. Exports happen in sequence (one at a time), then once all exports are completed, the BigQuery loads happen in parallel. For example, for tables A, B, C: 1. Export Table A 2. Export Table B 3. Export Table C 4. Load Tables A, B, C in parallel. """ if schema_type == SchemaType.JAILS: tables_to_export = export_config.COUNTY_TABLES_TO_EXPORT base_tables_dataset_ref = big_query_client.dataset_ref_for_id(county_dataset_config.COUNTY_BASE_DATASET) export_queries = export_config.COUNTY_TABLE_EXPORT_QUERIES elif schema_type == SchemaType.STATE: tables_to_export = export_config.STATE_TABLES_TO_EXPORT base_tables_dataset_ref = big_query_client.dataset_ref_for_id(state_dataset_config.STATE_BASE_DATASET) export_queries = export_config.STATE_TABLE_EXPORT_QUERIES else: logging.error("Invalid schema_type requested. Must be either" " SchemaType.JAILS or SchemaType.STATE.") return logging.info("Beginning CloudSQL export") cloudsql_export.export_all_tables(schema_type, tables_to_export, export_queries) logging.info("Beginning BQ table load") bq_load.load_all_tables_concurrently( big_query_client, base_tables_dataset_ref, tables_to_export, schema_type)