コード例 #1
0
    def test_load_all_tables_concurrently(self, mock_start, mock_wait):
        """Test that start_table_load THEN wait_for_table load are called."""
        start_load_jobs = [
            (self.mock_load_job, self.mock_dataset.table(table.name))
            for table in self.mock_export_config.COUNTY_TABLES_TO_EXPORT
        ]
        mock_start.side_effect = start_load_jobs

        mock_parent = mock.Mock()
        mock_parent.attach_mock(mock_start, 'start')
        mock_parent.attach_mock(mock_wait, 'wait')

        bq_load.load_all_tables_concurrently(
            self.mock_dataset, self.mock_export_config.COUNTY_TABLES_TO_EXPORT,
            self.schema_type)

        start_calls = [
            mock.call.start(self.mock_dataset, table.name, self.schema_type)
            for table in self.mock_export_config.COUNTY_TABLES_TO_EXPORT
        ]
        wait_calls = [
            mock.call.wait(self.mock_load_job,
                           self.mock_dataset.table(table.name))
            for table in self.mock_export_config.COUNTY_TABLES_TO_EXPORT
        ]
        mock_parent.assert_has_calls(start_calls + wait_calls)
コード例 #2
0
def export_all_then_load_all(big_query_client: BigQueryClient, schema_type: SchemaType):
    """Export all tables from Cloud SQL in the given schema, then load all
    tables to BigQuery.

    Exports happen in sequence (one at a time),
    then once all exports are completed, the BigQuery loads happen in parallel.

    For example, for tables A, B, C:
    1. Export Table A
    2. Export Table B
    3. Export Table C
    4. Load Tables A, B, C in parallel.
    """

    if schema_type == SchemaType.JAILS:
        tables_to_export = export_config.COUNTY_TABLES_TO_EXPORT
        base_tables_dataset_ref = big_query_client.dataset_ref_for_id(county_dataset_config.COUNTY_BASE_DATASET)
        export_queries = export_config.COUNTY_TABLE_EXPORT_QUERIES
    elif schema_type == SchemaType.STATE:
        tables_to_export = export_config.STATE_TABLES_TO_EXPORT
        base_tables_dataset_ref = big_query_client.dataset_ref_for_id(state_dataset_config.STATE_BASE_DATASET)
        export_queries = export_config.STATE_TABLE_EXPORT_QUERIES
    else:
        logging.error("Invalid schema_type requested. Must be either"
                      " SchemaType.JAILS or SchemaType.STATE.")
        return

    logging.info("Beginning CloudSQL export")
    cloudsql_export.export_all_tables(schema_type,
                                      tables_to_export,
                                      export_queries)

    logging.info("Beginning BQ table load")
    bq_load.load_all_tables_concurrently(
        big_query_client, base_tables_dataset_ref, tables_to_export, schema_type)