Example #1
0
def _copy_regional_dataset_to_multi_region(
        config: CloudSqlToBQConfig,
        dataset_override_prefix: Optional[str]) -> None:
    """Copies the unioned regional dataset for a schema to the multi-region dataset
    that contains the same data. Backs up the multi-region dataset before performing
    the copy. This backup dataset will get cleaned up if the copy succeeds, but
    otherwise will stick around for 1 week before tables expire.
    """
    bq_client = BigQueryClientImpl()

    source_dataset_id = config.unioned_regional_dataset(
        dataset_override_prefix)
    destination_dataset_id = config.unioned_multi_region_dataset(
        dataset_override_prefix)
    destination_dataset = bq_client.dataset_ref_for_id(destination_dataset_id)

    backup_dataset = bq_client.backup_dataset_tables_if_dataset_exists(
        destination_dataset_id)

    try:
        if bq_client.dataset_exists(destination_dataset):
            tables = bq_client.list_tables(destination_dataset_id)
            for table in tables:
                bq_client.delete_table(table.dataset_id, table.table_id)

        bq_client.create_dataset_if_necessary(
            destination_dataset,
            default_table_expiration_ms=TEMP_DATASET_DEFAULT_TABLE_EXPIRATION_MS
            if dataset_override_prefix else None,
        )

        # Copy into the canonical unioned source datasets in the US multi-region
        bq_client.copy_dataset_tables_across_regions(
            source_dataset_id=source_dataset_id,
            destination_dataset_id=destination_dataset_id,
        )
    except Exception as e:
        logging.info(
            "Failed to flash [%s] to [%s] - contents backup can be found at [%s]",
            source_dataset_id,
            destination_dataset_id,
            backup_dataset.dataset_id if backup_dataset else "NO BACKUP",
        )
        raise e

    if backup_dataset:
        bq_client.delete_dataset(backup_dataset,
                                 delete_contents=True,
                                 not_found_ok=True)
Example #2
0
    def __init__(
        self,
        *,
        config: CloudSqlToBQConfig,
        table: Table,
        state_codes: List[StateCode],
    ):
        if not config.is_state_segmented_refresh_schema():
            raise ValueError(
                f"Unexpected schema type [{config.schema_type.name}]")

        self.config = config
        self.table = table
        self.state_codes = state_codes
        # Dataset prefixing will ge handled automatically by view building logic
        self.dataset_id = config.unioned_regional_dataset(
            dataset_override_prefix=None)
        self.view_id = f"{table.name}_view"
        self.materialized_address_override = BigQueryAddress(
            dataset_id=self.dataset_id,
            table_id=table.name,
        )