def _copy_regional_dataset_to_multi_region( config: CloudSqlToBQConfig, dataset_override_prefix: Optional[str]) -> None: """Copies the unioned regional dataset for a schema to the multi-region dataset that contains the same data. Backs up the multi-region dataset before performing the copy. This backup dataset will get cleaned up if the copy succeeds, but otherwise will stick around for 1 week before tables expire. """ bq_client = BigQueryClientImpl() source_dataset_id = config.unioned_regional_dataset( dataset_override_prefix) destination_dataset_id = config.unioned_multi_region_dataset( dataset_override_prefix) destination_dataset = bq_client.dataset_ref_for_id(destination_dataset_id) backup_dataset = bq_client.backup_dataset_tables_if_dataset_exists( destination_dataset_id) try: if bq_client.dataset_exists(destination_dataset): tables = bq_client.list_tables(destination_dataset_id) for table in tables: bq_client.delete_table(table.dataset_id, table.table_id) bq_client.create_dataset_if_necessary( destination_dataset, default_table_expiration_ms=TEMP_DATASET_DEFAULT_TABLE_EXPIRATION_MS if dataset_override_prefix else None, ) # Copy into the canonical unioned source datasets in the US multi-region bq_client.copy_dataset_tables_across_regions( source_dataset_id=source_dataset_id, destination_dataset_id=destination_dataset_id, ) except Exception as e: logging.info( "Failed to flash [%s] to [%s] - contents backup can be found at [%s]", source_dataset_id, destination_dataset_id, backup_dataset.dataset_id if backup_dataset else "NO BACKUP", ) raise e if backup_dataset: bq_client.delete_dataset(backup_dataset, delete_contents=True, not_found_ok=True)
def __init__( self, *, config: CloudSqlToBQConfig, table: Table, state_codes: List[StateCode], ): if not config.is_state_segmented_refresh_schema(): raise ValueError( f"Unexpected schema type [{config.schema_type.name}]") self.config = config self.table = table self.state_codes = state_codes # Dataset prefixing will ge handled automatically by view building logic self.dataset_id = config.unioned_regional_dataset( dataset_override_prefix=None) self.view_id = f"{table.name}_view" self.materialized_address_override = BigQueryAddress( dataset_id=self.dataset_id, table_id=table.name, )