Python CloudSqlToBQConfig.unioned_regional_dataset Examples

Programming Language: Python

Namespace/Package Name: recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config

Method/Function: unioned_regional_dataset

Examples at hotexamples.com: 2

Python CloudSqlToBQConfig.unioned_regional_dataset - 2 examples found. These are the top rated real world Python examples of recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.CloudSqlToBQConfig.unioned_regional_dataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

for_schema_type(30)

is_valid_schema_type(5)

is_state_segmented_refresh_schema(4)

get_dataset_ref(3)

get_bq_schema_for_table(2)

get_gcs_export_uri_for_table(2)

get_tables_to_export(2)

unioned_regional_dataset(2)

default_config_path(1)

get_stale_bq_rows_for_excluded_regions_query_builder(1)

get_table_export_query(1)

materialized_dataset_for_segment(1)

unioned_multi_region_dataset(1)

Example #1

Show file

def _copy_regional_dataset_to_multi_region(
        config: CloudSqlToBQConfig,
        dataset_override_prefix: Optional[str]) -> None:
    """Copies the unioned regional dataset for a schema to the multi-region dataset
    that contains the same data. Backs up the multi-region dataset before performing
    the copy. This backup dataset will get cleaned up if the copy succeeds, but
    otherwise will stick around for 1 week before tables expire.
    """
    bq_client = BigQueryClientImpl()

    source_dataset_id = config.unioned_regional_dataset(
        dataset_override_prefix)
    destination_dataset_id = config.unioned_multi_region_dataset(
        dataset_override_prefix)
    destination_dataset = bq_client.dataset_ref_for_id(destination_dataset_id)

    backup_dataset = bq_client.backup_dataset_tables_if_dataset_exists(
        destination_dataset_id)

    try:
        if bq_client.dataset_exists(destination_dataset):
            tables = bq_client.list_tables(destination_dataset_id)
            for table in tables:
                bq_client.delete_table(table.dataset_id, table.table_id)

        bq_client.create_dataset_if_necessary(
            destination_dataset,
            default_table_expiration_ms=TEMP_DATASET_DEFAULT_TABLE_EXPIRATION_MS
            if dataset_override_prefix else None,
        )

        # Copy into the canonical unioned source datasets in the US multi-region
        bq_client.copy_dataset_tables_across_regions(
            source_dataset_id=source_dataset_id,
            destination_dataset_id=destination_dataset_id,
        )
    except Exception as e:
        logging.info(
            "Failed to flash [%s] to [%s] - contents backup can be found at [%s]",
            source_dataset_id,
            destination_dataset_id,
            backup_dataset.dataset_id if backup_dataset else "NO BACKUP",
        )
        raise e

    if backup_dataset:
        bq_client.delete_dataset(backup_dataset,
                                 delete_contents=True,
                                 not_found_ok=True)

Example #2

Show file

    def __init__(
        self,
        *,
        config: CloudSqlToBQConfig,
        table: Table,
        state_codes: List[StateCode],
    ):
        if not config.is_state_segmented_refresh_schema():
            raise ValueError(
                f"Unexpected schema type [{config.schema_type.name}]")

        self.config = config
        self.table = table
        self.state_codes = state_codes
        # Dataset prefixing will ge handled automatically by view building logic
        self.dataset_id = config.unioned_regional_dataset(
            dataset_override_prefix=None)
        self.view_id = f"{table.name}_view"
        self.materialized_address_override = BigQueryAddress(
            dataset_id=self.dataset_id,
            table_id=table.name,
        )