Esempio n. 1
0
def _federated_bq_regional_dataset_refresh(
    config: CloudSqlToBQConfig,
    dataset_override_prefix: Optional[str] = None,
) -> None:
    """Queries data in the appropriate CloudSQL instance for the given schema / conifg
    and loads it into a single, unified dataset **in the same** region as the CloudSQL
    instance. In the process, creates / updates views that provide direct federated
    connections to the CloudSQL instance and intermediate state-segmented datasets
    (where appropriate).

    Example resulting datasets (OPERATIONS schema):
      operations_cloudsql_connection  <-- Federated views
      us_xx_operations_regional  <-- Materialized data from most recent export for state
      us_yy_operations_regional
      operations_regional  <-- Materialized data from most recent export for each state
    """

    if config.is_state_segmented_refresh_schema():
        collector: BigQueryViewCollector[
            FederatedCloudSQLTableBigQueryViewBuilder] = StateSegmentedSchemaFederatedBigQueryViewCollector(
                config)
    else:
        collector = UnsegmentedSchemaFederatedBigQueryViewCollector(config)

    view_builders = collector.collect_view_builders()

    # TODO(#7285): Migrate Justice Counts connection to be in same region as instance
    if config.schema_type == SchemaType.JUSTICE_COUNTS:
        bq_region_override = None
    else:
        bq_region_override = SQLAlchemyEngineManager.get_cloudsql_instance_region(
            config.schema_type)

    dataset_overrides = None
    if dataset_override_prefix:
        dataset_overrides = dataset_overrides_for_view_builders(
            view_dataset_override_prefix=dataset_override_prefix,
            view_builders=view_builders,
        )
    create_managed_dataset_and_deploy_views_for_view_builders(
        view_source_table_datasets=set(),
        view_builders_to_update=view_builders,
        dataset_overrides=dataset_overrides,
        bq_region_override=bq_region_override,
        force_materialize=True,
    )

    if config.is_state_segmented_refresh_schema():
        _hydrate_unioned_regional_dataset_for_schema(config,
                                                     bq_region_override,
                                                     dataset_override_prefix)
Esempio n. 2
0
    def testGetAllStrippedCloudSqlRegion(self,
                                         mock_secrets: mock.MagicMock) -> None:
        # Arrange
        mock_secrets.side_effect = [
            "project:us-central1:111",
        ]

        # Act
        region = SQLAlchemyEngineManager.get_cloudsql_instance_region(
            schema_type=SchemaType.OPERATIONS)

        # Assert
        self.assertEqual(region, "us-central1")
        mock_secrets.assert_called_with("operations_cloudsql_instance_id")
Esempio n. 3
0
 def connection_region(self) -> str:
     """Returns the region of the BigQuery CloudSQL external connection."""
     # TODO(#7285): Migrate Justice Counts connection to be in same region as instance
     return ("US" if self.schema_type == SchemaType.JUSTICE_COUNTS else
             SQLAlchemyEngineManager.get_cloudsql_instance_region(
                 self.schema_type))