def test_dag_two_views_same_materialized_address(self) -> None: view_1 = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset", table_id="other_table" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) view_2 = BigQueryView( dataset_id="dataset_2", view_id="table_2", description="table_2 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset", table_id="other_table" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table_2`", ) with self.assertRaises(ValueError) as e: _ = BigQueryViewDagWalker([view_1, view_2]) self.assertTrue( str(e.exception).startswith( "Found materialized view address for view [('dataset_2', 'table_2')] " "that matches materialized_address of another view: " "[('dataset_1', 'table_1')]." ) )
def test_parse_view_multiple_parents(self) -> None: view = BigQueryView( dataset_id="my_dataset", view_id="my_view_id", description="my view description", view_query_template="""SELECT * FROM `{project_id}.some_dataset.some_table` LEFT OUTER JOIN `{project_id}.some_dataset.other_table` USING (some_col); """, ) node = BigQueryViewDagNode(view) node.set_materialized_addresss({}) self.assertEqual( node.parent_keys, { DagKey( view_address=BigQueryAddress( dataset_id="some_dataset", table_id="some_table" ) ), DagKey( view_address=BigQueryAddress( dataset_id="some_dataset", table_id="other_table" ) ), }, )
def test_union_dags_same_view_different_object(self) -> None: view = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_1", table_id="other_table_1" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) unioned_dag = BigQueryViewDagWalker.union_dags( BigQueryViewDagWalker([view]), BigQueryViewDagWalker( [ BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_1", table_id="other_table_1" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) ] ), ) self.assertCountEqual([view], unioned_dag.views)
def test_dag_parents_materialized_non_default(self) -> None: self.maxDiff = None view_1 = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_1", table_id="other_table_1" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) view_2 = BigQueryView( dataset_id="dataset_2", view_id="table_2", description="table_2 description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset_2", table_id="other_table_2" ), view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table_2`", ) view_3 = BigQueryView( dataset_id="dataset_3", view_id="table_3", description="table_3 description", view_query_template=""" SELECT * FROM `{project_id}.dataset_1.table_1` JOIN `{project_id}.other_dataset_2.other_table_2` USING (col)""", ) walker = BigQueryViewDagWalker([view_1, view_2, view_3]) def process_simple( view: BigQueryView, parent_results: Dict[BigQueryView, DagKey] ) -> str: if view == view_3: # View 3 should have two parents self.assertEqual( {view_1: view_1.view_id, view_2: view_2.view_id}, parent_results ) return view.view_id result = walker.process_dag(process_simple) self.assertEqual( {view_1: view_1.view_id, view_2: view_2.view_id, view_3: view_3.view_id}, result, )
def delete_unmanaged_views_and_tables_from_dataset( bq_client: BigQueryClient, dataset_id: str, managed_tables: Set[BigQueryAddress], dry_run: bool, ) -> Set[BigQueryAddress]: """This function takes in a set of managed views/tables and compares it to the list of tables BigQuery has. The function then deletes any views/tables that are in BigQuery but not in the set of managed views/tables. It then returns a set of the BigQueryAddress's from these unmanaged views/tables that are to be deleted.""" unmanaged_views_and_tables: Set[BigQueryAddress] = set() dataset_ref = bq_client.dataset_ref_for_id(dataset_id) if not bq_client.dataset_exists(dataset_ref): raise ValueError("Dataset %s does not exist in BigQuery" % dataset_id) for table in list(bq_client.list_tables(dataset_id)): table_bq_address = BigQueryAddress.from_list_item(table) if table_bq_address not in managed_tables: unmanaged_views_and_tables.add(table_bq_address) for view in unmanaged_views_and_tables: if dry_run: logging.info( "[DRY RUN] Regular run would delete unmanaged table/view %s from dataset %s.", view.table_id, view.dataset_id, ) else: logging.info( "Deleting unmanaged table/view %s from dataset %s.", view.table_id, view.dataset_id, ) bq_client.delete_table(view.dataset_id, view.table_id) return unmanaged_views_and_tables
def test_get_managed_views_for_dataset_map_all_views_same_dataset(self) -> None: # Arrange walker = BigQueryViewDagWalker(self.all_views_same_dataset) # Act result_dict = get_managed_view_and_materialized_table_addresses_by_dataset( walker ) # Assert expected_result: Dict[str, Set[BigQueryAddress]] = { "dataset_1": { BigQueryAddress(dataset_id="dataset_1", table_id="table_1"), BigQueryAddress(dataset_id="dataset_1", table_id="table_2"), BigQueryAddress(dataset_id="dataset_1", table_id="table_3"), }, } self.assertEqual(expected_result, result_dict)
def test_get_managed_views_for_dataset_map_x_shaped_dag(self) -> None: # Arrange walker = BigQueryViewDagWalker(self.x_shaped_dag_views_list) # Act result_dict = get_managed_view_and_materialized_table_addresses_by_dataset( walker ) # Assert expected_result: Dict[str, Set[BigQueryAddress]] = { "dataset_1": {BigQueryAddress(dataset_id="dataset_1", table_id="table_1")}, "dataset_2": {BigQueryAddress(dataset_id="dataset_2", table_id="table_2")}, "dataset_3": {BigQueryAddress(dataset_id="dataset_3", table_id="table_3")}, "dataset_4": {BigQueryAddress(dataset_id="dataset_4", table_id="table_4")}, "dataset_5": {BigQueryAddress(dataset_id="dataset_5", table_id="table_5")}, } self.assertEqual(expected_result, result_dict)
def test_dataset_overrides_for_view_builders(self) -> None: view_builders = [ SimpleBigQueryViewBuilder( dataset_id="dataset_1", view_id="my_fake_view", description="my_fake_view description", view_query_template="SELECT NULL LIMIT 0", should_materialize=True, ), SimpleBigQueryViewBuilder( dataset_id="dataset_2", view_id="my_fake_view_2", description="my_fake_view_2 description", view_query_template="SELECT NULL LIMIT 0", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="materialized_dataset", table_id="table_materialized" ), ), ] prefix = "my_prefix" overrides = dataset_overrides_for_view_builders(prefix, view_builders) expected_overrides = { "dataset_1": "my_prefix_dataset_1", "dataset_2": "my_prefix_dataset_2", "materialized_dataset": "my_prefix_materialized_dataset", } self.assertEqual(expected_overrides, overrides)
def test_build_unioned_segments_view_with_dataset_overrides(self) -> None: config = CloudSqlToBQConfig.for_schema_type(SchemaType.STATE) view_builder = UnionedStateSegmentsViewBuilder( config=config, table=StateBase.metadata.tables["state_person_external_id"], state_codes=[StateCode.US_XX, StateCode.US_WW], ) view = view_builder.build( dataset_overrides={ "state_regional": "my_prefix_state_regional", "us_xx_state_regional": "my_prefix_us_xx_state_regional", "us_ww_state_regional": "my_prefix_us_ww_state_regional", } ) expected_query = ( "SELECT state_person_external_id.external_id,state_person_external_id.state_code," "state_person_external_id.id_type,state_person_external_id.person_external_id_id," "state_person_external_id.person_id FROM " "`recidiviz-staging.my_prefix_us_xx_state_regional.state_person_external_id` state_person_external_id\n" "UNION ALL\n" "SELECT state_person_external_id.external_id,state_person_external_id.state_code," "state_person_external_id.id_type,state_person_external_id.person_external_id_id," "state_person_external_id.person_id FROM " "`recidiviz-staging.my_prefix_us_ww_state_regional.state_person_external_id` state_person_external_id" ) self.assertEqual(expected_query, view.view_query) self.assertEqual( BigQueryAddress( dataset_id="my_prefix_state_regional", table_id="state_person_external_id", ), view.materialized_address, )
def test_parse_view_materialized_parent(self) -> None: view = BigQueryView( dataset_id="my_dataset", view_id="my_view_id", description="my view description", view_query_template="SELECT * FROM `{project_id}.some_dataset.some_table_materialized`", ) parent_view = BigQueryView( dataset_id="some_dataset", view_id="some_table", description="my parent view description", view_query_template="SELECT * FROM UNNEST([])", should_materialize=True, ) node = BigQueryViewDagNode(view) if not parent_view.materialized_address: raise ValueError("Null materialized_address for view [{parent_view}]") node.set_materialized_addresss( {parent_view.materialized_address: DagKey.for_view(parent_view)} ) self.assertEqual( node.parent_keys, { DagKey( view_address=BigQueryAddress( dataset_id="some_dataset", table_id="some_table" ) ) }, )
def parent_tables(self) -> Set[BigQueryAddress]: """The set of actual tables/views referenced by this view.""" parents = re.findall(r"`[\w-]*\.([\w-]*)\.([\w-]*)`", self.view.view_query) return { BigQueryAddress(dataset_id=candidate[0], table_id=candidate[1]) for candidate in parents }
def test_build_view_state_legacy(self) -> None: table = one(t for t in StateBase.metadata.sorted_tables if t.name == "state_person") view_builder = FederatedCloudSQLTableBigQueryViewBuilder( connection_region="us-east2", table=table, view_id=table.name, cloud_sql_query="SELECT * FROM state_person;", database_key=SQLAlchemyDatabaseKey.for_state_code( StateCode.US_XX, db_version=SQLAlchemyStateDatabaseVersion.LEGACY), materialized_address_override=BigQueryAddress( dataset_id="materialized_dataset", table_id="materialized_table"), ) expected_description = """View providing a connection to the [state_person] table in the [postgres] database in the [STATE] schema. This view is managed outside of regular view update operations and the results can be found in the schema-specific datasets (`state`, `jails`, `justice_counts`, etc).""" expected_view_query = f"""/*{expected_description}*/ SELECT * FROM EXTERNAL_QUERY( "test-project.us-east2.state_cloudsql", "SELECT * FROM state_person;" )""" # Build without dataset overrides view = view_builder.build() self.assertIsInstance(view, FederatedCloudSQLTableBigQueryView) self.assertEqual(expected_view_query, view.view_query) self.assertEqual(expected_description, view.description) self.assertEqual( BigQueryAddress( dataset_id="state_cloudsql_connection", table_id="state_person", ), view.address, ) self.assertEqual( BigQueryAddress(dataset_id="materialized_dataset", table_id="materialized_table"), view.materialized_address, )
def print_dfs_tree(dataset_id: str, view_id: str, print_downstream_tree: bool = False) -> None: dag_walker = build_dag_walker(dataset_id, view_id) node = dag_walker.nodes_by_key[DagKey( view_address=BigQueryAddress(dataset_id=dataset_id, table_id=view_id))] dag_walker.populate_node_family_for_node(node=node) print(node.node_family.child_dfs_tree_str if print_downstream_tree else node.node_family.parent_dfs_tree_str)
def test_delete_unmanaged_views_and_tables_dry_run(self) -> None: managed_tables = {BigQueryAddress(dataset_id="dataset_1", table_id="table_1")} self.mock_client.list_tables.return_value = [ bigquery.table.TableListItem(self.mock_table_resource_ds_1_table_1), bigquery.table.TableListItem(self.mock_table_resource_ds_1_table_2), ] self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset_ref_ds_1 self.mock_client.dataset_exists.return_value = True expected_deleted_views: Set[BigQueryAddress] = { BigQueryAddress(dataset_id="dataset_1", table_id="table_2") } deleted_views = delete_unmanaged_views_and_tables_from_dataset( self.mock_client, "dataset_1", managed_tables, dry_run=True ) self.mock_client.dataset_ref_for_id.assert_called() self.mock_client.dataset_exists.assert_called() self.mock_client.list_tables.assert_called() self.mock_client.delete_table.assert_not_called() self.assertEqual(deleted_views, expected_deleted_views)
def materialized_address_for_unsegmented_table( self, table: Table) -> BigQueryAddress: """Returns the dataset that data for a given table in an unsegmented schema is materialized into. Throws for state-segmented schemas. """ if self.is_state_segmented_refresh_schema(): raise ValueError(f"Unexpected schema type [{self.schema_type}]") dataset = self.unioned_regional_dataset(dataset_override_prefix=None) if self.schema_type == SchemaType.JUSTICE_COUNTS: # TODO(#7285): JUSTICE_COUNTS has a custom materialized location for # backwards compatibility. Once we delete the legacy views at # `justice_counts.{table_name}` etc, we will be able to write materialized # tables to that location. return BigQueryAddress( dataset_id=dataset, table_id=f"{table.name}_materialized", ) return BigQueryAddress(dataset_id=dataset, table_id=table.name)
def test_parse_simple_view(self) -> None: view = BigQueryView( dataset_id="my_dataset", view_id="my_view_id", description="my view description", view_query_template="SELECT * FROM `{project_id}.some_dataset.some_table`", ) node = BigQueryViewDagNode(view) self.assertIsNone(view.materialized_address) node.set_materialized_addresss({}) self.assertEqual(node.is_root, False) self.assertEqual( node.dag_key, DagKey( view_address=BigQueryAddress( dataset_id="my_dataset", table_id="my_view_id" ) ), ) self.assertEqual( node.parent_keys, { DagKey( view_address=BigQueryAddress( dataset_id="some_dataset", table_id="some_table" ) ) }, ) self.assertEqual(node.child_keys, set()) node.is_root = True child_key = DagKey( view_address=BigQueryAddress( dataset_id="other_dataset", table_id="other_table" ) ) node.add_child_key(child_key) self.assertEqual(node.is_root, True) self.assertEqual(node.child_keys, {child_key})
def test_populate_node_family_full_parentage_complex_dependencies(self) -> None: view_1 = BigQueryView( dataset_id="dataset_1", view_id="table_1", description="table_1 description", view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`", ) view_2 = BigQueryView( dataset_id="dataset_2", view_id="table_2", description="table_2 description", view_query_template="SELECT * FROM `{project_id}.dataset_1.table_1`", ) view_3 = BigQueryView( dataset_id="dataset_3", view_id="table_3", description="table_3 description", view_query_template=""" SELECT * FROM `{project_id}.dataset_1.table_1` JOIN `{project_id}.dataset_2.table_2` USING (col)""", ) view_4 = BigQueryView( dataset_id="dataset_4", view_id="table_4", description="table_4 description", view_query_template=""" SELECT * FROM `{project_id}.dataset_2.table_2` JOIN `{project_id}.dataset_3.table_3` USING (col)""", ) dag_walker = BigQueryViewDagWalker([view_1, view_2, view_3, view_4]) start_node = dag_walker.node_for_view(view_4) dag_walker.populate_node_family_for_node( node=start_node, view_source_table_datasets={"source_dataset"} ) expected_parent_nodes = { DagKey( view_address=BigQueryAddress( dataset_id="source_dataset", table_id="source_table" ) ), DagKey.for_view(view_1), DagKey.for_view(view_2), DagKey.for_view(view_3), } self.assertEqual(expected_parent_nodes, start_node.node_family.full_parentage)
def _get_one_way_dependencies( descendants: bool = False, ) -> Tuple[Set[DagKey], str]: """Returns a set of all dependent DagKeys in one direction, and a string representation of that tree. If |descendants| is True, returns info about the tree of views that are dependent on the view. If |descendants| is False, returns info about the tree of all views that this view depends on.""" stack = [( DagKey(view_address=BigQueryAddress( dataset_id=node.view.dataset_id, table_id=node.view.table_id)), 0, )] tree = "" full_dependencies: Set[DagKey] = set() while len(stack) > 0: dag_key, tabs = stack.pop() if not datasets_to_skip or dag_key.dataset_id not in datasets_to_skip: table_name = (custom_node_formatter(dag_key) if custom_node_formatter else f"{dag_key.dataset_id}.{dag_key.table_id}") tree += ("|" if tabs else "") + ("--" * tabs) + table_name + "\n" # Stop if we reached a source view if (view_source_table_datasets and not descendants and dag_key.dataset_id in view_source_table_datasets): continue curr_node = self.nodes_by_key.get(dag_key) if curr_node: next_related_keys = (curr_node.child_keys if descendants else curr_node.parent_keys) for related_key in sorted( next_related_keys, key=lambda key: (key.dataset_id, key.table_id), reverse=descendants, ): full_dependencies.add(related_key) stack.append(( related_key, # We don't add a tab if we are skipping a view tabs if datasets_to_skip and dag_key.dataset_id in datasets_to_skip else tabs + 1, )) return full_dependencies, tree
def create_mock_bq_table( self, dataset_id: str, table_id: str, mock_schema: MockTableSchema, mock_data: pd.DataFrame, ) -> None: postgres_table_name = self.register_bq_address( address=BigQueryAddress(dataset_id=dataset_id, table_id=table_id)) mock_data.to_sql( name=postgres_table_name, con=self.postgres_engine, dtype=mock_schema.data_types, index=False, )
def materialized_address_for_segment_table( self, table: Table, state_code: StateCode, ) -> BigQueryAddress: """Returns the dataset that data for a given table in a state segment is materialized into. Throws for unsegmented schemas. """ if not self.is_state_segmented_refresh_schema(): raise ValueError(f"Unexpected schema type [{self.schema_type}]") return BigQueryAddress( dataset_id=self.materialized_dataset_for_segment(state_code), table_id=table.name, )
def test_delete_unmanaged_views_and_tables_dataset_doesnt_exist(self) -> None: managed_tables = {BigQueryAddress(dataset_id="dataset_1", table_id="table_1")} self.mock_client.list_tables.return_value = [ bigquery.table.TableListItem(self.mock_table_resource_ds_1_table_1), bigquery.table.TableListItem(self.mock_table_resource_ds_1_table_2), ] self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset_ref_ds_1 self.mock_client.dataset_exists.return_value = False with self.assertRaises(ValueError): delete_unmanaged_views_and_tables_from_dataset( self.mock_client, "dataset_bogus", managed_tables, dry_run=False ) self.mock_client.dataset_ref_for_id.assert_called() self.mock_client.dataset_exists.assert_called() self.mock_client.list_tables.assert_not_called() self.mock_client.delete_table.assert_not_called()
def assertIsValidEmptyParentsView(self, node: BigQueryViewDagNode) -> None: """Fails the test if a view that has no parents is an expected view with no parents. Failures could be indicative of poorly formed view queries. """ known_empty_parent_view_addresss = { # These views unnest data from a static list BigQueryAddress( dataset_id="census_managed_views", table_id="charge_class_severity_ranks", ), BigQueryAddress( dataset_id="analyst_data", table_id="admission_start_reason_dedup_priority", ), BigQueryAddress( dataset_id="analyst_data", table_id="release_termination_reason_dedup_priority", ), BigQueryAddress( dataset_id="analyst_data", table_id="violation_type_dedup_priority" ), BigQueryAddress( dataset_id="analyst_data", table_id="supervision_level_dedup_priority" ), BigQueryAddress( dataset_id="analyst_data", table_id="compartment_level_2_dedup_priority" ), # Generate data using pure date functions BigQueryAddress( dataset_id="reference_views", table_id="covid_report_weeks" ), BigQueryAddress( dataset_id="population_projection_data", table_id="simulation_run_dates" ), } if node.dag_key.view_address in known_empty_parent_view_addresss: return if "FROM EXTERNAL_QUERY" in node.view.view_query: return self.fail(node.dag_key)
def _get_all_config_view_addresses_for_product( product: ProductConfig, ) -> Set[BigQueryAddress]: """Returns a set containing a BQ address for each view listed by each export necessary for the given product.""" all_config_view_addresses: Set[BigQueryAddress] = set() for export in product.exports: collection_config = VIEW_COLLECTION_EXPORT_INDEX[export] view_builders = collection_config.view_builders_to_export all_config_view_addresses = all_config_view_addresses.union({ BigQueryAddress( dataset_id=view_builder.dataset_id, table_id=view_builder.view_id, ) for view_builder in view_builders }) return all_config_view_addresses
def test_materialized_address_override_same_as_view_throws(self) -> None: with self.assertRaises(ValueError) as e: _ = BigQueryView( dataset_id="view_dataset", view_id="my_view", description="my_view description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="view_dataset", table_id="my_view"), view_query_template= "SELECT * FROM `{project_id}.{some_dataset}.table`", some_dataset="a_dataset", ) self.assertEqual( str(e.exception), "Materialized address override " "[BigQueryAddress(dataset_id='view_dataset', table_id='my_view')] cannot be " "same as view itself.", )
def test_materialized_address_override_no_should_materialize_throws( self) -> None: with self.assertRaises(ValueError) as e: _ = BigQueryView( dataset_id="view_dataset", view_id="my_view", description="my_view description", materialized_address_override=BigQueryAddress( dataset_id="view_dataset_materialized", table_id="my_view_table", ), view_query_template= "SELECT * FROM `{project_id}.{some_dataset}.table`", some_dataset="a_dataset", ) self.assertTrue( str(e.exception).startswith( "Found nonnull materialized_address_override [" "BigQueryAddress(dataset_id='view_dataset_materialized', table_id='my_view_table')] " "when `should_materialize` is not True"))
def _rewrite_table_references(self, query: str) -> str: """Maps BQ table references to the underlying Postgres tables""" table_reference_regex = re.compile( r"`[\w-]+\.(?P<dataset_id>[\w-]+)\.(?P<table_id>[\w-]+)`") for match in re.finditer(table_reference_regex, query): table_reference = match.group() dataset_id, table_id = match.groups() dataset_match = re.match(r"(us_[a-z]{2})_raw_data", dataset_id) if dataset_match: dataset_id = dataset_match.group(1) # region_code table_id = table_id.lower() location = BigQueryAddress(dataset_id=dataset_id, table_id=table_id) if location not in self.mock_bq_to_postgres_tables: raise KeyError( f"BigQuery location [{location}] not properly registered - must be " f"created via create_mock_bq_table.") query = query.replace(table_reference, self.mock_bq_to_postgres_tables[location]) return query
def __init__( self, *, config: CloudSqlToBQConfig, table: Table, state_codes: List[StateCode], ): if not config.is_state_segmented_refresh_schema(): raise ValueError( f"Unexpected schema type [{config.schema_type.name}]") self.config = config self.table = table self.state_codes = state_codes # Dataset prefixing will ge handled automatically by view building logic self.dataset_id = config.unioned_regional_dataset( dataset_override_prefix=None) self.view_id = f"{table.name}_view" self.materialized_address_override = BigQueryAddress( dataset_id=self.dataset_id, table_id=table.name, )
# second, iterate over Sessions tables for view_builder in SESSIONS_SOURCE_VIEW_BUILDERS: # ensure view_builder has a defined materialized address if view_builder.materialized_address is None: raise ValueError( f"Materialized address not defined for view: {view_builder.view_id}" ) # append each unique view to the builder PARTNER_SHARED_SESSIONS_VIEW_BUILDERS.append( SimpleBigQueryViewBuilder( dataset_id=EXTERNALLY_SHARED_VIEWS_DATASET, view_id=view_prefix + view_builder.view_id, view_query_template=SESSIONS_QUERY_TEMPLATE, description=view_builder.description, origin_dataset_id=view_builder.dataset_id, origin_table_id=view_builder.materialized_address.table_id, allowed_states=str(allowed_states), should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id=destination_dataset_id, table_id=view_builder.materialized_address.table_id, ), )) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING): for view_builder in PARTNER_SHARED_SESSIONS_VIEW_BUILDERS: view_builder.build_and_print()
SELECT *, ROUND(IEEE_DIVIDE(recidivated_releases, releases), 2) as recidivism_rate FROM recidivism_numbers UNION ALL SELECT * FROM pa_recidivism ORDER BY state_code, release_cohort, followup_years, gender, age_bucket, race_or_ethnicity """ # TODO(#7373): Manage this table automatically. PA_RECIDIVISM_ADDRESS = BigQueryAddress(dataset_id="us_pa_supplemental", table_id="recidivism") RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_NAME, view_query_template=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_QUERY_TEMPLATE, dimensions=( "state_code", "release_cohort", "followup_years", "gender", "age_bucket", "race_or_ethnicity", ), description=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_DESCRIPTION, materialized_metrics_dataset=dataset_config.
def test_materialized_address_dataset_overrides(self) -> None: dataset_overrides = { "view_dataset": "my_override_view_dataset", "other_dataset": "my_override_other_dataset", } view_materialized_no_override = BigQueryView( dataset_id="view_dataset", view_id="my_view", description="my_view description", should_materialize=True, dataset_overrides=dataset_overrides, view_query_template= "SELECT * FROM `{project_id}.{some_dataset}.table`", some_dataset="a_dataset", ) self.assertEqual( BigQueryAddress(dataset_id="my_override_view_dataset", table_id="my_view_materialized"), view_materialized_no_override.materialized_address, ) self.assertEqual( BigQueryAddress(dataset_id="my_override_view_dataset", table_id="my_view_materialized"), view_materialized_no_override.table_for_query, ) view_with_override = BigQueryView( dataset_id="view_dataset", view_id="my_view", description="my_view description", should_materialize=True, materialized_address_override=BigQueryAddress( dataset_id="other_dataset", table_id="my_view_table", ), dataset_overrides=dataset_overrides, view_query_template= "SELECT * FROM `{project_id}.{some_dataset}.table`", some_dataset="a_dataset", ) self.assertEqual( BigQueryAddress(dataset_id="my_override_other_dataset", table_id="my_view_table"), view_with_override.materialized_address, ) self.assertEqual( BigQueryAddress(dataset_id="my_override_other_dataset", table_id="my_view_table"), view_with_override.table_for_query, ) view_not_materialized = BigQueryView( dataset_id="view_dataset", view_id="my_view", description="my_view description", dataset_overrides=dataset_overrides, view_query_template= "SELECT * FROM `{project_id}.{some_dataset}.table`", some_dataset="a_dataset", ) self.assertIsNone(view_not_materialized.materialized_address) self.assertEqual( BigQueryAddress(dataset_id="my_override_view_dataset", table_id="my_view"), view_not_materialized.table_for_query, )