def setUp(self): self.mock_project_id = 'fake-recidiviz-project' self.mock_dataset_id = 'base_dataset' self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id) self.metadata_patcher = mock.patch('recidiviz.utils.metadata.project_id') self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.client_patcher = mock.patch( 'recidiviz.metrics.export.metric_view_export_manager.BigQueryClientImpl') self.mock_client = self.client_patcher.start().return_value self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset self.mock_view_builder = MetricBigQueryViewBuilder(dataset_id=self.mock_dataset.dataset_id, view_id='test_view', view_query_template='SELECT NULL LIMIT 0', dimensions=[]) self.views_for_dataset = [self.mock_view_builder] self.output_uri_template_for_dataset = { "dataset_id": "gs://{project_id}-dataset-location/subdirectory", } self.views_to_update = {self.mock_dataset_id: self.views_for_dataset} self.metric_dataset_export_configs = [ ExportMetricDatasetConfig( dataset_id=self.mock_dataset_id, metric_view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-dataset-location/subdirectory", state_code_filter=mock_state_code, export_name=None ) ] view_config_values = { 'OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT': self.output_uri_template_for_dataset, 'VIEW_BUILDERS_FOR_VIEWS_TO_UPDATE': self.views_to_update, 'METRIC_DATASET_EXPORT_CONFIGS': self.metric_dataset_export_configs } self.view_export_config_patcher = mock.patch( 'recidiviz.metrics.export.metric_view_export_manager.view_config', **view_config_values) self.mock_export_config = self.view_export_config_patcher.start()
def setUp(self) -> None: self.metadata_patcher = patch("recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = "project-id" self.mock_bq_view_namespace = BigQueryViewNamespace.STATE metric_view_one = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view1", description="view1 description", view_query_template="select * from table", dimensions=("a", "b", "c"), ).build() export_config_one_staging = ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=metric_view_one, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket1/staging/US_XX"), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id="dataset", view_id="view2", description="view2 description", view_query_template="select * from view2", dimensions=("d", "e", "f"), ).build() export_config_two_staging = ExportBigQueryViewConfig( bq_view_namespace=self.mock_bq_view_namespace, view=metric_view_two, view_filter_clause="WHERE state_code = 'US_XX'", intermediate_table_name="intermediate_table2", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://bucket2/staging/US_XX"), ) self.staging_paths = [ export_config_one_staging.output_path("txt"), export_config_two_staging.output_path("txt"), ]
def setUp(self): self.mock_project_id = 'fake-recidiviz-project' self.mock_dataset_id = 'base_dataset' self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id) self.metadata_patcher = mock.patch( 'recidiviz.utils.metadata.project_id') self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.mock_view_builder = MetricBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id='test_view', view_query_template='SELECT NULL LIMIT 0', dimensions=[]) self.views_for_dataset = [self.mock_view_builder]
def setUp(self): self.mock_project_id = "fake-recidiviz-project" self.mock_dataset_id = "base_dataset" self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id ) self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE self.mock_view_builder = MetricBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", view_query_template="SELECT NULL LIMIT 0", dimensions=[], ) self.views_for_dataset = [self.mock_view_builder]
def setUp(self) -> None: self.metadata_patcher = patch('recidiviz.utils.metadata.project_id') self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = 'project-id' metric_view_one = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view1', view_query_template='select * from table', dimensions=['a', 'b', 'c'], ).build() export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/staging/US_XX'), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view2', view_query_template='select * from view2', dimensions=['d', 'e', 'f'], ).build() export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/staging/US_XX'), ) self.staging_paths = [ export_config_one_staging.output_path('txt'), export_config_two_staging.output_path('txt') ]
def get_view_builder( view_id: str, description: str, facility_type: state_specific_query_strings.SpotlightFacilityType, ) -> MetricBigQueryViewBuilder: """Retrieves an incarceration population view builder filtered by facility type""" return MetricBigQueryViewBuilder( view_id=view_id, description=description, dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_query_template= POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE, dimensions=( "state_code", "date_of_stay", "facility", "race_or_ethnicity", "gender", "age_bucket", ), static_reference_dataset=dataset_config. STATIC_REFERENCE_TABLES_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, unnested_race_or_ethnicity_dimension=bq_utils.unnest_column( "race_or_ethnicity", "race_or_ethnicity"), gender_dimension=bq_utils.unnest_column("gender", "gender"), age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"), facility_dimension=bq_utils.unnest_column("facility", "facility"), state_specific_race_or_ethnicity_groupings=state_specific_query_strings .state_specific_race_or_ethnicity_groupings( race_or_ethnicity_column="prioritized_race_or_ethnicity"), state_specific_facility_mapping=state_specific_query_strings. spotlight_state_specific_facility(), facility_type_filter=state_specific_query_strings. spotlight_state_specific_facility_filter(facility_type=facility_type), )
COUNT(DISTINCT(person_id)) as total_population FROM `{project_id}.{reference_views_dataset}.most_recent_daily_incarceration_population_materialized`, {unnested_race_or_ethnicity_dimension}, {gender_dimension}, {age_dimension} WHERE (race_or_ethnicity != 'ALL' AND gender = 'ALL' AND age_bucket = 'ALL') -- Race breakdown OR (race_or_ethnicity = 'ALL' AND gender != 'ALL' AND age_bucket = 'ALL') -- Gender breakdown OR (race_or_ethnicity = 'ALL' AND gender = 'ALL' AND age_bucket != 'ALL') -- Age breakdown OR (race_or_ethnicity = 'ALL' AND gender = 'ALL' AND age_bucket = 'ALL') -- State-wide count GROUP BY state_code, date_of_stay, race_or_ethnicity, gender, age_bucket ORDER BY state_code, date_of_stay, race_or_ethnicity, gender, age_bucket """ INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_NAME, view_query_template=INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_QUERY_TEMPLATE, dimensions=['state_code', 'date_of_stay', 'race_or_ethnicity', 'gender', 'age_bucket'], description=INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, unnested_race_or_ethnicity_dimension=bq_utils.unnest_column('prioritized_race_or_ethnicity', 'race_or_ethnicity'), gender_dimension=bq_utils.unnest_column('gender', 'gender'), age_dimension=bq_utils.unnest_column('age_bucket', 'age_bucket'), state_specific_race_or_ethnicity_groupings=state_specific_query_strings.state_specific_race_or_ethnicity_groupings() ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): INCARCERATION_POPULATION_BY_ADMISSION_REASON_VIEW_BUILDER.build_and_print()
""" REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_VIEW_NAME, view_query_template= REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_QUERY_TEMPLATE, dimensions=( "state_code", "metric_period_months", "level_1_supervision_location", "level_2_supervision_location", "admission_type", "supervision_type", "supervision_level", "violation_type", "reported_violations", "charge_category", ), description=REVOCATIONS_MATRIX_DISTRIBUTION_BY_VIOLATION_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, state_specific_violation_type_entry=state_specific_query_strings. state_specific_violation_type_entry(), state_specific_supervision_location_optimization_filter= state_specific_query_strings. state_specific_supervision_location_optimization_filter(), state_specific_violation_type_entry_categories=state_specific_query_strings .state_specific_violation_type_entry_categories(), ) if __name__ == "__main__":
ROUND(IEEE_DIVIDE(successful_termination_count, projected_completion_count), 2) as success_rate FROM success_counts ORDER BY state_code, projected_year, projected_month, supervision_type """ SUPERVISION_SUCCESS_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=SUPERVISION_SUCCESS_BY_MONTH_VIEW_NAME, view_query_template=SUPERVISION_SUCCESS_BY_MONTH_VIEW_QUERY_TEMPLATE, dimensions=( "state_code", "supervision_type", "projected_year", "projected_month", "district", ), description=SUPERVISION_SUCCESS_BY_MONTH_VIEW_DESCRIPTION, materialized_metrics_dataset=dataset_config. DATAFLOW_METRICS_MATERIALIZED_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, grouped_districts=state_specific_query_strings. state_supervision_specific_district_groupings( "supervising_district_external_id", "judicial_district_code"), district_dimension=bq_utils.unnest_district(), thirty_six_month_filter=bq_utils.thirty_six_month_filter(), ) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING): SUPERVISION_SUCCESS_BY_MONTH_VIEW_BUILDER.build_and_print()
{metric_period_dimension} WHERE methodology = 'EVENT' AND person_id IS NOT NULL AND DATE(year, month, 1) >= DATE_SUB(DATE_TRUNC(CURRENT_DATE('US/Pacific'), MONTH), INTERVAL metric_period_months - 1 MONTH) GROUP BY state_code, metric_period_months, supervision_type, district, person_id ) WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION') GROUP BY state_code, metric_period_months, supervision_type, district ORDER BY state_code, metric_period_months, district, supervision_type """ SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_VIEW_NAME, view_query_template=SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_QUERY_TEMPLATE, dimensions=['state_code', 'metric_period_months', 'supervision_type', 'district'], description=SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET, district_dimension=bq_utils.unnest_district(), supervision_type_dimension=bq_utils.unnest_supervision_type(), metric_period_dimension=bq_utils.unnest_metric_period_months(), filter_to_most_recent_job_id_for_metric=bq_utils.filter_to_most_recent_job_id_for_metric( reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET) ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): SUPERVISION_TERMINATION_BY_TYPE_BY_PERIOD_VIEW_BUILDER.build_and_print()
metric_period_months FROM supervision FULL OUTER JOIN referrals USING (state_code, supervision_type, district, metric_period_months, gender) WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION') AND district IS NOT NULL AND state_code = 'US_ND' ORDER BY state_code, gender, district, supervision_type, metric_period_months """ FTR_REFERRALS_BY_GENDER_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=FTR_REFERRALS_BY_GENDER_BY_PERIOD_VIEW_NAME, view_query_template=FTR_REFERRALS_BY_GENDER_BY_PERIOD_QUERY_TEMPLATE, dimensions=( "state_code", "metric_period_months", "district", "supervision_type", "gender", ), description=FTR_REFERRALS_BY_GENDER_BY_PERIOD_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, metric_period_dimension=bq_utils.unnest_metric_period_months(), metric_period_condition=bq_utils.metric_period_condition(), ) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING): FTR_REFERRALS_BY_GENDER_BY_PERIOD_VIEW_BUILDER.build_and_print()
WHERE methodology = 'PERSON' AND person_id IS NOT NULL AND m.metric_period_months = 1 AND {metric_period_condition} GROUP BY state_code, metric_period_months, district ) ret USING (state_code, metric_period_months, district) WHERE district IS NOT NULL ORDER BY state_code, metric_period_months, district """ REINCARCERATIONS_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=REINCARCERATIONS_BY_PERIOD_VIEW_NAME, view_query_template=REINCARCERATIONS_BY_PERIOD_QUERY_TEMPLATE, dimensions=['state_code', 'metric_period_months', 'district'], description=REINCARCERATIONS_BY_PERIOD_DESCRIPTION, metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, district_dimension=bq_utils.unnest_district( district_column='county_of_residence'), metric_period_dimension=bq_utils.unnest_metric_period_months(), metric_period_condition=bq_utils.metric_period_condition(), filter_to_most_recent_job_id_for_metric=bq_utils. filter_to_most_recent_job_id_for_metric( reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET)) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): REINCARCERATIONS_BY_PERIOD_VIEW_BUILDER.build_and_print()
AND year = EXTRACT(YEAR FROM CURRENT_DATE('US/Pacific')) AND month = EXTRACT(MONTH FROM CURRENT_DATE('US/Pacific')) ORDER BY state_code, metric_period_months, violation_record """ REVOCATIONS_MATRIX_FILTERED_CASELOAD_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=REVOCATIONS_MATRIX_FILTERED_CASELOAD_VIEW_NAME, view_query_template=REVOCATIONS_MATRIX_FILTERED_CASELOAD_QUERY_TEMPLATE, dimensions=[ 'state_code', 'metric_period_months', 'district', 'supervision_type', 'supervision_level', 'charge_category', 'risk_level', 'violation_type', 'reported_violations' ], description=REVOCATIONS_MATRIX_FILTERED_CASELOAD_DESCRIPTION, metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, most_severe_violation_type_subtype_grouping=state_specific_query_strings. state_specific_most_severe_violation_type_subtype_grouping(), state_specific_officer_recommendation=state_specific_query_strings. state_specific_officer_recommendation(), state_specific_supervision_level=state_specific_query_strings. state_specific_supervision_level(), filter_to_most_recent_job_id_for_metric=bq_utils. filter_to_most_recent_job_id_for_metric( reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET)) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): REVOCATIONS_MATRIX_FILTERED_CASELOAD_VIEW_BUILDER.build_and_print()
district, metric_period_months, race_or_ethnicity FROM `{project_id}.{reference_views_dataset}.event_based_revocations`, {metric_period_dimension}, {race_ethnicity_dimension} WHERE {metric_period_condition} GROUP BY state_code, supervision_type, district, metric_period_months, race_or_ethnicity ) rev USING (state_code, supervision_type, district, metric_period_months, race_or_ethnicity) WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION') AND race_or_ethnicity != 'EXTERNAL_UNKNOWN' ORDER BY state_code, race_or_ethnicity, district, supervision_type, metric_period_months """ REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_NAME, view_query_template=REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_QUERY_TEMPLATE, dimensions=['state_code', 'metric_period_months', 'supervision_type', 'district', 'race_or_ethnicity'], description=REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, metric_period_dimension=bq_utils.unnest_metric_period_months(), race_ethnicity_dimension=bq_utils.unnest_race_and_ethnicity(), metric_period_condition=bq_utils.metric_period_condition(), ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): REVOCATIONS_BY_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_BUILDER.build_and_print()
LEFT JOIN revocation_counts USING (state_code, violation_type, reported_violations, gender, risk_level, supervision_type, supervision_level, charge_category, district, metric_period_months) LEFT JOIN termination_counts USING (state_code, violation_type, reported_violations, gender, risk_level, supervision_type, supervision_level, charge_category, district, metric_period_months) ORDER BY state_code, metric_period_months, district, supervision_type, supervision_level, gender, risk_level, violation_type, reported_violations, charge_category """ REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_NAME, view_query_template= REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_QUERY_TEMPLATE, dimensions=[ 'state_code', 'metric_period_months', 'district', 'supervision_type', 'supervision_level', 'violation_type', 'reported_violations', 'charge_category', 'gender', 'risk_level' ], description=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER.build_and_print( )
class TestExportViewCollectionConfig(unittest.TestCase): """Tests the functionality of the ExportViewCollectionConfig class.""" def setUp(self): self.mock_project_id = "fake-recidiviz-project" self.mock_dataset_id = "base_dataset" self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id ) self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE self.mock_view_builder = MetricBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", view_query_template="SELECT NULL LIMIT 0", dimensions=[], ) self.views_for_dataset = [self.mock_view_builder] def tearDown(self): self.metadata_patcher.stop() def test_matches_filter(self): """Tests matches_filter function to ensure that state codes and export names correctly match""" state_dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket", state_code_filter="US_XX", export_name="EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ) self.assertTrue(state_dataset_export_config.matches_filter("US_XX")) dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket", state_code_filter=None, export_name="VALID_EXPORT_NAME", bq_view_namespace=self.mock_big_query_view_namespace, ) self.assertTrue(dataset_export_config.matches_filter("VALID_EXPORT_NAME")) self.assertFalse(dataset_export_config.matches_filter("INVALID_EXPORT_NAME")) def test_matches_filter_case_insensitive(self): """Tests matches_filter function with different cases to ensure state codes and export names correctly match""" state_dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket", state_code_filter="US_XX", export_name="OTHER_EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ) self.assertTrue(state_dataset_export_config.matches_filter("US_xx")) dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket", state_code_filter=None, export_name="VALID_EXPORT_NAME", bq_view_namespace=self.mock_big_query_view_namespace, ) self.assertTrue(dataset_export_config.matches_filter("valid_export_name")) def test_metric_export_state_agnostic(self): """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the export is state-agnostic.""" state_agnostic_dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket-without-state-codes", state_code_filter=None, export_name="ALL_STATE_TEST_PRODUCT", bq_view_namespace=self.mock_big_query_view_namespace, ) view_configs_to_export = ( state_agnostic_dataset_export_config.export_configs_for_views_to_export( project_id=self.mock_project_id ) ) expected_view = self.mock_view_builder.build() expected_view_export_configs = [ ExportBigQueryViewConfig( view=expected_view, view_filter_clause=None, intermediate_table_name=f"{expected_view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( state_agnostic_dataset_export_config.output_directory_uri_template.format( project_id=self.mock_project_id, ) ), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ) ] self.assertEqual(expected_view_export_configs, view_configs_to_export) def test_metric_export_state_specific(self): """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the export is state-specific.""" specific_state_dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket", state_code_filter="US_XX", export_name="TEST_REPORT", bq_view_namespace=self.mock_big_query_view_namespace, ) view_configs_to_export = ( specific_state_dataset_export_config.export_configs_for_views_to_export( project_id=self.mock_project_id ) ) expected_view = self.mock_view_builder.build() expected_view_export_configs = [ ExportBigQueryViewConfig( view=expected_view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{expected_view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( f"gs://{self.mock_project_id}-bucket/US_XX" ), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ) ] self.assertEqual(expected_view_export_configs, view_configs_to_export) def test_metric_export_lantern_dashboard(self): """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the export is state-agnostic.""" lantern_dashboard_dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket-without-state-codes", state_code_filter=None, export_name="TEST_EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ) view_configs_to_export = ( lantern_dashboard_dataset_export_config.export_configs_for_views_to_export( project_id=self.mock_project_id ) ) expected_view = self.mock_view_builder.build() expected_view_export_configs = [ ExportBigQueryViewConfig( view=expected_view, view_filter_clause=None, intermediate_table_name=f"{expected_view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( lantern_dashboard_dataset_export_config.output_directory_uri_template.format( project_id=self.mock_project_id, ) ), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ) ] self.assertEqual(expected_view_export_configs, view_configs_to_export) def test_metric_export_lantern_dashboard_with_state(self): """Tests the export_configs_for_views_to_export function on the ExportViewCollectionConfig class when the export is state-specific.""" lantern_dashboard_with_state_dataset_export_config = ExportViewCollectionConfig( view_builders_to_export=self.views_for_dataset, output_directory_uri_template="gs://{project_id}-bucket", state_code_filter="US_XX", export_name="TEST_EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ) view_configs_to_export = lantern_dashboard_with_state_dataset_export_config.export_configs_for_views_to_export( project_id=self.mock_project_id ) expected_view = self.mock_view_builder.build() expected_view_export_configs = [ ExportBigQueryViewConfig( view=expected_view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{expected_view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( f"gs://{self.mock_project_id}-bucket/US_XX" ), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ) ] self.assertEqual(expected_view_export_configs, view_configs_to_export)
SELECT state_code, year, month, CASE WHEN termination_reason = 'ABSCONSION' THEN person_id ELSE NULL END AS absconsion, CASE WHEN termination_reason = 'DEATH' THEN person_id ELSE NULL END AS death, CASE WHEN termination_reason = 'DISCHARGE' THEN person_id ELSE NULL END AS discharge, CASE WHEN termination_reason = 'EXPIRATION' THEN person_id ELSE NULL END AS expiration, CASE WHEN termination_reason = 'REVOCATION' THEN person_id ELSE NULL END AS revocation, CASE WHEN termination_reason = 'SUSPENSION' THEN person_id ELSE NULL END AS suspension, CASE WHEN termination_reason = 'EXTERNAL_UNKNOWN' THEN person_id ELSE NULL END AS other, supervision_type, district FROM case_terminations ) WHERE supervision_type IN ('ALL', 'PROBATION', 'PAROLE') AND year >= EXTRACT(YEAR FROM DATE_SUB(CURRENT_DATE('US/Pacific'), INTERVAL 3 YEAR)) GROUP BY state_code, year, month, supervision_type, district ORDER BY state_code, year, month, supervision_type, district """ CASE_TERMINATIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=CASE_TERMINATIONS_BY_TYPE_BY_MONTH_VIEW_NAME, view_query_template=CASE_TERMINATIONS_BY_TYPE_BY_MONTH_QUERY_TEMPLATE, dimensions=['state_code', 'year', 'month', 'supervision_type', 'district'], description=CASE_TERMINATIONS_BY_TYPE_BY_MONTH_DESCRIPTION, ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): CASE_TERMINATIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER.build_and_print()
SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_NAME, view_query_template= SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE, dimensions=( "state_code", "supervision_type", "metric_period_months", "district", "race_or_ethnicity", "gender", "age_bucket", ), description=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_VIEW_DESCRIPTION, materialized_metrics_dataset=dataset_config. DATAFLOW_METRICS_MATERIALIZED_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, grouped_districts=state_specific_query_strings. state_supervision_specific_district_groupings( "supervising_district_external_id", "judicial_district_code"), metric_period_condition=bq_utils.metric_period_condition(month_offset=1), unnested_race_or_ethnicity_dimension=bq_utils.unnest_column( "race_or_ethnicity", "race_or_ethnicity"), gender_dimension=bq_utils.unnest_column("gender", "gender"), age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"), state_specific_race_or_ethnicity_groupings=state_specific_query_strings. state_specific_race_or_ethnicity_groupings( "prioritized_race_or_ethnicity"), state_specific_supervision_type_inclusion_filter=state_specific_query_strings .state_specific_supervision_type_inclusion_filter(), )
GROUP BY state_code, supervision_type, race_or_ethnicity, region_id ORDER BY state_code, supervision_type, race_or_ethnicity, region_id """ ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_NAME, view_query_template= ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_QUERY_TEMPLATE, dimensions=[ 'state_code', 'supervision_type', 'race_or_ethnicity', 'region_id' ], description=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_DESCRIPTION, base_dataset=dataset_config.STATE_BASE_DATASET, static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET, current_month_condition=bq_utils.current_month_condition(), state_specific_race_or_ethnicity_groupings=state_specific_query_strings. state_specific_race_or_ethnicity_groupings(), race_or_ethnicity_dimension=bq_utils.unnest_race_and_ethnicity(), unnested_race_or_ethnicity_dimension=bq_utils.unnest_column( 'race_or_ethnicity', 'race_or_ethnicity'), region_dimension=bq_utils.unnest_column('region_id', 'region_id'), supervision_type_dimension=bq_utils.unnest_supervision_type(), filter_to_most_recent_job_id_for_metric=bq_utils. filter_to_most_recent_job_id_for_metric( reference_dataset=dataset_config.REFERENCE_VIEWS_DATASET)) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER.build_and_print()
def test_export_happy_path(self) -> None: metric_view_one = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view1', view_query_template='select * from table', dimensions=['a', 'b', 'c'], ).build() export_config_one = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/US_XX'), ) export_config_one_staging = ExportBigQueryViewConfig( view=metric_view_one, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket1/staging/US_XX'), ) metric_view_two = MetricBigQueryViewBuilder( dataset_id='dataset', view_id='view2', view_query_template='select * from view2', dimensions=['d', 'e', 'f'], ).build() export_config_two = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/US_XX'), ) export_config_two_staging = ExportBigQueryViewConfig( view=metric_view_two, view_filter_clause='WHERE state_code = \'US_XX\'', intermediate_table_name='intermediate_table2', output_directory=GcsfsDirectoryPath.from_absolute_path( 'gs://bucket2/staging/US_XX'), ) mock_bq_client = create_autospec(BigQueryClient) mock_fs = create_autospec(GCSFileSystem) mock_fs.exists.return_value = True delegate_one = create_autospec(BigQueryViewExporter) delegate_one_staging_paths = [ export_config_one_staging.output_path('json'), export_config_two_staging.output_path('json') ] delegate_one.export_and_validate.return_value = delegate_one_staging_paths delegate_two = create_autospec(BigQueryViewExporter) delegate_two_staging_paths = [ export_config_one_staging.output_path('txt'), export_config_two_staging.output_path('txt') ] delegate_two.export_and_validate.return_value = delegate_two_staging_paths # Make the actual call exporter = CompositeBigQueryViewExporter(mock_bq_client, mock_fs, [delegate_one, delegate_two]) exporter.export_and_validate([export_config_one, export_config_two]) # Assert all mocks called as expected delegate_one.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) delegate_two.export_and_validate.assert_has_calls([ call([export_config_one_staging, export_config_two_staging]), ]) mock_fs.copy.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.json'), GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.json'), GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.txt'), GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.txt'), GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.txt')) ]) mock_fs.delete.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='staging/US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='staging/US_XX/view2.txt')) ]) mock_fs.exists.assert_has_calls([ call( GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.json')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.json')), call( GcsfsFilePath(bucket_name='bucket1', blob_name='US_XX/view1.txt')), call( GcsfsFilePath(bucket_name='bucket2', blob_name='US_XX/view2.txt')), ])
{unnested_race_or_ethnicity_dimension}, {region_dimension}, {supervision_type_dimension} GROUP BY state_code, supervision_type, race_or_ethnicity, region_id ORDER BY state_code, supervision_type, race_or_ethnicity, region_id """ ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_NAME, view_query_template= ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_QUERY_TEMPLATE, dimensions=("state_code", "supervision_type", "race_or_ethnicity", "region_id"), description=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_DESCRIPTION, static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET, materialized_metrics_dataset=dataset_config. DATAFLOW_METRICS_MATERIALIZED_DATASET, state_specific_race_or_ethnicity_groupings=state_specific_query_strings. state_specific_race_or_ethnicity_groupings( "prioritized_race_or_ethnicity"), unnested_race_or_ethnicity_dimension=bq_utils.unnest_column( "race_or_ethnicity", "race_or_ethnicity"), region_dimension=bq_utils.unnest_column("region_id", "region_id"), supervision_type_dimension=bq_utils.unnest_supervision_type(), ) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING): ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_VIEW_BUILDER.build_and_print()
ORDER BY state_code, metric_period_months, supervision_type, race_or_ethnicity """ SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id= SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_NAME, view_query_template= SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_QUERY_TEMPLATE, dimensions=( "state_code", "supervision_type", "metric_period_months", "race_or_ethnicity", ), description= SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_VIEW_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET, metric_period_condition=bq_utils.metric_period_condition(), unnested_race_or_ethnicity_dimension=bq_utils.unnest_column( "race_or_ethnicity", "race_or_ethnicity"), gender_dimension=bq_utils.unnest_column("gender", "gender"), age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"), state_specific_race_or_ethnicity_groupings=state_specific_query_strings. state_specific_race_or_ethnicity_groupings(), state_specific_supervision_type_inclusion_filter=state_specific_query_strings .state_specific_supervision_type_inclusion_filter(), ) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING):
OR (race_or_ethnicity = 'ALL' AND gender = 'ALL' AND age_bucket = 'ALL')) -- State-wide count GROUP BY state_code, district, race_or_ethnicity, gender, age_bucket ORDER BY state_code, district, race_or_ethnicity, gender, age_bucket """ SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_NAME, view_query_template= SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE, dimensions=("state_code", "district", "race_or_ethnicity", "gender", "age_bucket"), description=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_DESCRIPTION, materialized_metrics_dataset=dataset_config. DATAFLOW_METRICS_MATERIALIZED_DATASET, state_specific_race_or_ethnicity_groupings=state_specific_query_strings. state_specific_race_or_ethnicity_groupings( "prioritized_race_or_ethnicity"), unnested_race_or_ethnicity_dimension=bq_utils.unnest_column( "race_or_ethnicity", "race_or_ethnicity"), gender_dimension=bq_utils.unnest_column("gender", "gender"), age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"), district_dimension=bq_utils.unnest_district( state_specific_query_strings. state_specific_judicial_district_groupings("judicial_district_code")), ) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING): SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_VIEW_BUILDER.build_and_print( )
-- Filter out any rows that don't have a specified violation_type WHERE violation_type != 'NO_VIOLATION_TYPE' """ REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_NAME, view_query_template= REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_QUERY_TEMPLATE, dimensions=( "state_code", "metric_period_months", "level_1_supervision_location", "level_2_supervision_location", "supervision_type", "supervision_level", "violation_type", "reported_violations", "admission_type", "charge_category", "gender", ), description=REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, gender_dimension=bq_utils.unnest_column("gender", "gender"), supported_gender_values=SUPPORTED_GENDER_VALUES, ) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING): REVOCATIONS_MATRIX_DISTRIBUTION_BY_GENDER_VIEW_BUILDER.build_and_print(
SELECT *, IEEE_DIVIDE(recidivated_releases, releases) as recidivism_rate FROM recidivism_numbers ORDER BY state_code, release_cohort, followup_years, gender, age_bucket, race_or_ethnicity """ RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_NAME, view_query_template=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_QUERY_TEMPLATE, dimensions=[ 'state_code', 'release_cohort', 'followup_years', 'gender', 'age_bucket', 'race_or_ethnicity' ], description=RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_DESCRIPTION, metrics_dataset=dataset_config.DATAFLOW_METRICS_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, state_specific_race_or_ethnicity_groupings=state_specific_query_strings. state_specific_race_or_ethnicity_groupings(), race_or_ethnicity_dimension=bq_utils.unnest_column( 'prioritized_race_or_ethnicity', 'race_or_ethnicity'), gender_dimension=bq_utils.unnest_column('gender', 'gender'), age_dimension=bq_utils.unnest_column('age_bucket', 'age_bucket'), ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): RECIDIVISM_RATES_BY_COHORT_BY_YEAR_VIEW_BUILDER.build_and_print()
class ViewCollectionExportManagerTest(unittest.TestCase): """Tests for view_export_manager.py.""" def setUp(self) -> None: self.app = Flask(__name__) self.app.register_blueprint(export_blueprint) self.app.config["TESTING"] = True self.headers: Dict[str, Dict[Any, Any]] = { "x-goog-iap-jwt-assertion": {} } self.client = self.app.test_client() self.mock_cloud_task_client_patcher = mock.patch( "google.cloud.tasks_v2.CloudTasksClient") self.mock_cloud_task_client_patcher.start() self.mock_uuid_patcher = mock.patch( f"{CLOUD_TASK_MANAGER_PACKAGE_NAME}.uuid") self.mock_uuid = self.mock_uuid_patcher.start() with self.app.test_request_context(): self.metric_view_data_export_url = flask.url_for( "export.metric_view_data_export") self.create_metric_view_data_export_tasks_url = flask.url_for( "export.create_metric_view_data_export_tasks") self.mock_state_code = "US_XX" self.mock_project_id = "test-project" self.mock_dataset_id = "base_dataset" self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id) self.metadata_patcher = mock.patch( "recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.client_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.BigQueryClientImpl") self.mock_client = self.client_patcher.start().return_value self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset self.mock_view_builder = SimpleBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", description="test_view description", view_query_template="SELECT NULL LIMIT 0", ) self.mock_metric_view_builder = MetricBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", description="test_view description", view_query_template="SELECT NULL LIMIT 0", dimensions=tuple(), ) self.view_builders_for_dataset = [ self.mock_view_builder, self.mock_metric_view_builder, ] self.output_uri_template_for_dataset = { "dataset_id": "gs://{project_id}-dataset-location/subdirectory", } self.views_to_update = { self.mock_dataset_id: self.view_builders_for_dataset } self.mock_export_name = "MOCK_EXPORT_NAME" self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE self.metric_dataset_export_configs_index = { "EXPORT": ExportViewCollectionConfig( view_builders_to_export=[self.mock_view_builder], output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name="EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ), "OTHER_EXPORT": ExportViewCollectionConfig( view_builders_to_export=[self.mock_metric_view_builder], output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name="OTHER_EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ), self.mock_export_name: ExportViewCollectionConfig( view_builders_to_export=self.view_builders_for_dataset, output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ), } export_config_values = { "OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT": self.output_uri_template_for_dataset, "VIEW_COLLECTION_EXPORT_INDEX": self.metric_dataset_export_configs_index, } self.export_config_patcher = mock.patch( # type: ignore[call-overload] "recidiviz.metrics.export.view_export_manager.export_config", **export_config_values, ) self.mock_export_config = self.export_config_patcher.start() self.gcs_factory_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.GcsfsFactory.build") self.gcs_factory_patcher.start().return_value = FakeGCSFileSystem() def tearDown(self) -> None: self.client_patcher.stop() self.export_config_patcher.stop() self.metadata_patcher.stop() self.gcs_factory_patcher.stop() self.mock_uuid_patcher.stop() self.mock_cloud_task_client_patcher.stop() @mock.patch("recidiviz.utils.environment.get_gcp_environment") def test_get_configs_for_export_name( self, mock_environment: mock.MagicMock) -> None: """Tests get_configs_for_export_name function to ensure that export names correctly match""" mock_environment.return_value = "production" export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name, state_code=self.mock_state_code, project_id=self.mock_project_id, ) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() expected_view_config_list = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause= f" WHERE state_code = '{self.mock_state_code}'", intermediate_table_name= f"{view.view_id}_table_{self.mock_state_code}", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code=self.mock_state_code, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause= f" WHERE state_code = '{self.mock_state_code}'", intermediate_table_name= f"{view.view_id}_table_{self.mock_state_code}", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code=self.mock_state_code, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] self.assertEqual(expected_view_config_list, export_configs_for_filter) # Test for case insensitivity export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name.lower(), state_code=self.mock_state_code.lower(), project_id=self.mock_project_id, ) self.assertEqual(expected_view_config_list, export_configs_for_filter) @mock.patch("recidiviz.utils.environment.get_gcp_environment") def test_get_configs_for_export_name_state_agnostic( self, mock_environment: mock.MagicMock) -> None: """Tests get_configs_for_export_name function to ensure that export names correctly match""" mock_environment.return_value = "production" export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name, project_id=self.mock_project_id) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() expected_view_config_list = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory".format( project_id=self.mock_project_id, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory".format( project_id=self.mock_project_id, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] self.assertEqual(expected_view_config_list, export_configs_for_filter) # Test for case insensitivity export_configs_for_filter = view_export_manager.get_configs_for_export_name( export_name=self.mock_export_name.lower(), project_id=self.mock_project_id) self.assertEqual(expected_view_config_list, export_configs_for_filter) @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted.""" view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, self.mock_state_code, mock_view_exporter) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() expected_view_config_list_1 = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ExportOutputFormatType.JSON], ) ] expected_view_config_list_2 = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause=" WHERE state_code = 'US_XX'", intermediate_table_name=f"{view.view_id}_table_US_XX", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-dataset-location/subdirectory/{state_code}" .format( project_id=self.mock_project_id, state_code="US_XX", )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ) ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([]), mock.call([ expected_view_config_list_1[0]. pointed_to_staging_subdirectory(), expected_view_config_list_2[0]. pointed_to_staging_subdirectory(), ]), # JSON exports mock.call([ expected_view_config_list_2[0]. pointed_to_staging_subdirectory() ]), # METRIC export ("OTHER_EXPORT") ], any_order=True, ) @mock.patch( "recidiviz.big_query.view_update_manager.create_managed_dataset_and_deploy_views_for_view_builders" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_raise_exception_no_export_matched( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: # pylint: disable=unused-argument """Tests the table is created from the view and then extracted.""" self.mock_export_config.NAMESPACE_TO_UPDATE_FOR_EXPORT_FILTER = { "US_YY": "NAMESPACE" } with self.assertRaises(ValueError) as e: view_export_manager.export_view_data_to_cloud_storage( export_job_name="JOBZZZ", override_view_exporter=mock_view_exporter) self.assertEqual( str(e.exception), "Export filter did not match any export configs:", " JOBZZZ", ) @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_state_agnostic( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted, where the export is not state-specific.""" state_agnostic_dataset_export_configs = { self.mock_export_name: ExportViewCollectionConfig( view_builders_to_export=self.view_builders_for_dataset, output_directory_uri_template= "gs://{project_id}-bucket-without-state-codes", export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ), } self.mock_export_config.VIEW_COLLECTION_EXPORT_INDEX = ( state_agnostic_dataset_export_configs) view_export_manager.export_view_data_to_cloud_storage( export_job_name=self.mock_export_name, override_view_exporter=mock_view_exporter, ) view = self.mock_view_builder.build() metric_view = self.mock_metric_view_builder.build() view_export_configs = [ ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ExportOutputFormatType.JSON], ), ExportBigQueryViewConfig( bq_view_namespace=self.mock_big_query_view_namespace, view=metric_view, view_filter_clause=None, intermediate_table_name=f"{view.view_id}_table", output_directory=GcsfsDirectoryPath.from_absolute_path( "gs://{project_id}-bucket-without-state-codes".format( project_id=self.mock_project_id, )), export_output_formats=[ ExportOutputFormatType.JSON, ExportOutputFormatType.METRIC, ], ), ] mock_view_update_manager_rematerialize.assert_called() mock_view_exporter.export_and_validate.assert_has_calls( [ mock.call([]), # CSV export mock.call([ view_export_configs[1].pointed_to_staging_subdirectory() ]), # JSON export mock.call([ conf.pointed_to_staging_subdirectory() for conf in view_export_configs ]), # METRIC export ], any_order=True, ) @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_value_error( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted.""" mock_view_exporter.export_and_validate.side_effect = ValueError with self.assertRaises(ValueError): view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) # Just the metric export is attempted and then the raise stops subsequent checks from happening mock_view_update_manager_rematerialize.assert_called_once() @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_validation_error( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock) -> None: """Tests the table is created from the view and then extracted.""" mock_view_exporter.export_and_validate.side_effect = ViewExportValidationError # Should not throw view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) # Just the metric export is attempted and then the raise stops subsequent checks from happening mock_view_update_manager_rematerialize.assert_called_once() @mock.patch("recidiviz.metrics.export.view_export_manager.deployed_views") @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.view_update_manager.create_managed_dataset_and_deploy_views_for_view_builders" ) @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_update_all_views( self, mock_view_exporter: Mock, mock_view_update_manager_deploy: Mock, mock_view_update_manager_rematerialize: Mock, mock_deployed_views: Mock, ) -> None: """Tests that all views in the namespace are updated before the export when the export name is in export_config.NAMESPACES_REQUIRING_FULL_UPDATE.""" self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [ self.mock_big_query_view_namespace ] mock_deployed_views.DEPLOYED_VIEW_BUILDERS_BY_NAMESPACE = { self.mock_big_query_view_namespace: self.view_builders_for_dataset } view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) mock_view_update_manager_deploy.assert_called_with( view_source_table_datasets=VIEW_SOURCE_TABLE_DATASETS, view_builders_to_update=self.view_builders_for_dataset, ) mock_view_update_manager_rematerialize.assert_called_once() @mock.patch("recidiviz.metrics.export.view_export_manager.deployed_views") @mock.patch("recidiviz.big_query.view_update_manager.rematerialize_views") @mock.patch( "recidiviz.big_query.export.big_query_view_exporter.BigQueryViewExporter" ) def test_export_dashboard_data_to_cloud_storage_update_materialized_views_only( self, mock_view_exporter: Mock, mock_view_update_manager_rematerialize: Mock, mock_deployed_views: Mock, ) -> None: """Tests that only materialized views in the namespace are updated before the export when the export name is not in export_config.NAMESPACES_REQUIRING_FULL_UPDATE.""" self.mock_export_config.NAMESPACES_REQUIRING_FULL_UPDATE = [ "OTHER_NAMESPACE" ] mock_deployed_views.DEPLOYED_VIEW_BUILDERS_BY_NAMESPACE = { self.mock_big_query_view_namespace: self.view_builders_for_dataset } view_export_manager.export_view_data_to_cloud_storage( self.mock_export_name, override_view_exporter=mock_view_exporter) mock_view_update_manager_rematerialize.assert_called_with( view_source_table_datasets=VIEW_SOURCE_TABLE_DATASETS, all_view_builders=DEPLOYED_VIEW_BUILDERS, views_to_update=[ view.build() for view in self.view_builders_for_dataset ], ) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_valid_request( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=EXPORT&state_code=US_XX", ) self.assertEqual(HTTPStatus.OK, response.status_code) response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=export&state_code=us_xx", ) self.assertEqual(HTTPStatus.OK, response.status_code) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_state_agnostic( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=MOCK_EXPORT_NAME", ) self.assertEqual(HTTPStatus.OK, response.status_code) # case insensitive response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=mock_export_name", ) self.assertEqual(HTTPStatus.OK, response.status_code) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_missing_required_state_code( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=EXPORT", ) self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code) self.assertEqual( b"Missing required state_code parameter for export_job_name EXPORT", response.data, ) # case insensitive response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="export_job_name=export", ) self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code) self.assertEqual( b"Missing required state_code parameter for export_job_name EXPORT", response.data, ) @mock.patch( "recidiviz.metrics.export.view_export_manager.export_view_data_to_cloud_storage" ) def test_metric_view_data_export_missing_export_job_name( self, mock_export_view_data_to_cloud_storage: Mock) -> None: with self.app.test_request_context(): mock_export_view_data_to_cloud_storage.return_value = None response = self.client.get( self.metric_view_data_export_url, headers=self.headers, query_string="state_code=US_XX", ) self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_code) self.assertEqual(b"Missing required export_job_name URL parameter", response.data) @mock.patch( "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task" ) def test_create_metric_view_data_export_tasks_state_code_filter( self, mock_create_metric_view_data_export_task: Mock) -> None: with self.app.test_request_context(): mock_create_metric_view_data_export_task.return_value = None response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=US_XX", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="OTHER_EXPORT", state_code="US_XX"), ], any_order=True, ) response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=us_xx", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="OTHER_EXPORT", state_code="US_XX"), ], any_order=True, ) @mock.patch( "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task" ) def test_create_metric_view_data_export_tasks_export_name_filter_state_agnostic( self, mock_create_metric_view_data_export_task: Mock) -> None: with self.app.test_request_context(): mock_create_metric_view_data_export_task.return_value = None response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=MOCK_EXPORT_NAME", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="MOCK_EXPORT_NAME", state_code=None), ], any_order=True, ) # case insensitive response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=mock_export_name", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="MOCK_EXPORT_NAME", state_code=None), ], any_order=True, ) @mock.patch( "recidiviz.metrics.export.view_export_cloud_task_manager.ViewExportCloudTaskManager.create_metric_view_data_export_task" ) def test_create_metric_view_data_export_tasks_export_name_filter( self, mock_create_metric_view_data_export_task: Mock) -> None: with self.app.test_request_context(): mock_create_metric_view_data_export_task.return_value = None response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=EXPORT", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="EXPORT", state_code="US_WW"), ], any_order=True, ) # case insensitive response = self.client.get( self.create_metric_view_data_export_tasks_url, headers=self.headers, query_string="export_job_filter=export", ) self.assertEqual(HTTPStatus.OK, response.status_code) mock_create_metric_view_data_export_task.assert_has_calls( [ mock.call(export_job_name="EXPORT", state_code="US_XX"), mock.call(export_job_name="EXPORT", state_code="US_WW"), ], any_order=True, )
""" INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.PUBLIC_DASHBOARD_VIEWS_DATASET, view_id=INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_NAME, view_query_template= INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_QUERY_TEMPLATE, dimensions=( "state_code", "population_date", "race_or_ethnicity", "gender", "age_bucket", ), description= INCARCERATION_POPULATION_BY_MONTH_BY_DEMOGRAPHICS_VIEW_DESCRIPTION, materialized_metrics_dataset=dataset_config. DATAFLOW_METRICS_MATERIALIZED_DATASET, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, static_reference_dataset=dataset_config.STATIC_REFERENCE_TABLES_DATASET, unnested_race_or_ethnicity_dimension=bq_utils.unnest_column( "race_or_ethnicity", "race_or_ethnicity"), gender_dimension=bq_utils.unnest_column("gender", "gender"), age_dimension=bq_utils.unnest_column("age_bucket", "age_bucket"), state_specific_race_or_ethnicity_groupings=state_specific_query_strings. state_specific_race_or_ethnicity_groupings( "prioritized_race_or_ethnicity"), state_specific_facility_exclusion=state_specific_query_strings. state_specific_facility_exclusion(), ) if __name__ == "__main__":
def setUp(self) -> None: self.app = Flask(__name__) self.app.register_blueprint(export_blueprint) self.app.config["TESTING"] = True self.headers: Dict[str, Dict[Any, Any]] = { "x-goog-iap-jwt-assertion": {} } self.client = self.app.test_client() self.mock_cloud_task_client_patcher = mock.patch( "google.cloud.tasks_v2.CloudTasksClient") self.mock_cloud_task_client_patcher.start() self.mock_uuid_patcher = mock.patch( f"{CLOUD_TASK_MANAGER_PACKAGE_NAME}.uuid") self.mock_uuid = self.mock_uuid_patcher.start() with self.app.test_request_context(): self.metric_view_data_export_url = flask.url_for( "export.metric_view_data_export") self.create_metric_view_data_export_tasks_url = flask.url_for( "export.create_metric_view_data_export_tasks") self.mock_state_code = "US_XX" self.mock_project_id = "test-project" self.mock_dataset_id = "base_dataset" self.mock_dataset = bigquery.dataset.DatasetReference( self.mock_project_id, self.mock_dataset_id) self.metadata_patcher = mock.patch( "recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = self.mock_project_id self.client_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.BigQueryClientImpl") self.mock_client = self.client_patcher.start().return_value self.mock_client.dataset_ref_for_id.return_value = self.mock_dataset self.mock_view_builder = SimpleBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", description="test_view description", view_query_template="SELECT NULL LIMIT 0", ) self.mock_metric_view_builder = MetricBigQueryViewBuilder( dataset_id=self.mock_dataset.dataset_id, view_id="test_view", description="test_view description", view_query_template="SELECT NULL LIMIT 0", dimensions=tuple(), ) self.view_builders_for_dataset = [ self.mock_view_builder, self.mock_metric_view_builder, ] self.output_uri_template_for_dataset = { "dataset_id": "gs://{project_id}-dataset-location/subdirectory", } self.views_to_update = { self.mock_dataset_id: self.view_builders_for_dataset } self.mock_export_name = "MOCK_EXPORT_NAME" self.mock_big_query_view_namespace = BigQueryViewNamespace.STATE self.metric_dataset_export_configs_index = { "EXPORT": ExportViewCollectionConfig( view_builders_to_export=[self.mock_view_builder], output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name="EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ), "OTHER_EXPORT": ExportViewCollectionConfig( view_builders_to_export=[self.mock_metric_view_builder], output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name="OTHER_EXPORT", bq_view_namespace=self.mock_big_query_view_namespace, ), self.mock_export_name: ExportViewCollectionConfig( view_builders_to_export=self.view_builders_for_dataset, output_directory_uri_template= "gs://{project_id}-dataset-location/subdirectory", export_name=self.mock_export_name, bq_view_namespace=self.mock_big_query_view_namespace, ), } export_config_values = { "OUTPUT_DIRECTORY_URI_TEMPLATE_FOR_DATASET_EXPORT": self.output_uri_template_for_dataset, "VIEW_COLLECTION_EXPORT_INDEX": self.metric_dataset_export_configs_index, } self.export_config_patcher = mock.patch( # type: ignore[call-overload] "recidiviz.metrics.export.view_export_manager.export_config", **export_config_values, ) self.mock_export_config = self.export_config_patcher.start() self.gcs_factory_patcher = mock.patch( "recidiviz.metrics.export.view_export_manager.GcsfsFactory.build") self.gcs_factory_patcher.start().return_value = FakeGCSFileSystem()
district FROM `{project_id}.{reference_views_dataset}.event_based_supervision_populations` GROUP BY state_code, year, month, supervision_type, district ) pop LEFT JOIN ( SELECT state_code, year, month, COUNT(DISTINCT person_id) AS revocation_count, supervision_type, district FROM `{project_id}.{reference_views_dataset}.event_based_revocations` GROUP BY state_code, year, month, supervision_type, district ) rev USING (state_code, year, month, supervision_type, district) WHERE supervision_type in ('ALL', 'PAROLE', 'PROBATION') ORDER BY state_code, year, month, supervision_type, district """ REVOCATIONS_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=REVOCATIONS_BY_MONTH_VIEW_NAME, view_query_template=REVOCATIONS_BY_MONTH_QUERY_TEMPLATE, dimensions=['state_code', 'year', 'month', 'supervision_type', 'district'], description=REVOCATIONS_BY_MONTH_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, ) if __name__ == '__main__': with local_project_id_override(GCP_PROJECT_STAGING): REVOCATIONS_BY_MONTH_VIEW_BUILDER.build_and_print()
supervision_type, district FROM ( SELECT state_code, year, month, COUNT(IF(most_severe_violation_type = 'NEW_ADMISSION', person_id, NULL)) AS new_admissions, COUNT(IF(most_severe_violation_type = 'TECHNICAL', person_id, NULL)) AS technicals, COUNT(IF(most_severe_violation_type IN ('ABSCONDED', 'ESCAPED', 'FELONY', 'MISDEMEANOR', 'LAW'), person_id, NULL)) AS non_technicals, COUNT(person_id) AS all_violation_types_count, supervision_type, district FROM most_recent_admission WHERE admission_rank = 1 GROUP BY state_code, year, month, supervision_type, district ) ORDER BY state_code, year, month, district, supervision_type """ ADMISSIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER = MetricBigQueryViewBuilder( dataset_id=dataset_config.DASHBOARD_VIEWS_DATASET, view_id=ADMISSIONS_BY_TYPE_BY_MONTH_VIEW_NAME, view_query_template=ADMISSIONS_BY_TYPE_BY_MONTH_QUERY_TEMPLATE, dimensions=("state_code", "year", "month", "supervision_type", "district"), description=ADMISSIONS_BY_TYPE_BY_MONTH_DESCRIPTION, reference_views_dataset=dataset_config.REFERENCE_VIEWS_DATASET, ) if __name__ == "__main__": with local_project_id_override(GCP_PROJECT_STAGING): ADMISSIONS_BY_TYPE_BY_MONTH_VIEW_BUILDER.build_and_print()