def test_validation_job_returns_correct_query(self) -> None: builder = SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_2", description="test_2 description", view_query_template="select * from literally_anything", ) dataset_overrides = {"my_dataset": "my_dataset_override"} existence_check_job = DataValidationJob( validation=ExistenceDataValidationCheck( view_builder=builder, validation_category=ValidationCategory.INVARIANT ), region_code="US_XX", ) self.assertEqual( "SELECT * FROM `recidiviz-456.my_dataset.test_2` WHERE region_code = 'US_XX';", existence_check_job.query_str(), ) existence_check_job = DataValidationJob( validation=ExistenceDataValidationCheck( view_builder=builder, validation_category=ValidationCategory.INVARIANT ), region_code="US_XX", dataset_overrides=dataset_overrides, ) self.assertEqual( "SELECT * FROM `recidiviz-456.my_dataset_override.test_2` WHERE region_code = 'US_XX';", existence_check_job.query_str(), )
def get_test_validations() -> List[DataValidationJob]: return [ DataValidationJob( region_code="US_UT", validation=ExistenceDataValidationCheck( validation_category=ValidationCategory.INVARIANT, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_1", description="test_1 description", view_query_template="select * from literally_anything", ), ), ), DataValidationJob( region_code="US_UT", validation=ExistenceDataValidationCheck( validation_category=ValidationCategory.INVARIANT, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_2", description="test_2 description", view_query_template="select * from literally_anything", ), ), ), DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_category=ValidationCategory.INVARIANT, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_1", description="test_1 description", view_query_template="select * from literally_anything", ), ), ), DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_category=ValidationCategory.INVARIANT, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_2", description="test_2 description", view_query_template="select * from literally_anything", ), ), ), ]
def test_existence_check_failures_below_threshold(self) -> None: self.mock_client.run_query_async.return_value = [ "some result row", "some other result row", ] job = DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_type=ValidationCheckType.EXISTENCE, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), num_allowed_rows=2, ), ) result = ExistenceValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult(validation_job=job, was_successful=True, failure_description=None), )
def test_existence_check_failures(self) -> None: self.mock_client.run_query_async.return_value = [ "some result row", "some other result row", ] job = DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_category=ValidationCategory.INVARIANT, validation_type=ValidationCheckType.EXISTENCE, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ) result = ExistenceValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, result_details=ExistenceValidationResultDetails( num_invalid_rows=2, num_allowed_rows=0), ), )
def test_existence_check_failures(self) -> None: self.mock_client.run_query_async.return_value = [ "some result row", "some other result row", ] job = DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_type=ValidationCheckType.EXISTENCE, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = ExistenceValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= "Found [2] invalid rows, though [0] were expected", ), )
def test_existence_check_no_failures(self): self.mock_client.run_query_async.return_value = [] job = DataValidationJob(region_code='US_VA', validation=ExistenceDataValidationCheck( validation_type=ValidationCheckType.EXISTENCE, view=BigQueryView(dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything') )) result = ExistenceValidationChecker.run_check(job) self.assertEqual(result, DataValidationJobResult(validation_job=job, was_successful=True, failure_description=None))
def test_check_happy_path_existence(self) -> None: job = DataValidationJob( region_code="US_VA", validation=ExistenceDataValidationCheck( validation_type=ValidationCheckType.EXISTENCE, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) check_class = check_resolver.checker_for_validation(job) assert isinstance(check_class, ExistenceValidationChecker)
def test_existence_check_failures(self): self.mock_client.run_query_async.return_value = ['some result row', 'some other result row'] job = DataValidationJob(region_code='US_VA', validation=ExistenceDataValidationCheck( validation_type=ValidationCheckType.EXISTENCE, view=BigQueryView(dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything') )) result = ExistenceValidationChecker.run_check(job) self.assertEqual(result, DataValidationJobResult(validation_job=job, was_successful=False, failure_description='Found 2 invalid rows, though 0 were expected'))
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER.build()), # TODO(#4054): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999 ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER.build()), SamenessDataValidationCheck(view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='absconsions', comparison_columns=['absconsions_by_month', 'absconsions_by_officer']), SamenessDataValidationCheck(view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='discharges', comparison_columns=['discharges_by_month', 'discharges_by_officer'], max_allowed_error=0.02), SamenessDataValidationCheck(view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum'], max_allowed_error=0.06), SamenessDataValidationCheck(view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(), comparison_columns=PO_REPORT_COMPARISON_COLUMNS), SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER.build(), comparison_columns=['cell_sum', 'caseload_sum']), SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.build(), comparison_columns=['cell_sum', 'month_sum'], max_allowed_error=0.03), # This version of this validation excludes the race column explicitly since we have chosen to count people with # multiple races in counts for each individual race, so the sum of the race breakdowns will not match the total. SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(), comparison_columns=['district_sum', 'risk_level_sum', 'gender_sum', 'officer_sum', 'race_sum']), # This version of the validation checks to make sure the race sum isn't far off from the other sums, even # though we expect them to be different (e.g. make sure it isn't zero). SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(), validation_name_suffix='with_race', comparison_columns=['district_sum', 'risk_level_sum', 'gender_sum', 'race_sum', 'officer_sum'], max_allowed_error=.05 ), SamenessDataValidationCheck( view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['dashboard_revocation_count', 'public_dashboard_revocation_count'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='termination', comparison_columns=['dashboard_successful_termination', 'public_dashboard_successful_termination'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='completion', comparison_columns=['dashboard_projected_completion', 'public_dashboard_projected_completion'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='termination', comparison_columns=['dashboard_successful_termination', 'public_dashboard_successful_termination'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='completion', comparison_columns=['dashboard_projected_completion', 'public_dashboard_projected_completion'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['covid_report_facility_population', 'public_dashboard_facility_population'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_MONTH_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['covid_report_population', 'public_dashboard_population'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['population_by_admission_reason_total_population', 'population_by_facility_by_demographics_total_population'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( # pylint: disable=line-too-long view=INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum'] ), SamenessDataValidationCheck( # pylint: disable=line-too-long view=SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum'] ), SamenessDataValidationCheck( view=INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum'] ), # External comparison validations SamenessDataValidationCheck(view=INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['external_population_count', 'internal_population_count']), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_person_external_id', 'internal_person_external_id'], max_allowed_error=0.02), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='facility', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_facility', 'internal_facility'], max_allowed_error=0.02), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_person_external_id', 'internal_person_external_id'], max_allowed_error=0.2), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='district', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_district', 'internal_district'], max_allowed_error=0.01), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='supervision_level', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_supervision_level', 'internal_supervision_level'], max_allowed_error=0.02), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='supervising_officer', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_supervising_officer', 'internal_supervising_officer'], max_allowed_error=0.02), SamenessDataValidationCheck( view=RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_person_external_id', 'internal_person_external_id'], max_allowed_error=0.02), SamenessDataValidationCheck( view=RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['external_recidivated', 'internal_recidivated'], max_allowed_error=0.02), ] return all_data_validations
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view_builder=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=ACTIVE_IN_POPULATION_AFTER_DEATH_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INVALID_ADMISSION_REASONS_FOR_TEMPORARY_CUSTODY_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INVALID_ADMITTED_FROM_SUPERVISION_ADMISSION_REASON_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INVALID_PFI_FOR_TEMPORARY_CUSTODY_ADMISSIONS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=ASSESSMENT_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), ExistenceDataValidationCheck( view_builder=CONTACT_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), ExistenceDataValidationCheck( view_builder=EMPLOYMENT_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), ExistenceDataValidationCheck( view_builder=ETL_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), SamenessDataValidationCheck( view_builder=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER, validation_name_suffix="absconsions", comparison_columns=[ "absconsions_by_month", "absconsions_from_po_report" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER, validation_name_suffix="discharges", comparison_columns=[ "discharges_by_month", "discharges_from_po_report" ], max_allowed_error=0.02, validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=FTR_REFERRALS_COMPARISON_VIEW_BUILDER, comparison_columns=[ "age_bucket_sum", "risk_level_sum", "gender_sum", "race_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER, comparison_columns=PO_REPORT_COMPARISON_COLUMNS, validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_CASELOAD_ADMISSION_HISTORY_VIEW_BUILDER, comparison_columns=[ "total_revocation_admissions", "total_caseload_admissions", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER, comparison_columns=[ "cell_sum", "caseload_sum", "caseload_num_rows" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER, comparison_columns=["cell_sum", "month_sum"], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=REVOCATION_MATRIX_COMPARISON_BY_MONTH_VIEW_BUILDER, comparison_columns=["reference_sum", "month_sum"], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER, comparison_columns=[ "district_sum", "risk_level_sum", "gender_sum", "race_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_REVOCATIONS_BY_OFFICER_VIEW_BUILDER, comparison_columns=["officer_sum", "caseload_sum"], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER, validation_name_suffix="revocation", comparison_columns=[ "revocation_count_all", "revocation_count_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER, validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER, validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER, validation_name_suffix="revocation", comparison_columns=[ "revocation_count_all", "revocation_count_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER, validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER, validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER, comparison_columns=[ "dashboard_revocation_count", "public_dashboard_revocation_count", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER, comparison_columns=[ "population_by_admission_reason_total_population", "population_by_facility_by_demographics_total_population", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= MOST_RECENT_ASSESSMENT_DATE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER, comparison_columns=[ "most_recent_etl_date", "most_recent_state_date", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= MOST_RECENT_ASSESSMENT_SCORE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER, comparison_columns=[ "most_recent_etl_score", "most_recent_state_score", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= MOST_RECENT_FACE_TO_FACE_CONTACT_DATE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER, comparison_columns=[ "most_recent_etl_face_to_face_contact_date", "most_recent_state_face_to_face_contact_date", ], validation_category=ValidationCategory.CONSISTENCY, ), # External comparison validations SamenessDataValidationCheck( view_builder= INCARCERATION_ADMISSION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_data_person_id", "internal_data_person_id", ], partition_columns=["region_code", "admission_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER, comparison_columns=[ "external_population_count", "internal_population_count", ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_data_person_id", "internal_data_person_id", ], partition_columns=["region_code", "date_of_stay"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], partition_columns=["region_code", "date_of_stay"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= INCARCERATION_RELEASE_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_data_person_id", "internal_data_person_id", ], partition_columns=["region_code", "release_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.2, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="district", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_district", "internal_district"], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.01, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="supervision_level", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervision_level", "internal_supervision_level", ], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="supervising_officer", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervising_officer", "internal_supervising_officer", ], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=[ "region_code", "release_cohort", "follow_up_period" ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "external_recidivated", "internal_recidivated" ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), SamenessDataValidationCheck( view_builder= SUPERVISION_START_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "start_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_TERMINATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "termination_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "date_of_stay"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], partition_columns=["region_code", "date_of_stay"], validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="legal_status", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_legal_status", "internal_legal_status" ], partition_columns=["region_code", "date_of_stay"], validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= POPULATION_PROJECTION_MONTHLY_POPULATION_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "external_total_population", "internal_total_population", ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_STATE_BY_DATE_JUSTICE_COUNTS_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "justice_counts_total_population", "internal_total_population", ], max_allowed_error=0.06, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), ] return all_data_validations
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build( )), ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build( )), # TODO(2981): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999 ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER. build()), ExistenceDataValidationCheck( view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'absconsions_by_month', 'absconsions_by_officer' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'discharges_by_month', 'discharges_by_officer' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02), SamenessDataValidationCheck( view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.06), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'external_population_count', 'internal_population_count' ]), SamenessDataValidationCheck( view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=PO_REPORT_COMPARISON_COLUMNS), SamenessDataValidationCheck( view= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['cell_sum', 'caseload_sum']), SamenessDataValidationCheck( view= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER. build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['cell_sum', 'month_sum'], max_allowed_error=0.03), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'district_sum', 'risk_level_sum', 'gender_sum', 'race_sum' ]), SamenessDataValidationCheck( view= SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_district', 'internal_district'], max_allowed_error=0.01), SamenessDataValidationCheck( view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build( ), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_revocation_count', 'public_dashboard_revocation_count' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_successful_termination', 'public_dashboard_successful_termination' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_projected_completion', 'public_dashboard_projected_completion' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_successful_termination', 'public_dashboard_successful_termination' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_projected_completion', 'public_dashboard_projected_completion' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'covid_report_facility_population', 'public_dashboard_facility_population' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'population_by_admission_reason_total_population', 'population_by_facility_by_demographics_total_population' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( # pylint: disable=line-too-long view= INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'race_or_ethnicity_breakdown_sum' ]), SamenessDataValidationCheck( # pylint: disable=line-too-long view= SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'race_or_ethnicity_breakdown_sum' ]), SamenessDataValidationCheck( view= INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), # TODO(3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view= SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'race_or_ethnicity_breakdown_sum' ]) ] return all_data_validations
PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER from recidiviz.validation.views.state.po_report_missing_fields import PO_REPORT_MISSING_FIELDS_VIEW_BUILDER, \ PO_REPORT_COMPARISON_COLUMNS from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_caseload import \ REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_month import \ REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER from recidiviz.validation.views.state.revocation_matrix_comparison_supervision_population import \ REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER from recidiviz.validation.views.state.supervision_eom_population_person_level_district_external_comparison import \ SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW_BUILDER from recidiviz.validation.views.state.supervision_termination_prior_to_start import \ SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER _ALL_DATA_VALIDATIONS: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['absconsions_by_month', 'absconsions_by_officer'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS), SamenessDataValidationCheck(
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build( )), ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build( )), # TODO(2981): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999 ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER. build()), ExistenceDataValidationCheck( view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'absconsions_by_month', 'absconsions_by_officer' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'discharges_by_month', 'discharges_by_officer' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02), SamenessDataValidationCheck( view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.06), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'external_population_count', 'internal_population_count' ]), SamenessDataValidationCheck( view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=PO_REPORT_COMPARISON_COLUMNS), SamenessDataValidationCheck( view= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['cell_sum', 'caseload_sum']), SamenessDataValidationCheck( view= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER. build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['cell_sum', 'month_sum'], max_allowed_error=0.03), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'district_sum', 'risk_level_sum', 'gender_sum', 'race_sum' ]), SamenessDataValidationCheck( view= SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_district', 'internal_district'], max_allowed_error=0.01), SamenessDataValidationCheck( view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_absconsion_count', 'public_dashboard_absconsion_count' ]), SamenessDataValidationCheck( view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_new_crime_count', 'public_dashboard_new_crime_count' ]), SamenessDataValidationCheck( view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_technical_count', 'public_dashboard_technical_count' ]), SamenessDataValidationCheck( view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_unknown_count', 'public_dashboard_unknown_count' ]), SamenessDataValidationCheck( view= SUPERVISION_POPULATION_BY_DISTRICT_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_supervision_count', 'public_dashboard_supervision_count' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_successful_termination', 'public_dashboard_successful_termination' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_projected_completion', 'public_dashboard_projected_completion' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_successful_termination', 'public_dashboard_successful_termination' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_projected_completion', 'public_dashboard_projected_completion' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'covid_report_facility_population', 'public_dashboard_facility_population' ]), ] return all_data_validations
def test_fetch_validation_jobs_to_perform_applies_configs( self, mock_get_all_validations_fn: MagicMock, mock_get_region_configs_fn: MagicMock, ) -> None: existence_builder = SimpleBigQueryViewBuilder( project_id="my_project", dataset_id="my_dataset", view_id="existence_view", description="existence_view description", view_query_template="SELECT NULL LIMIT 0", ) sameness_builder = SimpleBigQueryViewBuilder( project_id="my_project", dataset_id="my_dataset", view_id="sameness_view", description="sameness_view description", view_query_template="SELECT NULL LIMIT 1", ) mock_get_all_validations_fn.return_value = [ ExistenceDataValidationCheck( view_builder=existence_builder, validation_category=ValidationCategory.INVARIANT, ), SamenessDataValidationCheck( view_builder=sameness_builder, comparison_columns=["col1", "col2"], validation_category=ValidationCategory.CONSISTENCY, ), ] mock_get_region_configs_fn.return_value = { "US_XX": ValidationRegionConfig( region_code="US_XX", exclusions={}, num_allowed_rows_overrides={ existence_builder.view_id: ValidationNumAllowedRowsOverride( region_code="US_XX", validation_name=existence_builder.view_id, num_allowed_rows_override=10, override_reason="This is broken", ) }, max_allowed_error_overrides={ sameness_builder.view_id: ValidationMaxAllowedErrorOverride( region_code="US_XX", validation_name=sameness_builder.view_id, max_allowed_error_override=0.3, override_reason="This is also broken", ) }, ), "US_YY": ValidationRegionConfig( region_code="US_YY", exclusions={}, num_allowed_rows_overrides={}, max_allowed_error_overrides={}, ), } result = _fetch_validation_jobs_to_perform() expected_jobs = [ DataValidationJob( validation=ExistenceDataValidationCheck( validation_category=ValidationCategory.INVARIANT, view_builder=existence_builder, validation_name_suffix=None, validation_type=ValidationCheckType.EXISTENCE, num_allowed_rows=10, ), region_code="US_XX", ), DataValidationJob( validation=ExistenceDataValidationCheck( validation_category=ValidationCategory.INVARIANT, view_builder=existence_builder, validation_name_suffix=None, validation_type=ValidationCheckType.EXISTENCE, num_allowed_rows=0, ), # No override region_code="US_YY", ), DataValidationJob( validation=SamenessDataValidationCheck( validation_category=ValidationCategory.CONSISTENCY, view_builder=sameness_builder, validation_name_suffix=None, comparison_columns=["col1", "col2"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.3, validation_type=ValidationCheckType.SAMENESS, ), region_code="US_XX", ), DataValidationJob( validation=SamenessDataValidationCheck( validation_category=ValidationCategory.CONSISTENCY, view_builder=sameness_builder, validation_name_suffix=None, comparison_columns=["col1", "col2"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.0, validation_type=ValidationCheckType.SAMENESS, ), region_code="US_YY", ), ] self.assertEqual(expected_jobs, result)
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER.build() ), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="absconsions", comparison_columns=["absconsions_by_month", "absconsions_by_officer"], ), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="discharges", comparison_columns=["discharges_by_month", "discharges_by_officer"], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "age_bucket_sum", "risk_level_sum", "gender_sum", "race_sum", ], ), SamenessDataValidationCheck( view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(), comparison_columns=PO_REPORT_COMPARISON_COLUMNS, ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER.build(), comparison_columns=["cell_sum", "caseload_sum", "caseload_num_rows"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.build(), comparison_columns=["cell_sum", "month_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(), comparison_columns=[ "district_sum", "risk_level_sum", "gender_sum", "race_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_REVOCATIONS_BY_OFFICER_VIEW_BUILDER.build(), comparison_columns=["officer_sum", "caseload_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="revocation", comparison_columns=["revocation_count_all", "revocation_count_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="revocation", comparison_columns=["revocation_count_all", "revocation_count_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "dashboard_revocation_count", "public_dashboard_revocation_count", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "covid_report_facility_population", "public_dashboard_facility_population", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_MONTH_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "covid_report_population", "public_dashboard_population", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "population_by_admission_reason_total_population", "population_by_facility_by_demographics_total_population", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"], ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"], ), SamenessDataValidationCheck( view=INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"], ), # External comparison validations SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "external_population_count", "internal_population_count", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.2, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="district", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_district", "internal_district"], max_allowed_error=0.01, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="supervision_level", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervision_level", "internal_supervision_level", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="supervising_officer", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervising_officer", "internal_supervising_officer", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=["external_recidivated", "internal_recidivated"], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=SUPERVISION_TERMINATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], ), SamenessDataValidationCheck( view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="legal_status", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_legal_status", "internal_legal_status"], ), SamenessDataValidationCheck( view=POPULATION_PROJECTION_MONTHLY_POPULATION_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "external_total_population", "internal_total_population", ], max_allowed_error=0.02, ), ] return all_data_validations
from flask import Flask from mock import patch from recidiviz.big_query.big_query_view import BigQueryView from recidiviz.tests.utils.matchers import UnorderedCollection from recidiviz.validation.checks.existence_check import ExistenceDataValidationCheck from recidiviz.validation.configured_validations import get_all_validations, STATES_TO_VALIDATE from recidiviz.validation.validation_manager import validation_manager_blueprint, _fetch_validation_jobs_to_perform from recidiviz.validation.validation_models import DataValidationJob, DataValidationJobResult from recidiviz.validation.views import view_config _TEST_VALIDATIONS: List[DataValidationJob] = [ DataValidationJob( region_code='US_UT', validation=ExistenceDataValidationCheck(view=BigQueryView( dataset_id='my_dataset', view_id='test_1', view_query_template='select * from literally_anything'))), DataValidationJob( region_code='US_UT', validation=ExistenceDataValidationCheck(view=BigQueryView( dataset_id='my_dataset', view_id='test_2', view_query_template='select * from literally_anything'))), DataValidationJob( region_code='US_VA', validation=ExistenceDataValidationCheck(view=BigQueryView( dataset_id='my_dataset', view_id='test_1', view_query_template='select * from literally_anything'))), DataValidationJob( region_code='US_VA',
INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW from recidiviz.validation.views.state.incarceration_release_prior_to_admission import \ INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_caseload import \ REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_month import \ REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW from recidiviz.validation.views.state.revocation_matrix_comparison_supervision_population import \ REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW from recidiviz.validation.views.state.supervision_eom_population_person_level_district_external_comparison import \ SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW from recidiviz.validation.views.state.supervision_termination_prior_to_start import \ SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW _ALL_DATA_VALIDATIONS: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW), ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_NULLS_VIEW), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW, comparison_columns=['absconsions_by_month', 'absconsions_by_officer'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW, comparison_columns=['discharges_by_month', 'discharges_by_officer'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02), SamenessDataValidationCheck(