def test_samneness_check_validation_name(self) -> None: check = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=["a", "b", "c"], view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ) self.assertEqual(check.validation_name, "test_view") check_with_name_suffix = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, validation_name_suffix="b_c_only", comparison_columns=["b", "c"], view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ) self.assertEqual(check_with_name_suffix.validation_name, "test_view_b_c_only")
def test_sameness_check_validation_name(self) -> None: check = SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_AGGREGATE, validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=["a", "b", "c"], view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ) self.assertEqual(check.validation_name, "test_view") check_with_name_suffix = SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_AGGREGATE, validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, validation_name_suffix="b_c_only", comparison_columns=["b", "c"], view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ) self.assertEqual(check_with_name_suffix.validation_name, "test_view_b_c_only")
def test_string_sameness_check_different_values_above_margin(self): num_bad_rows = 5 max_allowed_error = ( (num_bad_rows - 1) / 100) # Below the number of bad rows self.mock_client.run_query_async.return_value = self.return_string_values_with_num_bad_rows( num_bad_rows) job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=max_allowed_error, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) actual_expected_error = (num_bad_rows / 100) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= f'{num_bad_rows} out of 100 row(s) did not contain matching strings. ' f'The acceptable margin of error is only {max_allowed_error}, but the ' f'validation returned an error rate of {actual_expected_error}.', ))
def test_string_sameness_check_different_values_handle_non_string_type( self): self.mock_client.run_query_async.return_value = [{ 'a': 'same', 'b': 'same', 'c': 1245 }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), 'Unexpected type [<class \'int\'>] for value [1245] in STRING validation [test_view].' )
def test_string_sameness_check_different_values_handle_empty_string(self): self.mock_client.run_query_async.return_value = [{ 'a': 'same', 'b': 'same', 'c': None }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= '1 out of 1 row(s) did not contain matching strings. ' 'The acceptable margin of error is only 0.0, but the ' 'validation returned an error rate of 1.0.', ))
def test_string_sameness_check_different_values_above_margin(self) -> None: num_bad_rows = 5 max_allowed_error = (num_bad_rows - 1) / 100 # Below the number of bad rows self.mock_client.run_query_async.return_value = ( self.return_string_values_with_num_bad_rows(num_bad_rows) ) job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=max_allowed_error, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) actual_expected_error = num_bad_rows / 100 self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description=f"{num_bad_rows} out of 100 row(s) did not contain matching strings. " f"The acceptable margin of error is only {max_allowed_error}, but the " f"validation returned an error rate of {actual_expected_error}.", ), )
def test_string_sameness_check_different_values_within_margin(self) -> None: num_bad_rows = 2 max_allowed_error = num_bad_rows / 100 self.mock_client.run_query_async.return_value = ( self.return_string_values_with_num_bad_rows(num_bad_rows) ) job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=max_allowed_error, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=True, failure_description=None ), )
def test_string_sameness_check_numbers_values_all_none(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": None, "b": None, "c": None} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), "Unexpected None value for column [a] in validation [test_view].", )
def test_string_sameness_check_strings_values_all_none(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": None, "b": None, "c": None} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=True, failure_description=None ), )
def test_sameness_check_numbers_different_values_within_margin(self): self.mock_client.run_query_async.return_value = [{ 'a': 98, 'b': 100, 'c': 99 }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult(validation_job=job, was_successful=True, failure_description=None))
def test_string_sameness_check_different_values_handle_non_string_type( self, ) -> None: self.mock_client.run_query_async.return_value = [ {"a": "same", "b": "same", "c": 1245} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), "Unexpected type [<class 'int'>] for value [1245] in STRING validation [test_view].", )
def test_sameness_check_numbers_multiple_rows_above_margin(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": 97, "b": 100, "c": 99}, {"a": 14, "b": 21, "c": 14}, ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description="2 row(s) had unacceptable margins of error. The acceptable margin " "of error is only 0.02, but the validation returned rows with " "errors as high as 0.3333.", ), )
def test_sameness_check_numbers_multiple_rows_above_margin(self): self.mock_client.run_query_async.return_value = [{ 'a': 97, 'b': 100, 'c': 99 }, { 'a': 14, 'b': 21, 'c': 14 }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description= '2 row(s) had unacceptable margins of error. The acceptable margin ' 'of error is only 0.02, but the validation returned rows with ' 'errors as high as 0.3333.', ))
def test_sameness_check_numbers_one_none(self) -> None: self.mock_client.run_query_async.return_value = [{"a": 3, "b": 3, "c": None}] job = DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_AGGREGATE, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), "Unexpected None value for column [c] in validation [test_view].", )
def test_sameness_check_strings_different_values_no_allowed_error(self) -> None: self.mock_client.run_query_async.return_value = [{"a": "a", "b": "b", "c": "c"}] job = DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], partition_columns=[], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, result_details=SamenessStringsValidationResultDetails( num_error_rows=1, total_num_rows=1, max_allowed_error=0.0, non_null_counts_per_column_per_partition=[ (tuple(), {"a": 1, "b": 1, "c": 1}), ], ), ), )
def test_sameness_check_numbers_different_values_within_margin(self) -> None: self.mock_client.run_query_async.return_value = [{"a": 98, "b": 100, "c": 99}] job = DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_AGGREGATE, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, result_details=SamenessNumbersValidationResultDetails( failed_rows=[], max_allowed_error=0.02 ), ), )
def test_string_sameness_check_different_values_handle_empty_string(self) -> None: self.mock_client.run_query_async.return_value = [ {"a": "same", "b": "same", "c": None} ] job = DataValidationJob( region_code="US_VA", validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, was_successful=False, failure_description="1 out of 1 row(s) did not contain matching strings. " "The acceptable margin of error is only 0.0, but the " "validation returned an error rate of 1.0.", ), )
def test_samneness_check_no_comparison_columns(self): with self.assertRaises(ValueError) as e: _ = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything')) self.assertEqual( str(e.exception), 'Found only [0] comparison columns, expected at least 2.')
def test_samneness_check_validation_name(self): check = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['a', 'b', 'c'], view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything')) self.assertEqual(check.validation_name, 'test_view') check_with_name_suffix = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, validation_name_suffix='b_c_only', comparison_columns=['b', 'c'], view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything')) self.assertEqual(check_with_name_suffix.validation_name, 'test_view_b_c_only')
def test_samneness_check_bad_max_error(self): with self.assertRaises(ValueError) as e: _ = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['a', 'b', 'c'], view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'), max_allowed_error=1.5) self.assertEqual( str(e.exception), 'Allowed error value must be between 0.0 and 1.0. Found instead: [1.5]' )
def test_samneness_check_no_comparison_columns(self) -> None: with self.assertRaises(ValueError) as e: _ = SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_AGGREGATE, validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ) self.assertEqual( str(e.exception), "Found only [0] comparison columns, expected at least 2." )
def test_sameness_check_strings_multiple_dates(self) -> None: self.mock_client.run_query_async.return_value = [ # January 2021 {"region": "US_XX", "date": "2021-01-31", "a": "00", "b": "00"}, {"region": "US_XX", "date": "2021-01-31", "a": "01", "b": None}, {"region": "US_XX", "date": "2021-01-31", "a": "02", "b": "02"}, {"region": "US_XX", "date": "2021-01-31", "a": None, "b": "03"}, # December 2020 {"region": "US_XX", "date": "2020-12-31", "a": "00", "b": "00"}, {"region": "US_XX", "date": "2020-12-31", "a": "02", "b": "02"}, {"region": "US_XX", "date": "2020-12-31", "a": None, "b": "04"}, ] job = DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b"], partition_columns=["region", "date"], sameness_check_type=SamenessDataValidationCheckType.STRINGS, max_allowed_error=0.0, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult( validation_job=job, result_details=SamenessStringsValidationResultDetails( num_error_rows=3, total_num_rows=7, max_allowed_error=0.0, non_null_counts_per_column_per_partition=[ (("US_XX", "2021-01-31"), {"a": 3, "b": 3}), (("US_XX", "2020-12-31"), {"a": 2, "b": 3}), ], ), ), )
def test_samneness_check_bad_max_error(self) -> None: with self.assertRaises(ValueError) as e: _ = SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=["a", "b", "c"], view=BigQueryView( dataset_id="my_dataset", view_id="test_view", view_query_template="select * from literally_anything", ), max_allowed_error=1.5, ) self.assertEqual( str(e.exception), "Allowed error value must be between 0.0 and 1.0. Found instead: [1.5]", )
def test_string_sameness_check_strings_values_all_none(self): self.mock_client.run_query_async.return_value = [{ 'a': None, 'b': None, 'c': None }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.STRINGS, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) result = SamenessValidationChecker.run_check(job) self.assertEqual( result, DataValidationJobResult(validation_job=job, was_successful=True, failure_description=None))
def test_string_sameness_check_numbers_one_none(self): self.mock_client.run_query_async.return_value = [{ 'a': 3, 'b': 3, 'c': None }] job = DataValidationJob( region_code='US_VA', validation=SamenessDataValidationCheck( validation_type=ValidationCheckType.SAMENESS, comparison_columns=['a', 'b', 'c'], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, view=BigQueryView( dataset_id='my_dataset', view_id='test_view', view_query_template='select * from literally_anything'))) with self.assertRaises(ValueError) as e: _ = SamenessValidationChecker.run_check(job) self.assertEqual( str(e.exception), 'Unexpected None value for column [c] in validation [test_view].')
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER.build() ), ExistenceDataValidationCheck( view=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER.build() ), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="absconsions", comparison_columns=["absconsions_by_month", "absconsions_by_officer"], ), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="discharges", comparison_columns=["discharges_by_month", "discharges_by_officer"], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "age_bucket_sum", "risk_level_sum", "gender_sum", "race_sum", ], ), SamenessDataValidationCheck( view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(), comparison_columns=PO_REPORT_COMPARISON_COLUMNS, ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER.build(), comparison_columns=["cell_sum", "caseload_sum", "caseload_num_rows"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.build(), comparison_columns=["cell_sum", "month_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(), comparison_columns=[ "district_sum", "risk_level_sum", "gender_sum", "race_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_REVOCATIONS_BY_OFFICER_VIEW_BUILDER.build(), comparison_columns=["officer_sum", "caseload_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="revocation", comparison_columns=["revocation_count_all", "revocation_count_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="revocation", comparison_columns=["revocation_count_all", "revocation_count_sum"], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], ), SamenessDataValidationCheck( view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "dashboard_revocation_count", "public_dashboard_revocation_count", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "covid_report_facility_population", "public_dashboard_facility_population", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_MONTH_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "covid_report_population", "public_dashboard_population", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "population_by_admission_reason_total_population", "population_by_facility_by_demographics_total_population", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"], ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"], ), SamenessDataValidationCheck( view=INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], ), SamenessDataValidationCheck( view=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"], ), # External comparison validations SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ "external_population_count", "internal_population_count", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.2, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="district", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_district", "internal_district"], max_allowed_error=0.01, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="supervision_level", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervision_level", "internal_supervision_level", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="supervising_officer", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervising_officer", "internal_supervising_officer", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=["external_recidivated", "internal_recidivated"], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=SUPERVISION_TERMINATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], max_allowed_error=0.02, ), SamenessDataValidationCheck( view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], ), SamenessDataValidationCheck( view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix="legal_status", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_legal_status", "internal_legal_status"], ), SamenessDataValidationCheck( view=POPULATION_PROJECTION_MONTHLY_POPULATION_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "external_total_population", "internal_total_population", ], max_allowed_error=0.02, ), ] return all_data_validations
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER.build()), # TODO(#4054): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999 ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck(view=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER.build()), SamenessDataValidationCheck(view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='absconsions', comparison_columns=['absconsions_by_month', 'absconsions_by_officer']), SamenessDataValidationCheck(view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='discharges', comparison_columns=['discharges_by_month', 'discharges_by_officer'], max_allowed_error=0.02), SamenessDataValidationCheck(view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum'], max_allowed_error=0.06), SamenessDataValidationCheck(view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(), comparison_columns=PO_REPORT_COMPARISON_COLUMNS), SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER.build(), comparison_columns=['cell_sum', 'caseload_sum']), SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.build(), comparison_columns=['cell_sum', 'month_sum'], max_allowed_error=0.03), # This version of this validation excludes the race column explicitly since we have chosen to count people with # multiple races in counts for each individual race, so the sum of the race breakdowns will not match the total. SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(), comparison_columns=['district_sum', 'risk_level_sum', 'gender_sum', 'officer_sum', 'race_sum']), # This version of the validation checks to make sure the race sum isn't far off from the other sums, even # though we expect them to be different (e.g. make sure it isn't zero). SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(), validation_name_suffix='with_race', comparison_columns=['district_sum', 'risk_level_sum', 'gender_sum', 'race_sum', 'officer_sum'], max_allowed_error=.05 ), SamenessDataValidationCheck( view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['dashboard_revocation_count', 'public_dashboard_revocation_count'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='termination', comparison_columns=['dashboard_successful_termination', 'public_dashboard_successful_termination'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='completion', comparison_columns=['dashboard_projected_completion', 'public_dashboard_projected_completion'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='termination', comparison_columns=['dashboard_successful_termination', 'public_dashboard_successful_termination'] ), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(), validation_name_suffix='completion', comparison_columns=['dashboard_projected_completion', 'public_dashboard_projected_completion'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['covid_report_facility_population', 'public_dashboard_facility_population'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_MONTH_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['covid_report_population', 'public_dashboard_population'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['population_by_admission_reason_total_population', 'population_by_facility_by_demographics_total_population'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( # pylint: disable=line-too-long view=INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum'] ), SamenessDataValidationCheck( # pylint: disable=line-too-long view=SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum'] ), SamenessDataValidationCheck( view=INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'] ), SamenessDataValidationCheck( view=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(), comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum'] ), # External comparison validations SamenessDataValidationCheck(view=INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), comparison_columns=['external_population_count', 'internal_population_count']), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_person_external_id', 'internal_person_external_id'], max_allowed_error=0.02), SamenessDataValidationCheck( view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='facility', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_facility', 'internal_facility'], max_allowed_error=0.02), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_person_external_id', 'internal_person_external_id'], max_allowed_error=0.2), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='district', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_district', 'internal_district'], max_allowed_error=0.01), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='supervision_level', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_supervision_level', 'internal_supervision_level'], max_allowed_error=0.02), SamenessDataValidationCheck( view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), validation_name_suffix='supervising_officer', sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_supervising_officer', 'internal_supervising_officer'], max_allowed_error=0.02), SamenessDataValidationCheck( view=RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_person_external_id', 'internal_person_external_id'], max_allowed_error=0.02), SamenessDataValidationCheck( view=RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['external_recidivated', 'internal_recidivated'], max_allowed_error=0.02), ] return all_data_validations
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view_builder=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=ACTIVE_IN_POPULATION_AFTER_DEATH_DATE_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INVALID_ADMISSION_REASONS_FOR_TEMPORARY_CUSTODY_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INVALID_ADMITTED_FROM_SUPERVISION_ADMISSION_REASON_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder= INVALID_PFI_FOR_TEMPORARY_CUSTODY_ADMISSIONS_VIEW_BUILDER, validation_category=ValidationCategory.INVARIANT, ), ExistenceDataValidationCheck( view_builder=ASSESSMENT_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), ExistenceDataValidationCheck( view_builder=CONTACT_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), ExistenceDataValidationCheck( view_builder=EMPLOYMENT_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), ExistenceDataValidationCheck( view_builder=ETL_FRESHNESS_VALIDATION_VIEW_BUILDER, validation_category=ValidationCategory.FRESHNESS, ), SamenessDataValidationCheck( view_builder=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER, validation_name_suffix="absconsions", comparison_columns=[ "absconsions_by_month", "absconsions_from_po_report" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER, validation_name_suffix="discharges", comparison_columns=[ "discharges_by_month", "discharges_from_po_report" ], max_allowed_error=0.02, validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=FTR_REFERRALS_COMPARISON_VIEW_BUILDER, comparison_columns=[ "age_bucket_sum", "risk_level_sum", "gender_sum", "race_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER, comparison_columns=PO_REPORT_COMPARISON_COLUMNS, validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_CASELOAD_ADMISSION_HISTORY_VIEW_BUILDER, comparison_columns=[ "total_revocation_admissions", "total_caseload_admissions", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER, comparison_columns=[ "cell_sum", "caseload_sum", "caseload_num_rows" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER, comparison_columns=["cell_sum", "month_sum"], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder=REVOCATION_MATRIX_COMPARISON_BY_MONTH_VIEW_BUILDER, comparison_columns=["reference_sum", "month_sum"], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER, comparison_columns=[ "district_sum", "risk_level_sum", "gender_sum", "race_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_COMPARISON_REVOCATIONS_BY_OFFICER_VIEW_BUILDER, comparison_columns=["officer_sum", "caseload_sum"], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER, validation_name_suffix="revocation", comparison_columns=[ "revocation_count_all", "revocation_count_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER, validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER, validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER, validation_name_suffix="revocation", comparison_columns=[ "revocation_count_all", "revocation_count_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER, validation_name_suffix="supervision", comparison_columns=[ "supervision_count_all", "supervision_population_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER, validation_name_suffix="recommendation", comparison_columns=[ "recommended_for_revocation_count_all", "recommended_for_revocation_count_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER, comparison_columns=[ "dashboard_revocation_count", "public_dashboard_revocation_count", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="termination", comparison_columns=[ "dashboard_successful_termination", "public_dashboard_successful_termination", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER, validation_name_suffix="completion", comparison_columns=[ "dashboard_projected_completion", "public_dashboard_projected_completion", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER, comparison_columns=[ "population_by_admission_reason_total_population", "population_by_facility_by_demographics_total_population", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "age_bucket_breakdown_sum", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view_builder= SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum", "gender_breakdown_sum", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER, comparison_columns=[ "metric_total", "race_or_ethnicity_breakdown_sum" ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= MOST_RECENT_ASSESSMENT_DATE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER, comparison_columns=[ "most_recent_etl_date", "most_recent_state_date", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= MOST_RECENT_ASSESSMENT_SCORE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER, comparison_columns=[ "most_recent_etl_score", "most_recent_state_score", ], validation_category=ValidationCategory.CONSISTENCY, ), SamenessDataValidationCheck( view_builder= MOST_RECENT_FACE_TO_FACE_CONTACT_DATE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER, comparison_columns=[ "most_recent_etl_face_to_face_contact_date", "most_recent_state_face_to_face_contact_date", ], validation_category=ValidationCategory.CONSISTENCY, ), # External comparison validations SamenessDataValidationCheck( view_builder= INCARCERATION_ADMISSION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_data_person_id", "internal_data_person_id", ], partition_columns=["region_code", "admission_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER, comparison_columns=[ "external_population_count", "internal_population_count", ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_data_person_id", "internal_data_person_id", ], partition_columns=["region_code", "date_of_stay"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], partition_columns=["region_code", "date_of_stay"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= INCARCERATION_RELEASE_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_data_person_id", "internal_data_person_id", ], partition_columns=["region_code", "release_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.2, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="district", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_district", "internal_district"], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.01, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="supervision_level", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervision_level", "internal_supervision_level", ], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="supervising_officer", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_supervising_officer", "internal_supervising_officer", ], partition_columns=["region_code", "date_of_supervision"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=[ "region_code", "release_cohort", "follow_up_period" ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "external_recidivated", "internal_recidivated" ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), SamenessDataValidationCheck( view_builder= SUPERVISION_START_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "start_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= SUPERVISION_TERMINATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "termination_date"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_person_external_id", "internal_person_external_id", ], partition_columns=["region_code", "date_of_stay"], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="facility", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=["external_facility", "internal_facility"], partition_columns=["region_code", "date_of_stay"], validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER, validation_name_suffix="legal_status", sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=[ "external_legal_status", "internal_legal_status" ], partition_columns=["region_code", "date_of_stay"], validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL, ), SamenessDataValidationCheck( view_builder= POPULATION_PROJECTION_MONTHLY_POPULATION_EXTERNAL_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "external_total_population", "internal_total_population", ], max_allowed_error=0.02, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), SamenessDataValidationCheck( view_builder= INCARCERATION_POPULATION_BY_STATE_BY_DATE_JUSTICE_COUNTS_COMPARISON_VIEW_BUILDER, sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ "justice_counts_total_population", "internal_total_population", ], max_allowed_error=0.06, validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), ] return all_data_validations
def test_from_successful_result(self) -> None: # Arrange job_result = DataValidationJobResult( validation_job=DataValidationJob( region_code="US_XX", validation=SamenessDataValidationCheck( validation_category=ValidationCategory.EXTERNAL_AGGREGATE, validation_type=ValidationCheckType.SAMENESS, comparison_columns=["a", "b", "c"], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, view_builder=SimpleBigQueryViewBuilder( dataset_id="my_dataset", view_id="test_view", description="test_view description", view_query_template="select * from literally_anything", ), ), ), result_details=SamenessNumbersValidationResultDetails( failed_rows=[], max_allowed_error=0.0 ), ) # Act result = ValidationResultForStorage.from_validation_result( run_id="abc123", run_datetime=datetime.datetime(2000, 1, 1, 0, 0, 0), result=job_result, ) # Assert self.assertEqual( ValidationResultForStorage( run_id="abc123", run_date=datetime.date(2000, 1, 1), run_datetime=datetime.datetime(2000, 1, 1, 0, 0, 0), system_version="v1.0.0", check_type=ValidationCheckType.SAMENESS, validation_name="test_view", region_code="US_XX", did_run=True, was_successful=True, failure_description=None, result_details_type="SamenessNumbersValidationResultDetails", result_details=SamenessNumbersValidationResultDetails( failed_rows=[], max_allowed_error=0.0 ), validation_category=ValidationCategory.EXTERNAL_AGGREGATE, ), result, ) self.assertEqual( { "run_id": "abc123", "run_date": "2000-01-01", "run_datetime": "2000-01-01T00:00:00", "system_version": "v1.0.0", "check_type": "SAMENESS", "validation_name": "test_view", "region_code": "US_XX", "did_run": True, "was_successful": True, "failure_description": None, "result_details_type": "SamenessNumbersValidationResultDetails", "result_details": '{"failed_rows": [], "max_allowed_error": 0.0}', "validation_category": "EXTERNAL_AGGREGATE", }, result.to_serializable(), )
def get_all_validations() -> List[DataValidationCheck]: """Returns the full list of configured validations to perform. This is not built as a top-level variable because the views cannot be built locally being run inside of a local_project_id_override block. """ all_data_validations: List[DataValidationCheck] = [ ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build( )), ExistenceDataValidationCheck( view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build( )), # TODO(2981): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999 ExistenceDataValidationCheck( view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER. build()), ExistenceDataValidationCheck( view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()), ExistenceDataValidationCheck( view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'absconsions_by_month', 'absconsions_by_officer' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS), SamenessDataValidationCheck( view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'discharges_by_month', 'discharges_by_officer' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.02), SamenessDataValidationCheck( view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(), comparison_columns=[ 'age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum' ], sameness_check_type=SamenessDataValidationCheckType.NUMBERS, max_allowed_error=0.06), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'external_population_count', 'internal_population_count' ]), SamenessDataValidationCheck( view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=PO_REPORT_COMPARISON_COLUMNS), SamenessDataValidationCheck( view= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['cell_sum', 'caseload_sum']), SamenessDataValidationCheck( view= REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER. build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=['cell_sum', 'month_sum'], max_allowed_error=0.03), SamenessDataValidationCheck( view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'district_sum', 'risk_level_sum', 'gender_sum', 'race_sum' ]), SamenessDataValidationCheck( view= SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.STRINGS, comparison_columns=['external_district', 'internal_district'], max_allowed_error=0.01), SamenessDataValidationCheck( view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build( ), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_revocation_count', 'public_dashboard_revocation_count' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_successful_termination', 'public_dashboard_successful_termination' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_projected_completion', 'public_dashboard_projected_completion' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_successful_termination', 'public_dashboard_successful_termination' ]), SamenessDataValidationCheck( view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'dashboard_projected_completion', 'public_dashboard_projected_completion' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'covid_report_facility_population', 'public_dashboard_facility_population' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'population_by_admission_reason_total_population', 'population_by_facility_by_demographics_total_population' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( # pylint: disable=line-too-long view= INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'race_or_ethnicity_breakdown_sum' ]), SamenessDataValidationCheck( # pylint: disable=line-too-long view= SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'race_or_ethnicity_breakdown_sum' ]), SamenessDataValidationCheck( view= INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), # TODO(3743): This validation will fail until we fix the view to handle people who age into new buckets SamenessDataValidationCheck( view= SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'age_bucket_breakdown_sum', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum' ]), SamenessDataValidationCheck( view= ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER .build(), sameness_check_type=SamenessDataValidationCheckType.NUMBERS, comparison_columns=[ 'metric_total', 'race_or_ethnicity_breakdown_sum' ]) ] return all_data_validations