def test_validation_job_returns_correct_query(self) -> None:
        builder = SimpleBigQueryViewBuilder(
            dataset_id="my_dataset",
            view_id="test_2",
            description="test_2 description",
            view_query_template="select * from literally_anything",
        )

        dataset_overrides = {"my_dataset": "my_dataset_override"}

        existence_check_job = DataValidationJob(
            validation=ExistenceDataValidationCheck(
                view_builder=builder, validation_category=ValidationCategory.INVARIANT
            ),
            region_code="US_XX",
        )

        self.assertEqual(
            "SELECT * FROM `recidiviz-456.my_dataset.test_2` WHERE region_code = 'US_XX';",
            existence_check_job.query_str(),
        )

        existence_check_job = DataValidationJob(
            validation=ExistenceDataValidationCheck(
                view_builder=builder, validation_category=ValidationCategory.INVARIANT
            ),
            region_code="US_XX",
            dataset_overrides=dataset_overrides,
        )

        self.assertEqual(
            "SELECT * FROM `recidiviz-456.my_dataset_override.test_2` WHERE region_code = 'US_XX';",
            existence_check_job.query_str(),
        )
def get_test_validations() -> List[DataValidationJob]:
    return [
        DataValidationJob(
            region_code="US_UT",
            validation=ExistenceDataValidationCheck(
                validation_category=ValidationCategory.INVARIANT,
                view_builder=SimpleBigQueryViewBuilder(
                    dataset_id="my_dataset",
                    view_id="test_1",
                    description="test_1 description",
                    view_query_template="select * from literally_anything",
                ),
            ),
        ),
        DataValidationJob(
            region_code="US_UT",
            validation=ExistenceDataValidationCheck(
                validation_category=ValidationCategory.INVARIANT,
                view_builder=SimpleBigQueryViewBuilder(
                    dataset_id="my_dataset",
                    view_id="test_2",
                    description="test_2 description",
                    view_query_template="select * from literally_anything",
                ),
            ),
        ),
        DataValidationJob(
            region_code="US_VA",
            validation=ExistenceDataValidationCheck(
                validation_category=ValidationCategory.INVARIANT,
                view_builder=SimpleBigQueryViewBuilder(
                    dataset_id="my_dataset",
                    view_id="test_1",
                    description="test_1 description",
                    view_query_template="select * from literally_anything",
                ),
            ),
        ),
        DataValidationJob(
            region_code="US_VA",
            validation=ExistenceDataValidationCheck(
                validation_category=ValidationCategory.INVARIANT,
                view_builder=SimpleBigQueryViewBuilder(
                    dataset_id="my_dataset",
                    view_id="test_2",
                    description="test_2 description",
                    view_query_template="select * from literally_anything",
                ),
            ),
        ),
    ]
Ejemplo n.º 3
0
    def test_existence_check_failures_below_threshold(self) -> None:
        self.mock_client.run_query_async.return_value = [
            "some result row",
            "some other result row",
        ]

        job = DataValidationJob(
            region_code="US_VA",
            validation=ExistenceDataValidationCheck(
                validation_type=ValidationCheckType.EXISTENCE,
                view=BigQueryView(
                    dataset_id="my_dataset",
                    view_id="test_view",
                    view_query_template="select * from literally_anything",
                ),
                num_allowed_rows=2,
            ),
        )
        result = ExistenceValidationChecker.run_check(job)

        self.assertEqual(
            result,
            DataValidationJobResult(validation_job=job,
                                    was_successful=True,
                                    failure_description=None),
        )
Ejemplo n.º 4
0
    def test_existence_check_failures(self) -> None:
        self.mock_client.run_query_async.return_value = [
            "some result row",
            "some other result row",
        ]

        job = DataValidationJob(
            region_code="US_VA",
            validation=ExistenceDataValidationCheck(
                validation_category=ValidationCategory.INVARIANT,
                validation_type=ValidationCheckType.EXISTENCE,
                view_builder=SimpleBigQueryViewBuilder(
                    dataset_id="my_dataset",
                    view_id="test_view",
                    description="test_view description",
                    view_query_template="select * from literally_anything",
                ),
            ),
        )
        result = ExistenceValidationChecker.run_check(job)

        self.assertEqual(
            result,
            DataValidationJobResult(
                validation_job=job,
                result_details=ExistenceValidationResultDetails(
                    num_invalid_rows=2, num_allowed_rows=0),
            ),
        )
Ejemplo n.º 5
0
    def test_existence_check_failures(self) -> None:
        self.mock_client.run_query_async.return_value = [
            "some result row",
            "some other result row",
        ]

        job = DataValidationJob(
            region_code="US_VA",
            validation=ExistenceDataValidationCheck(
                validation_type=ValidationCheckType.EXISTENCE,
                view=BigQueryView(
                    dataset_id="my_dataset",
                    view_id="test_view",
                    view_query_template="select * from literally_anything",
                ),
            ),
        )
        result = ExistenceValidationChecker.run_check(job)

        self.assertEqual(
            result,
            DataValidationJobResult(
                validation_job=job,
                was_successful=False,
                failure_description=
                "Found [2] invalid rows, though [0] were expected",
            ),
        )
Ejemplo n.º 6
0
    def test_existence_check_no_failures(self):
        self.mock_client.run_query_async.return_value = []

        job = DataValidationJob(region_code='US_VA',
                                validation=ExistenceDataValidationCheck(
                                    validation_type=ValidationCheckType.EXISTENCE,
                                    view=BigQueryView(dataset_id='my_dataset',
                                                      view_id='test_view',
                                                      view_query_template='select * from literally_anything')
                                ))
        result = ExistenceValidationChecker.run_check(job)

        self.assertEqual(result,
                         DataValidationJobResult(validation_job=job, was_successful=True, failure_description=None))
Ejemplo n.º 7
0
    def test_check_happy_path_existence(self) -> None:
        job = DataValidationJob(
            region_code="US_VA",
            validation=ExistenceDataValidationCheck(
                validation_type=ValidationCheckType.EXISTENCE,
                view=BigQueryView(
                    dataset_id="my_dataset",
                    view_id="test_view",
                    view_query_template="select * from literally_anything",
                ),
            ),
        )
        check_class = check_resolver.checker_for_validation(job)

        assert isinstance(check_class, ExistenceValidationChecker)
Ejemplo n.º 8
0
    def test_existence_check_failures(self):
        self.mock_client.run_query_async.return_value = ['some result row', 'some other result row']

        job = DataValidationJob(region_code='US_VA',
                                validation=ExistenceDataValidationCheck(
                                    validation_type=ValidationCheckType.EXISTENCE,
                                    view=BigQueryView(dataset_id='my_dataset',
                                                      view_id='test_view',
                                                      view_query_template='select * from literally_anything')
                                ))
        result = ExistenceValidationChecker.run_check(job)

        self.assertEqual(result,
                         DataValidationJobResult(validation_job=job,
                                                 was_successful=False,
                                                 failure_description='Found 2 invalid rows, though 0 were expected'))
Ejemplo n.º 9
0
def get_all_validations() -> List[DataValidationCheck]:
    """Returns the full list of configured validations to perform. This is not built as a top-level variable because the
     views cannot be built locally being run inside of a local_project_id_override block.
     """

    all_data_validations: List[DataValidationCheck] = [
        ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER.build()),

        # TODO(#4054): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999
        ExistenceDataValidationCheck(view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.build()),

        ExistenceDataValidationCheck(view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(view=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER.build()),

        SamenessDataValidationCheck(view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
                                    validation_name_suffix='absconsions',
                                    comparison_columns=['absconsions_by_month', 'absconsions_by_officer']),
        SamenessDataValidationCheck(view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
                                    validation_name_suffix='discharges',
                                    comparison_columns=['discharges_by_month', 'discharges_by_officer'],
                                    max_allowed_error=0.02),
        SamenessDataValidationCheck(view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(),
                                    comparison_columns=['age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum'],
                                    max_allowed_error=0.06),
        SamenessDataValidationCheck(view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(),
                                    comparison_columns=PO_REPORT_COMPARISON_COLUMNS),
        SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER.build(),
                                    comparison_columns=['cell_sum', 'caseload_sum']),
        SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.build(),
                                    comparison_columns=['cell_sum', 'month_sum'],
                                    max_allowed_error=0.03),
        # This version of this validation excludes the race column explicitly since we have chosen to count people with
        # multiple races in counts for each individual race, so the sum of the race breakdowns will not match the total.
        SamenessDataValidationCheck(view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(),
                                    comparison_columns=['district_sum', 'risk_level_sum', 'gender_sum', 'officer_sum', 'race_sum']),
        # This version of the validation checks to make sure the race sum isn't far off from the other sums, even
        # though we expect them to be different (e.g. make sure it isn't zero).
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(),
            validation_name_suffix='with_race',
            comparison_columns=['district_sum', 'risk_level_sum', 'gender_sum', 'race_sum', 'officer_sum'],
            max_allowed_error=.05
        ),
        SamenessDataValidationCheck(
            view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=['dashboard_revocation_count', 'public_dashboard_revocation_count']
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix='termination',
            comparison_columns=['dashboard_successful_termination', 'public_dashboard_successful_termination']
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix='completion',
            comparison_columns=['dashboard_projected_completion', 'public_dashboard_projected_completion']
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix='termination',
            comparison_columns=['dashboard_successful_termination', 'public_dashboard_successful_termination']
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix='completion',
            comparison_columns=['dashboard_projected_completion', 'public_dashboard_projected_completion']
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=['covid_report_facility_population', 'public_dashboard_facility_population']
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_MONTH_INTERNAL_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=['covid_report_population', 'public_dashboard_population']
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=['population_by_admission_reason_total_population',
                                'population_by_facility_by_demographics_total_population']
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'age_bucket_breakdown_sum',
                                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'age_bucket_breakdown_sum',
                                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            # pylint: disable=line-too-long
            view=INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            # pylint: disable=line-too-long
            view=SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'age_bucket_breakdown_sum',
                                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'age_bucket_breakdown_sum',
                                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'age_bucket_breakdown_sum',
                                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            view=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'age_bucket_breakdown_sum',
                                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'age_bucket_breakdown_sum',
                                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum']
        ),
        SamenessDataValidationCheck(
            view=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=['metric_total', 'race_or_ethnicity_breakdown_sum']
        ),

        # External comparison validations
        SamenessDataValidationCheck(view=INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
                                    comparison_columns=['external_population_count', 'internal_population_count']),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_person_external_id', 'internal_person_external_id'],
            max_allowed_error=0.02),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix='facility',
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_facility', 'internal_facility'],
            max_allowed_error=0.02),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_person_external_id', 'internal_person_external_id'],
            max_allowed_error=0.2),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix='district',
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_district', 'internal_district'],
            max_allowed_error=0.01),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix='supervision_level',
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_supervision_level', 'internal_supervision_level'],
            max_allowed_error=0.02),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix='supervising_officer',
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_supervising_officer', 'internal_supervising_officer'],
            max_allowed_error=0.02),
        SamenessDataValidationCheck(
            view=RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_person_external_id', 'internal_person_external_id'],
            max_allowed_error=0.02),
        SamenessDataValidationCheck(
            view=RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=['external_recidivated', 'internal_recidivated'],
            max_allowed_error=0.02),
    ]

    return all_data_validations
Ejemplo n.º 10
0
def get_all_validations() -> List[DataValidationCheck]:
    """Returns the full list of configured validations to perform. This is not built as a top-level variable because the
    views cannot be built locally being run inside of a local_project_id_override block.
    """

    all_data_validations: List[DataValidationCheck] = [
        ExistenceDataValidationCheck(
            view_builder=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=
            INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=ACTIVE_IN_POPULATION_AFTER_DEATH_DATE_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=
            INVALID_ADMISSION_REASONS_FOR_TEMPORARY_CUSTODY_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=
            INVALID_ADMITTED_FROM_SUPERVISION_ADMISSION_REASON_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=
            INVALID_PFI_FOR_TEMPORARY_CUSTODY_ADMISSIONS_VIEW_BUILDER,
            validation_category=ValidationCategory.INVARIANT,
        ),
        ExistenceDataValidationCheck(
            view_builder=ASSESSMENT_FRESHNESS_VALIDATION_VIEW_BUILDER,
            validation_category=ValidationCategory.FRESHNESS,
        ),
        ExistenceDataValidationCheck(
            view_builder=CONTACT_FRESHNESS_VALIDATION_VIEW_BUILDER,
            validation_category=ValidationCategory.FRESHNESS,
        ),
        ExistenceDataValidationCheck(
            view_builder=EMPLOYMENT_FRESHNESS_VALIDATION_VIEW_BUILDER,
            validation_category=ValidationCategory.FRESHNESS,
        ),
        ExistenceDataValidationCheck(
            view_builder=ETL_FRESHNESS_VALIDATION_VIEW_BUILDER,
            validation_category=ValidationCategory.FRESHNESS,
        ),
        SamenessDataValidationCheck(
            view_builder=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="absconsions",
            comparison_columns=[
                "absconsions_by_month", "absconsions_from_po_report"
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="discharges",
            comparison_columns=[
                "discharges_by_month", "discharges_from_po_report"
            ],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=FTR_REFERRALS_COMPARISON_VIEW_BUILDER,
            comparison_columns=[
                "age_bucket_sum",
                "risk_level_sum",
                "gender_sum",
                "race_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER,
            comparison_columns=PO_REPORT_COMPARISON_COLUMNS,
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_CASELOAD_ADMISSION_HISTORY_VIEW_BUILDER,
            comparison_columns=[
                "total_revocation_admissions",
                "total_caseload_admissions",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER,
            comparison_columns=[
                "cell_sum", "caseload_sum", "caseload_num_rows"
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER,
            comparison_columns=["cell_sum", "month_sum"],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=REVOCATION_MATRIX_COMPARISON_BY_MONTH_VIEW_BUILDER,
            comparison_columns=["reference_sum", "month_sum"],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER,
            comparison_columns=[
                "district_sum",
                "risk_level_sum",
                "gender_sum",
                "race_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_COMPARISON_REVOCATIONS_BY_OFFICER_VIEW_BUILDER,
            comparison_columns=["officer_sum", "caseload_sum"],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="revocation",
            comparison_columns=[
                "revocation_count_all", "revocation_count_sum"
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="supervision",
            comparison_columns=[
                "supervision_count_all",
                "supervision_population_count_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="recommendation",
            comparison_columns=[
                "recommended_for_revocation_count_all",
                "recommended_for_revocation_count_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="revocation",
            comparison_columns=[
                "revocation_count_all", "revocation_count_sum"
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="supervision",
            comparison_columns=[
                "supervision_count_all",
                "supervision_population_count_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="recommendation",
            comparison_columns=[
                "recommended_for_revocation_count_all",
                "recommended_for_revocation_count_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER,
            comparison_columns=[
                "dashboard_revocation_count",
                "public_dashboard_revocation_count",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="termination",
            comparison_columns=[
                "dashboard_successful_termination",
                "public_dashboard_successful_termination",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="completion",
            comparison_columns=[
                "dashboard_projected_completion",
                "public_dashboard_projected_completion",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="termination",
            comparison_columns=[
                "dashboard_successful_termination",
                "public_dashboard_successful_termination",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER,
            validation_name_suffix="completion",
            comparison_columns=[
                "dashboard_projected_completion",
                "public_dashboard_projected_completion",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER,
            comparison_columns=[
                "population_by_admission_reason_total_population",
                "population_by_facility_by_demographics_total_population",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total", "race_or_ethnicity_breakdown_sum"
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total", "race_or_ethnicity_breakdown_sum"
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER,
            comparison_columns=[
                "metric_total", "race_or_ethnicity_breakdown_sum"
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            MOST_RECENT_ASSESSMENT_DATE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER,
            comparison_columns=[
                "most_recent_etl_date",
                "most_recent_state_date",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            MOST_RECENT_ASSESSMENT_SCORE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER,
            comparison_columns=[
                "most_recent_etl_score",
                "most_recent_state_score",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        SamenessDataValidationCheck(
            view_builder=
            MOST_RECENT_FACE_TO_FACE_CONTACT_DATE_BY_PERSON_BY_STATE_COMPARISON_VIEW_BUILDER,
            comparison_columns=[
                "most_recent_etl_face_to_face_contact_date",
                "most_recent_state_face_to_face_contact_date",
            ],
            validation_category=ValidationCategory.CONSISTENCY,
        ),
        # External comparison validations
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_ADMISSION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_data_person_id",
                "internal_data_person_id",
            ],
            partition_columns=["region_code", "admission_date"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER,
            comparison_columns=[
                "external_population_count",
                "internal_population_count",
            ],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_AGGREGATE,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_data_person_id",
                "internal_data_person_id",
            ],
            partition_columns=["region_code", "date_of_stay"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER,
            validation_name_suffix="facility",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=["external_facility", "internal_facility"],
            partition_columns=["region_code", "date_of_stay"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_RELEASE_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_data_person_id",
                "internal_data_person_id",
            ],
            partition_columns=["region_code", "release_date"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            partition_columns=["region_code", "date_of_supervision"],
            max_allowed_error=0.2,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER,
            validation_name_suffix="district",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=["external_district", "internal_district"],
            partition_columns=["region_code", "date_of_supervision"],
            max_allowed_error=0.01,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER,
            validation_name_suffix="supervision_level",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_supervision_level",
                "internal_supervision_level",
            ],
            partition_columns=["region_code", "date_of_supervision"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER,
            validation_name_suffix="supervising_officer",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_supervising_officer",
                "internal_supervising_officer",
            ],
            partition_columns=["region_code", "date_of_supervision"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            partition_columns=[
                "region_code", "release_cohort", "follow_up_period"
            ],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                "external_recidivated", "internal_recidivated"
            ],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_AGGREGATE,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_START_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            partition_columns=["region_code", "start_date"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            SUPERVISION_TERMINATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            partition_columns=["region_code", "termination_date"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            partition_columns=["region_code", "date_of_stay"],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER,
            validation_name_suffix="facility",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=["external_facility", "internal_facility"],
            partition_columns=["region_code", "date_of_stay"],
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER,
            validation_name_suffix="legal_status",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_legal_status", "internal_legal_status"
            ],
            partition_columns=["region_code", "date_of_stay"],
            validation_category=ValidationCategory.EXTERNAL_INDIVIDUAL,
        ),
        SamenessDataValidationCheck(
            view_builder=
            POPULATION_PROJECTION_MONTHLY_POPULATION_EXTERNAL_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                "external_total_population",
                "internal_total_population",
            ],
            max_allowed_error=0.02,
            validation_category=ValidationCategory.EXTERNAL_AGGREGATE,
        ),
        SamenessDataValidationCheck(
            view_builder=
            INCARCERATION_POPULATION_BY_STATE_BY_DATE_JUSTICE_COUNTS_COMPARISON_VIEW_BUILDER,
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                "justice_counts_total_population",
                "internal_total_population",
            ],
            max_allowed_error=0.06,
            validation_category=ValidationCategory.EXTERNAL_AGGREGATE,
        ),
    ]

    return all_data_validations
Ejemplo n.º 11
0
def get_all_validations() -> List[DataValidationCheck]:
    """Returns the full list of configured validations to perform. This is not built as a top-level variable because the
     views cannot be built locally being run inside of a local_project_id_override block.
     """

    all_data_validations: List[DataValidationCheck] = [
        ExistenceDataValidationCheck(
            view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build(
            )),
        ExistenceDataValidationCheck(
            view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(
            view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build(
            )),

        # TODO(2981): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999
        ExistenceDataValidationCheck(
            view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.
            build()),
        ExistenceDataValidationCheck(
            view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(
            view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(
            view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()),
        SamenessDataValidationCheck(
            view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                'absconsions_by_month', 'absconsions_by_officer'
            ],
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS),
        SamenessDataValidationCheck(
            view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                'discharges_by_month', 'discharges_by_officer'
            ],
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            max_allowed_error=0.02),
        SamenessDataValidationCheck(
            view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                'age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum'
            ],
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            max_allowed_error=0.06),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'external_population_count', 'internal_population_count'
            ]),
        SamenessDataValidationCheck(
            view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=PO_REPORT_COMPARISON_COLUMNS),
        SamenessDataValidationCheck(
            view=
            REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=['cell_sum', 'caseload_sum']),
        SamenessDataValidationCheck(
            view=
            REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.
            build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=['cell_sum', 'month_sum'],
            max_allowed_error=0.03),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'district_sum', 'risk_level_sum', 'gender_sum', 'race_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_district', 'internal_district'],
            max_allowed_error=0.01),
        SamenessDataValidationCheck(
            view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(
            ),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_revocation_count',
                'public_dashboard_revocation_count'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_successful_termination',
                'public_dashboard_successful_termination'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_projected_completion',
                'public_dashboard_projected_completion'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_successful_termination',
                'public_dashboard_successful_termination'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_projected_completion',
                'public_dashboard_projected_completion'
            ]),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'covid_report_facility_population',
                'public_dashboard_facility_population'
            ]),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'population_by_admission_reason_total_population',
                'population_by_facility_by_demographics_total_population'
            ]),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            # pylint: disable=line-too-long
            view=
            INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'race_or_ethnicity_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            # pylint: disable=line-too-long
            view=
            SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'race_or_ethnicity_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        # TODO(3743): This validation will fail until we fix the view to handle people who age into new buckets
        SamenessDataValidationCheck(
            view=
            SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'age_bucket_breakdown_sum',
                'race_or_ethnicity_breakdown_sum', 'gender_breakdown_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'metric_total', 'race_or_ethnicity_breakdown_sum'
            ])
    ]

    return all_data_validations
Ejemplo n.º 12
0
    PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER
from recidiviz.validation.views.state.po_report_missing_fields import PO_REPORT_MISSING_FIELDS_VIEW_BUILDER, \
    PO_REPORT_COMPARISON_COLUMNS
from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_caseload import \
    REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER
from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_month import \
    REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER
from recidiviz.validation.views.state.revocation_matrix_comparison_supervision_population import \
    REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER
from recidiviz.validation.views.state.supervision_eom_population_person_level_district_external_comparison import \
    SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW_BUILDER
from recidiviz.validation.views.state.supervision_termination_prior_to_start import \
    SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER

_ALL_DATA_VALIDATIONS: List[DataValidationCheck] = [
    ExistenceDataValidationCheck(
        view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build()),
    ExistenceDataValidationCheck(
        view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()),
    ExistenceDataValidationCheck(
        view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build()),
    ExistenceDataValidationCheck(
        view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()),
    ExistenceDataValidationCheck(
        view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()),
    ExistenceDataValidationCheck(
        view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()),
    SamenessDataValidationCheck(
        view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
        comparison_columns=['absconsions_by_month', 'absconsions_by_officer'],
        sameness_check_type=SamenessDataValidationCheckType.NUMBERS),
    SamenessDataValidationCheck(
Ejemplo n.º 13
0
def get_all_validations() -> List[DataValidationCheck]:
    """Returns the full list of configured validations to perform. This is not built as a top-level variable because the
     views cannot be built locally being run inside of a local_project_id_override block.
     """

    all_data_validations: List[DataValidationCheck] = [
        ExistenceDataValidationCheck(
            view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build(
            )),
        ExistenceDataValidationCheck(
            view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(
            view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build(
            )),

        # TODO(2981): This should stop failing for MO once we fix the 600ish periods with end dates of 99999999
        ExistenceDataValidationCheck(
            view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.
            build()),
        ExistenceDataValidationCheck(
            view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(
            view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()),
        ExistenceDataValidationCheck(
            view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()),
        SamenessDataValidationCheck(
            view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                'absconsions_by_month', 'absconsions_by_officer'
            ],
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS),
        SamenessDataValidationCheck(
            view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                'discharges_by_month', 'discharges_by_officer'
            ],
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            max_allowed_error=0.02),
        SamenessDataValidationCheck(
            view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                'age_bucket_sum', 'risk_level_sum', 'gender_sum', 'race_sum'
            ],
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            max_allowed_error=0.06),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'external_population_count', 'internal_population_count'
            ]),
        SamenessDataValidationCheck(
            view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=PO_REPORT_COMPARISON_COLUMNS),
        SamenessDataValidationCheck(
            view=
            REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=['cell_sum', 'caseload_sum']),
        SamenessDataValidationCheck(
            view=
            REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.
            build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=['cell_sum', 'month_sum'],
            max_allowed_error=0.03),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'district_sum', 'risk_level_sum', 'gender_sum', 'race_sum'
            ]),
        SamenessDataValidationCheck(
            view=
            SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=['external_district', 'internal_district'],
            max_allowed_error=0.01),
        SamenessDataValidationCheck(
            view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_absconsion_count',
                'public_dashboard_absconsion_count'
            ]),
        SamenessDataValidationCheck(
            view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_new_crime_count', 'public_dashboard_new_crime_count'
            ]),
        SamenessDataValidationCheck(
            view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_technical_count', 'public_dashboard_technical_count'
            ]),
        SamenessDataValidationCheck(
            view=REVOCATIONS_BY_VIOLATION_TYPE_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_unknown_count', 'public_dashboard_unknown_count'
            ]),
        SamenessDataValidationCheck(
            view=
            SUPERVISION_POPULATION_BY_DISTRICT_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_supervision_count',
                'public_dashboard_supervision_count'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_successful_termination',
                'public_dashboard_successful_termination'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_projected_completion',
                'public_dashboard_projected_completion'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_successful_termination',
                'public_dashboard_successful_termination'
            ]),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'dashboard_projected_completion',
                'public_dashboard_projected_completion'
            ]),
        SamenessDataValidationCheck(
            view=
            INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER
            .build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                'covid_report_facility_population',
                'public_dashboard_facility_population'
            ]),
    ]

    return all_data_validations
Ejemplo n.º 14
0
    def test_fetch_validation_jobs_to_perform_applies_configs(
        self,
        mock_get_all_validations_fn: MagicMock,
        mock_get_region_configs_fn: MagicMock,
    ) -> None:
        existence_builder = SimpleBigQueryViewBuilder(
            project_id="my_project",
            dataset_id="my_dataset",
            view_id="existence_view",
            description="existence_view description",
            view_query_template="SELECT NULL LIMIT 0",
        )
        sameness_builder = SimpleBigQueryViewBuilder(
            project_id="my_project",
            dataset_id="my_dataset",
            view_id="sameness_view",
            description="sameness_view description",
            view_query_template="SELECT NULL LIMIT 1",
        )
        mock_get_all_validations_fn.return_value = [
            ExistenceDataValidationCheck(
                view_builder=existence_builder,
                validation_category=ValidationCategory.INVARIANT,
            ),
            SamenessDataValidationCheck(
                view_builder=sameness_builder,
                comparison_columns=["col1", "col2"],
                validation_category=ValidationCategory.CONSISTENCY,
            ),
        ]
        mock_get_region_configs_fn.return_value = {
            "US_XX": ValidationRegionConfig(
                region_code="US_XX",
                exclusions={},
                num_allowed_rows_overrides={
                    existence_builder.view_id: ValidationNumAllowedRowsOverride(
                        region_code="US_XX",
                        validation_name=existence_builder.view_id,
                        num_allowed_rows_override=10,
                        override_reason="This is broken",
                    )
                },
                max_allowed_error_overrides={
                    sameness_builder.view_id: ValidationMaxAllowedErrorOverride(
                        region_code="US_XX",
                        validation_name=sameness_builder.view_id,
                        max_allowed_error_override=0.3,
                        override_reason="This is also broken",
                    )
                },
            ),
            "US_YY": ValidationRegionConfig(
                region_code="US_YY",
                exclusions={},
                num_allowed_rows_overrides={},
                max_allowed_error_overrides={},
            ),
        }
        result = _fetch_validation_jobs_to_perform()

        expected_jobs = [
            DataValidationJob(
                validation=ExistenceDataValidationCheck(
                    validation_category=ValidationCategory.INVARIANT,
                    view_builder=existence_builder,
                    validation_name_suffix=None,
                    validation_type=ValidationCheckType.EXISTENCE,
                    num_allowed_rows=10,
                ),
                region_code="US_XX",
            ),
            DataValidationJob(
                validation=ExistenceDataValidationCheck(
                    validation_category=ValidationCategory.INVARIANT,
                    view_builder=existence_builder,
                    validation_name_suffix=None,
                    validation_type=ValidationCheckType.EXISTENCE,
                    num_allowed_rows=0,
                ),  # No override
                region_code="US_YY",
            ),
            DataValidationJob(
                validation=SamenessDataValidationCheck(
                    validation_category=ValidationCategory.CONSISTENCY,
                    view_builder=sameness_builder,
                    validation_name_suffix=None,
                    comparison_columns=["col1", "col2"],
                    sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
                    max_allowed_error=0.3,
                    validation_type=ValidationCheckType.SAMENESS,
                ),
                region_code="US_XX",
            ),
            DataValidationJob(
                validation=SamenessDataValidationCheck(
                    validation_category=ValidationCategory.CONSISTENCY,
                    view_builder=sameness_builder,
                    validation_name_suffix=None,
                    comparison_columns=["col1", "col2"],
                    sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
                    max_allowed_error=0.0,
                    validation_type=ValidationCheckType.SAMENESS,
                ),
                region_code="US_YY",
            ),
        ]
        self.assertEqual(expected_jobs, result)
Ejemplo n.º 15
0
def get_all_validations() -> List[DataValidationCheck]:
    """Returns the full list of configured validations to perform. This is not built as a top-level variable because the
    views cannot be built locally being run inside of a local_project_id_override block.
    """

    all_data_validations: List[DataValidationCheck] = [
        ExistenceDataValidationCheck(
            view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=INCARCERATION_ADMISSION_NULLS_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=INCARCERATION_RELEASE_REASON_NO_DATE_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=OVERLAPPING_INCARCERATION_PERIODS_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=INCARCERATION_RELEASE_REASON_NO_RELEASE_DATE_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=PO_REPORT_AVGS_PER_DISTRICT_STATE_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=PO_REPORT_DISTINCT_BY_OFFICER_MONTH_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=SUPERVISION_TERMINATION_REASON_NO_DATE_VIEW_BUILDER.build()
        ),
        ExistenceDataValidationCheck(
            view=OVERLAPPING_SUPERVISION_PERIODS_VIEW_BUILDER.build()
        ),
        SamenessDataValidationCheck(
            view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="absconsions",
            comparison_columns=["absconsions_by_month", "absconsions_by_officer"],
        ),
        SamenessDataValidationCheck(
            view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="discharges",
            comparison_columns=["discharges_by_month", "discharges_by_officer"],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=FTR_REFERRALS_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                "age_bucket_sum",
                "risk_level_sum",
                "gender_sum",
                "race_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=PO_REPORT_MISSING_FIELDS_VIEW_BUILDER.build(),
            comparison_columns=PO_REPORT_COMPARISON_COLUMNS,
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW_BUILDER.build(),
            comparison_columns=["cell_sum", "caseload_sum", "caseload_num_rows"],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW_BUILDER.build(),
            comparison_columns=["cell_sum", "month_sum"],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW_BUILDER.build(),
            comparison_columns=[
                "district_sum",
                "risk_level_sum",
                "gender_sum",
                "race_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_COMPARISON_REVOCATIONS_BY_OFFICER_VIEW_BUILDER.build(),
            comparison_columns=["officer_sum", "caseload_sum"],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="revocation",
            comparison_columns=["revocation_count_all", "revocation_count_sum"],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="supervision",
            comparison_columns=[
                "supervision_count_all",
                "supervision_population_count_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_DISTRIBUTION_BY_RACE_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="recommendation",
            comparison_columns=[
                "recommended_for_revocation_count_all",
                "recommended_for_revocation_count_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="revocation",
            comparison_columns=["revocation_count_all", "revocation_count_sum"],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="supervision",
            comparison_columns=[
                "supervision_count_all",
                "supervision_population_count_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=REVOCATION_MATRIX_DISTRIBUTION_BY_GENDER_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="recommendation",
            comparison_columns=[
                "recommended_for_revocation_count_all",
                "recommended_for_revocation_count_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=REVOCATIONS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                "dashboard_revocation_count",
                "public_dashboard_revocation_count",
            ],
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="termination",
            comparison_columns=[
                "dashboard_successful_termination",
                "public_dashboard_successful_termination",
            ],
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_MONTH_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="completion",
            comparison_columns=[
                "dashboard_projected_completion",
                "public_dashboard_projected_completion",
            ],
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="termination",
            comparison_columns=[
                "dashboard_successful_termination",
                "public_dashboard_successful_termination",
            ],
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_DASHBOARD_COMPARISON_VIEW_BUILDER.build(),
            validation_name_suffix="completion",
            comparison_columns=[
                "dashboard_projected_completion",
                "public_dashboard_projected_completion",
            ],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_FACILITY_INTERNAL_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                "covid_report_facility_population",
                "public_dashboard_facility_population",
            ],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_MONTH_INTERNAL_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                "covid_report_population",
                "public_dashboard_population",
            ],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_DEMOGRAPHIC_INTERNAL_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                "population_by_admission_reason_total_population",
                "population_by_facility_by_demographics_total_population",
            ],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_ADMISSION_REASON_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_FACILITY_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"],
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_BY_PRIORITIZED_RACE_AND_ETHNICITY_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_LENGTHS_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_RELEASES_BY_TYPE_BY_PERIOD_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_REVOCATIONS_BY_PERIOD_BY_TYPE_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=SENTENCE_TYPE_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_BY_DISTRICT_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "age_bucket_breakdown_sum",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        # TODO(#3743): This validation will fail until we fix the view to handle people who age into new buckets
        SamenessDataValidationCheck(
            view=SUPERVISION_SUCCESS_BY_PERIOD_BY_DEMOGRAPHICS_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=[
                "metric_total",
                "race_or_ethnicity_breakdown_sum",
                "gender_breakdown_sum",
            ],
        ),
        SamenessDataValidationCheck(
            view=ACTIVE_PROGRAM_PARTICIPATION_BY_REGION_INTERNAL_CONSISTENCY_VIEW_BUILDER.build(),
            comparison_columns=["metric_total", "race_or_ethnicity_breakdown_sum"],
        ),
        # External comparison validations
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            comparison_columns=[
                "external_population_count",
                "internal_population_count",
            ],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=INCARCERATION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix="facility",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=["external_facility", "internal_facility"],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            max_allowed_error=0.2,
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix="district",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=["external_district", "internal_district"],
            max_allowed_error=0.01,
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix="supervision_level",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_supervision_level",
                "internal_supervision_level",
            ],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix="supervising_officer",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_supervising_officer",
                "internal_supervising_officer",
            ],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=RECIDIVISM_RELEASE_COHORT_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=RECIDIVISM_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=["external_recidivated", "internal_recidivated"],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=SUPERVISION_TERMINATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=[
                "external_person_external_id",
                "internal_person_external_id",
            ],
            max_allowed_error=0.02,
        ),
        SamenessDataValidationCheck(
            view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix="facility",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=["external_facility", "internal_facility"],
        ),
        SamenessDataValidationCheck(
            view=COUNTY_JAIL_POPULATION_PERSON_LEVEL_EXTERNAL_COMPARISON_MATCHING_PEOPLE_VIEW_BUILDER.build(),
            validation_name_suffix="legal_status",
            sameness_check_type=SamenessDataValidationCheckType.STRINGS,
            comparison_columns=["external_legal_status", "internal_legal_status"],
        ),
        SamenessDataValidationCheck(
            view=POPULATION_PROJECTION_MONTHLY_POPULATION_EXTERNAL_COMPARISON_VIEW_BUILDER.build(),
            sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
            comparison_columns=[
                "external_total_population",
                "internal_total_population",
            ],
            max_allowed_error=0.02,
        ),
    ]

    return all_data_validations
Ejemplo n.º 16
0
from flask import Flask
from mock import patch

from recidiviz.big_query.big_query_view import BigQueryView
from recidiviz.tests.utils.matchers import UnorderedCollection
from recidiviz.validation.checks.existence_check import ExistenceDataValidationCheck
from recidiviz.validation.configured_validations import get_all_validations, STATES_TO_VALIDATE
from recidiviz.validation.validation_manager import validation_manager_blueprint, _fetch_validation_jobs_to_perform
from recidiviz.validation.validation_models import DataValidationJob, DataValidationJobResult
from recidiviz.validation.views import view_config

_TEST_VALIDATIONS: List[DataValidationJob] = [
    DataValidationJob(
        region_code='US_UT',
        validation=ExistenceDataValidationCheck(view=BigQueryView(
            dataset_id='my_dataset',
            view_id='test_1',
            view_query_template='select * from literally_anything'))),
    DataValidationJob(
        region_code='US_UT',
        validation=ExistenceDataValidationCheck(view=BigQueryView(
            dataset_id='my_dataset',
            view_id='test_2',
            view_query_template='select * from literally_anything'))),
    DataValidationJob(
        region_code='US_VA',
        validation=ExistenceDataValidationCheck(view=BigQueryView(
            dataset_id='my_dataset',
            view_id='test_1',
            view_query_template='select * from literally_anything'))),
    DataValidationJob(
        region_code='US_VA',
    INCARCERATION_POPULATION_BY_FACILITY_EXTERNAL_COMPARISON_VIEW
from recidiviz.validation.views.state.incarceration_release_prior_to_admission import \
    INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW
from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_caseload import \
    REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_CASELOAD_VIEW
from recidiviz.validation.views.state.revocation_matrix_comparison_revocation_cell_vs_month import \
    REVOCATION_MATRIX_COMPARISON_REVOCATION_CELL_VS_MONTH_VIEW
from recidiviz.validation.views.state.revocation_matrix_comparison_supervision_population import \
    REVOCATION_MATRIX_COMPARISON_SUPERVISION_POPULATION_VIEW
from recidiviz.validation.views.state.supervision_eom_population_person_level_district_external_comparison import \
    SUPERVISION_EOM_POPULATION_PERSON_LEVEL_DISTRICT_EXTERNAL_COMPARISON_VIEW
from recidiviz.validation.views.state.supervision_termination_prior_to_start import \
    SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW

_ALL_DATA_VALIDATIONS: List[DataValidationCheck] = [
    ExistenceDataValidationCheck(
        view=INCARCERATION_ADMISSION_AFTER_OPEN_PERIOD_VIEW),
    ExistenceDataValidationCheck(view=INCARCERATION_ADMISSION_NULLS_VIEW),
    ExistenceDataValidationCheck(
        view=INCARCERATION_RELEASE_PRIOR_TO_ADMISSION_VIEW),
    ExistenceDataValidationCheck(
        view=SUPERVISION_TERMINATION_PRIOR_TO_START_VIEW),
    SamenessDataValidationCheck(
        view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW,
        comparison_columns=['absconsions_by_month', 'absconsions_by_officer'],
        sameness_check_type=SamenessDataValidationCheckType.NUMBERS),
    SamenessDataValidationCheck(
        view=CASE_TERMINATIONS_BY_TYPE_COMPARISON_VIEW,
        comparison_columns=['discharges_by_month', 'discharges_by_officer'],
        sameness_check_type=SamenessDataValidationCheckType.NUMBERS,
        max_allowed_error=0.02),
    SamenessDataValidationCheck(