Ejemplo n.º 1
0
def _fetch_validation_jobs_to_perform(
    region_code_filter: Optional[str] = None,
    validation_name_filter: Optional[Pattern] = None,
    dataset_overrides: Optional[Dict[str, str]] = None,
) -> List[DataValidationJob]:
    """
    Creates and returns validation jobs for all validations meeting the name filter,
    for the given region code, and with the dataset overrides if given.
    """
    validation_checks = get_all_validations()
    region_configs = get_validation_region_configs()
    global_config = get_validation_global_config()

    validation_jobs: List[DataValidationJob] = []
    for check in validation_checks:
        if check.validation_name in global_config.disabled:
            continue
        if validation_name_filter is not None and not re.search(
                validation_name_filter, check.validation_name):
            continue

        for region_code, region_config in region_configs.items():
            if region_code_filter and region_code != region_code_filter:
                continue
            if check.validation_name not in region_config.exclusions:
                updated_check = check.updated_for_region(region_config)
                validation_jobs.append(
                    DataValidationJob(
                        validation=updated_check,
                        region_code=region_code,
                        dataset_overrides=dataset_overrides,
                    ))

    return validation_jobs
    def test_cross_product_states_and_checks_production(
        self, _mock_get_environment: MagicMock
    ) -> None:
        all_validations = get_all_validations()
        region_configs_to_validate = get_validation_region_configs()
        global_config = get_validation_global_config()
        state_codes_to_validate = region_configs_to_validate.keys()

        # When you promote a state to production, we will start running validations against that state - add it to this
        # list to confirm you've updated all relevant external data validation tables in production to include
        # validation data for the newly promoted region.
        self.assertCountEqual(
            state_codes_to_validate, ["US_ID", "US_MO", "US_ND", "US_PA"]
        )

        num_exclusions = sum(
            [len(config.exclusions) for config in region_configs_to_validate.values()]
        ) + len(global_config.disabled) * len(state_codes_to_validate)

        expected_length = (
            len(all_validations) * len(state_codes_to_validate) - num_exclusions
        )

        result = _fetch_validation_jobs_to_perform()
        self.assertEqual(expected_length, len(result))
    def test_cross_product_states_and_checks_staging(
        self, _mock_get_environment: MagicMock
    ) -> None:
        all_validations = get_all_validations()
        all_region_configs = get_validation_region_configs()
        global_config = get_validation_global_config()
        all_regions = all_region_configs.keys()

        num_exclusions = sum(
            [len(config.exclusions) for config in all_region_configs.values()]
        ) + len(global_config.disabled) * len(all_regions)

        expected_length = len(all_validations) * len(all_regions) - num_exclusions

        result = _fetch_validation_jobs_to_perform()
        self.assertEqual(expected_length, len(result))
    def test_configs_all_reference_real_validations(self) -> None:
        validation_names = {
            validation.validation_name for validation in get_all_validations()
        }
        region_configs_to_validate = get_validation_region_configs()
        global_config = get_validation_global_config()

        global_disabled_names = {
            validation.validation_name for validation in global_config.disabled.values()
        }

        global_names_not_in_validations_list = global_disabled_names.difference(
            validation_names
        )
        self.assertEqual(
            set(),
            global_names_not_in_validations_list,
            f"Found views referenced in global config that do not exist in validations list: "
            f"{global_names_not_in_validations_list}",
        )

        for region_code, region_config in region_configs_to_validate.items():
            region_validation_names = {
                exclusion.validation_name
                for exclusion in region_config.exclusions.values()
            }
            region_validation_names.update(
                {
                    override.validation_name
                    for override in region_config.max_allowed_error_overrides.values()
                }
            )
            region_validation_names.update(
                {
                    override.validation_name
                    for override in region_config.num_allowed_rows_overrides.values()
                }
            )
            region_names_not_in_validations_list = region_validation_names.difference(
                validation_names
            )
            self.assertEqual(
                set(),
                region_names_not_in_validations_list,
                f"Found views referenced in region [{region_code}] config that do not exist in validations"
                f" list: {global_names_not_in_validations_list}",
            )
Ejemplo n.º 5
0
def _fetch_validation_jobs_to_perform(
    region_code_filter: Optional[str] = None, ) -> List[DataValidationJob]:
    validation_checks = get_all_validations()
    region_configs = get_validation_region_configs()
    global_config = get_validation_global_config()

    validation_jobs: List[DataValidationJob] = []
    for check in validation_checks:
        if check.validation_name in global_config.disabled:
            continue

        for region_code, region_config in region_configs.items():
            if region_code_filter and region_code != region_code_filter:
                continue
            if check.validation_name not in region_config.exclusions:
                updated_check = check.updated_for_region(region_config)
                validation_jobs.append(
                    DataValidationJob(validation=updated_check,
                                      region_code=region_code))

    return validation_jobs
Ejemplo n.º 6
0
 def test_parse_global_config_parses(self) -> None:
     # Test passes if this parses
     self.assertIsNotNone(get_validation_global_config())