def test_all_builders_referenced_by_validations_are_in_view_config(self) -> None: builders_in_validations = {v.view_builder for v in get_all_validations()} validation_views_not_in_view_config = builders_in_validations.difference( validation_view_config.VIEW_BUILDERS_FOR_VIEWS_TO_UPDATE ) self.assertEqual(set(), validation_views_not_in_view_config)
def test_cross_product_states_and_checks(self): all_validations = get_all_validations() all_states = STATES_TO_VALIDATE expected_length = len(all_validations) * len(all_states) result = _fetch_validation_jobs_to_perform() self.assertEqual(expected_length, len(result))
def _fetch_validation_jobs_to_perform( region_code_filter: Optional[str] = None, validation_name_filter: Optional[Pattern] = None, dataset_overrides: Optional[Dict[str, str]] = None, ) -> List[DataValidationJob]: """ Creates and returns validation jobs for all validations meeting the name filter, for the given region code, and with the dataset overrides if given. """ validation_checks = get_all_validations() region_configs = get_validation_region_configs() global_config = get_validation_global_config() validation_jobs: List[DataValidationJob] = [] for check in validation_checks: if check.validation_name in global_config.disabled: continue if validation_name_filter is not None and not re.search( validation_name_filter, check.validation_name): continue for region_code, region_config in region_configs.items(): if region_code_filter and region_code != region_code_filter: continue if check.validation_name not in region_config.exclusions: updated_check = check.updated_for_region(region_config) validation_jobs.append( DataValidationJob( validation=updated_check, region_code=region_code, dataset_overrides=dataset_overrides, )) return validation_jobs
def test_all_validations_no_overlapping_names(self) -> None: all_validations = get_all_validations() all_names: Set[str] = set() for validation in all_validations: self.assertNotIn(validation.validation_name, all_names) all_names.add(validation.validation_name)
def test_cross_product_states_and_checks_production( self, _mock_get_environment: MagicMock ) -> None: all_validations = get_all_validations() region_configs_to_validate = get_validation_region_configs() global_config = get_validation_global_config() state_codes_to_validate = region_configs_to_validate.keys() # When you promote a state to production, we will start running validations against that state - add it to this # list to confirm you've updated all relevant external data validation tables in production to include # validation data for the newly promoted region. self.assertCountEqual( state_codes_to_validate, ["US_ID", "US_MO", "US_ND", "US_PA"] ) num_exclusions = sum( [len(config.exclusions) for config in region_configs_to_validate.values()] ) + len(global_config.disabled) * len(state_codes_to_validate) expected_length = ( len(all_validations) * len(state_codes_to_validate) - num_exclusions ) result = _fetch_validation_jobs_to_perform() self.assertEqual(expected_length, len(result))
def _fetch_validation_jobs_to_perform() -> List[DataValidationJob]: validation_checks = get_all_validations() validation_jobs: List[DataValidationJob] = [] for check in validation_checks: for state_code in STATES_TO_VALIDATE: validation_jobs.append( DataValidationJob(validation=check, region_code=state_code)) return validation_jobs
def test_cross_product_states_and_checks_staging( self, _mock_get_environment: MagicMock ) -> None: all_validations = get_all_validations() all_region_configs = get_validation_region_configs() global_config = get_validation_global_config() all_regions = all_region_configs.keys() num_exclusions = sum( [len(config.exclusions) for config in all_region_configs.values()] ) + len(global_config.disabled) * len(all_regions) expected_length = len(all_validations) * len(all_regions) - num_exclusions result = _fetch_validation_jobs_to_perform() self.assertEqual(expected_length, len(result))
def test_configs_all_reference_real_validations(self) -> None: validation_names = { validation.validation_name for validation in get_all_validations() } region_configs_to_validate = get_validation_region_configs() global_config = get_validation_global_config() global_disabled_names = { validation.validation_name for validation in global_config.disabled.values() } global_names_not_in_validations_list = global_disabled_names.difference( validation_names ) self.assertEqual( set(), global_names_not_in_validations_list, f"Found views referenced in global config that do not exist in validations list: " f"{global_names_not_in_validations_list}", ) for region_code, region_config in region_configs_to_validate.items(): region_validation_names = { exclusion.validation_name for exclusion in region_config.exclusions.values() } region_validation_names.update( { override.validation_name for override in region_config.max_allowed_error_overrides.values() } ) region_validation_names.update( { override.validation_name for override in region_config.num_allowed_rows_overrides.values() } ) region_names_not_in_validations_list = region_validation_names.difference( validation_names ) self.assertEqual( set(), region_names_not_in_validations_list, f"Found views referenced in region [{region_code}] config that do not exist in validations" f" list: {global_names_not_in_validations_list}", )
def _fetch_validation_jobs_to_perform( region_code_filter: Optional[str] = None, ) -> List[DataValidationJob]: validation_checks = get_all_validations() region_configs = get_validation_region_configs() global_config = get_validation_global_config() validation_jobs: List[DataValidationJob] = [] for check in validation_checks: if check.validation_name in global_config.disabled: continue for region_code, region_config in region_configs.items(): if region_code_filter and region_code != region_code_filter: continue if check.validation_name not in region_config.exclusions: updated_check = check.updated_for_region(region_config) validation_jobs.append( DataValidationJob(validation=updated_check, region_code=region_code)) return validation_jobs