def _check_gebruiksdoel_plus(self, entity, gebruiksdoelen): """ The value of the gebruiksdoel_plus (woonfunctie or gezondheidszorgfunctie) may only be filled if gebruiksdoel is either woonfunctie or gezondheidszorgfunctie. """ qa_checks = { 'woonfunctie': QA_CHECK.Value_gebruiksdoel_woonfunctie_should_match, 'gezondheidszorgfunctie': QA_CHECK.Value_gebruiksdoel_gezondheidszorgfunctie_should_match } # Check both woonfunctie and gezondheidszorgfunctie for check_value in ['woonfunctie', 'gezondheidszorgfunctie']: attribute_name = f'gebruiksdoel_{check_value}' attribute_value = entity.get(attribute_name, {}).get('omschrijving') if attribute_value and check_value not in gebruiksdoelen: log_issue( logger, QA_LEVEL.WARNING, Issue(qa_checks[check_value], entity, self.source_id, attribute_name, compared_to='gebruiksdoel'))
def _query_missing(query, check, attr): """ Query for any missing attributes :param query: query to execute :param items_name: name of the missing attribute :return: None """ historic_count = 0 for data in _get_data(query): if data.get('eind_geldigheid') is None: # Report actual warnings # Create an issue for the failing check # The entity that contains the error is data, the id-attribute is named id # The attribute that is in error is called bronwaarde issue = Issue(check, data, 'id', 'bronwaarde') issue.attribute = attr # Set the name of the attribute that has the failing bronwaarde log_issue(logger, QA_LEVEL.WARNING, issue) else: # Count historic warnings historic_count += 1 items_name = f"{attr} {check['msg']}" if historic_count > 0: logger.data_info(f"{items_name}: {historic_count} historical errors")
def _check_gebruiksdoelen_duplicates(self, entity: dict, gebruiksdoelen: list[str]): counts = reduce(lambda d, x: d | {x: d[x] + 1}, gebruiksdoelen, defaultdict(int)) if [v for v in counts.values() if v > 1]: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_duplicates, entity, self.source_id, 'gebruiksdoel'))
def _check_gebruiksdoelen_exist(self, entity: dict, gebruiksdoelen: list[str]): for gebruiksdoel in gebruiksdoelen: if gebruiksdoel not in VALID_GEBRUIKSDOEL_DOMAIN: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_gebruiksdoel_in_domain, entity, self.source_id, 'gebruiksdoel')) # Stop checking if the issue has occured, the whole list will be in the data warning break
def test_log_issue_no_entity(self): # Skip issues that are not linked to an entity entity = { 'attr': 'any attr' } issue = Issue({'id': 'any_check', 'msg': 'any msg'}, entity, 'id', 'attr') mock_logger = MagicMock() mock_logger.get_name.return_value = "any name" log_issue(mock_logger, QA_LEVEL.INFO, issue) mock_logger.add_issue.assert_not_called()
def _validate_begin_geldigheid(self, entity): if entity[FIELD.START_VALIDITY]: if entity[FIELD.END_VALIDITY] and entity[FIELD.START_VALIDITY] > entity[FIELD.END_VALIDITY]: # Start-Validity cannot be after End-Validity log_issue(logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_not_after, entity, self.source_id, FIELD.START_VALIDITY, compared_to=FIELD.END_VALIDITY)) else: log_issue(logger, QA_LEVEL.ERROR, Issue(QA_CHECK.Value_not_empty, entity, self.source_id, FIELD.START_VALIDITY)) self.validated = False
def check_relation_conflicts(catalog_name, collection_name, attribute_name): updater = Relater(catalog_name, collection_name, attribute_name) result = updater.get_conflicts() for row in result: row = dict(row) # Log conflicting relations if (row.get("row_number") or 0) > 1: row['volgnummer'] = row.get('src_volgnummer') issue = Issue(QA_CHECK.Unique_destination, row, 'src_id', 'bronwaarde') issue.attribute = attribute_name log_issue(logger, QA_LEVEL.WARNING, issue)
def enrich_nummeraanduiding(self, nummeraanduiding): # ligt_in_woonplaats can have multiple values, use the last value and log a warning bronwaarde = nummeraanduiding.get('ligt_in_bag_woonplaats') if bronwaarde and ';' in bronwaarde: nummeraanduiding['ligt_in_bag_woonplaats'] = bronwaarde.split( ';')[-1] if not self.multiple_values_logged: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_1_1_reference, nummeraanduiding, None, 'ligt_in_bag_woonplaats')) self.multiple_values_logged = True
def _attr_check(self, check, attr, entity): level = check["level"] # Check if (nested) attr is available in entity key_list = split_field_reference(attr) _current_level = entity for key in key_list: if key in _current_level: _current_level = _current_level[key] else: # If a fatal check has failed, mark the validation as fatal if level == QA_LEVEL.FATAL: self.fatal = True log_issue(logger, level, Issue(QA_CHECK.Attribute_exists, entity, self.entity_id, attr)) return False return True
def _qa_check(self, check, attr, entity): # noqa: C901 level = check["level"] key_list = split_field_reference(attr) value = get_nested_item(entity, *key_list) validate_function = self.validate_functions.get(check['type']) is_correct = validate_function(check, value) # If the value doesn't pass the qa check, handle the correct way if not is_correct: # If a fatal check has failed, mark the validation as fatal if level == QA_LEVEL.FATAL: self.fatal = True log_issue(logger, level, Issue(check, entity, self.entity_id, attr)) return False return True
def validate_bouwblok(self, entity): """ Validate bouwblok Checks that are being performed: - begin_geldigheid can not be in the future (not fatal) :param entities: the list of entities :return: """ # begin_geldigheid can not be in the future if entity[FIELD.START_VALIDITY] > datetime.datetime.utcnow(): log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_not_in_future, entity, self.source_id, FIELD.START_VALIDITY))
def validate_pand(self, entity): """ Validate pand Checks that are being performed: - aantal_bouwlagen does not match the highest and lowest bouwlagen - aantal_bouwlagen isn't filled but hoogste and laagste bouwlaag is :param entities: the list of entities :return: """ laagste_bouwlaag = entity.get('laagste_bouwlaag') hoogste_bouwlaag = entity.get('hoogste_bouwlaag') aantal_bouwlagen = entity.get('aantal_bouwlagen') counted_bouwlagen = None if all(b is not None for b in [laagste_bouwlaag, hoogste_bouwlaag]): count_ground_floor = 1 if laagste_bouwlaag < 1 else 0 counted_bouwlagen = (hoogste_bouwlaag + count_ground_floor) - laagste_bouwlaag # aantal_bouwlagen should match the highest and lowest value if all([aantal_bouwlagen, counted_bouwlagen ]) and aantal_bouwlagen != counted_bouwlagen: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_aantal_bouwlagen_should_match, entity, self.source_id, "aantal_bouwlagen", compared_to= "hoogste_bouwlaag and laagste_bouwlaag combined", compared_to_value=counted_bouwlagen)) if not aantal_bouwlagen and all([ value is not None for value in [laagste_bouwlaag, hoogste_bouwlaag] ]): log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_aantal_bouwlagen_not_filled, entity, self.source_id, "aantal_bouwlagen"))
def date_comparison_issue(self, entity, date_field, compare_date_field): """ Log date comparison Logs the a warning for a date comparison between 2 fields :param entity: the entity which is compared :param date_field: field name of the date :param compare_date_field: field name of the compared date :return: """ log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_not_after, entity, self.source_id, date_field, compared_to=compare_date_field))
def test_log_issue(self): # Issue without id. Should add issue, but not log it entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check', 'msg': 'any msg'}, entity, 'id', 'attr') mock_logger = MagicMock() mock_logger.get_name.return_value = "any name" log_issue(mock_logger, QA_LEVEL.INFO, issue) mock_logger.add_issue.assert_called() mock_logger.data_info.assert_not_called() # Issue with id. Should not add issue, but should log it mock_logger.reset_mock() issue = Issue({'id': 'any_check', 'msg': 'any msg'}, {}, 'id', 'attr') log_issue(mock_logger, QA_LEVEL.INFO, issue) mock_logger.add_issue.assert_not_called() mock_logger.data_info.assert_called()
def validate(self, entity): """ Validate entity with state to see if generic validations for states are correct. Checks that are being performed: - begin_geldigheid should not be after eind_geldigheid (when filled) - volgnummer should be a positive number and unique in the collection :param entity: a GOB entity :return: """ self._validate_begin_geldigheid(entity) # volgnummer should a positive number and unique in the collection if entity[FIELD.SEQNR] < 1: log_issue(logger, QA_LEVEL.ERROR, Issue(QA_CHECK.Format_numeric, entity, self.source_id, FIELD.SEQNR)) self.validated = False identificatie = str(entity[self.source_id]) if entity[FIELD.SEQNR] in self.volgnummers[identificatie]: log_issue(logger, QA_LEVEL.ERROR, Issue(QA_CHECK.Value_unique, entity, self.source_id, FIELD.SEQNR)) self.validated = False # Only one eind_geldigheid may be empty per entity if entity[FIELD.END_VALIDITY] is None: if self.end_date.get(identificatie): log_issue(logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_empty_once, entity, self.source_id, FIELD.END_VALIDITY)) self.end_date[identificatie] = True # Add the volgnummer to the set for this entity identificatie self.volgnummers[identificatie].add(entity[FIELD.SEQNR])
def _check_aantal_eenheden_complex(self, entity): aantal_eenheden_complex = entity.get('aantal_eenheden_complex') check_attributes = [ 'gebruiksdoel_woonfunctie', 'gebruiksdoel_gezondheidszorgfunctie' ] check_values = [ entity.get(attr, {}).get('omschrijving', '') or '' for attr in check_attributes ] # If aantal_eenheden_complex is filled and complex not in the check values log a data warning if aantal_eenheden_complex is not None and all( 'complex' not in value.lower() for value in check_values): log_issue( logger, QA_LEVEL.WARNING, Issue( QA_CHECK.Value_aantal_eenheden_complex_should_be_empty, entity, self.source_id, 'aantal_eenheden_complex', compared_to= 'gebruiksdoel_woonfunctie and gebruiksdoel_gezondheidszorgfunctie', compared_to_value=', '.join(check_values))) # If complex in one of the check values, but aantal_eenheden_complex is not filled, log a data warning if any('complex' in value.lower() for value in check_values) and not aantal_eenheden_complex: log_issue( logger, QA_LEVEL.WARNING, Issue( QA_CHECK.Value_aantal_eenheden_complex_should_be_filled, entity, self.source_id, 'aantal_eenheden_complex', compared_to= 'gebruiksdoel_woonfunctie and gebruiksdoel_gezondheidszorgfunctie', compared_to_value=', '.join(check_values)))