def big_has_versioning(file): """Check fp 4 - versioning. This is suitible for large ontologies as it reads the file line by line, instead of loading an OWLOntology object. This method looks for the owl:versionIRI property in the header. Args: file (str): path to ontology Return: PASS, INFO, WARN, or FAIL with optional message """ # may return empty string if version IRI is missing # or None if ontology cannot be parsed version_iri = dash_utils.get_version_iri(file) if version_iri and version_iri != '': # compare version IRI to the regex pattern search = re.search(pat, version_iri) if search: return 'PASS' else: return format_msg('WARN', [bad_format.format(version_iri)]) elif version_iri == '': return format_msg('ERROR', [missing_version]) else: return format_msg('INFO', ['unable to parse ontology'])
def big_is_maintained(file): """Check fp 16 - maintenance - on large ontologies. This is suitible for large ontologies as it reads the file line by line, instead of loading an OWLOntology object. This method looks for the owl:versionIRI property in the header. Args: file (str): path to ontology file Return: PASS, INFO, WARN, or ERROR with optional help message """ # may return empty string if version IRI is missing # or None if ontology cannot be parsed version_iri = dash_utils.get_version_iri(file) if version_iri and version_iri != '': return check_version_iri(version_iri) elif version_iri == '': # no version IRI to check return format_msg('INFO', ['missing version IRI to check date']) else: # ontology was in bad format return format_msg('INFO', ['unable to parse ontology'])
def save_invalid_uris(ns, error, warn): """Save invalid (error or warning) IRIs to a report file (reports/principles/fp3-*.tsv). Args: ns (str): ontology ID error (list): list of ERROR IRIs warn (list): list of WARN IRIs Return: ERROR or WARN with detailed message, or PASS if no errors or warnings. """ if len(error) > 0 or len(warn) > 0: file = 'reports/principles/fp3-{0}.tsv'.format(ns) with open(file, 'w+') as f: for e in error: f.write('ERROR\t{0}\n'.format(e)) for w in warn: f.write('WARN\t{0}\n'.format(w)) if len(error) > 0 and len(warn) > 0: return format_msg('ERROR', [ error_msg.format(len(error)), warn_msg.format(len(warn)), help_msg ]) elif len(error) > 0: return format_msg('ERROR', [error_msg.format(len(error)), help_msg]) elif len(warn) > 0: return format_msg('WARN', [warn_msg.format(len(warn)), help_msg]) return 'PASS'
def has_scope(data, domain_map): '''Check fp 5 - scope. Retrieve the "scope" tag from the data and compare to other scopes in the map. If domains overlap, return INFO with a list of overlapping domains. If scope is missing, ERROR. Otherwise, PASS. Args: data (dict): ontology data from registry domain_map (dict): map of ontology to domain ''' ns = data['id'] if 'domain' in data: domain = data['domain'] else: return format_msg('ERROR', ['missing domain (scope)']) # exclude this NS from check (it will match itself) updated_domain_map = domain_map updated_domain_map.pop(ns) if domain in updated_domain_map.values(): same_domain = [] for ont_id, other_domain in domain_map.items(): if domain == other_domain: same_domain.append(ont_id) same_domain_str = " ".join(same_domain) return format_msg('INFO', [info_msg.format(domain, same_domain_str)]) return 'PASS'
def has_versioning(ontology): """Check fp 4 - versioning. Retrieve the version IRI from the OWLOntology object. If the version IRI does not exist, ERROR. If the version IRI does exist, check if it is in the recommended date format. If not, WARN. Otherwise PASS. Args: ontology (OWLOntology): ontology object Return: PASS, INFO, WARN, or ERROR with optional message """ if ontology is None: return format_msg('INFO', ['unable to load ontology']) # retrieve version IRI or None from ontology version_iri = ontology.getOntologyID().getVersionIRI().orNull() if version_iri: # compare version IRI to the regex pattern version_iri_str = version_iri.toString() search = re.search(pat, version_iri_str) if search: return 'PASS' else: return format_msg('WARN', [bad_format.format(version_iri_str)]) else: return format_msg('ERROR', [missing_version])
def has_users(data): """Check fp 9 - users. If the ontology has an active issue tracker and examples of use, PASS. Args: data (dict): ontology registry data from YAML file Return: PASS or ERROR with optional help message """ if 'tracker' in data: tracker = data['tracker'] else: tracker = None if 'usages' in data: usages = data['usages'] # TODO: usages should have a valid user that resovles # and a description else: usages = None # tracker is required? if tracker is None and usages is None: return format_msg('ERROR', ['missing tracker and usages']) elif tracker is None: return format_msg('ERROR', ['missing tracker']) elif usages is None: return format_msg('ERROR', ['missing usages']) return 'PASS'
def check_properties(namespace, props, ro_props): """Compare the properties from an ontology to the RO properties. Args: namespace (str): ontology ID props (dict): map of ontology property label to IRI ro_props (dict): map of RO property label to IRI Return: PASS or violation level with optional help message """ # properties that share an RO label # but have a different IRI same_label = {} # properties that do not have an RO IRI # and do not share a label with an RO prop not_ro = {} for label, iri in props.items(): label_match = False iri_match = False if label in ro_props.keys(): label_match = True if iri in ro_props.values(): iri_match = True if label_match and not iri_match: ro_iri = ro_props[label] if iri != ro_iri: same_label[iri] = label elif not label_match and not iri_match: not_ro[iri] = label # delete the property map to free up memory del props # maybe save a report file if len(same_label) > 0 or len(not_ro) > 0: save_invalid_relations(namespace, ro_props, same_label, not_ro) # return the results if len(same_label) > 0 and len(not_ro) > 0: return format_msg('ERROR', [ ro_match.format(len(same_label)), non_ro.format(len(not_ro)), help_msg ]) elif len(same_label) > 0 and len(not_ro) == 0: return format_msg('ERROR', [ro_match.format(len(same_label)), help_msg]) elif len(not_ro) > 0 and len(same_label) == 0: return format_msg('INFO', [non_ro.format(len(not_ro)), help_msg]) else: return 'PASS'
def big_is_common_format(good_format): """Check FP 2 - Common Format on large ontologies Args: good_format (bool): True if ontology could be parsed by Jena Return: PASS if good_format, ERROR otherwise. """ if good_format is None: return format_msg('INFO', ['unable to load ontology (may be too large)']) elif good_format is False: return format_msg('ERROR', ['unable to parse ontology']) else: return 'PASS'
def has_valid_labels(report): """Check fp 12 - naming conventions. If the ontology passes all ROBOT report label checks, return PASS. Args: report (Report): complete ROBOT report Return: PASS, INFO, or ERROR with optional help message """ if report is None: return format_msg('INFO', ['report could not be generated']) # all error level duplicates = report.getViolationCount('duplicate_label') missing = report.getViolationCount('missing_label') multiples = report.getViolationCount('multiple_labels') if duplicates > 0 and multiples > 0 and missing > 0: # all three violations return format_msg('ERROR', [ duplicate_msg.format(duplicates), multiple_msg.format(multiples), missing_msg.format(missing), help_msg ]) elif duplicates > 0 and multiples > 0: # duplicate and multiple labels return format_msg('ERROR', [ duplicate_msg.format(duplicates), multiple_msg.format(multiples), help_msg ]) elif duplicates > 0 and missing > 0: # duplicate and missing labels return format_msg('ERROR', [ duplicate_msg.format(duplicates), missing_msg.format(missing), help_msg ]) elif multiples > 0 and missing > 0: # multiple and missing labels return format_msg('ERROR', [ multiple_msg.format(multiples), missing_msg.format(missing), help_msg ]) elif duplicates > 0: # just duplicate labels return format_msg('ERROR', [duplicate_msg.format(duplicates), help_msg]) elif multiples > 0: # just multiple labels return format_msg('ERROR', [multiple_msg.format(multiples), help_msg]) elif missing > 0: # just missing labels return format_msg('ERROR', [missing_msg.format(missing), help_msg]) else: # no label violations present return 'PASS'
def process_report(robot_gateway, ns, report): """Save the Report and return the status. Args: robot_gateway (Gateway): py4j gateway to ROBOT ns (str): ontology namespace report (Report): completed Report object Return: ERROR, WARN, INFO, or PASS with optional help message """ if report is None: return 'INFO|report failed' outfile = 'reports/robot/{0}.tsv'.format(ns) # print summary to terminal and save to report file report_options = robot_gateway.ReportOperation.getDefaultOptions() robot_gateway.ReportOperation.processReport(report, outfile, report_options) print('See {0} for details\n'.format(outfile)) # return the report status errs = report.getTotalViolations('ERROR') warns = report.getTotalViolations('WARN') info = report.getTotalViolations('INFO') if errs > 0: return format_msg('ERROR', [ '{0} errors'.format(errs), '{0} warnings'.format(warns), '{0} info messages'.format(info), 'Click to download report' ]) elif warns > 0: return format_msg('WARN', [ '{0} warnings'.format(warns), '{0} info messages'.format(info), 'Click to download report' ]) elif info > 0: return format_msg( 'INFO', ['{0} info messages'.format(info), 'Click to download report']) else: return 'PASS'
def check_version_iri(version_iri): """Check if the version IRI is in date format then check when the version IRI was last updated. Args: version_iri (str): version IRI from ontology header Return: PASS, INFO, WARN, or ERROR with optional help message """ # serach for date in version IRI using regex pattern search = re.search(vpat, version_iri) if search: date = search.group(1) splits = date.split('-') version_date = datetime.datetime(int(splits[0]), int(splits[1]), int(splits[2])) # check 3 years (error) three_years_ago = datetime.datetime.now() \ - datetime.timedelta(days=3*365) if version_date < three_years_ago: return format_msg('ERROR', [old_version_msg.format(date, 'three')]) # check 2 years (warn) two_years_ago = datetime.datetime.now() \ - datetime.timedelta(days=2*365) if version_date < two_years_ago: return format_msg('WARN', [old_version_msg.format(date, 'two')]) # check 1 year (info) one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365) if version_date < one_year_ago: return format_msg('INFO', [old_version_msg.format(date, 'one')]) # has valid version IRI and it has been updated within the past year return 'PASS' else: # version IRI is not in the expected date format return format_msg('INFO', ['version IRI does not have date information'])
def is_maintained(ontology): """Check fp 16 - maintenance. This method uses OWL API to retrieve the version IRI. Args: ontology (OWLOntology): ontology object Return: PASS, INFO, WARN, or ERROR with optional help message """ if ontology is None: return format_msg('INFO', ['unable to load ontology']) version_iri = ontology.getOntologyID().getVersionIRI().orNull() if version_iri: return check_version_iri(version_iri.toString()) # no version IRI (is Null) return format_msg('INFO', ['missing version IRI to check date'])
def has_contact(data): """Check fp 11 - locus of authority. Check if the registry data contains a valid contract entry. Args: data (dict): ontology registry data from YAML file Return: PASS or ERROR with optional help message """ try: jsonschema.validate(data, contact_schema) except jsonschema.exceptions.ValidationError as ve: if 'contact' in data: # contact is in data but is not proper format return format_msg('ERROR', ['invalid contact information']) else: # contact entry is missing from data return format_msg('ERROR', ['missing contact information']) return 'PASS'
def is_common_format(ontology): """Check FP 2 - Common Format. Args: ontology (OWLOntology): ontology object Return: PASS if OWLOntology is not None, ERROR otherwise. """ if ontology is None: return format_msg('ERROR', ['unable to load ontology']) else: return 'PASS'
def has_valid_uris(robot_gateway, namespace, ontology): """Check FP 3 - URIs. This check ensures that all ontology entities follow NS_LOCALID. Annotation properties are not checked, as many are in legacy OBO format and use #LOCALID. Obsolete entities are also ignored. LOCALID should not be semantically meaningful, therefore numeric IDs should be used. If the IRI start with the namespace, but does not use `_`, it will be added to errors. If IRI starts with NS, uses _, but does not match the IRI pattern with numbers, it will be added to warnings. Args: robot_gateway (Gatway): namespace (str): ontology ID ontology (OWLOntology): ontology object Return: INFO if ontology is None. ERROR if any errors, WARN if any warns, PASS otherwise. """ if not ontology: return format_msg('INFO', ['unable to load ontology']) entities = robot_gateway.OntologyHelper.getEntities(ontology) error = [] warn = [] for e in entities: if e.isOWLAnnotationProperty(): # allow legacy annotation properties continue # check if the entity is obsolete obsolete = False for ann in ontology.getAnnotationAssertionAxioms(e.getIRI()): if ann.getProperty().getIRI().toString() == owl_deprecated: # check if the entity is obsolete obsolete = dash_utils.is_obsolete(ann) # if so, just ignore it if obsolete: continue iri = e.getIRI().toString().lower() check = check_uri(namespace, iri) if check == 'ERROR': error.append(iri) elif check == 'WARN': warn.append(iri) return save_invalid_uris(namespace, error, warn)
def has_valid_definitions(report): """Check fp 6 - textual definitions. If the ontology passes all ROBOT report definition checks, PASS. If there are any violations, return that level of violation and a summary of the violations. Args: report (Report): ROBOT report object """ if report is None: return 'INFO|report could not be generated' # error level violations duplicates = report.getViolationCount('duplicate_definition') multiples = report.getViolationCount('multiple_definitions') # warn level violation missing = report.getViolationCount('missing_definition') if duplicates > 0 and multiples > 0 and missing > 0: return format_msg('ERROR', [ duplicate_msg.format(duplicates), multiple_msg.format(multiples), missing_msg.format(missing), help_msg ]) elif duplicates > 0 and multiples > 0: return format_msg('ERROR', [ duplicate_msg.format(duplicates), multiple_msg.format(multiples), help_msg ]) elif duplicates > 0 and missing > 0: return format_msg('ERROR', [ duplicate_msg.format(duplicates), missing_msg.format(missing), help_msg ]) elif multiples > 0 and missing > 0: return format_msg('ERROR', [ multiple_msg.format(multiples), missing_msg.format(missing), help_msg ]) elif duplicates > 0: return format_msg('ERROR', [duplicate_msg.format(duplicates), help_msg]) elif multiples > 0: return format_msg('ERROR', [multiple_msg.format(missing), help_msg]) elif missing > 0: return format_msg('WARN', [missing_msg.format(missing), help_msg]) else: # no violations found return 'PASS'
def big_has_valid_uris(namespace, file): """Check FP 3 - URIs on a big ontology. This check ensures that all ontology entities follow NS_LOCALID. Annotation properties are not checked, as many are in legacy OBO format and use #LOCALID. Obsolete entities are also ignored. LOCALID should not be semantically meaningful, therefore numeric IDs should be used. If the IRI start with the namespace, but does not use `_`, it will be added to errors. If IRI starts with NS, uses _, but does not match the IRI pattern with numbers, it will be added to warnings. Args: namespace (str): ontology ID file (str): path to ontology file Return: INFO if ontology IRIs cannot be parsed. ERROR if any errors, WARN if any warns, PASS otherwise. """ error = [] warn = [] prefixes = True header = True valid = False # prefixes owl = None rdf = None with open(file, 'r') as f: # TODO: rework to exclude deprecated classes for line in f: if 'Ontology' and 'about' in line: if not owl and not rdf: # did not find OWL and RDF - end now return format_msg('INFO', ['unable to parse ontology']) # end prefixes prefixes = False # valid ontology to parse (found Ontology declaration) valid = True if line.strip().endswith('/>'): # no ontology annotations - end header now header = False elif prefixes and 'http://www.w3.org/2002/07/owl#' in line: # set the OWL prefix owl = dash_utils.get_prefix(line) elif prefixes and 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'\ in line: # set the RDF prefix rdf = dash_utils.get_prefix(line) elif header and '</{0}:Ontology>'.format(owl) in line: # end of Ontology annotations = end of header header = False elif not header and '{0}:about'.format(rdf) in line \ and '{0}:AnnotationProperty'.format(owl) not in line: # non-AP entity found - check the IRI iri = dash_utils.get_resource_value(line).lower() check = check_uri(namespace, iri) if check == 'ERROR': error.append(iri) elif check == 'WARN': warn.append(iri) if not valid: # not valid ontology return format_msg('INFO', ['unable to parse ontology']) return save_invalid_uris(namespace, error, warn)