Beispiel #1
0
def big_has_versioning(file):
    """Check fp 4 - versioning.

    This is suitible for large ontologies as it reads the file line by line,
    instead of loading an OWLOntology object. This method looks for the
    owl:versionIRI property in the header.

    Args:
        file (str): path to ontology

    Return:
        PASS, INFO, WARN, or FAIL with optional message
    """
    # may return empty string if version IRI is missing
    # or None if ontology cannot be parsed
    version_iri = dash_utils.get_version_iri(file)

    if version_iri and version_iri != '':
        # compare version IRI to the regex pattern
        search = re.search(pat, version_iri)
        if search:
            return 'PASS'
        else:
            return format_msg('WARN', [bad_format.format(version_iri)])
    elif version_iri == '':
        return format_msg('ERROR', [missing_version])
    else:
        return format_msg('INFO', ['unable to parse ontology'])
Beispiel #2
0
def big_is_maintained(file):
    """Check fp 16 - maintenance - on large ontologies.

    This is suitible for large ontologies as it reads the file line by line,
    instead of loading an OWLOntology object. This method looks for the
    owl:versionIRI property in the header.

    Args:
        file (str): path to ontology file

    Return:
        PASS, INFO, WARN, or ERROR with optional help message
    """
    # may return empty string if version IRI is missing
    # or None if ontology cannot be parsed
    version_iri = dash_utils.get_version_iri(file)

    if version_iri and version_iri != '':
        return check_version_iri(version_iri)
    elif version_iri == '':
        # no version IRI to check
        return format_msg('INFO', ['missing version IRI to check date'])
    else:
        # ontology was in bad format
        return format_msg('INFO', ['unable to parse ontology'])
def save_invalid_uris(ns, error, warn):
    """Save invalid (error or warning) IRIs to a report file
    (reports/principles/fp3-*.tsv).

    Args:
        ns (str): ontology ID
        error (list): list of ERROR IRIs
        warn (list): list of WARN IRIs

    Return:
        ERROR or WARN with detailed message, or PASS if no errors or warnings.
    """
    if len(error) > 0 or len(warn) > 0:
        file = 'reports/principles/fp3-{0}.tsv'.format(ns)
        with open(file, 'w+') as f:
            for e in error:
                f.write('ERROR\t{0}\n'.format(e))
            for w in warn:
                f.write('WARN\t{0}\n'.format(w))

    if len(error) > 0 and len(warn) > 0:
        return format_msg('ERROR', [
            error_msg.format(len(error)),
            warn_msg.format(len(warn)), help_msg
        ])
    elif len(error) > 0:
        return format_msg('ERROR', [error_msg.format(len(error)), help_msg])
    elif len(warn) > 0:
        return format_msg('WARN', [warn_msg.format(len(warn)), help_msg])
    return 'PASS'
Beispiel #4
0
def has_scope(data, domain_map):
    '''Check fp 5 - scope.

    Retrieve the "scope" tag from the data and compare to other scopes in the
    map. If domains overlap, return INFO with a list of overlapping domains.
    If scope is missing, ERROR. Otherwise, PASS.

    Args:
        data (dict): ontology data from registry
        domain_map (dict): map of ontology to domain
    '''
    ns = data['id']
    if 'domain' in data:
        domain = data['domain']
    else:
        return format_msg('ERROR', ['missing domain (scope)'])

    # exclude this NS from check (it will match itself)
    updated_domain_map = domain_map
    updated_domain_map.pop(ns)

    if domain in updated_domain_map.values():
        same_domain = []
        for ont_id, other_domain in domain_map.items():
            if domain == other_domain:
                same_domain.append(ont_id)
        same_domain_str = " ".join(same_domain)
        return format_msg('INFO', [info_msg.format(domain, same_domain_str)])

    return 'PASS'
Beispiel #5
0
def has_versioning(ontology):
    """Check fp 4 - versioning.

    Retrieve the version IRI from the OWLOntology object. If the version IRI
    does not exist, ERROR. If the version IRI does exist, check if it is in the
    recommended date format. If not, WARN. Otherwise PASS.

    Args:
        ontology (OWLOntology): ontology object

    Return:
        PASS, INFO, WARN, or ERROR with optional message
    """
    if ontology is None:
        return format_msg('INFO', ['unable to load ontology'])

    # retrieve version IRI or None from ontology
    version_iri = ontology.getOntologyID().getVersionIRI().orNull()
    if version_iri:
        # compare version IRI to the regex pattern
        version_iri_str = version_iri.toString()
        search = re.search(pat, version_iri_str)
        if search:
            return 'PASS'
        else:
            return format_msg('WARN', [bad_format.format(version_iri_str)])
    else:
        return format_msg('ERROR', [missing_version])
Beispiel #6
0
def has_users(data):
    """Check fp 9 - users.
    If the ontology has an active issue tracker and examples of use, PASS.

    Args:
        data (dict): ontology registry data from YAML file

    Return:
        PASS or ERROR with optional help message
    """
    if 'tracker' in data:
        tracker = data['tracker']
    else:
        tracker = None
    if 'usages' in data:
        usages = data['usages']
        # TODO: usages should have a valid user that resovles
        #       and a description
    else:
        usages = None

    # tracker is required?
    if tracker is None and usages is None:
        return format_msg('ERROR', ['missing tracker and usages'])
    elif tracker is None:
        return format_msg('ERROR', ['missing tracker'])
    elif usages is None:
        return format_msg('ERROR', ['missing usages'])
    return 'PASS'
Beispiel #7
0
def check_properties(namespace, props, ro_props):
    """Compare the properties from an ontology to the RO properties.

    Args:
        namespace (str): ontology ID
        props (dict): map of ontology property label to IRI
        ro_props (dict): map of RO property label to IRI

    Return:
        PASS or violation level with optional help message
    """
    # properties that share an RO label
    # but have a different IRI
    same_label = {}

    # properties that do not have an RO IRI
    # and do not share a label with an RO prop
    not_ro = {}

    for label, iri in props.items():
        label_match = False
        iri_match = False

        if label in ro_props.keys():
            label_match = True
        if iri in ro_props.values():
            iri_match = True

        if label_match and not iri_match:
            ro_iri = ro_props[label]
            if iri != ro_iri:
                same_label[iri] = label
        elif not label_match and not iri_match:
            not_ro[iri] = label

    # delete the property map to free up memory
    del props

    # maybe save a report file
    if len(same_label) > 0 or len(not_ro) > 0:
        save_invalid_relations(namespace, ro_props, same_label, not_ro)

    # return the results
    if len(same_label) > 0 and len(not_ro) > 0:
        return format_msg('ERROR', [
            ro_match.format(len(same_label)),
            non_ro.format(len(not_ro)), help_msg
        ])
    elif len(same_label) > 0 and len(not_ro) == 0:
        return format_msg('ERROR',
                          [ro_match.format(len(same_label)), help_msg])
    elif len(not_ro) > 0 and len(same_label) == 0:
        return format_msg('INFO', [non_ro.format(len(not_ro)), help_msg])
    else:
        return 'PASS'
def big_is_common_format(good_format):
    """Check FP 2 - Common Format on large ontologies

    Args:
        good_format (bool): True if ontology could be parsed by Jena

    Return:
        PASS if good_format, ERROR otherwise.
    """
    if good_format is None:
        return format_msg('INFO',
                          ['unable to load ontology (may be too large)'])
    elif good_format is False:
        return format_msg('ERROR', ['unable to parse ontology'])
    else:
        return 'PASS'
Beispiel #9
0
def has_valid_labels(report):
    """Check fp 12 - naming conventions.

    If the ontology passes all ROBOT report label checks, return PASS.

    Args:
        report (Report): complete ROBOT report

    Return:
        PASS, INFO, or ERROR with optional help message
    """
    if report is None:
        return format_msg('INFO', ['report could not be generated'])

    # all error level
    duplicates = report.getViolationCount('duplicate_label')
    missing = report.getViolationCount('missing_label')
    multiples = report.getViolationCount('multiple_labels')

    if duplicates > 0 and multiples > 0 and missing > 0:
        # all three violations
        return format_msg('ERROR', [
            duplicate_msg.format(duplicates),
            multiple_msg.format(multiples),
            missing_msg.format(missing), help_msg
        ])
    elif duplicates > 0 and multiples > 0:
        # duplicate and multiple labels
        return format_msg('ERROR', [
            duplicate_msg.format(duplicates),
            multiple_msg.format(multiples), help_msg
        ])
    elif duplicates > 0 and missing > 0:
        # duplicate and missing labels
        return format_msg('ERROR', [
            duplicate_msg.format(duplicates),
            missing_msg.format(missing), help_msg
        ])
    elif multiples > 0 and missing > 0:
        # multiple and missing labels
        return format_msg('ERROR', [
            multiple_msg.format(multiples),
            missing_msg.format(missing), help_msg
        ])
    elif duplicates > 0:
        # just duplicate labels
        return format_msg('ERROR',
                          [duplicate_msg.format(duplicates), help_msg])
    elif multiples > 0:
        # just multiple labels
        return format_msg('ERROR', [multiple_msg.format(multiples), help_msg])
    elif missing > 0:
        # just missing labels
        return format_msg('ERROR', [missing_msg.format(missing), help_msg])
    else:
        # no label violations present
        return 'PASS'
def process_report(robot_gateway, ns, report):
    """Save the Report and return the status.

    Args:
        robot_gateway (Gateway): py4j gateway to ROBOT
        ns (str): ontology namespace
        report (Report): completed Report object

    Return:
        ERROR, WARN, INFO, or PASS with optional help message
    """
    if report is None:
        return 'INFO|report failed'
    outfile = 'reports/robot/{0}.tsv'.format(ns)

    # print summary to terminal and save to report file
    report_options = robot_gateway.ReportOperation.getDefaultOptions()
    robot_gateway.ReportOperation.processReport(report, outfile,
                                                report_options)
    print('See {0} for details\n'.format(outfile))

    # return the report status
    errs = report.getTotalViolations('ERROR')
    warns = report.getTotalViolations('WARN')
    info = report.getTotalViolations('INFO')

    if errs > 0:
        return format_msg('ERROR', [
            '{0} errors'.format(errs), '{0} warnings'.format(warns),
            '{0} info messages'.format(info), 'Click to download report'
        ])
    elif warns > 0:
        return format_msg('WARN', [
            '{0} warnings'.format(warns), '{0} info messages'.format(info),
            'Click to download report'
        ])
    elif info > 0:
        return format_msg(
            'INFO',
            ['{0} info messages'.format(info), 'Click to download report'])
    else:
        return 'PASS'
Beispiel #11
0
def check_version_iri(version_iri):
    """Check if the version IRI is in date format then check when the version
    IRI was last updated.

    Args:
        version_iri (str): version IRI from ontology header

    Return:
        PASS, INFO, WARN, or ERROR with optional help message
    """
    # serach for date in version IRI using regex pattern
    search = re.search(vpat, version_iri)
    if search:
        date = search.group(1)
        splits = date.split('-')
        version_date = datetime.datetime(int(splits[0]), int(splits[1]),
                                         int(splits[2]))

        # check 3 years (error)
        three_years_ago = datetime.datetime.now() \
            - datetime.timedelta(days=3*365)
        if version_date < three_years_ago:
            return format_msg('ERROR', [old_version_msg.format(date, 'three')])

        # check 2 years (warn)
        two_years_ago = datetime.datetime.now() \
            - datetime.timedelta(days=2*365)
        if version_date < two_years_ago:
            return format_msg('WARN', [old_version_msg.format(date, 'two')])

        # check 1 year (info)
        one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365)
        if version_date < one_year_ago:
            return format_msg('INFO', [old_version_msg.format(date, 'one')])

        # has valid version IRI and it has been updated within the past year
        return 'PASS'
    else:
        # version IRI is not in the expected date format
        return format_msg('INFO',
                          ['version IRI does not have date information'])
Beispiel #12
0
def is_maintained(ontology):
    """Check fp 16 - maintenance.

    This method uses OWL API to retrieve the version IRI.

    Args:
        ontology (OWLOntology): ontology object

    Return:
        PASS, INFO, WARN, or ERROR with optional help message
    """
    if ontology is None:
        return format_msg('INFO', ['unable to load ontology'])

    version_iri = ontology.getOntologyID().getVersionIRI().orNull()

    if version_iri:
        return check_version_iri(version_iri.toString())

    # no version IRI (is Null)
    return format_msg('INFO', ['missing version IRI to check date'])
Beispiel #13
0
def has_contact(data):
    """Check fp 11 - locus of authority.

    Check if the registry data contains a valid contract entry.

    Args:
        data (dict): ontology registry data from YAML file

    Return:
        PASS or ERROR with optional help message
    """
    try:
        jsonschema.validate(data, contact_schema)
    except jsonschema.exceptions.ValidationError as ve:
        if 'contact' in data:
            # contact is in data but is not proper format
            return format_msg('ERROR', ['invalid contact information'])
        else:
            # contact entry is missing from data
            return format_msg('ERROR', ['missing contact information'])
    return 'PASS'
Beispiel #14
0
def is_common_format(ontology):
    """Check FP 2 - Common Format.

    Args:
        ontology (OWLOntology): ontology object

    Return:
        PASS if OWLOntology is not None, ERROR otherwise.
    """
    if ontology is None:
        return format_msg('ERROR', ['unable to load ontology'])
    else:
        return 'PASS'
Beispiel #15
0
def has_valid_uris(robot_gateway, namespace, ontology):
    """Check FP 3 - URIs.

    This check ensures that all ontology entities follow NS_LOCALID.
    Annotation properties are not checked, as many are in legacy OBO format
    and use #LOCALID. Obsolete entities are also ignored. LOCALID should
    not be semantically meaningful, therefore numeric IDs should be used.
    If the IRI start with the namespace, but does not use `_`, it will be
    added to errors. If IRI starts with NS, uses _, but does not match the
    IRI pattern with numbers, it will be added to warnings.

    Args:
        robot_gateway (Gatway):
        namespace (str): ontology ID
        ontology (OWLOntology): ontology object

    Return:
        INFO if ontology is None. ERROR if any errors, WARN if any warns, PASS
        otherwise.
    """
    if not ontology:
        return format_msg('INFO', ['unable to load ontology'])

    entities = robot_gateway.OntologyHelper.getEntities(ontology)
    error = []
    warn = []

    for e in entities:
        if e.isOWLAnnotationProperty():
            # allow legacy annotation properties
            continue

        # check if the entity is obsolete
        obsolete = False
        for ann in ontology.getAnnotationAssertionAxioms(e.getIRI()):
            if ann.getProperty().getIRI().toString() == owl_deprecated:
                # check if the entity is obsolete
                obsolete = dash_utils.is_obsolete(ann)
        # if so, just ignore it
        if obsolete:
            continue

        iri = e.getIRI().toString().lower()
        check = check_uri(namespace, iri)
        if check == 'ERROR':
            error.append(iri)
        elif check == 'WARN':
            warn.append(iri)

    return save_invalid_uris(namespace, error, warn)
Beispiel #16
0
def has_valid_definitions(report):
    """Check fp 6 - textual definitions.

    If the ontology passes all ROBOT report definition checks, PASS. If there
    are any violations, return that level of violation and a summary of the
    violations.

    Args:
        report (Report): ROBOT report object
    """
    if report is None:
        return 'INFO|report could not be generated'

    # error level violations
    duplicates = report.getViolationCount('duplicate_definition')
    multiples = report.getViolationCount('multiple_definitions')

    # warn level violation
    missing = report.getViolationCount('missing_definition')

    if duplicates > 0 and multiples > 0 and missing > 0:
        return format_msg('ERROR', [
            duplicate_msg.format(duplicates),
            multiple_msg.format(multiples),
            missing_msg.format(missing), help_msg
        ])
    elif duplicates > 0 and multiples > 0:
        return format_msg('ERROR', [
            duplicate_msg.format(duplicates),
            multiple_msg.format(multiples), help_msg
        ])
    elif duplicates > 0 and missing > 0:
        return format_msg('ERROR', [
            duplicate_msg.format(duplicates),
            missing_msg.format(missing), help_msg
        ])
    elif multiples > 0 and missing > 0:
        return format_msg('ERROR', [
            multiple_msg.format(multiples),
            missing_msg.format(missing), help_msg
        ])
    elif duplicates > 0:
        return format_msg('ERROR',
                          [duplicate_msg.format(duplicates), help_msg])
    elif multiples > 0:
        return format_msg('ERROR', [multiple_msg.format(missing), help_msg])
    elif missing > 0:
        return format_msg('WARN', [missing_msg.format(missing), help_msg])
    else:
        # no violations found
        return 'PASS'
Beispiel #17
0
def big_has_valid_uris(namespace, file):
    """Check FP 3 - URIs on a big ontology.

    This check ensures that all ontology entities follow NS_LOCALID.
    Annotation properties are not checked, as many are in legacy OBO format
    and use #LOCALID. Obsolete entities are also ignored. LOCALID should
    not be semantically meaningful, therefore numeric IDs should be used.
    If the IRI start with the namespace, but does not use `_`, it will be
    added to errors. If IRI starts with NS, uses _, but does not match the
    IRI pattern with numbers, it will be added to warnings.

    Args:
        namespace (str): ontology ID
        file (str): path to ontology file

    Return:
        INFO if ontology IRIs cannot be parsed. ERROR if any errors, WARN if
        any warns, PASS otherwise.
    """
    error = []
    warn = []

    prefixes = True
    header = True
    valid = False

    # prefixes
    owl = None
    rdf = None

    with open(file, 'r') as f:
        # TODO: rework to exclude deprecated classes
        for line in f:
            if 'Ontology' and 'about' in line:
                if not owl and not rdf:
                    # did not find OWL and RDF - end now
                    return format_msg('INFO', ['unable to parse ontology'])

                # end prefixes
                prefixes = False
                # valid ontology to parse (found Ontology declaration)
                valid = True

                if line.strip().endswith('/>'):
                    # no ontology annotations - end header now
                    header = False

            elif prefixes and 'http://www.w3.org/2002/07/owl#' in line:
                # set the OWL prefix
                owl = dash_utils.get_prefix(line)

            elif prefixes and 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'\
                    in line:
                # set the RDF prefix
                rdf = dash_utils.get_prefix(line)

            elif header and '</{0}:Ontology>'.format(owl) in line:
                # end of Ontology annotations = end of header
                header = False

            elif not header and '{0}:about'.format(rdf) in line \
                    and '{0}:AnnotationProperty'.format(owl) not in line:
                # non-AP entity found - check the IRI
                iri = dash_utils.get_resource_value(line).lower()
                check = check_uri(namespace, iri)
                if check == 'ERROR':
                    error.append(iri)
                elif check == 'WARN':
                    warn.append(iri)

    if not valid:
        # not valid ontology
        return format_msg('INFO', ['unable to parse ontology'])

    return save_invalid_uris(namespace, error, warn)