Beispiel #1
0
    def contract(self, uri: str, fallback: bool = True) -> str:
        """
        Contract a given URI to a CURIE, based on mappings from `prefix_map`.

        Parameters
        ----------
        uri: str
            A URI

        fallback: bool
            Determines whether to fallback to default prefix mappings, as determined
            by `prefixcommons.curie_util`, when URI prefix is not found in `reverse_prefix_map`.

        Returns
        -------
        str
            A CURIE corresponding to the URI

        """
        # always prioritize non-CURIE shortform
        curie = None
        print(uri)
        if uri in self.reverse_prefix_map:
            curie = self.reverse_prefix_map[uri]
        else:
            curie_list = cu.contract_uri(uri, [self.prefix_map])
            print(curie_list)
            if len(curie_list) == 0 and fallback:
                curie_list = cu.contract_uri(uri)
                if len(curie_list) != 0:
                    curie = curie_list[0]
            else:
                curie = curie_list[0]
        print("IRI {} to CURIE {}".format(uri, curie))
        return curie
Beispiel #2
0
def test_prefixes():
    assert contract_uri(bp_iri) == [bp_id]
    assert expand_uri(bp_id) == bp_iri
    assert contract_uri("FAKE", strict=False) == []
    try:
        contract_uri("FAKE", strict=True)
    except NoPrefix as e:
        pass
    else:
        assert False
Beispiel #3
0
    def contract_uri(self, uri):
        if len(self.context.keys()) > 0:
            curies = contract_uri(uri, cmaps=[self.context])
            if len(curies) > 0:
                return curies[0]

        curies = contract_uri(uri)
        if len(curies) > 0:
            return curies[0]
        else:
            return uri
Beispiel #4
0
    def contract_uri(self, uri):
        if len(self.context.keys()) > 0:
            curies = contract_uri(uri, cmaps=[self.context])
            if len(curies) > 0:
                return sorted(curies, key=len)[0] # sort by length

        curies = sorted(contract_uri(uri), key=len) # Sort by length
        if len(curies) > 0:
            return curies[0]
        else:
            return uri
Beispiel #5
0
 def contract(self, uri):
     # always prioritize non-CURIE shortform
     if uri in self.rprefixmap:
         return self.rprefixmap[uri]
     shortforms = cu.contract_uri(uri, [self.prefixmap])
     if shortforms == []:
         if self.fallback:
             shortforms = cu.contract_uri(uri)
         if shortforms == []:
             return None
     return shortforms[0]
Beispiel #6
0
    def from_str(ExtensionUnit, entity: str) -> Union:
        """
        Attempts to parse string entity as an ExtensionUnit
        If the `relation(term)` is not formatted correctly, an Error is returned.
        If the `relation` cannot be found in the `relations` dictionary then an error
        is also returned.
        """
        parsed = relation_tuple.findall(entity)
        if len(parsed) == 1:
            rel, term = parsed[0]
            rel_uri = relations.lookup_label(rel)
            if rel_uri is None:
                # print("Error because rel_uri isn't in the file: {}".format(rel))
                return Error(entity)

            term_curie = Curie.from_str(term)
            rel_curie = Curie.from_str(
                curie_util.contract_uri(rel_uri, strict=False)[0])
            if isinstance(term_curie, Error):
                # print("Error because term is screwed up: {}".format(term))
                return Error("`{}`: {}".format(term, term_curie.info))
            return ExtensionUnit(rel_curie, term_curie)
        else:
            # print("Just couldn't even parse it at all: {}".format(entity))
            return Error(entity)
Beispiel #7
0
def short_label(uri: URIRef):
    curie = curie_util.contract_uri(uri, prefix_map, strict=True)
    if len(curie) == 0:
        logging.warning("contract_uri failed for URI {}".format(uri))
        # TODO: replace '_' in curie with ':'
        curie = uri.split('/')[-1]
    return curie
Beispiel #8
0
def contract(uri: str,
             prefix_maps: Optional[List[Dict]] = None,
             fallback: bool = True) -> str:
    """
    Contract a given URI to a CURIE, based on mappings from `prefix_maps`.
    If no prefix map is provided then will use defaults from prefixcommons-py.

    This method will return the URI as the CURIE if there is no mapping found.

    Parameters
    ----------
    uri: str
        A URI
    prefix_maps: Optional[List[Dict]]
        A list of prefix maps to use for mapping
    fallback: bool
        Determines whether to fallback to default prefix mappings, as determined
        by `prefixcommons.curie_util`, when URI prefix is not found in `prefix_maps`.

    Returns
    -------
    str
        A CURIE corresponding to the URI

    """
    curie = uri
    default_curie_maps = [
        get_jsonld_context("monarch_context"),
        get_jsonld_context("obo_context"),
    ]
    if prefix_maps:
        curie_list = contract_uri(uri, prefix_maps)
        if len(curie_list) == 0:
            if fallback:
                curie_list = contract_uri(uri, default_curie_maps)
                if curie_list:
                    curie = curie_list[0]
        else:
            curie = curie_list[0]
    else:
        curie_list = contract_uri(uri, default_curie_maps)
        if len(curie_list) > 0:
            curie = curie_list[0]

    return curie
Beispiel #9
0
def contract(uri: URIRef) -> str:
    """
    We sort the curies to ensure that we take the same item every time
    """
    curies = contract_uri(str(uri), cmaps=cmaps)
    if len(curies) > 0:
        curies.sort()
        return curies[0]
    return None
Beispiel #10
0
 def _uri2id(self, uri):
     s = "{:s}".format(str(uri))
     for prefix,uribase in self.prefixmap.items():
         if (s.startswith(uribase)):
             s = s.replace(uribase,prefix+":")
             return s
     curies = contract_uri(uri)
     if len(curies) > 0:
         return curies[0]
     return s
Beispiel #11
0
def to_association(gaf_line: List[str],
                   report=None,
                   group="unknown",
                   dataset="unknown") -> assocparser.ParseResult:
    report = Report(group=group, dataset=dataset) if report is None else report
    source_line = "\t".join(gaf_line)

    if source_line == "":
        report.error(source_line,
                     "Blank Line",
                     "EMPTY",
                     "Blank lines are not allowed",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    if len(gaf_line) > 17:
        # If we see more than 17 columns, we will just cut off the columns after column 17
        report.warning(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were more than 17 columns in this line. Proceeding by cutting off extra columns after column 17.",
            rule=1)
        gaf_line = gaf_line[:17]

    if 17 > len(gaf_line) >= 15:
        gaf_line += [""] * (17 - len(gaf_line))

    if len(gaf_line) != 17:
        report.error(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were {columns} columns found in this line, and there should be 15 (for GAF v1) or 17 (for GAF v2)"
            .format(columns=len(gaf_line)),
            rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    ## check for missing columns
    ## We use indeces here because we run GO RULES before we split the vals into individual variables
    DB_INDEX = 0
    DB_OBJECT_INDEX = 1
    TAXON_INDEX = 12
    REFERENCE_INDEX = 5
    if gaf_line[DB_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_IDSPACE,
                     "EMPTY",
                     "col1 is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[DB_OBJECT_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "col2 is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[TAXON_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_TAXON,
                     "EMPTY",
                     "taxon column is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[REFERENCE_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "reference column 6 is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    taxon = gaf_line[12].split("|")
    taxon_curie = taxon[0].replace("taxon", "NCBITaxon")
    interacting_taxon = taxon[1].replace(
        "taxon", "NCBITaxon") if len(taxon) == 2 else None
    subject_curie = "{db}:{id}".format(db=gaf_line[0], id=gaf_line[1])
    subject = association.Subject(subject_curie, gaf_line[2], gaf_line[9],
                                  gaf_line[10].split("|"), gaf_line[11],
                                  taxon_curie)
    aspect = gaf_line[8]
    negated, relation, qualifiers = assocparser._parse_qualifier(
        gaf_line[3], aspect)

    # For allowed, see http://geneontology.org/docs/go-annotations/#annotation-qualifiers
    for q in qualifiers:

        if q not in allowed_qualifiers:
            report.error(
                source_line,
                Report.INVALID_QUALIFIER,
                q,
                "Qualifiers must be `contributes_to`, `colocalizes_with`, or `NOT`",
                taxon=gaf_line[TAXON_INDEX],
                rule=1)
            return assocparser.ParseResult(source_line, [],
                                           True,
                                           report=report)

    object = association.Term(gaf_line[4], taxon_curie)
    evidence = association.Evidence(ecomap.coderef_to_ecoclass(gaf_line[6]),
                                    [e for e in gaf_line[5].split("|") if e],
                                    [e for e in gaf_line[7].split("|") if e])
    subject_extensions = [
        association.ExtensionUnit("rdfs:subClassOf", gaf_line[16])
    ] if gaf_line[16] else []

    conjunctions = []
    if gaf_line[15]:
        for conjuncts in gaf_line[15].split("|"):
            extension_units = []
            for u in conjuncts.split(","):
                parsed = relation_tuple.findall(u)
                if len(parsed) == 1:
                    rel, term = parsed[0]
                    extension_units.append(association.ExtensionUnit(
                        rel, term))
                else:
                    # Otherwise, something went bad with the regex, and it's a bad parse
                    report.error(source_line,
                                 Report.EXTENSION_SYNTAX_ERROR,
                                 u,
                                 "extensions should be relation(curie)",
                                 taxon=taxon,
                                 rule=1)
                    return assocparser.ParseResult(source_line, [],
                                                   True,
                                                   report=report)

            conjunction = association.ExtensionConjunctions(extension_units)
            conjunctions.append(conjunction)
    object_extensions = association.ExtensionExpression(conjunctions)
    looked_up_rel = relations.lookup_label(relation)
    if looked_up_rel is None:
        report.error(
            source_line,
            assocparser.Report.INVALID_QUALIFIER,
            relation,
            "Qualifer must be \"colocalizes_with\", \"contributes_to\", or \"NOT\"",
            taxon=taxon,
            rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    a = association.GoAssociation(
        source_line="\t".join(gaf_line),
        subject=subject,
        relation=curie_util.contract_uri(looked_up_rel)[0],
        object=object,
        negated=negated,
        qualifiers=qualifiers,
        aspect=aspect,
        interacting_taxon=interacting_taxon,
        evidence=evidence,
        subject_extensions=subject_extensions,
        object_extensions=object_extensions,
        provided_by=gaf_line[14],
        date=gaf_line[13],
        properties={})

    return assocparser.ParseResult(source_line, [a], False, report=report)
Beispiel #12
0
def curiefy(r):
    for (k, v) in r.items():
        if v['type'] == 'uri':
            curies = contract_uri(v['value'])
            if len(curies) > 0:
                r[k]['value'] = curies[0]
Beispiel #13
0
def _triple_to_association(digraph, subject, predicate, obj):
    """
    Convert triple to association object
    """
    object_eq = []
    subject_eq = []
    if 'equivalentOriginalNodeTarget' in predicate:
        for eq in predicate['equivalentOriginalNodeTarget']:
            curies = contract_uri(eq, [get_curie_map()], shortest=True)
            if len(curies) != 0:
                object_eq.append(curies[0])

    if 'equivalentOriginalNodeSource' in predicate:
        for eq in predicate['equivalentOriginalNodeSource']:
            curies = contract_uri(eq, [get_curie_map()], shortest=True)
            if len(curies) != 0:
                subject_eq.append(curies[0])

    relation_lbl = predicate['lbl'][0] if predicate['lbl'] else None

    association = {
        'subject': {
            'id': subject,
            'label': digraph.node[subject]['lbl'],
            'iri': expand_uri(subject, [get_curie_map()])
        },
        'subject_eq': subject_eq,
        'relation': {
            'id': predicate['pred'],
            'label': relation_lbl,
            'iri': expand_uri(predicate['pred'], [get_curie_map()])
        },
        'object': {
            'id': obj,
            'label': digraph.node[obj]['lbl'],
            'iri': expand_uri(obj, [get_curie_map()])
        },
        'object_eq': object_eq,
        'provided_by': predicate['isDefinedBy'],
        'evidence_types': [],
        'publications': []
    }

    # get association node linked to ECO codes and publications
    association_nodes = _get_association_nodes(digraph, subject, predicate, obj)

    if len(list(association_nodes)) > 1:
        # This can happen with clique merging, for now log it
        # and combine both in association results
        logging.debug("Ambiguous association for %s, %s, %s",
                      subject, predicate, obj)

    for association_node in list(association_nodes):
        for obj, edges in digraph.adj[association_node].items():
            eco_codes = [eco['id'] for eco in association['evidence_types']]
            pubs = [pub['id'] for pub in association['publications']]

            for edge in edges.values():
                if edge['pred'] == 'RO:0002558' and obj not in eco_codes:
                    association['evidence_types'].append({
                        'id': obj,
                        'label': digraph.node[obj]['lbl']
                    })
                elif edge['pred'] == 'dc:source' and obj not in pubs:
                    association['publications'].append({
                        'id': obj,
                        'label': digraph.node[obj]['lbl']
                    })

    return association
Beispiel #14
0
def contract_uri_wrap(uri):
    curies = contract_uri(uri)
    if len(curies) > 0:
        return curies[0]
    else:
        return uri
Beispiel #15
0
def to_association(gaf_line: List[str], report=None, group="unknown", dataset="unknown", qualifier_parser=assocparser.Qualifier2_1(), bio_entities=None) -> assocparser.ParseResult:
    report = Report(group=group, dataset=dataset) if report is None else report
    bio_entities = collections.BioEntities(dict()) if bio_entities is None else bio_entities
    source_line = "\t".join(gaf_line)

    if source_line == "":
        report.error(source_line, "Blank Line", "EMPTY", "Blank lines are not allowed", rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    if len(gaf_line) > 17:
        # If we see more than 17 columns, we will just cut off the columns after column 17
        report.warning(source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "",
            msg="There were more than 17 columns in this line. Proceeding by cutting off extra columns after column 17.",
            rule=1)
        gaf_line = gaf_line[:17]

    if 17 > len(gaf_line) >= 15:
        gaf_line += [""] * (17 - len(gaf_line))

    if len(gaf_line) != 17:
        report.error(source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "",
            msg="There were {columns} columns found in this line, and there should be 15 (for GAF v1) or 17 (for GAF v2)".format(columns=len(gaf_line)), rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    ## check for missing columns
    ## We use indeces here because we run GO RULES before we split the vals into individual variables
    DB_INDEX = 0
    DB_OBJECT_INDEX = 1
    TAXON_INDEX = 12
    REFERENCE_INDEX = 5
    if gaf_line[DB_INDEX] == "":
        report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", taxon=gaf_line[TAXON_INDEX], rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[DB_OBJECT_INDEX] == "":
        report.error(source_line, Report.INVALID_ID, "EMPTY", "col2 is empty", taxon=gaf_line[TAXON_INDEX], rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[REFERENCE_INDEX] == "":
        report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    parsed_taxons_result = gaf_line_validators["taxon"].validate(gaf_line[TAXON_INDEX])  # type: assocparser.ValidateResult
    if not parsed_taxons_result.valid:
        report.error(source_line, Report.INVALID_TAXON, parsed_taxons_result.original, parsed_taxons_result.message, taxon=parsed_taxons_result.original, rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    taxon = parsed_taxons_result.parsed[0]

    date = assocparser.parse_date(gaf_line[13], report, source_line)
    if date is None:
        return assocparser.ParseResult(source_line, [], True, report=report)

    interacting_taxon = parsed_taxons_result.parsed[1] if len(parsed_taxons_result.parsed) == 2 else None
    subject_curie = association.Curie(gaf_line[0], gaf_line[1])
    subject = association.Subject(subject_curie, gaf_line[2], [gaf_line[9]], gaf_line[10].split("|"), [association.map_gp_type_label_to_curie(gaf_line[11])], taxon)
    gpi_entity = bio_entities.get(subject_curie)
    if gpi_entity is not None and subject != gpi_entity:
        subject = gpi_entity

    # column 4 is qualifiers -> index 3
    # For allowed, see http://geneontology.org/docs/go-annotations/#annotation-qualifiers
    # We use the below validate to check validaty if qualifiers, not as much to *parse* them into the GoAssociation object.
    # For GoAssociation we will use the above qualifiers list. This is fine because the above does not include `NOT`, etc
    # This is confusing, and we can fix later on by consolidating qualifier and relation in GoAssociation.
    parsed_qualifiers = qualifier_parser.validate(gaf_line[3])
    if not parsed_qualifiers.valid:
        report.error(source_line, Report.INVALID_QUALIFIER, parsed_qualifiers.original, parsed_qualifiers.message, taxon=gaf_line[TAXON_INDEX], rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    aspect = gaf_line[8]
    negated, relation_label, qualifiers = assocparser._parse_qualifier(gaf_line[3], aspect)
    # Note: Relation label is grabbed from qualifiers, if any exist in _parse_qualifier
    qualifiers = [association.Curie.from_str(curie_util.contract_uri(relations.lookup_label(q), strict=False)[0]) for q in qualifiers]

    object = association.Term(association.Curie.from_str(gaf_line[4]), taxon)
    if isinstance(object, association.Error):
        report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Problem parsing GO Term", taxon=gaf_line[TAXON_INDEX], rule=1)

    # References
    references = [association.Curie.from_str(e) for e in gaf_line[5].split("|") if e]
    for r in references:
        if isinstance(r, association.Error):
            report.error(source_line, Report.INVALID_SYMBOL, gaf_line[5], "Problem parsing references", taxon=gaf_line[TAXON_INDEX], rule=1)
            return assocparser.ParseResult(source_line, [], True, report=report)

    gorefs = [ref for ref in references if ref.namespace == "GO_REF"] + [None]
    eco_curie = ecomap.coderef_to_ecoclass(gaf_line[6], reference=gorefs[0])
    if eco_curie is None:
        report.error(source_line, Report.UNKNOWN_EVIDENCE_CLASS, gaf_line[6], msg="Expecting a known ECO GAF code, e.g ISS", rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    withfroms = association.ConjunctiveSet.str_to_conjunctions(gaf_line[7])
    if isinstance(withfroms, association.Error):
        report.error(source_line, Report.INVALID_SYMBOL, gaf_line[7], "Problem parsing with/from", taxon=gaf_line[TAXON_INDEX], rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    evidence_type = association.Curie.from_str(eco_curie)
    if isinstance(evidence_type, association.Error):
        report.error(source_line, Report.INVALID_SYMBOL, gaf_line[6], "Problem parsing evidence type", taxon=gaf_line[TAXON_INDEX], rule=1)

    evidence = association.Evidence(association.Curie.from_str(eco_curie), references, withfroms)
    if any([isinstance(e, association.Error) for e in evidence.has_supporting_reference]):
        first_error = [e for e in evidence.has_supporting_reference if isinstance(e, association.Error)][0]
        report.error(source_line, Report.INVALID_SYMBOL, gaf_line[5], first_error.info, taxon=str(taxon), rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    subject_extensions = []
    if gaf_line[16]:
        subject_filler = association.Curie.from_str(gaf_line[16])
        if isinstance(subject_filler, association.Error):
            report.error(source_line, assocparser.Report.INVALID_ID, gaf_line[16], subject_filler.info, taxon=str(taxon), rule=1)
            return assocparser.ParseResult(source_line, [], True, report=report)
        # filler is not an Error, so keep moving
        subject_extensions.append(association.ExtensionUnit(association.Curie.from_str("rdfs:subClassOf"), subject_filler))

    conjunctions = []
    if gaf_line[15]:
        conjunctions = association.ConjunctiveSet.str_to_conjunctions(
            gaf_line[15],
            conjunct_element_builder=lambda el: association.ExtensionUnit.from_str(el))

        if isinstance(conjunctions, association.Error):
            report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, conjunctions.info, "extensions should be relation(curie) and relation should have corresponding URI", taxon=str(taxon), rule=1)
            return assocparser.ParseResult(source_line, [], True, report=report)

    relation_uri = relations.lookup_label(relation_label)
    if relation_uri is None:
        report.error(source_line, assocparser.Report.INVALID_QUALIFIER, relation_label, "Could not find CURIE for relation `{}`".format(relation_label), taxon=str(taxon), rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    # We don't have to check that this is well formed because we're grabbing it from the known relations URI map.
    relation_curie = association.Curie.from_str(curie_util.contract_uri(relation_uri)[0])

    a = association.GoAssociation(
        source_line="\t".join(gaf_line),
        subject=subject,
        relation=relation_curie,
        object=object,
        negated=negated,
        qualifiers=qualifiers,
        aspect=aspect,
        interacting_taxon=interacting_taxon,
        evidence=evidence,
        subject_extensions=subject_extensions,
        object_extensions=conjunctions,
        provided_by=gaf_line[14],
        date=date,
        properties={})

    return assocparser.ParseResult(source_line, [a], False, report=report)
Beispiel #16
0
def from_1_2(gpad_line: List[str],
             report=None,
             group="unknown",
             dataset="unknown",
             bio_entities=None):
    source_line = "\t".join(gpad_line)

    if source_line == "":
        report.error(source_line,
                     "Blank Line",
                     "EMPTY",
                     "Blank lines are not allowed",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    if len(gpad_line) > 12:
        report.warning(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were more than 12 columns in this line. Proceeding by cutting off extra columns.",
            rule=1)

        gpad_line = gpad_line[:12]

    if 12 > len(gpad_line) >= 10:
        gpad_line += [""] * (12 - len(gpad_line))

    if len(gpad_line) != 12:
        report.error(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were {columns} columns found in this line, and there should be between 10 and 12"
            .format(columns=len(gpad_line)),
            rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    ## check for missing columns
    ## We use indeces here because we run GO RULES before we split the vals into individual variables
    DB_INDEX = 0
    DB_OBJECT_INDEX = 1
    QUALIFIER = 2
    REFERENCE_INDEX = 4
    EVIDENCE_INDEX = 5
    if gpad_line[DB_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_IDSPACE,
                     "EMPTY",
                     "col1 is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[DB_OBJECT_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "col2 is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[QUALIFIER] == "":
        report.error(source_line,
                     Report.INVALID_TAXON,
                     "EMPTY",
                     "qualifier column is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[REFERENCE_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "reference column is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[EVIDENCE_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "Evidence column is empty",
                     rule=1)

    taxon = association.Curie("NCBITaxon", "0")
    subject_curie = association.Curie(gpad_line[0], gpad_line[1])
    subject = association.Subject(subject_curie, "", [""], [], [], taxon)

    entity = bio_entities.get(subject_curie)
    if entity is not None:
        subject = entity
        taxon = subject.taxon

    go_term = association.Curie.from_str(gpad_line[3])
    if go_term.is_error():
        report.error(source_line,
                     Report.INVALID_SYMBOL,
                     gpad_line[3],
                     "Problem parsing GO Term",
                     taxon=str(taxon),
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    object = association.Term(go_term, taxon)

    evidence_type = association.Curie.from_str(gpad_line[5])
    if evidence_type.is_error():
        report.error(source_line,
                     Report.INVALID_SYMBOL,
                     gpad_line[5],
                     "Problem parsing Evidence ECO Curie",
                     taxon=str(taxon),
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    references = [
        association.Curie.from_str(e) for e in gpad_line[4].split("|") if e
    ]
    for r in references:
        if r.is_error():
            report.error(source_line,
                         Report.INVALID_SYMBOL,
                         gpad_line[4],
                         "Problem parsing references",
                         taxon=str(taxon),
                         rule=1)
            return assocparser.ParseResult(source_line, [],
                                           True,
                                           report=report)

    withfroms = association.ConjunctiveSet.str_to_conjunctions(
        gpad_line[6])  # Returns a list of ConjuctiveSets or Error
    if isinstance(withfroms, association.Error):
        report.error(source_line,
                     Report.INVALID_SYMBOL,
                     gpad_line[6],
                     "Problem parsing With/From column",
                     taxon=str(taxon),
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    evidence = association.Evidence(evidence_type, references, withfroms)

    # Guarenteed to have at least one element, from above check
    raw_qs = gpad_line[QUALIFIER].split("|")
    negated = "NOT" in raw_qs

    looked_up_qualifiers = [
        relations.lookup_label(q) for q in raw_qs if q != "NOT"
    ]
    if None in looked_up_qualifiers:
        report.error(source_line,
                     Report.INVALID_QUALIFIER,
                     raw_qs,
                     "Could not find a URI for qualifier",
                     taxon=str(taxon),
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    qualifiers = [
        association.Curie.from_str(curie_util.contract_uri(q)[0])
        for q in looked_up_qualifiers
    ]

    date = assocparser.parse_date(gpad_line[8], report, source_line)
    if date is None:
        return assocparser.ParseResult(source_line, [], True, report=report)

    interacting_taxon = None
    if gpad_line[7]:
        taxon_result = gpad_line_validators["taxon"].validate(gpad_line[7])
        if not taxon_result.valid:
            report.error(source_line,
                         Report.INVALID_TAXON,
                         taxon_result.original,
                         taxon_result.message,
                         taxon=str(taxon_result.original),
                         rule=1)
            return assocparser.ParseResult(source_line, [],
                                           True,
                                           report=report)
        else:
            interacting_taxon = taxon_result.parsed[0]

    conjunctions = []
    if gpad_line[10]:
        conjunctions = association.ConjunctiveSet.str_to_conjunctions(
            gpad_line[10],
            conjunct_element_builder=lambda el: association.ExtensionUnit.
            from_str(el))

        if isinstance(conjunctions, association.Error):
            report.error(source_line,
                         Report.EXTENSION_SYNTAX_ERROR,
                         conjunctions.info,
                         "extensions should be relation(curie)",
                         taxon=str(taxon),
                         rule=1)
            return assocparser.ParseResult(source_line, [],
                                           True,
                                           report=report)

    properties_list = association.parse_annotation_properties(gpad_line[11])

    # print(properties_list)
    a = association.GoAssociation(source_line=source_line,
                                  subject=subject,
                                  relation=qualifiers[0],
                                  object=object,
                                  negated=negated,
                                  qualifiers=qualifiers,
                                  aspect=None,
                                  interacting_taxon=interacting_taxon,
                                  evidence=evidence,
                                  subject_extensions=[],
                                  object_extensions=conjunctions,
                                  provided_by=gpad_line[9],
                                  date=date,
                                  properties=properties_list)

    return assocparser.ParseResult(source_line, [a], False, report=report)
Beispiel #17
0
def to_association(
    gaf_line: List[str],
    report=None,
    group="unknown",
    dataset="unknown",
    qualifier_parser=Qualifier2_1()) -> assocparser.ParseResult:
    report = Report(group=group, dataset=dataset) if report is None else report
    source_line = "\t".join(gaf_line)

    if source_line == "":
        report.error(source_line,
                     "Blank Line",
                     "EMPTY",
                     "Blank lines are not allowed",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    if len(gaf_line) > 17:
        # If we see more than 17 columns, we will just cut off the columns after column 17
        report.warning(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were more than 17 columns in this line. Proceeding by cutting off extra columns after column 17.",
            rule=1)
        gaf_line = gaf_line[:17]

    if 17 > len(gaf_line) >= 15:
        gaf_line += [""] * (17 - len(gaf_line))

    if len(gaf_line) != 17:
        report.error(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were {columns} columns found in this line, and there should be 15 (for GAF v1) or 17 (for GAF v2)"
            .format(columns=len(gaf_line)),
            rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    ## check for missing columns
    ## We use indeces here because we run GO RULES before we split the vals into individual variables
    DB_INDEX = 0
    DB_OBJECT_INDEX = 1
    TAXON_INDEX = 12
    REFERENCE_INDEX = 5
    if gaf_line[DB_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_IDSPACE,
                     "EMPTY",
                     "col1 is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[DB_OBJECT_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "col2 is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[TAXON_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_TAXON,
                     "EMPTY",
                     "taxon column is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gaf_line[REFERENCE_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "reference column 6 is empty",
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    taxon = gaf_line[12].split("|")
    taxon_curie = taxon[0].replace("taxon", "NCBITaxon")
    date = assocparser._normalize_gaf_date(gaf_line[13], report, taxon_curie,
                                           source_line)
    if date is None:
        return assocparser.ParseResult(source_line, [], True, report=report)

    interacting_taxon = taxon[1].replace(
        "taxon", "NCBITaxon") if len(taxon) == 2 else None
    subject_curie = "{db}:{id}".format(db=gaf_line[0], id=gaf_line[1])
    subject = association.Subject(subject_curie, gaf_line[2], gaf_line[9],
                                  gaf_line[10].split("|"), gaf_line[11],
                                  taxon_curie)
    aspect = gaf_line[8]
    negated, relation, qualifiers = assocparser._parse_qualifier(
        gaf_line[3], aspect)

    # column 4 is qualifiers -> index 3
    # For allowed, see http://geneontology.org/docs/go-annotations/#annotation-qualifiers
    parsed_qualifiers = qualifier_parser.validate(gaf_line[3])
    if not parsed_qualifiers.valid:
        report.error(source_line,
                     Report.INVALID_QUALIFIER,
                     parsed_qualifiers.original,
                     parsed_qualifiers.message,
                     taxon=gaf_line[TAXON_INDEX],
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    object = association.Term(gaf_line[4], taxon_curie)
    evidence = association.Evidence(
        ecomap.coderef_to_ecoclass(gaf_line[6]),
        [e for e in gaf_line[5].split("|") if e],
        association.ConjunctiveSet.str_to_conjunctions(gaf_line[7]))

    subject_extensions = [
        association.ExtensionUnit("rdfs:subClassOf", gaf_line[16])
    ] if gaf_line[16] else []

    conjunctions = []
    if gaf_line[15]:
        conjunctions = association.ConjunctiveSet.str_to_conjunctions(
            gaf_line[15],
            conjunct_element_builder=lambda el: association.ExtensionUnit.
            from_str(el))

        if isinstance(conjunctions, association.Error):
            report.error(source_line,
                         Report.EXTENSION_SYNTAX_ERROR,
                         conjunctions.info,
                         "extensions should be relation(curie)",
                         taxon=taxon,
                         rule=1)
            return assocparser.ParseResult(source_line, [],
                                           True,
                                           report=report)

    looked_up_rel = relations.lookup_label(relation)
    if looked_up_rel is None:
        report.error(source_line,
                     assocparser.Report.INVALID_QUALIFIER,
                     relation,
                     "Could not find CURIE for relation `{}`".format(relation),
                     taxon=taxon,
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    a = association.GoAssociation(
        source_line="\t".join(gaf_line),
        subject=subject,
        relation=curie_util.contract_uri(looked_up_rel)[0],
        object=object,
        negated=negated,
        qualifiers=qualifiers,
        aspect=aspect,
        interacting_taxon=interacting_taxon,
        evidence=evidence,
        subject_extensions=subject_extensions,
        object_extensions=conjunctions,
        provided_by=gaf_line[14],
        date=date,
        properties={})

    return assocparser.ParseResult(source_line, [a], False, report=report)
Beispiel #18
0
 def curie(self, uri: UriString) -> str:
     curies = contract_uri(str(uri))
     if len(curies) > 0:
         return curies[0]
     return str(uri)
def uri_to_curie(uri: str, curie_map=NAMESPACES) -> str:
    curies = contract_uri(uri, [curie_map], shortest=True)
    if curies:
        return curies[0]
    else:
        return uri
Beispiel #20
0
def contract_uri_wrapper(id):
    uri = contract_uri(id, cmaps=[prefix_context])
    return uri
Beispiel #21
0
def id(uri):
    curies = contract_uri(uri)
    if len(curies) > 0:
        return curies[0]
    else:
        return uri.toPython()
Beispiel #22
0
def to_association(gpad_line: List[str],
                   report=None,
                   group="unknown",
                   dataset="unknown") -> assocparser.ParseResult:

    report = Report(group=group, dataset=dataset) if report is None else report

    source_line = "\t".join(gpad_line)

    if len(gpad_line) > 12:
        report.warning(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were more than 12 columns in this line. Proceeding by cutting off extra columns.",
            rule=1)

        gpad_line = gpad_line[:12]

    if 12 > len(gpad_line) >= 10:
        gpad_line += [""] * (12 - len(gpad_line))

    if len(gpad_line) != 12:
        report.error(
            source_line,
            assocparser.Report.WRONG_NUMBER_OF_COLUMNS,
            "",
            msg=
            "There were {columns} columns found in this line, and there should be between 10 and 12"
            .format(columns=len(gpad_line)))
        return assocparser.ParseResult(source_line, [], True, report=report)

    ## check for missing columns
    ## We use indeces here because we run GO RULES before we split the vals into individual variables
    DB_INDEX = 0
    DB_OBJECT_INDEX = 1
    QUALIFIER = 2
    REFERENCE_INDEX = 4
    EVIDENCE_INDEX = 5
    if gpad_line[DB_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_IDSPACE,
                     "EMPTY",
                     "col1 is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[DB_OBJECT_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "col2 is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[QUALIFIER] == "":
        report.error(source_line,
                     Report.INVALID_TAXON,
                     "EMPTY",
                     "qualifier column is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[REFERENCE_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "reference column is empty",
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)
    if gpad_line[EVIDENCE_INDEX] == "":
        report.error(source_line,
                     Report.INVALID_ID,
                     "EMPTY",
                     "Evidence column is empty",
                     rule=1)

    taxon = ""
    subject_curie = "{db}:{id}".format(db=gpad_line[0], id=gpad_line[1])
    subject = association.Subject(subject_curie, "", "", [], "", "")
    object = association.Term(gpad_line[3], "")
    evidence = association.Evidence(gpad_line[5],
                                    [e for e in gpad_line[4].split("|") if e],
                                    [e for e in gpad_line[6].split("|") if e])

    raw_qs = gpad_line[2].split("|")
    negated = "NOT" in raw_qs
    looked_up_qualifiers = [
        relations.lookup_label(q) for q in raw_qs if q != "NOT"
    ]
    if None in looked_up_qualifiers:
        report.error(source_line,
                     Report.INVALID_QUALIFIER,
                     raw_qs,
                     "Could not find a URI for qualifier",
                     taxon=taxon,
                     rule=1)
        return assocparser.ParseResult(source_line, [], True, report=report)

    qualifiers = [curie_util.contract_uri(q)[0] for q in looked_up_qualifiers]

    conjunctions = []
    if gpad_line[11]:
        for conjuncts in gpad_line[11].split("|"):
            extension_units = []
            for u in conjuncts.split(","):
                parsed = relation_tuple.findall(u)
                if len(parsed) == 1:
                    rel, term = parsed[0]
                    extension_units.append(association.ExtensionUnit(
                        rel, term))
                else:
                    # Otherwise, something went bad with the regex, and it's a bad parse
                    report.error(source_line,
                                 Report.EXTENSION_SYNTAX_ERROR,
                                 u,
                                 "extensions should be relation(curie)",
                                 taxon=taxon,
                                 rule=1)
                    return assocparser.ParseResult(source_line, [],
                                                   True,
                                                   report=report)

            conjunction = association.ExtensionConjunctions(extension_units)
            conjunctions.append(conjunction)
    object_extensions = association.ExtensionExpression(conjunctions)

    properties_list = [
        prop.split("=") for prop in gpad_line[11].split("|") if prop
    ]
    # print(properties_list)
    a = association.GoAssociation(
        source_line="\t".join(gpad_line),
        subject=subject,
        relation="",
        object=object,
        negated=negated,
        qualifiers=qualifiers,
        aspect=None,
        interacting_taxon=gpad_line[7],
        evidence=evidence,
        subject_extensions=[],
        object_extensions=object_extensions,
        provided_by=gpad_line[9],
        date=gpad_line[8],
        properties={prop[0]: prop[1]
                    for prop in properties_list if prop})

    return assocparser.ParseResult(source_line, [a], False, report=report)
Beispiel #23
0
 def get(self, uri):
     """
     Returns contracted URI
     """
     return contract_uri(uri)