Exemple #1
0
def make_keys_from_gaf(gaf: association.GoAssociation) -> List[AnnotationKey]:

    term = curie_util.expand_uri(str(gaf.object.id), cmaps=[prefix_context])
    relation = curie_util.expand_uri(str(gaf.relation), cmaps=[prefix_context])
    taxon = curie_util.expand_uri(str(gaf.object.taxon),
                                  cmaps=[prefix_context])
    extensions = gaf.object_extensions  # type: List[association.ConjunctiveSet]

    annotation_keys = []  # type: List[AnnotationKey]

    if extensions:
        for conjunction in extensions:
            # Each conjunction is a ConjunctiveSet
            # conjunction is foo(bar),hello(world)
            # Create a new ConjunctiveSet using a frozenset of the elements instead of a list
            frozen_conjunction = association.ConjunctiveSet(
                frozenset(conjunction.elements))
            # Build the Key now
            annotation_keys.append(
                AnnotationKey(RelationTo(relation, term), taxon,
                              frozen_conjunction))
    else:
        annotation_keys.append(
            AnnotationKey(RelationTo(relation, term), taxon,
                          association.ConjunctiveSet(frozenset([]))))

    return annotation_keys
Exemple #2
0
def make_keys_from_gaf(gaf: List[str]) -> List[AnnotationKey]:

    term = curie_util.expand_uri(gaf[4], cmaps=[prefix_context])
    relation = aspect_relation_map[gaf[8]]
    taxon = "http://purl.obolibrary.org/obo/NCBITaxon_{}".format(
        gaf[12].split("|")[0].split(":")[1])
    extension = gaf[15]

    annotation_keys = []  # type: List[AnnotationKey]
    for conjunction in extension.split("|"):
        # conjunction is foo(bar),hello(world)
        conjunctions = []  # type: List[association.ExtensionUnit]
        for extension_unit in conjunction.split(","):
            # extension_unit is foo(bar)
            found_rel = relation_tuple.match(extension_unit)
            if found_rel:
                rel_label, filler = found_rel.groups()
                ext_relation = lookup_relation(rel_label)  # type: Uri
                fill_id = curie_util.expand_uri(filler, cmaps=[prefix_context
                                                               ])  # type: Uri
                extension_unit = association.ExtensionUnit(
                    ext_relation, fill_id)  # type: association.ExtensionUnit
                # Append the extensions unit to the list of conjunctions
                conjunctions.append(extension_unit)

        extension_conjunction = association.ExtensionConjunctions(
            frozenset(conjunctions))
        # Build the Key now
        annotation_keys.append(
            AnnotationKey(RelationTo(relation, term), taxon,
                          extension_conjunction))

    return annotation_keys
Exemple #3
0
    def expand(self, curie: str, fallback: bool = True) -> str:
        """
        Expand a given CURIE to an URI, based on mappings from `prefix_map`.

        Parameters
        ----------
        curie: str
            A CURIE
        fallback: bool
            Determines whether to fallback to default prefix mappings, as determined
            by `prefixcommons.curie_util`, when CURIE prefix is not found in `prefix_map`.

        Returns
        -------
        str
            A URI corresponding to the CURIE

        """
        uri = None
        if curie in self.prefix_map:
            uri = self.prefix_map[curie]
            # TODO: prefixcommons.curie_util will not unfold objects in json-ld context
            if isinstance(uri, str):
                return uri
        else:
            uri = cu.expand_uri(curie, [self.prefix_map])
            if uri == curie and fallback:
                uri = cu.expand_uri(curie)
        print("CURIE {} to IRI {}".format(curie, uri))
        return uri
Exemple #4
0
def expand(curie: str, prefix_maps: Optional[List[dict]] = None, fallback: bool = True) -> str:
    """
    Expand a given CURIE to an URI, based on mappings from `prefix_map`.

    This method will return the CURIE as the IRI if there is no mapping found.

    Parameters
    ----------
    curie: str
        A CURIE
    prefix_maps: Optional[List[dict]]
        A list of prefix maps to use for mapping
    fallback: bool
        Determines whether to fallback to default prefix mappings, as determined
        by `prefixcommons.curie_util`, when CURIE prefix is not found in `prefix_maps`.

    Returns
    -------
    str
        A URI corresponding to the CURIE

    """
    default_curie_maps = [get_jsonld_context('monarch_context'), get_jsonld_context('obo_context')]
    if prefix_maps:
        uri = expand_uri(curie, prefix_maps)
        if uri == curie and fallback:
            uri = expand_uri(curie, default_curie_maps)
    else:
        uri = expand_uri(curie, default_curie_maps)

    return uri
Exemple #5
0
 def expand(self, id):
     if id in self.prefixmap:
         uri = self.prefixmap[id]
         # todo: curie util will not unfold objects in json-ld context
         if isinstance(uri,str):
             return uri
     uri = cu.expand_uri(id, [self.prefixmap])
     if uri == id and self.fallback:
         uri = cu.expand_uri(id)
     return uri
Exemple #6
0
    def to_gaf_2_2_tsv(self) -> List:
        gp_isoforms = "" if not self.subject_extensions else self.subject_extensions[
            0].term

        qual_labels = [
            relations.lookup_uri(curie_util.expand_uri(str(q), strict=False))
            for q in self.qualifiers
        ]
        if self.negated:
            qual_labels.insert(0, "NOT")

        qualifier = "|".join(qual_labels)

        self.object.taxon.namespace = "taxon"
        taxon = str(self.object.taxon)
        if self.interacting_taxon:
            self.interacting_taxon.namespace = "taxon"
            taxon = "{taxon}|{interacting}".format(taxon=taxon,
                                                   interacting=str(
                                                       self.interacting_taxon))

        return [
            self.subject.id.namespace, self.subject.id.identity,
            self.subject.label, qualifier,
            str(self.object.id), "|".join(
                [str(ref) for ref in self.evidence.has_supporting_reference]),
            ecomap.ecoclass_to_coderef(str(self.evidence.type))[0],
            ConjunctiveSet.list_to_str(self.evidence.with_support_from),
            self.aspect if self.aspect else "", self.subject.fullname,
            "|".join(self.subject.synonyms), self.subject.type, taxon,
            self.date, self.provided_by,
            ConjunctiveSet.list_to_str(self.object_extensions), gp_isoforms
        ]
 def _predicate(self, name: SlotDefinitionName) -> IRIREF:
     slot = self.schema.slots[name]
     if slot.mappings:
         return IRIREF(cu.expand_uri(slot.mappings[0]))
     else:
         # TODO: look at the RDF to figure out what URI's go here
         return IRIREF(BIOENTITY[underscore(name)])
    def full_statement_bnode_in_model(self, model):
        # Find all existing URI's for IDA, IDB, mech, and reg. Check if statements exist for these URI combos. Might need SPARQL or further triple querying refinement (e.g. triple annotated with "owl:NamedIndividual")
        # mechanism["term"] ENABLED_BY self.id_a
        # regulated_activity["term"] ENABLED_BY self.id_b
        # mechanism["term"] REGULATES regulated_activity["term"]
        graph = model.writer.writer.graph

        # a_enables_triples = []
        # for id_a in model.uri_list_for_individual(self.full_id_a()):
        #     for mech_uri in model.uri_list_for_individual(self.mechanism["term"]):
        #         if (mech_uri, ENABLED_BY, id_a) in graph:
        #             a_enables_triples.append((mech_uri, ENABLED_BY, id_a))
        a_enables_triples = model.triples_by_ids(self.mechanism["term"], ENABLED_BY, self.full_id_a())

        # b_enables_triples = []
        # for id_b in model.uri_list_for_individual(self.full_id_b()):
        #     for reg_act in model.uri_list_for_individual(self.regulated_activity["term"]):
        #         if (reg_act, ENABLED_BY, id_b) in graph:
        #             b_enables_triples.append((reg_act, ENABLED_BY, id_b))
        b_enables_triples = model.triples_by_ids(self.regulated_activity["term"], ENABLED_BY, self.full_id_b())

        for a_triple in a_enables_triples:
            for b_triple in b_enables_triples:
                candidate_reg_triple = (a_triple[0], URIRef(expand_uri(self.relation)), b_triple[0])
                if candidate_reg_triple in graph:
                    return candidate_reg_triple
Exemple #9
0
    def _do_conjunctions_match_constraint(self, conjunction, term, constraints, conjunction_counts):
        # Check each extension in the conjunctions
        for ext in conjunction.elements:

            extension_good = False
            for constraint in constraints:
                constraint_relation_uri = association.relations.lookup_label(constraint["relation"])
                ext_relation_uri = curie_util.expand_uri(str(ext.relation))
                if ext_relation_uri == constraint_relation_uri:

                    if (ext.term.namespace in constraint["namespaces"] and str(term) in constraint["primary_terms"]):
                        # If we match namespace and go term, then if we there is a cardinality constraint, check that.
                        if "cardinality" in constraint:
                            cardinality_violations = [(ext, num) for ext, num in dict(conjunction_counts).items() if num > constraint["cardinality"]]
                            extension_good = len(cardinality_violations) == 0
                        else:
                            extension_good = True

                        if extension_good:
                            # If things are good for this extension, break and go to the next one
                            break

            # if we get through all constraints and we found no constraint match for `ext`
            # Then we know that `ext` is wrong, making the whole conjunction wrong, and we can bail here.
            if not extension_good:
                return False

        # If we get to the end of all extensions without failing early, then the conjunction is good!
        return True
    def get_uri(self, ncname: str) -> Optional[str]:
        """ Get the URI associated with ncname

        @param ncname:
        """
        uri = cu.expand_uri(ncname + ':', self.curi_maps)
        return uri if uri and uri.startswith('http') else None
Exemple #11
0
    def sparql(self, select='*', body=None, inject_prefixes=None, single_column=False):
        """
        Execute a SPARQL query.

        The query is specified using `select` and `body` parameters.
        The argument for the Named Graph is injected into the query.

        The select parameter should be either '*' or a list of vars (not prefixed with '?').

         - If '*' is passed, then the result is a list of dicts, { $var: {value: $val } }
         - If a list of vars is passed, then the result is a list of lists
         - Unless single_column=True, in which case the results are a simple list of values from the first var

        The inject_prefixes argument can be used to inject a list of prefixes - these are expanded
        using the prefixcommons library
        
        """
        if inject_prefixes is None:
            inject_prefixes = []
        namedGraph = get_named_graph(self.handle)
        cols = []
        select_val = None
        if select is None or select=='*':
            if not single_column:
                cols=None
            select_val='*'
        else:
            if isinstance(cols,list):
                cols = [select]
            else:
                cols = select
            select_val = ", ".join(['?'+c for c in cols])

        prefixes = ""
        if inject_prefixes is not None:
            plist = ["prefix {}: <{}> ".format(p,expand_uri(p+":")) for p in inject_prefixes if p != "" and p is not None]
            prefixes = "\n".join(plist)
        query = """
        {prefixes}
        SELECT {s} WHERE {{
        GRAPH <{g}>  {{
        {b}
        }}
        }}
        """.format(prefixes=prefixes, s=select_val, b=body, g=namedGraph)
        bindings = run_sparql(query)
        if len(bindings) == 0:
            return []
        if cols is None:
            return bindings
        else:
            if single_column:
                c = list(bindings[0].keys())[0]
                return [r[c]['value'] for r in bindings]
            else:
                return [r[c]['value'] for c in cols for r in bindings]
def test_prefixes():
    assert contract_uri(bp_iri) == [bp_id]
    assert expand_uri(bp_id) == bp_iri
    assert contract_uri("FAKE", strict=False) == []
    try:
        contract_uri("FAKE", strict=True)
    except NoPrefix as e:
        pass
    else:
        assert False
Exemple #13
0
    def to_gaf_2_1_tsv(self) -> List:
        """
        Converts the GoAssociation into a "TSV" columnar GAF 2.1 row as a list of strings.
        """
        gp_isoforms = "" if not self.subject_extensions else self.subject_extensions[
            0].term

        allowed_qualifiers = {"contributes_to", "colocalizes_with"}

        # Curie Object -> CURIE Str -> URI -> Label
        qual_labels = [
            relations.lookup_uri(curie_util.expand_uri(str(q), strict=False))
            for q in self.qualifiers
        ]
        if len(qual_labels) == 1 and qual_labels[0] not in allowed_qualifiers:
            logger.warning(
                "Cannot write qualifier `{}` in GAF version 2.1 since only {} are allowed: skipping"
                .format(self.qualifiers[0], ", ".join(allowed_qualifiers)))
            # If the qualifier is wrong, blank out the qualifiers
            qual_labels = []

        if self.negated:
            qual_labels.append("NOT")

        qualifier = "|".join(qual_labels)

        self.object.taxon.namespace = "taxon"
        taxon = str(self.object.taxon)
        if self.interacting_taxon:
            self.interacting_taxon.namespace = "taxon"
            taxon = "{taxon}|{interacting}".format(taxon=taxon,
                                                   interacting=str(
                                                       self.interacting_taxon))

        # For extensions, we provide the to string function on ConjunctElement that
        # calls its `display` method, with the flag to use labels instead of the CURIE.
        # This function is used to turn the whole column correctly into a string
        return [
            self.subject.id.namespace, self.subject.id.identity,
            self.subject.label, qualifier,
            str(self.object.id), "|".join(
                [str(ref) for ref in self.evidence.has_supporting_reference]),
            self.evidence.gaf_evidence_code(),
            ConjunctiveSet.list_to_str(self.evidence.with_support_from),
            self.aspect if self.aspect else "",
            self.subject.fullname_field(), "|".join(self.subject.synonyms),
            gp_type_label_to_curie(self.subject.type[0]), taxon,
            ymd_str(self.date, ""), self.provided_by,
            ConjunctiveSet.list_to_str(
                self.object_extensions,
                conjunct_to_str=lambda conj: conj.display(use_rel_label=True)),
            gp_isoforms
        ]
Exemple #14
0
    def to_gpad_1_2_tsv(self) -> List:
        """
        Converts the GoAssociation into a "TSV" columnar GPAD 1.2 row as a list of strings.
        """
        # Curie Object -> CURIE Str -> URI -> Label
        qual_labels = [
            relations.lookup_uri(curie_util.expand_uri(str(q), strict=False))
            for q in self.qualifiers
        ]

        # Try qualifiers first since, if we are going from GAF -> GPAD and the GAF had a qualifier, that would be
        # more specific than the relation, which is calculated from the aspect/Go term.
        if qual_labels == []:
            # If there were no qualifiers, then we'll use the Relation. Gpad requires at least one qualifier (which is the relation)
            qual_labels.append(
                relations.lookup_uri(
                    curie_util.expand_uri(str(self.relation), strict=False)))

        if self.negated:
            qual_labels = ["NOT"] + qual_labels

        qualifier = "|".join(qual_labels)

        props_list = [
            "{key}={value}".format(key=key, value=value)
            for key, value in self.properties
        ]
        return [
            self.subject.id.namespace, self.subject.id.identity, qualifier,
            str(self.object.id), "|".join(
                [str(ref) for ref in self.evidence.has_supporting_reference]),
            str(self.evidence.type),
            ConjunctiveSet.list_to_str(self.evidence.with_support_from),
            str(self.interacting_taxon) if self.interacting_taxon else "",
            ymd_str(self.date, ""), self.provided_by,
            ConjunctiveSet.list_to_str(
                self.object_extensions,
                conjunct_to_str=lambda conj: conj.display(use_rel_label=True)),
            "|".join(props_list)
        ]
    def add_prefix(self, ncname: str) -> None:
        """ Look up ncname and add it to the prefix map if necessary

        @param ncname: name to add
        """
        if ncname not in self.prefixmap:
            uri = cu.expand_uri(ncname + ':', self.curi_maps)
            if uri and '://' in uri:
                self.prefixmap[ncname] = uri
            else:
                print(f"No expansion for {ncname}", file=sys.stderr)
                self.prefixmap[
                    ncname] = f"http://example.org/unknown/{ncname}/"
Exemple #16
0
def anyont_fetch_label(id):
    """
    fetch all rdfs:label assertions for a URI
    """
    iri = expand_uri(id, strict=False)
    query = """
    SELECT ?label WHERE {{
    <{iri}> rdfs:label ?label
    }}
    """.format(iri=iri)
    bindings = run_sparql(query)
    rows = [r['label']['value'] for r in bindings]
    return rows[0]
Exemple #17
0
    def uri(self, id):
        # allow either atoms or objects
        if isinstance(id, dict):
            return self.uri(id['id'])
        logging.info("Expand: {}".format(id))

        uri = curie_util.expand_uri(id, cmaps=[prefix_context])
        if uri != id:
            # If URI is different, then that means we found an curie expansion, and we should add the prefix
            prefix = id.split(":")[0]
            self.writer.graph.bind(prefix, prefix_context[prefix])

        return URIRef(uri)
Exemple #18
0
def make_keys_from_gaf(gaf: association.GoAssociation) -> List[AnnotationKey]:

    term = curie_util.expand_uri(gaf.object.id, cmaps=[prefix_context])
    relation = curie_util.expand_uri(gaf.relation, cmaps=[prefix_context])
    taxon = curie_util.expand_uri(gaf.object.taxon, cmaps=[prefix_context])
    extensions = gaf.object_extensions

    annotation_keys = []  # type: List[AnnotationKey]

    if extensions.conjunctions:
        for conjunction in extensions.conjunctions:
            # conjunction is foo(bar),hello(world)
            extension_conjunction = association.ExtensionConjunctions(
                frozenset(conjunction.extensions))
            # Build the Key now
            annotation_keys.append(
                AnnotationKey(RelationTo(relation, term), taxon,
                              extension_conjunction))
    else:
        annotation_keys.append(
            AnnotationKey(RelationTo(relation, term), taxon,
                          association.ExtensionConjunctions(frozenset([]))))

    return annotation_keys
Exemple #19
0
    def translate_evidence(self, association, stmt):
        """

        ``
        _:1 a Axiom
            owl:annotatedSource s
            owl:annotatedProperty p
            owl:annotatedTarget o
            evidence [ a ECO ; ...]
        ``

        """
        ev = association['evidence']
        ev_id = None
        if 'id' in ev:
            ev_id = self.uri(ev['id'])
        else:
            ev_id = genid(base=self.writer.base + '/')

        stmt_id = self.blanknode()  ## OWL reification: must be blank
        (s, p, o) = stmt
        self.emit_type(stmt_id, OWL.Axiom)

        self.emit(stmt_id, OWL.annotatedSource, s)
        self.emit(stmt_id, OWL.annotatedProperty, p)
        self.emit(stmt_id, OWL.annotatedTarget, o)

        self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id)

        ev_cls = self.eco_class(self.uri(ev['type']))
        self.emit_type(ev_id, OWL.NamedIndividual)
        self.emit_type(ev_id, ev_cls)
        if 'with_support_from' in ev:
            for w in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(w))
        for ref in ev['has_supporting_reference']:
            o = self.uri(ref)
            if ref == expand_uri(ref):
                o = Literal(ref)
            self.emit(ev_id, HAS_SUPPORTING_REFERENCE, o)
        if 'with_support_from' in ev:
            for ref in ev['with_support_from']:
                self.emit(ev_id, self.uri(evt.evidence_with_support_from),
                          self.uri(ref))
Exemple #20
0
    def __post_init__(self):
        """
        Logic moved from scigraph_util.map_tuple
        """
        if self.lbl:
            self.label = self.lbl

        if 'category' in self.meta:
            self.category = self.category or self.meta['category']

        self.iri = self.iri or expand_uri(self.id, [get_curie_map()])

        if 'synonym' in self.meta:
            self.synonyms = self.synonyms or [
                SynonymPropertyValue(pred='synonym', val=s)
                for s in self.meta['synonym']
            ]

        if 'definition' in self.meta and self.meta['definition']:
            self.description = self.description or self.meta['definition'][0]
Exemple #21
0
    def to_gaf_2_2_tsv(self) -> List:
        """
        Converts the GoAssociation into a "TSV" columnar GAF 2.2 row as a list of strings.
        """
        gp_isoforms = "" if not self.subject_extensions else self.subject_extensions[
            0].term

        qual_labels = [
            relations.lookup_uri(curie_util.expand_uri(str(q), strict=False))
            for q in self.qualifiers
        ]
        if self.negated:
            qual_labels.insert(0, "NOT")

        qualifier = "|".join(qual_labels)

        self.object.taxon.namespace = "taxon"
        taxon = str(self.object.taxon)
        if self.interacting_taxon:
            self.interacting_taxon.namespace = "taxon"
            taxon = "{taxon}|{interacting}".format(taxon=taxon,
                                                   interacting=str(
                                                       self.interacting_taxon))

        return [
            self.subject.id.namespace, self.subject.id.identity,
            self.subject.label, qualifier,
            str(self.object.id), "|".join(
                [str(ref) for ref in self.evidence.has_supporting_reference]),
            self.evidence.gaf_evidence_code(),
            ConjunctiveSet.list_to_str(self.evidence.with_support_from),
            self.aspect if self.aspect else "",
            self.subject.fullname_field(), "|".join(self.subject.synonyms),
            gp_type_label_to_curie(self.subject.type[0]), taxon,
            ymd_str(self.date, ""), self.provided_by,
            ConjunctiveSet.list_to_str(
                self.object_extensions,
                conjunct_to_str=lambda conj: conj.display(use_rel_label=True)),
            gp_isoforms
        ]
Exemple #22
0
    def map_tuple(self, id, lbl, meta):
        obj = {
            'id':
            id,
            'label':
            lbl,
            'category':
            meta.get('category'),
            'xrefs':
            meta.get('http://www.geneontology.org/formats/oboInOwl#hasDbXref'),
            'iri':
            expand_uri(id, [get_curie_map()])
        }
        if 'synonym' in meta:
            obj['synonyms'] = [
                SynonymPropertyValue(pred='synonym', val=s)
                for s in meta['synonym']
            ]
        if 'definition' in meta:
            obj['description'] = meta.get('definition')[0]

        return obj
Exemple #23
0
 def emit_not(self, s, t):
     bn = self.blanknode()
     self.emit_type(bn, OWL.Class)
     self.emit(bn, OWL.complementOf, URIRef(expand_uri(t)))
     return self.emit_type(s, bn)
Exemple #24
0
ro = OboRO()
evt = Evidence()
upt = UpperLevel()

# Pull the go_context file from prefixcommons.
# NOTE: this is a temporary measure. We will build the go json ld context as part of the pipeline in future
# See https://github.com/geneontology/go-site/issues/617
prefix_context = {
    key: value
    for context in curie_util.default_curie_maps +
    [curie_util.read_biocontext("go_context")]
    for key, value in context.items()
}

HAS_SUPPORTING_REFERENCE = URIRef(
    expand_uri(evt.has_supporting_reference, cmaps=[evt._prefixmap]))

ENABLED_BY = URIRef(expand_uri(ro.enabled_by))
ENABLES = URIRef(expand_uri(ro.enables))
INVOLVED_IN = URIRef(expand_uri(ro.involved_in))
PART_OF = URIRef(expand_uri(ro.part_of))
OCCURS_IN = URIRef(expand_uri(ro.occurs_in))
COLOCALIZES_WITH = URIRef(expand_uri(ro.colocalizes_with))
MOLECULAR_FUNCTION = URIRef(expand_uri(upt.molecular_function))

logger = logging.getLogger(__name__)


def genid(base=None):
    return URIRef(str(uuid.uuid4()), base=base)
Exemple #25
0
def _triple_to_association(digraph, subject, predicate, obj):
    """
    Convert triple to association object
    """
    object_eq = []
    subject_eq = []
    if 'equivalentOriginalNodeTarget' in predicate:
        for eq in predicate['equivalentOriginalNodeTarget']:
            curies = contract_uri(eq, [get_curie_map()], shortest=True)
            if len(curies) != 0:
                object_eq.append(curies[0])

    if 'equivalentOriginalNodeSource' in predicate:
        for eq in predicate['equivalentOriginalNodeSource']:
            curies = contract_uri(eq, [get_curie_map()], shortest=True)
            if len(curies) != 0:
                subject_eq.append(curies[0])

    relation_lbl = predicate['lbl'][0] if predicate['lbl'] else None

    association = {
        'subject': {
            'id': subject,
            'label': digraph.node[subject]['lbl'],
            'iri': expand_uri(subject, [get_curie_map()])
        },
        'subject_eq': subject_eq,
        'relation': {
            'id': predicate['pred'],
            'label': relation_lbl,
            'iri': expand_uri(predicate['pred'], [get_curie_map()])
        },
        'object': {
            'id': obj,
            'label': digraph.node[obj]['lbl'],
            'iri': expand_uri(obj, [get_curie_map()])
        },
        'object_eq': object_eq,
        'provided_by': predicate['isDefinedBy'],
        'evidence_types': [],
        'publications': []
    }

    # get association node linked to ECO codes and publications
    association_nodes = _get_association_nodes(digraph, subject, predicate, obj)

    if len(list(association_nodes)) > 1:
        # This can happen with clique merging, for now log it
        # and combine both in association results
        logging.debug("Ambiguous association for %s, %s, %s",
                      subject, predicate, obj)

    for association_node in list(association_nodes):
        for obj, edges in digraph.adj[association_node].items():
            eco_codes = [eco['id'] for eco in association['evidence_types']]
            pubs = [pub['id'] for pub in association['publications']]

            for edge in edges.values():
                if edge['pred'] == 'RO:0002558' and obj not in eco_codes:
                    association['evidence_types'].append({
                        'id': obj,
                        'label': digraph.node[obj]['lbl']
                    })
                elif edge['pred'] == 'dc:source' and obj not in pubs:
                    association['publications'].append({
                        'id': obj,
                        'label': digraph.node[obj]['lbl']
                    })

    return association
import itertools
import yaml
import datetime
from typing import List
from copy import copy
from ontobio.vocabulary.relations import OboRO
from rdflib.term import URIRef
from prefixcommons.curie_util import expand_uri
from entity_factories import SignorEntityFactory
from entity_models import SignorEntity
from ontobio.rdfgen.gocamgen import gocamgen
from util import OntologyTerm

ro = OboRO()

ENABLED_BY = URIRef(expand_uri(ro.enabled_by))


class MechanismToGoMapping:
    def __init__(self, mechanism, mi_id, go_id, relation):
        self.mechanism = mechanism
        self.mi_id = mi_id
        self.go_id = go_id
        self.relation = relation


class MechanismToGoMappingSet:
    def __init__(self, mapping_file=None):
        self.mappings = []
        if mapping_file:
            with open(mapping_file) as mf:
Exemple #27
0
 def get(self, id):
     """
     Returns expanded URI
     """
     return expand_uri(id)
Exemple #28
0
def clique_merge(graph: nx.Graph, report=False) -> nx.Graph:
    """
    Builds up cliques using the `same_as` attribute of each node. Uses those
    cliques to build up a mapping for relabelling nodes. Chooses labels so as
    to preserve the original nodes, rather than taking xrefs that don't appear
    as nodes in the graph.

    This method will also expand the `same_as` attribute of the nodes to
    include the discovered clique.
    """
    original_size = len(graph)
    print('original graph has {} nodes'.format(original_size))

    cliqueGraph = nx.Graph()

    with click.progressbar(
            graph.nodes(data=True),
            label='building cliques from same_as node property') as bar:
        for n, attr_dict in bar:
            if 'same_as' in attr_dict:
                for m in attr_dict['same_as']:
                    cliqueGraph.add_edge(n, m)

    with click.progressbar(graph.edges(data=True),
                           label='building cliques from same_as edges') as bar:
        for u, v, attr_dict in bar:
            if 'edge_label' in attr_dict and attr_dict[
                    'edge_label'] == 'same_as':
                cliqueGraph.add_edge(u, v)

    edges = []
    with click.progressbar(cliqueGraph.edges(),
                           label='Breaking invalid cliques') as bar:
        for u, v in bar:
            try:
                u_categories = graph.node[u].get('category', [])
                v_categories = graph.node[v].get('category', [])
            except:
                continue
            l = len(edges)
            for a in u_categories:
                if len(edges) > l:
                    break
                if get_toolkit().get_element(a) is None:
                    continue
                for b in v_categories:
                    if get_toolkit().get_element(b) is None:
                        continue
                    a_ancestors = get_toolkit().ancestors(a)
                    b_ancestors = get_toolkit().ancestors(b)
                    if a_ancestors == b_ancestors == []:
                        continue
                    elif a not in b_ancestors and b not in a_ancestors:
                        edges.append((u, v))
                        break

    print('breaking {} many edges'.format(len(edges)))
    cliqueGraph.remove_edges_from(edges)

    mapping = {}

    connected_components = list(nx.connected_components(cliqueGraph))

    print('Discovered {} cliques'.format(len(connected_components)))

    with click.progressbar(connected_components,
                           label='building mapping') as bar:
        for nodes in bar:
            nodes = list(nodes)
            categories = set()
            for n in nodes:
                if not graph.has_node(n):
                    continue

                attr_dict = graph.node[n]

                attr_dict['same_as'] = nodes

                if 'category' in attr_dict:
                    categories.update(listify(attr_dict['category']))

                if 'categories' in attr_dict:
                    categories.update(listify(attr_dict['categories']))

            list_of_prefixes = []
            for category in categories:
                try:
                    list_of_prefixes.append(
                        get_toolkit().get_element(category).id_prefixes)
                except:
                    pass

            nodes.sort()
            nodes.sort(key=build_sort_key(list_of_prefixes))

            for n in nodes:
                if n != nodes[0]:
                    mapping[n] = nodes[0]

    g = relabel_nodes(graph, mapping)

    edges = []
    for u, v, key, data in g.edges(keys=True, data=True):
        if data.get('edge_label') == 'same_as':
            edges.append((u, v, key))
    g.remove_edges_from(edges)

    for n, data in g.nodes(data=True):
        data['iri'] = expand_uri(n)
        if 'id' in data and data['id'] != n:
            data['id'] = n
        if 'same_as' in data and n in data['same_as']:
            data['same_as'].remove(n)
            if data['same_as'] == []:
                del data['same_as']

    final_size = len(g)
    print('Resulting graph has {} nodes'.format(final_size))
    print('Eliminated {} nodes'.format(original_size - final_size))

    return g
Exemple #29
0
def expand_uri_wrapper(id):
    c = prefix_context
    c['GOREL'] = "http://purl.obolibrary.org/obo/GOREL_"
    uri = expand_uri(id, cmaps=[c])
    return uri
Exemple #30
0
 def uri(self, id):
     # allow either atoms or objects
     if isinstance(id, dict):
         return self.uri(id['id'])
     logging.info("Expand: {}".format(id))
     return URIRef(expand_uri(id))