def make_keys_from_gaf(gaf: association.GoAssociation) -> List[AnnotationKey]: term = curie_util.expand_uri(str(gaf.object.id), cmaps=[prefix_context]) relation = curie_util.expand_uri(str(gaf.relation), cmaps=[prefix_context]) taxon = curie_util.expand_uri(str(gaf.object.taxon), cmaps=[prefix_context]) extensions = gaf.object_extensions # type: List[association.ConjunctiveSet] annotation_keys = [] # type: List[AnnotationKey] if extensions: for conjunction in extensions: # Each conjunction is a ConjunctiveSet # conjunction is foo(bar),hello(world) # Create a new ConjunctiveSet using a frozenset of the elements instead of a list frozen_conjunction = association.ConjunctiveSet( frozenset(conjunction.elements)) # Build the Key now annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, frozen_conjunction)) else: annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, association.ConjunctiveSet(frozenset([])))) return annotation_keys
def make_keys_from_gaf(gaf: List[str]) -> List[AnnotationKey]: term = curie_util.expand_uri(gaf[4], cmaps=[prefix_context]) relation = aspect_relation_map[gaf[8]] taxon = "http://purl.obolibrary.org/obo/NCBITaxon_{}".format( gaf[12].split("|")[0].split(":")[1]) extension = gaf[15] annotation_keys = [] # type: List[AnnotationKey] for conjunction in extension.split("|"): # conjunction is foo(bar),hello(world) conjunctions = [] # type: List[association.ExtensionUnit] for extension_unit in conjunction.split(","): # extension_unit is foo(bar) found_rel = relation_tuple.match(extension_unit) if found_rel: rel_label, filler = found_rel.groups() ext_relation = lookup_relation(rel_label) # type: Uri fill_id = curie_util.expand_uri(filler, cmaps=[prefix_context ]) # type: Uri extension_unit = association.ExtensionUnit( ext_relation, fill_id) # type: association.ExtensionUnit # Append the extensions unit to the list of conjunctions conjunctions.append(extension_unit) extension_conjunction = association.ExtensionConjunctions( frozenset(conjunctions)) # Build the Key now annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, extension_conjunction)) return annotation_keys
def expand(self, curie: str, fallback: bool = True) -> str: """ Expand a given CURIE to an URI, based on mappings from `prefix_map`. Parameters ---------- curie: str A CURIE fallback: bool Determines whether to fallback to default prefix mappings, as determined by `prefixcommons.curie_util`, when CURIE prefix is not found in `prefix_map`. Returns ------- str A URI corresponding to the CURIE """ uri = None if curie in self.prefix_map: uri = self.prefix_map[curie] # TODO: prefixcommons.curie_util will not unfold objects in json-ld context if isinstance(uri, str): return uri else: uri = cu.expand_uri(curie, [self.prefix_map]) if uri == curie and fallback: uri = cu.expand_uri(curie) print("CURIE {} to IRI {}".format(curie, uri)) return uri
def expand(curie: str, prefix_maps: Optional[List[dict]] = None, fallback: bool = True) -> str: """ Expand a given CURIE to an URI, based on mappings from `prefix_map`. This method will return the CURIE as the IRI if there is no mapping found. Parameters ---------- curie: str A CURIE prefix_maps: Optional[List[dict]] A list of prefix maps to use for mapping fallback: bool Determines whether to fallback to default prefix mappings, as determined by `prefixcommons.curie_util`, when CURIE prefix is not found in `prefix_maps`. Returns ------- str A URI corresponding to the CURIE """ default_curie_maps = [get_jsonld_context('monarch_context'), get_jsonld_context('obo_context')] if prefix_maps: uri = expand_uri(curie, prefix_maps) if uri == curie and fallback: uri = expand_uri(curie, default_curie_maps) else: uri = expand_uri(curie, default_curie_maps) return uri
def expand(self, id): if id in self.prefixmap: uri = self.prefixmap[id] # todo: curie util will not unfold objects in json-ld context if isinstance(uri,str): return uri uri = cu.expand_uri(id, [self.prefixmap]) if uri == id and self.fallback: uri = cu.expand_uri(id) return uri
def to_gaf_2_2_tsv(self) -> List: gp_isoforms = "" if not self.subject_extensions else self.subject_extensions[ 0].term qual_labels = [ relations.lookup_uri(curie_util.expand_uri(str(q), strict=False)) for q in self.qualifiers ] if self.negated: qual_labels.insert(0, "NOT") qualifier = "|".join(qual_labels) self.object.taxon.namespace = "taxon" taxon = str(self.object.taxon) if self.interacting_taxon: self.interacting_taxon.namespace = "taxon" taxon = "{taxon}|{interacting}".format(taxon=taxon, interacting=str( self.interacting_taxon)) return [ self.subject.id.namespace, self.subject.id.identity, self.subject.label, qualifier, str(self.object.id), "|".join( [str(ref) for ref in self.evidence.has_supporting_reference]), ecomap.ecoclass_to_coderef(str(self.evidence.type))[0], ConjunctiveSet.list_to_str(self.evidence.with_support_from), self.aspect if self.aspect else "", self.subject.fullname, "|".join(self.subject.synonyms), self.subject.type, taxon, self.date, self.provided_by, ConjunctiveSet.list_to_str(self.object_extensions), gp_isoforms ]
def _predicate(self, name: SlotDefinitionName) -> IRIREF: slot = self.schema.slots[name] if slot.mappings: return IRIREF(cu.expand_uri(slot.mappings[0])) else: # TODO: look at the RDF to figure out what URI's go here return IRIREF(BIOENTITY[underscore(name)])
def full_statement_bnode_in_model(self, model): # Find all existing URI's for IDA, IDB, mech, and reg. Check if statements exist for these URI combos. Might need SPARQL or further triple querying refinement (e.g. triple annotated with "owl:NamedIndividual") # mechanism["term"] ENABLED_BY self.id_a # regulated_activity["term"] ENABLED_BY self.id_b # mechanism["term"] REGULATES regulated_activity["term"] graph = model.writer.writer.graph # a_enables_triples = [] # for id_a in model.uri_list_for_individual(self.full_id_a()): # for mech_uri in model.uri_list_for_individual(self.mechanism["term"]): # if (mech_uri, ENABLED_BY, id_a) in graph: # a_enables_triples.append((mech_uri, ENABLED_BY, id_a)) a_enables_triples = model.triples_by_ids(self.mechanism["term"], ENABLED_BY, self.full_id_a()) # b_enables_triples = [] # for id_b in model.uri_list_for_individual(self.full_id_b()): # for reg_act in model.uri_list_for_individual(self.regulated_activity["term"]): # if (reg_act, ENABLED_BY, id_b) in graph: # b_enables_triples.append((reg_act, ENABLED_BY, id_b)) b_enables_triples = model.triples_by_ids(self.regulated_activity["term"], ENABLED_BY, self.full_id_b()) for a_triple in a_enables_triples: for b_triple in b_enables_triples: candidate_reg_triple = (a_triple[0], URIRef(expand_uri(self.relation)), b_triple[0]) if candidate_reg_triple in graph: return candidate_reg_triple
def _do_conjunctions_match_constraint(self, conjunction, term, constraints, conjunction_counts): # Check each extension in the conjunctions for ext in conjunction.elements: extension_good = False for constraint in constraints: constraint_relation_uri = association.relations.lookup_label(constraint["relation"]) ext_relation_uri = curie_util.expand_uri(str(ext.relation)) if ext_relation_uri == constraint_relation_uri: if (ext.term.namespace in constraint["namespaces"] and str(term) in constraint["primary_terms"]): # If we match namespace and go term, then if we there is a cardinality constraint, check that. if "cardinality" in constraint: cardinality_violations = [(ext, num) for ext, num in dict(conjunction_counts).items() if num > constraint["cardinality"]] extension_good = len(cardinality_violations) == 0 else: extension_good = True if extension_good: # If things are good for this extension, break and go to the next one break # if we get through all constraints and we found no constraint match for `ext` # Then we know that `ext` is wrong, making the whole conjunction wrong, and we can bail here. if not extension_good: return False # If we get to the end of all extensions without failing early, then the conjunction is good! return True
def get_uri(self, ncname: str) -> Optional[str]: """ Get the URI associated with ncname @param ncname: """ uri = cu.expand_uri(ncname + ':', self.curi_maps) return uri if uri and uri.startswith('http') else None
def sparql(self, select='*', body=None, inject_prefixes=None, single_column=False): """ Execute a SPARQL query. The query is specified using `select` and `body` parameters. The argument for the Named Graph is injected into the query. The select parameter should be either '*' or a list of vars (not prefixed with '?'). - If '*' is passed, then the result is a list of dicts, { $var: {value: $val } } - If a list of vars is passed, then the result is a list of lists - Unless single_column=True, in which case the results are a simple list of values from the first var The inject_prefixes argument can be used to inject a list of prefixes - these are expanded using the prefixcommons library """ if inject_prefixes is None: inject_prefixes = [] namedGraph = get_named_graph(self.handle) cols = [] select_val = None if select is None or select=='*': if not single_column: cols=None select_val='*' else: if isinstance(cols,list): cols = [select] else: cols = select select_val = ", ".join(['?'+c for c in cols]) prefixes = "" if inject_prefixes is not None: plist = ["prefix {}: <{}> ".format(p,expand_uri(p+":")) for p in inject_prefixes if p != "" and p is not None] prefixes = "\n".join(plist) query = """ {prefixes} SELECT {s} WHERE {{ GRAPH <{g}> {{ {b} }} }} """.format(prefixes=prefixes, s=select_val, b=body, g=namedGraph) bindings = run_sparql(query) if len(bindings) == 0: return [] if cols is None: return bindings else: if single_column: c = list(bindings[0].keys())[0] return [r[c]['value'] for r in bindings] else: return [r[c]['value'] for c in cols for r in bindings]
def test_prefixes(): assert contract_uri(bp_iri) == [bp_id] assert expand_uri(bp_id) == bp_iri assert contract_uri("FAKE", strict=False) == [] try: contract_uri("FAKE", strict=True) except NoPrefix as e: pass else: assert False
def to_gaf_2_1_tsv(self) -> List: """ Converts the GoAssociation into a "TSV" columnar GAF 2.1 row as a list of strings. """ gp_isoforms = "" if not self.subject_extensions else self.subject_extensions[ 0].term allowed_qualifiers = {"contributes_to", "colocalizes_with"} # Curie Object -> CURIE Str -> URI -> Label qual_labels = [ relations.lookup_uri(curie_util.expand_uri(str(q), strict=False)) for q in self.qualifiers ] if len(qual_labels) == 1 and qual_labels[0] not in allowed_qualifiers: logger.warning( "Cannot write qualifier `{}` in GAF version 2.1 since only {} are allowed: skipping" .format(self.qualifiers[0], ", ".join(allowed_qualifiers))) # If the qualifier is wrong, blank out the qualifiers qual_labels = [] if self.negated: qual_labels.append("NOT") qualifier = "|".join(qual_labels) self.object.taxon.namespace = "taxon" taxon = str(self.object.taxon) if self.interacting_taxon: self.interacting_taxon.namespace = "taxon" taxon = "{taxon}|{interacting}".format(taxon=taxon, interacting=str( self.interacting_taxon)) # For extensions, we provide the to string function on ConjunctElement that # calls its `display` method, with the flag to use labels instead of the CURIE. # This function is used to turn the whole column correctly into a string return [ self.subject.id.namespace, self.subject.id.identity, self.subject.label, qualifier, str(self.object.id), "|".join( [str(ref) for ref in self.evidence.has_supporting_reference]), self.evidence.gaf_evidence_code(), ConjunctiveSet.list_to_str(self.evidence.with_support_from), self.aspect if self.aspect else "", self.subject.fullname_field(), "|".join(self.subject.synonyms), gp_type_label_to_curie(self.subject.type[0]), taxon, ymd_str(self.date, ""), self.provided_by, ConjunctiveSet.list_to_str( self.object_extensions, conjunct_to_str=lambda conj: conj.display(use_rel_label=True)), gp_isoforms ]
def to_gpad_1_2_tsv(self) -> List: """ Converts the GoAssociation into a "TSV" columnar GPAD 1.2 row as a list of strings. """ # Curie Object -> CURIE Str -> URI -> Label qual_labels = [ relations.lookup_uri(curie_util.expand_uri(str(q), strict=False)) for q in self.qualifiers ] # Try qualifiers first since, if we are going from GAF -> GPAD and the GAF had a qualifier, that would be # more specific than the relation, which is calculated from the aspect/Go term. if qual_labels == []: # If there were no qualifiers, then we'll use the Relation. Gpad requires at least one qualifier (which is the relation) qual_labels.append( relations.lookup_uri( curie_util.expand_uri(str(self.relation), strict=False))) if self.negated: qual_labels = ["NOT"] + qual_labels qualifier = "|".join(qual_labels) props_list = [ "{key}={value}".format(key=key, value=value) for key, value in self.properties ] return [ self.subject.id.namespace, self.subject.id.identity, qualifier, str(self.object.id), "|".join( [str(ref) for ref in self.evidence.has_supporting_reference]), str(self.evidence.type), ConjunctiveSet.list_to_str(self.evidence.with_support_from), str(self.interacting_taxon) if self.interacting_taxon else "", ymd_str(self.date, ""), self.provided_by, ConjunctiveSet.list_to_str( self.object_extensions, conjunct_to_str=lambda conj: conj.display(use_rel_label=True)), "|".join(props_list) ]
def add_prefix(self, ncname: str) -> None: """ Look up ncname and add it to the prefix map if necessary @param ncname: name to add """ if ncname not in self.prefixmap: uri = cu.expand_uri(ncname + ':', self.curi_maps) if uri and '://' in uri: self.prefixmap[ncname] = uri else: print(f"No expansion for {ncname}", file=sys.stderr) self.prefixmap[ ncname] = f"http://example.org/unknown/{ncname}/"
def anyont_fetch_label(id): """ fetch all rdfs:label assertions for a URI """ iri = expand_uri(id, strict=False) query = """ SELECT ?label WHERE {{ <{iri}> rdfs:label ?label }} """.format(iri=iri) bindings = run_sparql(query) rows = [r['label']['value'] for r in bindings] return rows[0]
def uri(self, id): # allow either atoms or objects if isinstance(id, dict): return self.uri(id['id']) logging.info("Expand: {}".format(id)) uri = curie_util.expand_uri(id, cmaps=[prefix_context]) if uri != id: # If URI is different, then that means we found an curie expansion, and we should add the prefix prefix = id.split(":")[0] self.writer.graph.bind(prefix, prefix_context[prefix]) return URIRef(uri)
def make_keys_from_gaf(gaf: association.GoAssociation) -> List[AnnotationKey]: term = curie_util.expand_uri(gaf.object.id, cmaps=[prefix_context]) relation = curie_util.expand_uri(gaf.relation, cmaps=[prefix_context]) taxon = curie_util.expand_uri(gaf.object.taxon, cmaps=[prefix_context]) extensions = gaf.object_extensions annotation_keys = [] # type: List[AnnotationKey] if extensions.conjunctions: for conjunction in extensions.conjunctions: # conjunction is foo(bar),hello(world) extension_conjunction = association.ExtensionConjunctions( frozenset(conjunction.extensions)) # Build the Key now annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, extension_conjunction)) else: annotation_keys.append( AnnotationKey(RelationTo(relation, term), taxon, association.ExtensionConjunctions(frozenset([])))) return annotation_keys
def translate_evidence(self, association, stmt): """ `` _:1 a Axiom owl:annotatedSource s owl:annotatedProperty p owl:annotatedTarget o evidence [ a ECO ; ...] `` """ ev = association['evidence'] ev_id = None if 'id' in ev: ev_id = self.uri(ev['id']) else: ev_id = genid(base=self.writer.base + '/') stmt_id = self.blanknode() ## OWL reification: must be blank (s, p, o) = stmt self.emit_type(stmt_id, OWL.Axiom) self.emit(stmt_id, OWL.annotatedSource, s) self.emit(stmt_id, OWL.annotatedProperty, p) self.emit(stmt_id, OWL.annotatedTarget, o) self.emit(stmt_id, self.uri(evt.axiom_has_evidence), ev_id) ev_cls = self.eco_class(self.uri(ev['type'])) self.emit_type(ev_id, OWL.NamedIndividual) self.emit_type(ev_id, ev_cls) if 'with_support_from' in ev: for w in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(w)) for ref in ev['has_supporting_reference']: o = self.uri(ref) if ref == expand_uri(ref): o = Literal(ref) self.emit(ev_id, HAS_SUPPORTING_REFERENCE, o) if 'with_support_from' in ev: for ref in ev['with_support_from']: self.emit(ev_id, self.uri(evt.evidence_with_support_from), self.uri(ref))
def __post_init__(self): """ Logic moved from scigraph_util.map_tuple """ if self.lbl: self.label = self.lbl if 'category' in self.meta: self.category = self.category or self.meta['category'] self.iri = self.iri or expand_uri(self.id, [get_curie_map()]) if 'synonym' in self.meta: self.synonyms = self.synonyms or [ SynonymPropertyValue(pred='synonym', val=s) for s in self.meta['synonym'] ] if 'definition' in self.meta and self.meta['definition']: self.description = self.description or self.meta['definition'][0]
def to_gaf_2_2_tsv(self) -> List: """ Converts the GoAssociation into a "TSV" columnar GAF 2.2 row as a list of strings. """ gp_isoforms = "" if not self.subject_extensions else self.subject_extensions[ 0].term qual_labels = [ relations.lookup_uri(curie_util.expand_uri(str(q), strict=False)) for q in self.qualifiers ] if self.negated: qual_labels.insert(0, "NOT") qualifier = "|".join(qual_labels) self.object.taxon.namespace = "taxon" taxon = str(self.object.taxon) if self.interacting_taxon: self.interacting_taxon.namespace = "taxon" taxon = "{taxon}|{interacting}".format(taxon=taxon, interacting=str( self.interacting_taxon)) return [ self.subject.id.namespace, self.subject.id.identity, self.subject.label, qualifier, str(self.object.id), "|".join( [str(ref) for ref in self.evidence.has_supporting_reference]), self.evidence.gaf_evidence_code(), ConjunctiveSet.list_to_str(self.evidence.with_support_from), self.aspect if self.aspect else "", self.subject.fullname_field(), "|".join(self.subject.synonyms), gp_type_label_to_curie(self.subject.type[0]), taxon, ymd_str(self.date, ""), self.provided_by, ConjunctiveSet.list_to_str( self.object_extensions, conjunct_to_str=lambda conj: conj.display(use_rel_label=True)), gp_isoforms ]
def map_tuple(self, id, lbl, meta): obj = { 'id': id, 'label': lbl, 'category': meta.get('category'), 'xrefs': meta.get('http://www.geneontology.org/formats/oboInOwl#hasDbXref'), 'iri': expand_uri(id, [get_curie_map()]) } if 'synonym' in meta: obj['synonyms'] = [ SynonymPropertyValue(pred='synonym', val=s) for s in meta['synonym'] ] if 'definition' in meta: obj['description'] = meta.get('definition')[0] return obj
def emit_not(self, s, t): bn = self.blanknode() self.emit_type(bn, OWL.Class) self.emit(bn, OWL.complementOf, URIRef(expand_uri(t))) return self.emit_type(s, bn)
ro = OboRO() evt = Evidence() upt = UpperLevel() # Pull the go_context file from prefixcommons. # NOTE: this is a temporary measure. We will build the go json ld context as part of the pipeline in future # See https://github.com/geneontology/go-site/issues/617 prefix_context = { key: value for context in curie_util.default_curie_maps + [curie_util.read_biocontext("go_context")] for key, value in context.items() } HAS_SUPPORTING_REFERENCE = URIRef( expand_uri(evt.has_supporting_reference, cmaps=[evt._prefixmap])) ENABLED_BY = URIRef(expand_uri(ro.enabled_by)) ENABLES = URIRef(expand_uri(ro.enables)) INVOLVED_IN = URIRef(expand_uri(ro.involved_in)) PART_OF = URIRef(expand_uri(ro.part_of)) OCCURS_IN = URIRef(expand_uri(ro.occurs_in)) COLOCALIZES_WITH = URIRef(expand_uri(ro.colocalizes_with)) MOLECULAR_FUNCTION = URIRef(expand_uri(upt.molecular_function)) logger = logging.getLogger(__name__) def genid(base=None): return URIRef(str(uuid.uuid4()), base=base)
def _triple_to_association(digraph, subject, predicate, obj): """ Convert triple to association object """ object_eq = [] subject_eq = [] if 'equivalentOriginalNodeTarget' in predicate: for eq in predicate['equivalentOriginalNodeTarget']: curies = contract_uri(eq, [get_curie_map()], shortest=True) if len(curies) != 0: object_eq.append(curies[0]) if 'equivalentOriginalNodeSource' in predicate: for eq in predicate['equivalentOriginalNodeSource']: curies = contract_uri(eq, [get_curie_map()], shortest=True) if len(curies) != 0: subject_eq.append(curies[0]) relation_lbl = predicate['lbl'][0] if predicate['lbl'] else None association = { 'subject': { 'id': subject, 'label': digraph.node[subject]['lbl'], 'iri': expand_uri(subject, [get_curie_map()]) }, 'subject_eq': subject_eq, 'relation': { 'id': predicate['pred'], 'label': relation_lbl, 'iri': expand_uri(predicate['pred'], [get_curie_map()]) }, 'object': { 'id': obj, 'label': digraph.node[obj]['lbl'], 'iri': expand_uri(obj, [get_curie_map()]) }, 'object_eq': object_eq, 'provided_by': predicate['isDefinedBy'], 'evidence_types': [], 'publications': [] } # get association node linked to ECO codes and publications association_nodes = _get_association_nodes(digraph, subject, predicate, obj) if len(list(association_nodes)) > 1: # This can happen with clique merging, for now log it # and combine both in association results logging.debug("Ambiguous association for %s, %s, %s", subject, predicate, obj) for association_node in list(association_nodes): for obj, edges in digraph.adj[association_node].items(): eco_codes = [eco['id'] for eco in association['evidence_types']] pubs = [pub['id'] for pub in association['publications']] for edge in edges.values(): if edge['pred'] == 'RO:0002558' and obj not in eco_codes: association['evidence_types'].append({ 'id': obj, 'label': digraph.node[obj]['lbl'] }) elif edge['pred'] == 'dc:source' and obj not in pubs: association['publications'].append({ 'id': obj, 'label': digraph.node[obj]['lbl'] }) return association
import itertools import yaml import datetime from typing import List from copy import copy from ontobio.vocabulary.relations import OboRO from rdflib.term import URIRef from prefixcommons.curie_util import expand_uri from entity_factories import SignorEntityFactory from entity_models import SignorEntity from ontobio.rdfgen.gocamgen import gocamgen from util import OntologyTerm ro = OboRO() ENABLED_BY = URIRef(expand_uri(ro.enabled_by)) class MechanismToGoMapping: def __init__(self, mechanism, mi_id, go_id, relation): self.mechanism = mechanism self.mi_id = mi_id self.go_id = go_id self.relation = relation class MechanismToGoMappingSet: def __init__(self, mapping_file=None): self.mappings = [] if mapping_file: with open(mapping_file) as mf:
def get(self, id): """ Returns expanded URI """ return expand_uri(id)
def clique_merge(graph: nx.Graph, report=False) -> nx.Graph: """ Builds up cliques using the `same_as` attribute of each node. Uses those cliques to build up a mapping for relabelling nodes. Chooses labels so as to preserve the original nodes, rather than taking xrefs that don't appear as nodes in the graph. This method will also expand the `same_as` attribute of the nodes to include the discovered clique. """ original_size = len(graph) print('original graph has {} nodes'.format(original_size)) cliqueGraph = nx.Graph() with click.progressbar( graph.nodes(data=True), label='building cliques from same_as node property') as bar: for n, attr_dict in bar: if 'same_as' in attr_dict: for m in attr_dict['same_as']: cliqueGraph.add_edge(n, m) with click.progressbar(graph.edges(data=True), label='building cliques from same_as edges') as bar: for u, v, attr_dict in bar: if 'edge_label' in attr_dict and attr_dict[ 'edge_label'] == 'same_as': cliqueGraph.add_edge(u, v) edges = [] with click.progressbar(cliqueGraph.edges(), label='Breaking invalid cliques') as bar: for u, v in bar: try: u_categories = graph.node[u].get('category', []) v_categories = graph.node[v].get('category', []) except: continue l = len(edges) for a in u_categories: if len(edges) > l: break if get_toolkit().get_element(a) is None: continue for b in v_categories: if get_toolkit().get_element(b) is None: continue a_ancestors = get_toolkit().ancestors(a) b_ancestors = get_toolkit().ancestors(b) if a_ancestors == b_ancestors == []: continue elif a not in b_ancestors and b not in a_ancestors: edges.append((u, v)) break print('breaking {} many edges'.format(len(edges))) cliqueGraph.remove_edges_from(edges) mapping = {} connected_components = list(nx.connected_components(cliqueGraph)) print('Discovered {} cliques'.format(len(connected_components))) with click.progressbar(connected_components, label='building mapping') as bar: for nodes in bar: nodes = list(nodes) categories = set() for n in nodes: if not graph.has_node(n): continue attr_dict = graph.node[n] attr_dict['same_as'] = nodes if 'category' in attr_dict: categories.update(listify(attr_dict['category'])) if 'categories' in attr_dict: categories.update(listify(attr_dict['categories'])) list_of_prefixes = [] for category in categories: try: list_of_prefixes.append( get_toolkit().get_element(category).id_prefixes) except: pass nodes.sort() nodes.sort(key=build_sort_key(list_of_prefixes)) for n in nodes: if n != nodes[0]: mapping[n] = nodes[0] g = relabel_nodes(graph, mapping) edges = [] for u, v, key, data in g.edges(keys=True, data=True): if data.get('edge_label') == 'same_as': edges.append((u, v, key)) g.remove_edges_from(edges) for n, data in g.nodes(data=True): data['iri'] = expand_uri(n) if 'id' in data and data['id'] != n: data['id'] = n if 'same_as' in data and n in data['same_as']: data['same_as'].remove(n) if data['same_as'] == []: del data['same_as'] final_size = len(g) print('Resulting graph has {} nodes'.format(final_size)) print('Eliminated {} nodes'.format(original_size - final_size)) return g
def expand_uri_wrapper(id): c = prefix_context c['GOREL'] = "http://purl.obolibrary.org/obo/GOREL_" uri = expand_uri(id, cmaps=[c]) return uri
def uri(self, id): # allow either atoms or objects if isinstance(id, dict): return self.uri(id['id']) logging.info("Expand: {}".format(id)) return URIRef(expand_uri(id))