def load_edge(self, edge_record: List) -> Tuple: """ Load an edge into an instance of BaseGraph Parameters ---------- edge_record: List A 4-tuple edge record Returns ------- Tuple A tuple with subject ID, object ID, edge key, and edge data """ subject_node = edge_record[0] edge = edge_record[1] object_node = edge_record[2] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge.keys(): edge['provided_by'] = self.graph_metadata['provided_by'] if 'id' not in edge.keys(): edge['id'] = generate_uuid() key = generate_edge_key(subject_node['id'], edge['predicate'], object_node['id']) edge = validate_edge(edge) edge = sanitize_import(edge.copy()) self.edge_properties.update(edge.keys()) return subject_node['id'], object_node['id'], key, edge
def read_edge(self, edge: Dict) -> Optional[Tuple]: """ Load an edge into an instance of BaseGraph. Parameters ---------- edge: Dict An edge Returns ------- Optional[Tuple] A tuple that contains subject id, object id, edge key, and edge data """ edge = self.validate_edge(edge) if not edge: return None edge_data = sanitize_import(edge.copy(), self.list_delimiter) if "id" not in edge_data: edge_data["id"] = generate_uuid() s = edge_data["subject"] o = edge_data["object"] self.set_edge_provenance(edge_data) key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) return s, o, key, edge_data
def load_edge(self, edge_record: List) -> Tuple: """ Load an edge into an instance of BaseGraph Parameters ---------- edge_record: List A 4-tuple edge record Returns ------- Tuple A tuple with subject ID, object ID, edge key, and edge data """ subject_node = edge_record[0] edge_data = edge_record[1] object_node = edge_record[2] self.set_edge_provenance(edge_data) if "id" not in edge_data.keys(): edge_data["id"] = generate_uuid() key = generate_edge_key(subject_node["id"], edge_data["predicate"], object_node["id"]) edge_data = self.validate_edge(edge_data) if not edge_data: return () edge_data = sanitize_import(edge_data.copy()) self.edge_properties.update(edge_data.keys()) return subject_node["id"], object_node["id"], key, edge_data
def read_edge(self, edge: Dict) -> Optional[Tuple]: """ Load an edge into an instance of BaseGraph. Parameters ---------- edge: Dict An edge Returns ------- Optional[Tuple] A tuple that contains subject id, object id, edge key, and edge data """ edge = validate_edge(edge) edge_data = sanitize_import(edge.copy()) if 'id' not in edge_data: edge_data['id'] = generate_uuid() s = edge_data['subject'] o = edge_data['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys(): edge_data['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_data['predicate'], o) self.edge_properties.update(list(edge_data.keys())) if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) return s, o, key, edge_data
def reify(self, u: str, v: str, data: Dict) -> Dict: """ Create a node representation of an edge. Parameters ---------- u: str Subject v: str Object k: str Edge key data: Dict Edge data Returns ------- Dict The reified node """ s = self.uriref(u) p = self.uriref(data["predicate"]) o = self.uriref(v) if "id" in data: node_id = self.uriref(data["id"]) else: # generate a UUID for the reified node node_id = self.uriref(generate_uuid()) reified_node = data.copy() if "category" in reified_node: del reified_node["category"] reified_node["id"] = node_id reified_node["type"] = "biolink:Association" reified_node["subject"] = s reified_node["predicate"] = p reified_node["object"] = o return reified_node
def reify(self, u: str, v: str, data: Dict) -> Dict: """ Create a node representation of an edge. Parameters ---------- u: str Subject v: str Object k: str Edge key data: Dict Edge data Returns ------- Dict The reified node """ s = self.uriref(u) p = self.uriref(data['predicate']) o = self.uriref(v) if 'id' in data: node_id = self.uriref(data['id']) else: # generate a UUID for the reified node node_id = self.uriref(generate_uuid()) reified_node = data.copy() if 'category' in reified_node: del reified_node['category'] reified_node['id'] = node_id reified_node['type'] = 'biolink:Association' reified_node['subject'] = s reified_node['predicate'] = p reified_node['object'] = o return reified_node
def load_graph(self, rdfgraph: rdflib.Graph, **kwargs: Any) -> None: """ Walk through the rdflib.Graph and load all triples into kgx.graph.base_graph.BaseGraph Parameters ---------- rdfgraph: rdflib.Graph Graph containing nodes and edges kwargs: Any Any additional arguments """ seen = set() seen.add(RDFS.subClassOf) for s, p, o in rdfgraph.triples((None, RDFS.subClassOf, None)): # ignoring blank nodes if isinstance(s, rdflib.term.BNode): continue pred = None parent = None os_interpretation = None if isinstance(o, rdflib.term.BNode): # C SubClassOf R some D for x in rdfgraph.objects(o, OWL.onProperty): pred = x # owl:someValuesFrom for x in rdfgraph.objects(o, OWL.someValuesFrom): os_interpretation = self.OWLSTAR.term( "AllSomeInterpretation") parent = x # owl:allValuesFrom for x in rdfgraph.objects(o, OWL.allValuesFrom): os_interpretation = self.OWLSTAR.term( "AllOnlyInterpretation") parent = x if pred is None or parent is None: log.warning( f"{s} {p} {o} has OWL.onProperty {pred} and OWL.someValuesFrom {parent}" ) log.warning( "Do not know how to handle BNode: {}".format(o)) continue else: # C rdfs:subClassOf D (where C and D are named classes) pred = p parent = o if os_interpretation: # reify edges that have logical interpretation eid = generate_uuid() self.reified_nodes.add(eid) yield from self.triple(URIRef(eid), self.BIOLINK.term("category"), self.BIOLINK.Association) yield from self.triple(URIRef(eid), self.BIOLINK.term("subject"), s) yield from self.triple(URIRef(eid), self.BIOLINK.term("predicate"), pred) yield from self.triple(URIRef(eid), self.BIOLINK.term("object"), parent) yield from self.triple( URIRef(eid), self.BIOLINK.term("logical_interpretation"), os_interpretation, ) else: yield from self.triple(s, pred, parent) seen.add(OWL.equivalentClass) for s, p, o in rdfgraph.triples((None, OWL.equivalentClass, None)): # A owl:equivalentClass B (where A and B are named classes) if not isinstance(o, rdflib.term.BNode): yield from self.triple(s, p, o) for relation in rdfgraph.subjects(RDF.type, OWL.ObjectProperty): seen.add(relation) for s, p, o in rdfgraph.triples((relation, None, None)): if not isinstance(o, rdflib.term.BNode): if p not in self.excluded_predicates: yield from self.triple(s, p, o) for s, p, o in rdfgraph.triples((None, None, None)): if isinstance(s, rdflib.term.BNode) or isinstance( o, rdflib.term.BNode): continue if p in seen: continue if p in self.excluded_predicates: continue yield from self.triple(s, p, o) for n in self.reified_nodes: data = self.node_cache.pop(n) self.dereify(n, data) for k, data in self.node_cache.items(): node_data = self.validate_node(data) if not node_data: continue node_data = sanitize_import(node_data) self.set_node_provenance(node_data) if self.check_node_filter(node_data): self.node_properties.update(node_data.keys()) yield k, node_data self.node_cache.clear() for k, data in self.edge_cache.items(): edge_data = self.validate_edge(data) if not edge_data: continue edge_data = sanitize_import(edge_data) self.set_edge_provenance(edge_data) if self.check_edge_filter(edge_data): self.edge_properties.update(edge_data.keys()) yield k[0], k[1], k[2], edge_data self.edge_cache.clear()
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate( self.prefix_manager, edge["predicate_id"], self.predicate_mapping ) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { "subject": edge["subject_id"], "predicate": edge_predicate, "object": edge["object_id"], } del edge["predicate_id"] data = self.validate_edge(data) if not data: return # ? subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith("subject"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith("object"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v subject_node = self.load_node(subject_node) object_node = self.load_node(object_node) if not (subject_node and object_node): return # ? objs = [subject_node, object_node] for k, v in self.graph_metadata.items(): if k not in {"curie_map"}: data[k] = v edge_data = sanitize_import(data.copy()) if "subject" in edge_data and "object" in edge_data: if "id" not in edge_data: edge_data["id"] = generate_uuid() s = edge_data["subject"] o = edge_data["object"] self.set_edge_provenance(edge_data) key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: self.owner.log_error( entity=str(edge_data), error_type=ErrorType.MISSING_NODE, message="Ignoring edge with either a missing 'subject' or 'object'", message_level=MessageLevel.WARNING ) for o in objs: yield o
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate(self.prefix_manager, edge['predicate_id'], self.predicate_mapping) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { 'subject': edge['subject_id'], 'predicate': edge_predicate, 'object': edge['object_id'], } del edge['predicate_id'] data = validate_edge(data) subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith('subject'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith('object'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v objs = [self.load_node(subject_node), self.load_node(object_node)] for k, v in self.graph_metadata.items(): if k not in {'curie_map'}: data[k] = v edge_data = sanitize_import(data.copy()) if 'subject' in edge_data and 'object' in edge_data: if 'id' not in edge_data: edge_data['id'] = generate_uuid() s = edge_data['subject'] o = edge_data['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys( ): edge_data['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_data['predicate'], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: log.info( "Ignoring edge with either a missing 'subject' or 'object': {}" .format(edge_data)) for o in objs: yield o
def test_generate_uuid(): """ Test generation of UUID by generate_uuid method. """ s = generate_uuid() assert s.startswith('urn:uuid:')