def load_node(self, node: Dict) -> Tuple[str, Dict]: """ Load a node into an instance of BaseGraph Parameters ---------- node: Dict A node Returns ------- Optional[Tuple[str, Dict]] A tuple that contains node id and node data """ node = validate_node(node) node_data = sanitize_import(node.copy()) if 'id' in node_data: n = node_data['id'] if 'provided_by' in self.graph_metadata and 'provided_by' not in node_data.keys( ): node_data['provided_by'] = self.graph_metadata['provided_by'] self.node_properties.update(list(node_data.keys())) return n, node_data else: log.info("Ignoring node with no 'id': {}".format(node))
def read_node(self, node: Dict) -> Optional[Tuple[str, Dict]]: """ Prepare a node. Parameters ---------- node: Dict A node Returns ------- Optional[Tuple[str, Dict]] A tuple that contains node id and node data """ node = validate_node(node) node_data = sanitize_import(node.copy()) if 'id' in node_data: n = node_data['id'] if 'provided_by' in self.graph_metadata and 'provided_by' not in node_data.keys(): node_data['provided_by'] = self.graph_metadata['provided_by'] self.node_properties.update(list(node_data.keys())) if self.check_node_filter(node_data): self.node_properties.update(node_data.keys()) return n, node_data else: log.info(f"Ignoring node with no 'id': {node}")
def read_nodes(self) -> Generator: """ Read nodes as records from the graph. Returns ------- Generator A generator for nodes """ for n, data in self.graph.nodes(data=True): if "id" not in data: data["id"] = n node_data = self.validate_node(data) if not node_data: continue node_data = sanitize_import(node_data.copy()) self.set_node_provenance(node_data) if self.check_node_filter(node_data): self.node_properties.update(node_data.keys()) yield n, node_data
def load_node(self, node_data: Dict) -> Optional[Tuple[str, Dict]]: """ Load a node into an instance of BaseGraph Parameters ---------- node_data: Dict A node Returns ------- Optional[Tuple[str, Dict]] A tuple that contains node id and node data """ node_data = self.validate_node(node_data) if not node_data: return None node_data = sanitize_import(node_data.copy()) if "id" in node_data: n = node_data["id"] self.set_node_provenance(node_data) self.node_properties.update(list(node_data.keys())) return n, node_data else: self.owner.log_error( entity=str(node_data), error_type=ErrorType.MISSING_NODE_PROPERTY, message="Ignoring node with no 'id'", message_level=MessageLevel.WARNING )
def read_node(self, node: Dict) -> Optional[Tuple[str, Dict]]: """ Prepare a node. Parameters ---------- node: Dict A node Returns ------- Optional[Tuple[str, Dict]] A tuple that contains node id and node data """ node = self.validate_node(node) if node: # if not None, assumed to have an "id" here... node_data = sanitize_import(node.copy(), self.list_delimiter) n = node_data["id"] self.set_node_provenance(node_data) self.node_properties.update(list(node_data.keys())) if self.check_node_filter(node_data): self.node_properties.update(node_data.keys()) return n, node_data
def read_edge(self, edge: Dict) -> Optional[Tuple]: """ Load an edge into an instance of BaseGraph. Parameters ---------- edge: Dict An edge Returns ------- Optional[Tuple] A tuple that contains subject id, object id, edge key, and edge data """ edge = self.validate_edge(edge) if not edge: return None edge_data = sanitize_import(edge.copy(), self.list_delimiter) if "id" not in edge_data: edge_data["id"] = generate_uuid() s = edge_data["subject"] o = edge_data["object"] self.set_edge_provenance(edge_data) key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) return s, o, key, edge_data
def load_edge(self, edge_record: List) -> Tuple: """ Load an edge into an instance of BaseGraph Parameters ---------- edge_record: List A 4-tuple edge record Returns ------- Tuple A tuple with subject ID, object ID, edge key, and edge data """ subject_node = edge_record[0] edge_data = edge_record[1] object_node = edge_record[2] self.set_edge_provenance(edge_data) if "id" not in edge_data.keys(): edge_data["id"] = generate_uuid() key = generate_edge_key(subject_node["id"], edge_data["predicate"], object_node["id"]) edge_data = self.validate_edge(edge_data) if not edge_data: return () edge_data = sanitize_import(edge_data.copy()) self.edge_properties.update(edge_data.keys()) return subject_node["id"], object_node["id"], key, edge_data
def load_node(self, node_data: Dict) -> Optional[Tuple]: """ Load node into an instance of BaseGraph Parameters ---------- node_data: Dict A node Returns ------- Tuple A tuple with node ID and node data """ self.node_count += 1 # TODO: remove the seen_nodes self.seen_nodes.add(node_data["id"]) self.set_node_provenance(node_data) node_data = self.validate_node(node_data) if not node_data: return None node_data = sanitize_import(node_data.copy()) self.node_properties.update(node_data.keys()) return node_data["id"], node_data
def load_edge(self, edge_record: List) -> Tuple: """ Load an edge into an instance of BaseGraph Parameters ---------- edge_record: List A 4-tuple edge record Returns ------- Tuple A tuple with subject ID, object ID, edge key, and edge data """ subject_node = edge_record[0] edge = edge_record[1] object_node = edge_record[2] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge.keys(): edge['provided_by'] = self.graph_metadata['provided_by'] if 'id' not in edge.keys(): edge['id'] = generate_uuid() key = generate_edge_key(subject_node['id'], edge['predicate'], object_node['id']) edge = validate_edge(edge) edge = sanitize_import(edge.copy()) self.edge_properties.update(edge.keys()) return subject_node['id'], object_node['id'], key, edge
def read_edge(self, edge: Dict) -> Optional[Tuple]: """ Load an edge into an instance of BaseGraph. Parameters ---------- edge: Dict An edge Returns ------- Optional[Tuple] A tuple that contains subject id, object id, edge key, and edge data """ edge = validate_edge(edge) edge_data = sanitize_import(edge.copy()) if 'id' not in edge_data: edge_data['id'] = generate_uuid() s = edge_data['subject'] o = edge_data['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys(): edge_data['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_data['predicate'], o) self.edge_properties.update(list(edge_data.keys())) if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) return s, o, key, edge_data
def test_sanitize_import1(query): """ Test sanitize_import method. """ d = sanitize_import(query[0], list_delimiter='|') for k, v in query[1].items(): assert k in d assert d[k] == v
def read_edges(self) -> Generator: """ Read edges as records from the graph. Returns ------- Generator A generator for edges """ for u, v, k, data in self.graph.edges(keys=True, data=True): edge_data = validate_edge(data) edge_data = sanitize_import(edge_data.copy()) if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys( ): edge_data['provided_by'] = self.graph_metadata['provided_by'] if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) yield u, v, k, edge_data
def read_nodes(self) -> Generator: """ Read nodes as records from the graph. Returns ------- Generator A generator for nodes """ for n, data in self.graph.nodes(data=True): if 'id' not in data: data['id'] = n node_data = validate_node(data) node_data = sanitize_import(node_data.copy()) if 'provided_by' in self.graph_metadata and 'provided_by' not in node_data.keys( ): node_data['provided_by'] = self.graph_metadata['provided_by'] if self.check_node_filter(node_data): self.node_properties.update(node_data.keys()) yield n, node_data
def read_edges(self) -> Generator: """ Read edges as records from the graph. Returns ------- Generator A generator for edges """ for u, v, k, data in self.graph.edges(keys=True, data=True): edge_data = self.validate_edge(data) if not edge_data: continue edge_data = sanitize_import(edge_data.copy()) self.set_edge_provenance(edge_data) if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) yield u, v, k, edge_data
def load_node(self, node: Dict) -> Tuple: """ Load node into an instance of BaseGraph Parameters ---------- node: Dict A node Returns ------- Tuple A tuple with node ID and node data """ self.node_count += 1 # TODO: remove the seen_nodes self.seen_nodes.add(node['id']) if 'provided_by' in self.graph_metadata and 'provided_by' not in node.keys(): node['provided_by'] = self.graph_metadata['provided_by'] node = validate_node(node) node = sanitize_import(node.copy()) self.node_properties.update(node.keys()) return node['id'], node
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate(self.prefix_manager, edge['predicate_id'], self.predicate_mapping) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { 'subject': edge['subject_id'], 'predicate': edge_predicate, 'object': edge['object_id'], } del edge['predicate_id'] data = validate_edge(data) subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith('subject'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith('object'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v objs = [self.load_node(subject_node), self.load_node(object_node)] for k, v in self.graph_metadata.items(): if k not in {'curie_map'}: data[k] = v edge_data = sanitize_import(data.copy()) if 'subject' in edge_data and 'object' in edge_data: if 'id' not in edge_data: edge_data['id'] = generate_uuid() s = edge_data['subject'] o = edge_data['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys( ): edge_data['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_data['predicate'], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: log.info( "Ignoring edge with either a missing 'subject' or 'object': {}" .format(edge_data)) for o in objs: yield o
def parse( self, filename: str, format: str = 'nt', compression: Optional[str] = None, provided_by: Optional[str] = None, **kwargs: Any, ) -> Generator: """ This method reads from RDF N-Triples and yields records. .. note:: To ensure proper parsing of N-Triples and a relatively low memory footprint, it is recommended that the N-Triples be sorted based on the subject IRIs. ```sort -k 1,2 -t ' ' data.nt > data_sorted.nt``` Parameters ---------- filename: str The filename to parse format: str The format (``nt``) compression: Optional[str] The compression type (``gz``) provided_by: Optional[str] The name of the source providing the input file kwargs: Any Any additional arguments Returns ------- Generator A generator for records """ p = CustomNTriplesParser(self) if provided_by: self.graph_metadata['provided_by'] = [provided_by] if compression == 'gz': yield from p.parse(gzip.open(filename, 'rb')) else: yield from p.parse(open(filename, 'rb')) log.info(f"Done parsing {filename}") for n in self.reified_nodes: data = self.node_cache.pop(n) self.dereify(n, data) for k in self.node_cache.keys(): data = self.node_cache[k] if 'category' in data: if 'biolink:NamedThing' not in set(data['category']): data['category'].append('biolink:NamedThing') else: data['category'] = ["biolink:NamedThing"] data = validate_node(data) data = sanitize_import(data) if 'provided_by' in self.graph_metadata and 'provided_by' not in data.keys(): data['provided_by'] = self.graph_metadata['provided_by'] if self.check_node_filter(data): self.node_properties.update(data.keys()) yield k, data self.node_cache.clear() for k in self.edge_cache.keys(): data = self.edge_cache[k] data = validate_edge(data) data = sanitize_import(data) if 'provided_by' in self.graph_metadata and 'provided_by' not in data.keys(): data['provided_by'] = self.graph_metadata['provided_by'] if self.check_edge_filter(data): self.edge_properties.update(data.keys()) yield k[0], k[1], k[2], data self.edge_cache.clear()
def load_graph(self, rdfgraph: rdflib.Graph, **kwargs: Any) -> None: """ Walk through the rdflib.Graph and load all triples into kgx.graph.base_graph.BaseGraph Parameters ---------- rdfgraph: rdflib.Graph Graph containing nodes and edges kwargs: Any Any additional arguments """ seen = set() seen.add(RDFS.subClassOf) for s, p, o in rdfgraph.triples((None, RDFS.subClassOf, None)): # ignoring blank nodes if isinstance(s, rdflib.term.BNode): continue pred = None parent = None os_interpretation = None if isinstance(o, rdflib.term.BNode): # C SubClassOf R some D for x in rdfgraph.objects(o, OWL.onProperty): pred = x # owl:someValuesFrom for x in rdfgraph.objects(o, OWL.someValuesFrom): os_interpretation = self.OWLSTAR.term( "AllSomeInterpretation") parent = x # owl:allValuesFrom for x in rdfgraph.objects(o, OWL.allValuesFrom): os_interpretation = self.OWLSTAR.term( "AllOnlyInterpretation") parent = x if pred is None or parent is None: log.warning( f"{s} {p} {o} has OWL.onProperty {pred} and OWL.someValuesFrom {parent}" ) log.warning( "Do not know how to handle BNode: {}".format(o)) continue else: # C rdfs:subClassOf D (where C and D are named classes) pred = p parent = o if os_interpretation: # reify edges that have logical interpretation eid = generate_uuid() self.reified_nodes.add(eid) yield from self.triple(URIRef(eid), self.BIOLINK.term("category"), self.BIOLINK.Association) yield from self.triple(URIRef(eid), self.BIOLINK.term("subject"), s) yield from self.triple(URIRef(eid), self.BIOLINK.term("predicate"), pred) yield from self.triple(URIRef(eid), self.BIOLINK.term("object"), parent) yield from self.triple( URIRef(eid), self.BIOLINK.term("logical_interpretation"), os_interpretation, ) else: yield from self.triple(s, pred, parent) seen.add(OWL.equivalentClass) for s, p, o in rdfgraph.triples((None, OWL.equivalentClass, None)): # A owl:equivalentClass B (where A and B are named classes) if not isinstance(o, rdflib.term.BNode): yield from self.triple(s, p, o) for relation in rdfgraph.subjects(RDF.type, OWL.ObjectProperty): seen.add(relation) for s, p, o in rdfgraph.triples((relation, None, None)): if not isinstance(o, rdflib.term.BNode): if p not in self.excluded_predicates: yield from self.triple(s, p, o) for s, p, o in rdfgraph.triples((None, None, None)): if isinstance(s, rdflib.term.BNode) or isinstance( o, rdflib.term.BNode): continue if p in seen: continue if p in self.excluded_predicates: continue yield from self.triple(s, p, o) for n in self.reified_nodes: data = self.node_cache.pop(n) self.dereify(n, data) for k, data in self.node_cache.items(): node_data = self.validate_node(data) if not node_data: continue node_data = sanitize_import(node_data) self.set_node_provenance(node_data) if self.check_node_filter(node_data): self.node_properties.update(node_data.keys()) yield k, node_data self.node_cache.clear() for k, data in self.edge_cache.items(): edge_data = self.validate_edge(data) if not edge_data: continue edge_data = sanitize_import(edge_data) self.set_edge_provenance(edge_data) if self.check_edge_filter(edge_data): self.edge_properties.update(edge_data.keys()) yield k[0], k[1], k[2], edge_data self.edge_cache.clear()
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate( self.prefix_manager, edge["predicate_id"], self.predicate_mapping ) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { "subject": edge["subject_id"], "predicate": edge_predicate, "object": edge["object_id"], } del edge["predicate_id"] data = self.validate_edge(data) if not data: return # ? subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith("subject"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith("object"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v subject_node = self.load_node(subject_node) object_node = self.load_node(object_node) if not (subject_node and object_node): return # ? objs = [subject_node, object_node] for k, v in self.graph_metadata.items(): if k not in {"curie_map"}: data[k] = v edge_data = sanitize_import(data.copy()) if "subject" in edge_data and "object" in edge_data: if "id" not in edge_data: edge_data["id"] = generate_uuid() s = edge_data["subject"] o = edge_data["object"] self.set_edge_provenance(edge_data) key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: self.owner.log_error( entity=str(edge_data), error_type=ErrorType.MISSING_NODE, message="Ignoring edge with either a missing 'subject' or 'object'", message_level=MessageLevel.WARNING ) for o in objs: yield o
def triple(self, s: URIRef, p: URIRef, o: URIRef) -> None: """ Parse a triple. Parameters ---------- s: URIRef Subject p: URIRef Predicate o: URIRef Object """ self.count += 1 (element_uri, canonical_uri, predicate, property_name) = self.process_predicate(p) if element_uri: prop_uri = element_uri elif predicate: prop_uri = predicate else: prop_uri = property_name s_curie = self.prefix_manager.contract(s) if s_curie.startswith("biolink") or s_curie.startswith("OBAN"): log.warning(f"Skipping {s} {p} {o}") elif s_curie in self.reified_nodes: # subject is a reified node self.add_node_attribute(s, key=prop_uri, value=o) elif p in self.reification_predicates: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif property_name in { "subject", "predicate", "object", "predicate", "relation", }: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif o in self.reification_types: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif element_uri and element_uri in self.node_property_predicates: # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif (p in self.node_property_predicates or predicate in self.node_property_predicates or property_name in self.node_property_predicates): # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif isinstance(o, rdflib.term.Literal): self.add_node_attribute(s, key=prop_uri, value=o) else: # treating predicate as an edge self.add_edge(s, o, p) if len(self.edge_cache) >= self.CACHE_SIZE: while self.reified_nodes: n = self.reified_nodes.pop() data = self.node_cache.pop(n) try: self.dereify(n, data) except ValueError as e: self.owner.log_error( entity=str(data), error_type=ErrorType.INVALID_EDGE_PROPERTY, message=str(e), message_level=MessageLevel.WARNING) self._incomplete_nodes[n] = data for n in self._incomplete_nodes.keys(): self.node_cache[n] = self._incomplete_nodes[n] self.reified_nodes.add(n) self._incomplete_nodes.clear() for k in self.edge_cache.keys(): if ("id" not in self.edge_cache[k] and "association_id" not in self.edge_cache[k]): edge_key = generate_edge_key( self.edge_cache[k]["subject"], self.edge_cache[k]["predicate"], self.edge_cache[k]["object"], ) self.edge_cache[k]["id"] = edge_key data = self.edge_cache[k] data = self.validate_edge(data) data = sanitize_import(data) self.set_edge_provenance(data) if self.check_edge_filter(data): self.edge_properties.update(data.keys()) yield k[0], k[1], k[2], data self.edge_cache.clear() yield None
def triple(self, s: URIRef, p: URIRef, o: URIRef) -> None: """ Parse a triple. Parameters ---------- s: URIRef Subject p: URIRef Predicate o: URIRef Object """ self.count += 1 (element_uri, canonical_uri, predicate, property_name) = self.process_predicate(p) if element_uri: prop_uri = element_uri elif predicate: prop_uri = predicate else: prop_uri = property_name s_curie = self.prefix_manager.contract(s) if s_curie.startswith('biolink') or s_curie.startswith('OBAN'): log.warning(f"Skipping {s} {p} {o}") elif s_curie in self.reified_nodes: # subject is a reified node self.add_node_attribute(s, key=prop_uri, value=o) elif p in self.reification_predicates: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif property_name in {'subject', 'predicate', 'object', 'predicate', 'relation'}: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif o in self.reification_types: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif element_uri and element_uri in self.node_property_predicates: # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif ( p in self.node_property_predicates or predicate in self.node_property_predicates or property_name in self.node_property_predicates ): # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif isinstance(o, rdflib.term.Literal): self.add_node_attribute(s, key=prop_uri, value=o) else: # treating predicate as an edge self.add_edge(s, o, p) if len(self.edge_cache) >= self.CACHE_SIZE: while self.reified_nodes: n = self.reified_nodes.pop() data = self.node_cache.pop(n) try: self.dereify(n, data) except ValueError as e: log.info(e) self._incomplete_nodes[n] = data for n in self._incomplete_nodes.keys(): self.node_cache[n] = self._incomplete_nodes[n] self.reified_nodes.add(n) self._incomplete_nodes.clear() for k in self.edge_cache.keys(): if 'id' not in self.edge_cache[k] and 'association_id' not in self.edge_cache[k]: edge_key = generate_edge_key( self.edge_cache[k]['subject'], self.edge_cache[k]['predicate'], self.edge_cache[k]['object'], ) self.edge_cache[k]['id'] = edge_key data = self.edge_cache[k] data = validate_edge(data) data = sanitize_import(data) if 'provided_by' in self.graph_metadata and 'provided_by' not in data.keys(): data['provided_by'] = self.graph_metadata['provided_by'] if self.check_edge_filter(data): self.edge_properties.update(data.keys()) yield k[0], k[1], k[2], data self.edge_cache.clear() yield None
def parse( self, filename: str, format: str = "nt", compression: Optional[str] = None, **kwargs: Any, ) -> Generator: """ This method reads from RDF N-Triples and yields records. .. note:: To ensure proper parsing of N-Triples and a relatively low memory footprint, it is recommended that the N-Triples be sorted based on the subject IRIs. ```sort -k 1,2 -t ' ' data.nt > data_sorted.nt``` Parameters ---------- filename: str The filename to parse format: str The format (``nt``) compression: Optional[str] The compression type (``gz``) kwargs: Any Any additional arguments Returns ------- Generator A generator for records """ p = CustomNTriplesParser(self) self.set_provenance_map(kwargs) if compression == "gz": yield from p.parse(gzip.open(filename, "rb")) else: yield from p.parse(open(filename, "rb")) log.info(f"Done parsing {filename}") for n in self.reified_nodes: data = self.node_cache.pop(n) self.dereify(n, data) for k in self.node_cache.keys(): node_data = self.node_cache[k] if "category" in node_data: if NAMED_THING not in set(node_data["category"]): node_data["category"].append(NAMED_THING) else: node_data["category"] = [NAMED_THING] node_data = self.validate_node(node_data) if not node_data: continue node_data = sanitize_import(node_data) self.set_node_provenance(node_data) if self.check_node_filter(node_data): self.node_properties.update(node_data.keys()) yield k, node_data self.node_cache.clear() for k in self.edge_cache.keys(): edge_data = self.edge_cache[k] edge_data = self.validate_edge(edge_data) if not edge_data: continue edge_data = sanitize_import(edge_data) self.set_edge_provenance(edge_data) if self.check_edge_filter(edge_data): self.edge_properties.update(edge_data.keys()) yield k[0], k[1], k[2], edge_data self.edge_cache.clear()