def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]: """ This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph. This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form. Returns the CURIE identifiers used for the `subject` and `object` in the networkx.MultiDiGraph, and the processed `edge_label`. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple Returns ------- Tuple[str, str, str] A 3-nary tuple (of the form subject, object, predicate) that represents the edge """ s = self.add_node(subject_iri) o = self.add_node(object_iri) relation = self.prefix_manager.contract(predicate_iri) edge_label = process_iri(predicate_iri) if ' ' in edge_label: logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label)) if edge_label.startswith(self.BIOLINK): logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK)) edge_label = edge_label.replace(self.BIOLINK, '') if PrefixManager.is_curie(edge_label): name = curie_lookup(edge_label) if name: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name)) edge_label = name else: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL)) edge_label = self.DEFAULT_EDGE_LABEL kwargs = { 'subject': s, 'predicate': str(predicate_iri), 'object': o, 'relation': relation, 'edge_label': f"biolink:{edge_label}" } if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_label, o) if not self.graph.has_edge(s, o, key=key): self.graph.add_edge(s, o, key=key, **kwargs) # TODO: support append return s, o, edge_label
def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None: """ Adds an attribute to an edge, while taking into account whether the attribute should be multi-valued. Multi-valued properties will not contain duplicates. The key may be a rdflib.URIRef or a URI string that maps onto a property name as defined in `rdf_utils.property_mapping`. If the nodes in the edge does not exist then they will be created using subject_iri and object_iri. If the edge itself does not exist then it will be created using subject_iri, object_iri and predicate_iri. Parameters ---------- subject_iri: [rdflib.URIRef, str] The IRI of the subject node of an edge in rdflib.Graph object_iri: rdflib.URIRef The IRI of the object node of an edge in rdflib.Graph predicate_iri: rdflib.URIRef The IRI of the predicate representing an edge in rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: str The value of the attribute """ if key.lower() in is_property_multivalued: key = key.lower() else: if not isinstance(key, URIRef): key = URIRef(key) key = property_mapping.get(key) if key is not None: subject_curie = make_curie(subject_iri) object_curie = make_curie(object_iri) edge_label = process_iri(predicate_iri) if is_curie(edge_label): edge_label = curie_lookup(edge_label) edge_key = generate_edge_key(subject_curie, edge_label, object_curie) attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key) self._add_attribute(attr_dict, key, value)
def add_node_attribute( self, iri: Union[URIRef, str], key: str, value: Union[str, List] ) -> None: """ Add an attribute to a node in cache, while taking into account whether the attribute should be multi-valued. The ``key`` may be a rdflib.URIRef or an URI string that maps onto a property name as defined in ``rdf_utils.property_mapping``. Parameters ---------- iri: Union[rdflib.URIRef, str] The IRI of a node in the rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: Union[str, List] The value of the attribute Returns ------- Dict The node data """ if self.prefix_manager.is_iri(key): key_curie = self.prefix_manager.contract(key) else: key_curie = key c = curie_lookup(key_curie) if c: key_curie = c if self.prefix_manager.is_curie(key_curie): # property names will always be just the reference mapped_key = self.prefix_manager.get_reference(key_curie) else: mapped_key = key_curie if isinstance(value, rdflib.term.Identifier): if isinstance(value, rdflib.term.URIRef): value_curie = self.prefix_manager.contract(value) # if self.prefix_manager.get_prefix(value_curie) not in {'biolink'} \ # and mapped_key not in {'type', 'category', 'predicate', 'relation', 'predicate'}: # d = self.add_node(value) # value = d['id'] # else: # value = value_curie value = value_curie else: value = value.toPython() if mapped_key in is_property_multivalued and is_property_multivalued[mapped_key]: value = [value] if mapped_key in self.node_record: if isinstance(self.node_record[mapped_key], str): _ = self.node_record[mapped_key] self.node_record[mapped_key] = [_] self.node_record[mapped_key].append(value) else: self.node_record[mapped_key] = [value] curie = self.prefix_manager.contract(iri) if curie in self.node_cache: if mapped_key in self.node_cache[curie]: node = self.node_cache[curie] updated_node = prepare_data_dict(node, {mapped_key: value}) self.node_cache[curie] = updated_node else: self.node_cache[curie][mapped_key] = value else: self.node_cache[curie] = {'id': curie, mapped_key: value}
def test_curie_lookup(query): """ Test look up of a CURIE. """ s = curie_lookup(query[0]) assert s == query[1]
def test_curie_lookup(query): s = curie_lookup(query[0]) assert s == query[1]