def get_biolink_element(prefix_manager: PrefixManager, predicate: Any) -> Optional[Element]: """ Returns a Biolink Model element for a given predicate. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager predicate: Any The CURIE of a predicate Returns ------- Optional[Element] The corresponding Biolink Model element """ toolkit = get_toolkit() if prefix_manager.is_iri(predicate): predicate_curie = prefix_manager.contract(predicate) else: predicate_curie = predicate if prefix_manager.is_curie(predicate_curie): reference = prefix_manager.get_reference(predicate_curie) else: reference = predicate_curie element = toolkit.get_element(reference) if not element: try: mapping = toolkit.get_element_by_mapping(predicate) if mapping: element = toolkit.get_element(mapping) except ValueError as e: log.error(e) return element
class RdfGraphMixin(object): """ A mixin that defines the following methods, - load_networkx_graph(): template method that all deriving classes should implement - add_node(): method to add a node from a RDF form to property graph form - add_node_attribute(): method to add a node attribute from a RDF form to property graph form - add_edge(): method to add an edge from a RDF form to property graph form - add_edge_attribute(): method to add an edge attribute from an RDF form to property graph form """ DEFAULT_EDGE_LABEL = 'related_to' def __init__(self, source_graph: nx.MultiDiGraph = None): if source_graph: self.graph = source_graph else: self.graph = nx.MultiDiGraph() self.graph_metadata = {} self.prefix_manager = PrefixManager() self.DEFAULT = Namespace(self.prefix_manager.prefix_map[':']) # TODO: use OBO IRI from biolink model context once https://github.com/biolink/biolink-model/issues/211 is resolved self.OBO = Namespace('http://purl.obolibrary.org/obo/') self.OBAN = Namespace(self.prefix_manager.prefix_map['OBAN']) self.PMID = Namespace(self.prefix_manager.prefix_map['PMID']) self.BIOLINK = Namespace(self.prefix_manager.prefix_map['biolink']) def load_networkx_graph(self, rdfgraph: rdflib.Graph = None, predicates: Set[URIRef] = None, **kwargs) -> None: """ This method should be overridden and be implemented by the derived class, and should load all desired nodes and edges from rdflib.Graph into networkx.MultiDiGraph Its preferred that this method does not use the networkx API directly when adding nodes, edges, and their attributes. Instead, Using the following methods, - ``add_node()`` - ``add_node_attribute()`` - ``add_edge()`` - ``add_edge_attribute()`` to ensure that nodes, edges, and their attributes are added in conformance with the BioLink Model, and that URIRef's are translated into CURIEs or BioLink Model elements whenever appropriate. Parameters ---------- rdfgraph: rdflib.Graph Graph containing nodes and edges predicates: list A list of rdflib.URIRef representing predicates to be loaded kwargs: dict Any additional arguments """ raise NotImplementedError("Method not implemented.") def add_node(self, iri: URIRef) -> str: """ This method should be used by all derived classes when adding a node to the networkx.MultiDiGraph. This ensures that a node's identifier is a CURIE, and that it's `iri` property is set. Returns the CURIE identifier for the node in the networkx.MultiDiGraph Parameters ---------- iri : rdflib.URIRef IRI of a node Returns ------- str The CURIE identifier of a node """ n = self.prefix_manager.contract(iri) if not n: n = iri if not self.graph.has_node(n): kwargs = {'id': n} if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] if 'category' not in kwargs: kwargs['category'] = ["biolink:NamedThing"] self.graph.add_node(n, **kwargs) return n def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]: """ This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph. This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form. Returns the CURIE identifiers used for the `subject` and `object` in the networkx.MultiDiGraph, and the processed `edge_label`. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple Returns ------- Tuple[str, str, str] A 3-nary tuple (of the form subject, object, predicate) that represents the edge """ s = self.add_node(subject_iri) o = self.add_node(object_iri) relation = self.prefix_manager.contract(predicate_iri) edge_label = process_iri(predicate_iri) if ' ' in edge_label: logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label)) if edge_label.startswith(self.BIOLINK): logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK)) edge_label = edge_label.replace(self.BIOLINK, '') if PrefixManager.is_curie(edge_label): name = curie_lookup(edge_label) if name: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name)) edge_label = name else: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL)) edge_label = self.DEFAULT_EDGE_LABEL kwargs = { 'subject': s, 'predicate': str(predicate_iri), 'object': o, 'relation': relation, 'edge_label': f"biolink:{edge_label}" } if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_label, o) if not self.graph.has_edge(s, o, key=key): self.graph.add_edge(s, o, key=key, **kwargs) # TODO: support append return s, o, edge_label def add_node_attribute(self, iri: Union[URIRef, str], key: str, value: str) -> None: """ Add an attribute to a node, while taking into account whether the attribute should be multi-valued. Multi-valued properties will not contain duplicates. The ``key`` may be a rdflib.URIRef or a URI string that maps onto a property name as defined in ``rdf_utils.property_mapping``. If the node does not exist then it is created using the given ``iri``. Parameters ---------- iri: Union[rdflib.URIRef, str] The IRI of a node in the rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: str The value of the attribute """ if not isinstance(key, URIRef): key = URIRef(key) mapped_key = property_mapping.get(key) if not mapped_key: logging.debug(f"{key} could not be mapped; using {key}") mapped_key = key n = self.prefix_manager.contract(str(iri)) if self.graph.has_node(n): attr_dict = self.graph.nodes[n] else: self.add_node(n) attr_dict = {'id': n} self._add_attribute(attr_dict, mapped_key, str(value)) def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None: """ Adds an attribute to an edge, while taking into account whether the attribute should be multi-valued. Multi-valued properties will not contain duplicates. The ``key`` may be a rdflib.URIRef or a URI string that maps onto a property name as defined in ``rdf_utils.property_mapping``. If the nodes in the edge does not exist then they will be created using ``subject_iri`` and ``object_iri``. If the edge itself does not exist then it will be created using ``subject_iri``, ``object_iri`` and ``predicate_iri``. Parameters ---------- subject_iri: [rdflib.URIRef, str] The IRI of the subject node of an edge in rdflib.Graph object_iri: rdflib.URIRef The IRI of the object node of an edge in rdflib.Graph predicate_iri: rdflib.URIRef The IRI of the predicate representing an edge in rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: str The value of the attribute """ if key.lower() in is_property_multivalued: key = key.lower() else: if not isinstance(key, URIRef): key = URIRef(key) key = property_mapping.get(key) if key is not None and value is not None: subject_curie = self.prefix_manager.contract(subject_iri) object_curie = self.prefix_manager.contract(object_iri) edge_label = process_iri(predicate_iri) if PrefixManager.is_curie(edge_label): edge_label = curie_lookup(edge_label) edge_key = generate_edge_key(subject_curie, edge_label, object_curie) attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key) self._add_attribute(attr_dict, key, value) def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None: """ Adds an attribute to the attribute dictionary, respecting whether or not that attribute should be multi-valued. Multi-valued attributes will not contain duplicates. Some attributes are singular form of others. In such cases overflowing values will be placed into the correlating multi-valued attribute. For example, `name` attribute will hold only one value while any additional value will be stored as `synonym` attribute. Parameters ---------- attr_dict: dict Dictionary representing the attribute set of a node or an edge in a networkx graph key: str The name of the attribute value: str The value of the attribute """ if PrefixManager.is_iri(value): value = process_iri(value) if key in is_property_multivalued and is_property_multivalued[key]: if key not in attr_dict: attr_dict[key] = [value] elif value not in attr_dict[key]: attr_dict[key].append(value) else: if key == 'name': self._add_attribute(attr_dict, 'synonym', value) else: attr_dict[key] = value
def process_predicate( prefix_manager: PrefixManager, p: Union[URIRef, str], predicate_mapping: Optional[Dict] = None, ) -> Tuple: """ Process a predicate where the method checks if there is a mapping in Biolink Model. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager p: Union[URIRef, str] The predicate predicate_mapping: Optional[Dict] Predicate mappings Returns ------- Tuple[str, str, str, str] A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available), the CURIE form of p, the reference of p """ if prefix_manager.is_iri(p): predicate = prefix_manager.contract(str(p)) else: predicate = None if prefix_manager.is_curie(p): property_name = prefix_manager.get_reference(p) predicate = p else: if predicate and prefix_manager.is_curie(predicate): property_name = prefix_manager.get_reference(predicate) else: property_name = p predicate = f":{p}" element = get_biolink_element(prefix_manager, p) canonical_uri = None if element: if isinstance(element, SlotDefinition): # predicate corresponds to a biolink slot if element.definition_uri: element_uri = prefix_manager.contract(element.definition_uri) else: element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if element.slot_uri: canonical_uri = element.slot_uri elif isinstance(element, ClassDefinition): # this will happen only when the IRI is actually # a reference to a class element_uri = prefix_manager.contract(element.class_uri) else: element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}" if "biolink:Attribute" in get_biolink_ancestors(element.name): element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if not predicate: predicate = element_uri else: # no mapping to biolink model; # look at predicate mappings element_uri = None if predicate_mapping: if p in predicate_mapping: property_name = predicate_mapping[p] predicate = f":{property_name}" # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri, # 'predicate': predicate, 'property_name': property_name} return element_uri, canonical_uri, predicate, property_name
def test_prefix_manager_contract(query): """ Test to check the contract method in PrefixManager. """ pm = PrefixManager() assert pm.contract(query[0]) == query[1]