Example #1
0
def get_biolink_element(prefix_manager: PrefixManager,
                        predicate: Any) -> Optional[Element]:
    """
    Returns a Biolink Model element for a given predicate.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    predicate: Any
        The CURIE of a predicate

    Returns
    -------
    Optional[Element]
        The corresponding Biolink Model element

    """
    toolkit = get_toolkit()
    if prefix_manager.is_iri(predicate):
        predicate_curie = prefix_manager.contract(predicate)
    else:
        predicate_curie = predicate
    if prefix_manager.is_curie(predicate_curie):
        reference = prefix_manager.get_reference(predicate_curie)
    else:
        reference = predicate_curie
    element = toolkit.get_element(reference)
    if not element:
        try:
            mapping = toolkit.get_element_by_mapping(predicate)
            if mapping:
                element = toolkit.get_element(mapping)
        except ValueError as e:
            log.error(e)
    return element
Example #2
0
class RdfGraphMixin(object):
    """
    A mixin that defines the following methods,
        - load_networkx_graph(): template method that all deriving classes should implement
        - add_node(): method to add a node from a RDF form to property graph form
        - add_node_attribute(): method to add a node attribute from a RDF form to property graph form
        - add_edge(): method to add an edge from a RDF form to property graph form
        - add_edge_attribute(): method to add an edge attribute from an RDF form to property graph form

    """

    DEFAULT_EDGE_LABEL = 'related_to'

    def __init__(self, source_graph: nx.MultiDiGraph = None):
        if source_graph:
            self.graph = source_graph
        else:
            self.graph = nx.MultiDiGraph()

        self.graph_metadata = {}
        self.prefix_manager = PrefixManager()
        self.DEFAULT = Namespace(self.prefix_manager.prefix_map[':'])
        # TODO: use OBO IRI from biolink model context once https://github.com/biolink/biolink-model/issues/211 is resolved
        self.OBO = Namespace('http://purl.obolibrary.org/obo/')
        self.OBAN = Namespace(self.prefix_manager.prefix_map['OBAN'])
        self.PMID = Namespace(self.prefix_manager.prefix_map['PMID'])
        self.BIOLINK = Namespace(self.prefix_manager.prefix_map['biolink'])

    def load_networkx_graph(self, rdfgraph: rdflib.Graph = None, predicates: Set[URIRef] = None, **kwargs) -> None:
        """
        This method should be overridden and be implemented by the derived class,
        and should load all desired nodes and edges from rdflib.Graph into networkx.MultiDiGraph

        Its preferred that this method does not use the networkx API directly
        when adding nodes, edges, and their attributes.

        Instead, Using the following methods,
            - ``add_node()``
            - ``add_node_attribute()``
            - ``add_edge()``
            - ``add_edge_attribute()``

        to ensure that nodes, edges, and their attributes
        are added in conformance with the BioLink Model, and that URIRef's are
        translated into CURIEs or BioLink Model elements whenever appropriate.

        Parameters
        ----------
        rdfgraph: rdflib.Graph
            Graph containing nodes and edges
        predicates: list
            A list of rdflib.URIRef representing predicates to be loaded
        kwargs: dict
            Any additional arguments

        """
        raise NotImplementedError("Method not implemented.")

    def add_node(self, iri: URIRef) -> str:
        """
        This method should be used by all derived classes when adding a node to
        the networkx.MultiDiGraph. This ensures that a node's identifier is a CURIE,
        and that it's `iri` property is set.

        Returns the CURIE identifier for the node in the networkx.MultiDiGraph

        Parameters
        ----------
        iri : rdflib.URIRef
            IRI of a node

        Returns
        -------
        str
            The CURIE identifier of a node

        """
        n = self.prefix_manager.contract(iri)
        if not n:
            n = iri
        if not self.graph.has_node(n):
            kwargs = {'id': n}
            if 'provided_by' in self.graph_metadata:
                kwargs['provided_by'] = self.graph_metadata['provided_by']
            if 'category' not in kwargs:
                kwargs['category'] = ["biolink:NamedThing"]
            self.graph.add_node(n, **kwargs)

        return n

    def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]:
        """
        This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph.
        This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form.

        Returns the CURIE identifiers used for the `subject` and `object` in the
        networkx.MultiDiGraph, and the processed `edge_label`.

        Parameters
        ----------
        subject_iri: rdflib.URIRef
            Subject IRI for the subject in a triple
        object_iri: rdflib.URIRef
            Object IRI for the object in a triple
        predicate_iri: rdflib.URIRef
            Predicate IRI for the predicate in a triple

        Returns
        -------
        Tuple[str, str, str]
            A 3-nary tuple (of the form subject, object, predicate) that represents the edge

        """
        s = self.add_node(subject_iri)
        o = self.add_node(object_iri)
        relation = self.prefix_manager.contract(predicate_iri)
        edge_label = process_iri(predicate_iri)
        if ' ' in edge_label:
            logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label))
        if edge_label.startswith(self.BIOLINK):
            logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK))
            edge_label = edge_label.replace(self.BIOLINK, '')

        if PrefixManager.is_curie(edge_label):
            name = curie_lookup(edge_label)
            if name:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name))
                edge_label = name
            else:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL))
                edge_label = self.DEFAULT_EDGE_LABEL

        kwargs = {
            'subject': s,
            'predicate': str(predicate_iri),
            'object': o,
            'relation': relation,
            'edge_label': f"biolink:{edge_label}"
        }
        if 'provided_by' in self.graph_metadata:
            kwargs['provided_by'] = self.graph_metadata['provided_by']

        key = generate_edge_key(s, edge_label, o)
        if not self.graph.has_edge(s, o, key=key):
            self.graph.add_edge(s, o, key=key, **kwargs)
        # TODO: support append
        return s, o, edge_label

    def add_node_attribute(self, iri: Union[URIRef, str], key: str, value: str) -> None:
        """
        Add an attribute to a node, while taking into account whether the attribute
        should be multi-valued.
        Multi-valued properties will not contain duplicates.

        The ``key`` may be a rdflib.URIRef or a URI string that maps onto a property name
        as defined in ``rdf_utils.property_mapping``.

        If the node does not exist then it is created using the given ``iri``.

        Parameters
        ----------
        iri: Union[rdflib.URIRef, str]
            The IRI of a node in the rdflib.Graph
        key: str
            The name of the attribute. Can be a rdflib.URIRef or URI string
        value: str
            The value of the attribute

        """
        if not isinstance(key, URIRef):
            key = URIRef(key)
        mapped_key = property_mapping.get(key)
        if not mapped_key:
            logging.debug(f"{key} could not be mapped; using {key}")
            mapped_key = key

        n = self.prefix_manager.contract(str(iri))
        if self.graph.has_node(n):
            attr_dict = self.graph.nodes[n]
        else:
            self.add_node(n)
            attr_dict = {'id': n}
        self._add_attribute(attr_dict, mapped_key, str(value))

    def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None:
        """
        Adds an attribute to an edge, while taking into account whether the attribute
        should be multi-valued.
        Multi-valued properties will not contain duplicates.

        The ``key`` may be a rdflib.URIRef or a URI string that maps onto a property name
        as defined in ``rdf_utils.property_mapping``.

        If the nodes in the edge does not exist then they will be created
        using ``subject_iri`` and ``object_iri``.

        If the edge itself does not exist then it will be created using
        ``subject_iri``, ``object_iri`` and ``predicate_iri``.

        Parameters
        ----------
        subject_iri: [rdflib.URIRef, str]
            The IRI of the subject node of an edge in rdflib.Graph
        object_iri: rdflib.URIRef
            The IRI of the object node of an edge in rdflib.Graph
        predicate_iri: rdflib.URIRef
            The IRI of the predicate representing an edge in rdflib.Graph
        key: str
            The name of the attribute. Can be a rdflib.URIRef or URI string
        value: str
            The value of the attribute

        """
        if key.lower() in is_property_multivalued:
            key = key.lower()
        else:
            if not isinstance(key, URIRef):
                key = URIRef(key)
            key = property_mapping.get(key)

        if key is not None and value is not None:
            subject_curie = self.prefix_manager.contract(subject_iri)
            object_curie = self.prefix_manager.contract(object_iri)
            edge_label = process_iri(predicate_iri)
            if PrefixManager.is_curie(edge_label):
                edge_label = curie_lookup(edge_label)
            edge_key = generate_edge_key(subject_curie, edge_label, object_curie)
            attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key)
            self._add_attribute(attr_dict, key, value)

    def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None:
        """
        Adds an attribute to the attribute dictionary, respecting whether or not
        that attribute should be multi-valued.
        Multi-valued attributes will not contain duplicates.

        Some attributes are singular form of others. In such cases overflowing values
        will be placed into the correlating multi-valued attribute.
        For example, `name` attribute will hold only one value while any additional
        value will be stored as `synonym` attribute.

        Parameters
        ----------
        attr_dict: dict
            Dictionary representing the attribute set of a node or an edge in a networkx graph
        key: str
            The name of the attribute
        value: str
            The value of the attribute

        """
        if PrefixManager.is_iri(value):
            value = process_iri(value)
        if key in is_property_multivalued and is_property_multivalued[key]:
            if key not in attr_dict:
                attr_dict[key] = [value]
            elif value not in attr_dict[key]:
                attr_dict[key].append(value)
        else:
            if key == 'name':
                self._add_attribute(attr_dict, 'synonym', value)
            else:
                attr_dict[key] = value
Example #3
0
def process_predicate(
    prefix_manager: PrefixManager,
    p: Union[URIRef, str],
    predicate_mapping: Optional[Dict] = None,
) -> Tuple:
    """
    Process a predicate where the method checks if there is a mapping in Biolink Model.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    p: Union[URIRef, str]
        The predicate
    predicate_mapping: Optional[Dict]
        Predicate mappings

    Returns
    -------
    Tuple[str, str, str, str]
        A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available),
        the CURIE form of p, the reference of p

    """
    if prefix_manager.is_iri(p):
        predicate = prefix_manager.contract(str(p))
    else:
        predicate = None
    if prefix_manager.is_curie(p):
        property_name = prefix_manager.get_reference(p)
        predicate = p
    else:
        if predicate and prefix_manager.is_curie(predicate):
            property_name = prefix_manager.get_reference(predicate)
        else:
            property_name = p
            predicate = f":{p}"
    element = get_biolink_element(prefix_manager, p)
    canonical_uri = None
    if element:
        if isinstance(element, SlotDefinition):
            # predicate corresponds to a biolink slot
            if element.definition_uri:
                element_uri = prefix_manager.contract(element.definition_uri)
            else:
                element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
            if element.slot_uri:
                canonical_uri = element.slot_uri
        elif isinstance(element, ClassDefinition):
            # this will happen only when the IRI is actually
            # a reference to a class
            element_uri = prefix_manager.contract(element.class_uri)
        else:
            element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}"
        if "biolink:Attribute" in get_biolink_ancestors(element.name):
            element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
        if not predicate:
            predicate = element_uri
    else:
        # no mapping to biolink model;
        # look at predicate mappings
        element_uri = None
        if predicate_mapping:
            if p in predicate_mapping:
                property_name = predicate_mapping[p]
                predicate = f":{property_name}"
        # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri,
        # 'predicate': predicate, 'property_name': property_name}
    return element_uri, canonical_uri, predicate, property_name
Example #4
0
def test_prefix_manager_contract(query):
    """
    Test to check the contract method in PrefixManager.
    """
    pm = PrefixManager()
    assert pm.contract(query[0]) == query[1]