예제 #1
0
    def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None:
        """
        Adds an attribute to the attribute dictionary, respecting whether or not
        that attribute should be multi-valued.
        Multi-valued attributes will not contain duplicates.

        Some attributes are singular form of others. In such cases overflowing values
        will be placed into the correlating multi-valued attribute.
        For example, `name` attribute will hold only one value while any additional
        value will be stored as `synonym` attribute.

        Parameters
        ----------
        attr_dict: dict
            Dictionary representing the attribute set of a node or an edge in a networkx graph
        key: str
            The name of the attribute
        value: str
            The value of the attribute

        """
        if PrefixManager.is_iri(value):
            value = process_iri(value)
        if key in is_property_multivalued and is_property_multivalued[key]:
            if key not in attr_dict:
                attr_dict[key] = [value]
            elif value not in attr_dict[key]:
                attr_dict[key].append(value)
        else:
            if key == 'name':
                self._add_attribute(attr_dict, 'synonym', value)
            else:
                attr_dict[key] = value
예제 #2
0
    def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]:
        """
        This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph.
        This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form.

        Returns the CURIE identifiers used for the `subject` and `object` in the
        networkx.MultiDiGraph, and the processed `edge_label`.

        Parameters
        ----------
        subject_iri: rdflib.URIRef
            Subject IRI for the subject in a triple
        object_iri: rdflib.URIRef
            Object IRI for the object in a triple
        predicate_iri: rdflib.URIRef
            Predicate IRI for the predicate in a triple

        Returns
        -------
        Tuple[str, str, str]
            A 3-nary tuple (of the form subject, object, predicate) that represents the edge

        """
        s = self.add_node(subject_iri)
        o = self.add_node(object_iri)
        relation = self.prefix_manager.contract(predicate_iri)
        edge_label = process_iri(predicate_iri)
        if ' ' in edge_label:
            logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label))
        if edge_label.startswith(self.BIOLINK):
            logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK))
            edge_label = edge_label.replace(self.BIOLINK, '')

        if PrefixManager.is_curie(edge_label):
            name = curie_lookup(edge_label)
            if name:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name))
                edge_label = name
            else:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL))
                edge_label = self.DEFAULT_EDGE_LABEL

        kwargs = {
            'subject': s,
            'predicate': str(predicate_iri),
            'object': o,
            'relation': relation,
            'edge_label': f"biolink:{edge_label}"
        }
        if 'provided_by' in self.graph_metadata:
            kwargs['provided_by'] = self.graph_metadata['provided_by']

        key = generate_edge_key(s, edge_label, o)
        if not self.graph.has_edge(s, o, key=key):
            self.graph.add_edge(s, o, key=key, **kwargs)
        # TODO: support append
        return s, o, edge_label
예제 #3
0
    def add_edge_attribute(self, subject_iri: Union[URIRef, str],
                           object_iri: URIRef, predicate_iri: URIRef, key: str,
                           value: str) -> None:
        """
        Adds an attribute to an edge, while taking into account whether the attribute
        should be multi-valued.
        Multi-valued properties will not contain duplicates.

        The key may be a rdflib.URIRef or a URI string that maps onto a property name
        as defined in `rdf_utils.property_mapping`.

        If the nodes in the edge does not exist then they will be created
        using subject_iri and object_iri.

        If the edge itself does not exist then it will be created using
        subject_iri, object_iri and predicate_iri.

        Parameters
        ----------
        subject_iri: [rdflib.URIRef, str]
            The IRI of the subject node of an edge in rdflib.Graph
        object_iri: rdflib.URIRef
            The IRI of the object node of an edge in rdflib.Graph
        predicate_iri: rdflib.URIRef
            The IRI of the predicate representing an edge in rdflib.Graph
        key: str
            The name of the attribute. Can be a rdflib.URIRef or URI string
        value: str
            The value of the attribute

        """
        if key.lower() in is_property_multivalued:
            key = key.lower()
        else:
            if not isinstance(key, URIRef):
                key = URIRef(key)
            key = property_mapping.get(key)

        if key is not None:
            subject_curie = make_curie(subject_iri)
            object_curie = make_curie(object_iri)
            edge_label = process_iri(predicate_iri)
            if is_curie(edge_label):
                edge_label = curie_lookup(edge_label)
            edge_key = generate_edge_key(subject_curie, edge_label,
                                         object_curie)
            attr_dict = self.graph.get_edge_data(subject_curie,
                                                 object_curie,
                                                 key=edge_key)
            self._add_attribute(attr_dict, key, value)
예제 #4
0
    def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None:
        """
        Adds an attribute to the attribute dictionary, respecting whether or not
        that attribute should be multi-valued.
        Multi-valued attributes will not contain duplicates.

        Some attributes are singular form of others. In such cases overflowing values
        will be placed into the correlating multi-valued attribute.
        For example, 'name' attribute will hold only one value while any additional
        value will be stored as 'synonym' attribute.

        Parameters
        ----------
        attr_dict: dict
            Dictionary representing the attribute set of a node or an edge in a networkx graph
        key: str
            The name of the attribute
        value: str
            The value of the attribute

        """
        if key is None or key not in is_property_multivalued:
            logging.warning(
                "Discarding key {} as it is not a valid property.".format(key))
            return

        value = make_curie(process_iri(value))

        if is_property_multivalued[key]:
            if key not in attr_dict:
                attr_dict[key] = [value]
            elif value not in attr_dict[key]:
                attr_dict[key].append(value)
        else:
            if key not in attr_dict:
                attr_dict[key] = value
            elif key == 'name':
                self._add_attribute(attr_dict, 'synonym', value)
예제 #5
0
    def add_edge(self, subject_iri: URIRef, object_iri: URIRef,
                 predicate_iri: URIRef) -> Tuple[str, str, str]:
        """
        This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph.
        This ensures that the subject and object identifiers are CURIEs, and that edge_label is in the correct form.

        Returns the CURIE identifiers used for the subject and object in the
        networkx.MultiDiGraph, and the processed edge_label.

        Parameters
        ----------
        subject_iri: rdflib.URIRef
            Subject IRI for the subject in a triple
        object_iri: rdflib.URIRef
            Object IRI for the object in a triple
        predicate_iri: rdflib.URIRef
            Predicate IRI for the predicate in a triple

        Returns
        -------
        Tuple[str, str, str]
            A 3-nary tuple (of the form subject, object, predicate) that represents the edge

        """
        s = self.add_node(subject_iri)
        o = self.add_node(object_iri)

        relation = make_curie(predicate_iri)
        edge_label = process_iri(predicate_iri)
        if ' ' in edge_label:
            logging.debug(
                "predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'"
                .format(predicate_iri, edge_label))
        # TODO: shouldn't this move to the utilities function process_uri()
        if edge_label.startswith(self.BIOLINK):
            logging.debug(
                "predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix"
                .format(predicate_iri, edge_label, self.BIOLINK))
            edge_label = edge_label.replace(self.BIOLINK, '')

        # TODO: is there no way to get label of a CURIE?
        # TODO: this should also move to the utilities function
        # Any service? or preload required ontologies by prefix?
        if ':' in edge_label:
            logging.debug(
                "edge label '{}' is a CURIE; defaulting back to 'related_to'".
                format(edge_label))
            logging.debug(
                "predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}"
                .format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL))
            edge_label = self.DEFAULT_EDGE_LABEL

        kwargs = {'relation': relation, 'edge_label': edge_label}
        if 'provided_by' in self.graph_metadata:
            kwargs['provided_by'] = self.graph_metadata['provided_by']

        if self.graph.has_edge(s, o, key=edge_label):
            logging.debug("{} -- {} --> {} edge already exists".format(
                s, edge_label, o))
        else:
            self.graph.add_edge(s, o, key=edge_label, **kwargs)

        return s, o, edge_label
예제 #6
0
def test_process_iri(query):
    s = process_iri(query[0])
    assert s == query[1]