예제 #1
0
    def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None:
        """
        Adds an attribute to the attribute dictionary, respecting whether or not
        that attribute should be multi-valued.
        Multi-valued attributes will not contain duplicates.

        Some attributes are singular form of others. In such cases overflowing values
        will be placed into the correlating multi-valued attribute.
        For example, `name` attribute will hold only one value while any additional
        value will be stored as `synonym` attribute.

        Parameters
        ----------
        attr_dict: dict
            Dictionary representing the attribute set of a node or an edge in a networkx graph
        key: str
            The name of the attribute
        value: str
            The value of the attribute

        """
        if PrefixManager.is_iri(value):
            value = process_iri(value)
        if key in is_property_multivalued and is_property_multivalued[key]:
            if key not in attr_dict:
                attr_dict[key] = [value]
            elif value not in attr_dict[key]:
                attr_dict[key].append(value)
        else:
            if key == 'name':
                self._add_attribute(attr_dict, 'synonym', value)
            else:
                attr_dict[key] = value
예제 #2
0
    def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier:
        """
        Prepare the object of a triple.

        Parameters
        ----------
        prop: str
            property name
        prop_type: str
            property type
        value: Any
            property value

        Returns
        -------
        rdflib.term.Identifier
            An instance of rdflib.term.Identifier

        """
        if prop_type == 'uriorcurie' or prop_type == 'xsd:anyURI':
            if isinstance(value, str) and PrefixManager.is_curie(value):
                o = self.uriref(value)
            elif isinstance(value, str) and PrefixManager.is_iri(value):
                if _is_valid_uri(value):
                    o = URIRef(value)
                else:
                    o = Literal(value)
            else:
                o = Literal(value)
        elif prop_type.startswith('xsd'):
            o = Literal(value, datatype=self.prefix_manager.expand(prop_type))
        else:
            o = Literal(value, datatype=self.prefix_manager.expand("xsd:string"))
        return o
예제 #3
0
def get_biolink_element(prefix_manager: PrefixManager,
                        predicate: Any) -> Optional[Element]:
    """
    Returns a Biolink Model element for a given predicate.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    predicate: Any
        The CURIE of a predicate

    Returns
    -------
    Optional[Element]
        The corresponding Biolink Model element

    """
    toolkit = get_toolkit()
    if prefix_manager.is_iri(predicate):
        predicate_curie = prefix_manager.contract(predicate)
    else:
        predicate_curie = predicate
    if prefix_manager.is_curie(predicate_curie):
        reference = prefix_manager.get_reference(predicate_curie)
    else:
        reference = predicate_curie
    element = toolkit.get_element(reference)
    if not element:
        try:
            mapping = toolkit.get_element_by_mapping(predicate)
            if mapping:
                element = toolkit.get_element(mapping)
        except ValueError as e:
            log.error(e)
    return element
예제 #4
0
def process_predicate(
    prefix_manager: PrefixManager,
    p: Union[URIRef, str],
    predicate_mapping: Optional[Dict] = None,
) -> Tuple:
    """
    Process a predicate where the method checks if there is a mapping in Biolink Model.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    p: Union[URIRef, str]
        The predicate
    predicate_mapping: Optional[Dict]
        Predicate mappings

    Returns
    -------
    Tuple[str, str, str, str]
        A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available),
        the CURIE form of p, the reference of p

    """
    if prefix_manager.is_iri(p):
        predicate = prefix_manager.contract(str(p))
    else:
        predicate = None
    if prefix_manager.is_curie(p):
        property_name = prefix_manager.get_reference(p)
        predicate = p
    else:
        if predicate and prefix_manager.is_curie(predicate):
            property_name = prefix_manager.get_reference(predicate)
        else:
            property_name = p
            predicate = f":{p}"
    element = get_biolink_element(prefix_manager, p)
    canonical_uri = None
    if element:
        if isinstance(element, SlotDefinition):
            # predicate corresponds to a biolink slot
            if element.definition_uri:
                element_uri = prefix_manager.contract(element.definition_uri)
            else:
                element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
            if element.slot_uri:
                canonical_uri = element.slot_uri
        elif isinstance(element, ClassDefinition):
            # this will happen only when the IRI is actually
            # a reference to a class
            element_uri = prefix_manager.contract(element.class_uri)
        else:
            element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}"
        if "biolink:Attribute" in get_biolink_ancestors(element.name):
            element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
        if not predicate:
            predicate = element_uri
    else:
        # no mapping to biolink model;
        # look at predicate mappings
        element_uri = None
        if predicate_mapping:
            if p in predicate_mapping:
                property_name = predicate_mapping[p]
                predicate = f":{property_name}"
        # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri,
        # 'predicate': predicate, 'property_name': property_name}
    return element_uri, canonical_uri, predicate, property_name
예제 #5
0
    def read_edge(self, edge: Dict) -> Dict:
        """
        Read and parse an edge record.

        Parameters
        ----------
        edge: Dict
            The edge record

        Returns
        -------
        Dict
            The processed edge

        """
        fixed_edge = dict()
        fixed_edge['subject'] = self.prefix_manager.contract(edge['sub'])
        if PrefixManager.is_iri(edge['pred']):
            curie = self.prefix_manager.contract(edge['pred'])
            if curie in self.ecache:
                edge_predicate = self.ecache[curie]
            else:
                element = get_biolink_element(curie)
                if not element:
                    try:
                        mapping = self.toolkit.get_element_by_mapping(
                            edge['pred'])
                        if mapping:
                            element = self.toolkit.get_element(mapping)
                    except ValueError as e:
                        log.error(e)

                if element:
                    edge_predicate = format_biolink_slots(
                        element.name.replace(',', ''))
                    fixed_edge['predicate'] = edge_predicate
                else:
                    edge_predicate = 'biolink:related_to'
                self.ecache[curie] = edge_predicate
            fixed_edge['predicate'] = edge_predicate
            fixed_edge['relation'] = curie
        else:
            if edge['pred'] == 'is_a':
                fixed_edge['predicate'] = 'biolink:subclass_of'
                fixed_edge['relation'] = 'rdfs:subClassOf'
            elif edge['pred'] == 'has_part':
                fixed_edge['predicate'] = 'biolink:has_part'
                fixed_edge['relation'] = "BFO:0000051"
            elif edge['pred'] == 'part_of':
                fixed_edge['predicate'] = 'biolink:part_of'
                fixed_edge['relation'] = "BFO:0000050"
            else:
                fixed_edge[
                    'predicate'] = f"biolink:{edge['pred'].replace(' ', '_')}"
                fixed_edge['relation'] = edge['pred']

        fixed_edge['object'] = self.prefix_manager.contract(edge['obj'])
        for x in edge.keys():
            if x not in {'sub', 'pred', 'obj'}:
                fixed_edge[x] = edge[x]
        return super().read_edge(fixed_edge)
예제 #6
0
    def read_edge(self, edge: Dict) -> Optional[Tuple]:
        """
        Read and parse an edge record.

        Parameters
        ----------
        edge: Dict
            The edge record

        Returns
        -------
        Dict
            The processed edge

        """
        fixed_edge = dict()
        fixed_edge["subject"] = self.prefix_manager.contract(edge["sub"])
        if PrefixManager.is_iri(edge["pred"]):
            curie = self.prefix_manager.contract(edge["pred"])
            if curie in self.ecache:
                edge_predicate = self.ecache[curie]
            else:
                element = get_biolink_element(curie)
                if not element:
                    try:
                        mapping = self.toolkit.get_element_by_mapping(
                            edge["pred"])
                        if mapping:
                            element = self.toolkit.get_element(mapping)

                    #  TODO: not sure how this exception would be thrown here.. under what conditions?
                    except ValueError as e:
                        self.owner.log_error(
                            entity=str(edge["pred"]),
                            error_type=ErrorType.INVALID_EDGE_PREDICATE,
                            message=str(e))
                        element = None

                if element:
                    edge_predicate = format_biolink_slots(
                        element.name.replace(",", ""))
                    fixed_edge["predicate"] = edge_predicate
                else:
                    edge_predicate = "biolink:related_to"
                self.ecache[curie] = edge_predicate
            fixed_edge["predicate"] = edge_predicate
            fixed_edge["relation"] = curie
        else:
            if edge["pred"] == "is_a":
                fixed_edge["predicate"] = "biolink:subclass_of"
                fixed_edge["relation"] = "rdfs:subClassOf"
            elif edge["pred"] == "has_part":
                fixed_edge["predicate"] = "biolink:has_part"
                fixed_edge["relation"] = "BFO:0000051"
            elif edge["pred"] == "part_of":
                fixed_edge["predicate"] = "biolink:part_of"
                fixed_edge["relation"] = "BFO:0000050"
            else:
                fixed_edge[
                    "predicate"] = f"biolink:{edge['pred'].replace(' ', '_')}"
                fixed_edge["relation"] = edge["pred"]

        fixed_edge["object"] = self.prefix_manager.contract(edge["obj"])
        for x in edge.keys():
            if x not in {"sub", "pred", "obj"}:
                fixed_edge[x] = edge[x]
        return super().read_edge(fixed_edge)
예제 #7
0
    def export_edges(self) -> Set[URIRef]:
        """
        Export all edges from networkx.MultiDiGraph.

        This method yields one (or more) triple that corresponds to an edge.

        Returns
        -------
        Set[rdflib.term.URIRef]
            A triple

        """
        cache = []
        for u, v, k, data in self.graph.edges(data=True, keys=True):
            if data['edge_label'] in self.edge_properties:
                # treat as a direct edge
                s = self.uriref(u)
                p = self.uriref(data['edge_label'])
                o = self.uriref(v)
                yield (s, p, o)
            else:
                # reify
                s = self.uriref(u)
                p = self.uriref(data['edge_label'])
                o = self.uriref(v)
                cache.append((s, p, o))
                if 'id' in data:
                    s = self.uriref(data['id'])
                else:
                    # generate a UUID for the reified node
                    s = self.uriref(generate_uuid())
                all_data = data.copy()
                all_data['type'] = 'biolink:Association'
                for prop, value in all_data.items():
                    if prop in {'id', 'association_id', 'edge_key'}:
                        continue
                    p = self.uriref(prop)
                    if isinstance(value, list):
                        for x in value:
                            if isinstance(x,
                                          str) and PrefixManager.is_curie(x):
                                o = self.uriref(x)
                            elif isinstance(x,
                                            str) and PrefixManager.is_iri(x):
                                o = URIRef(x)
                            else:
                                o = Literal(x)
                            yield (s, p, o)
                    else:
                        if isinstance(value,
                                      str) and PrefixManager.is_curie(value):
                            o = self.uriref(value)
                        elif isinstance(value,
                                        str) and PrefixManager.is_iri(value):
                            o = URIRef(value)
                        else:
                            # literal
                            o = Literal(value)
                        yield (s, p, o)

        for t in cache:
            yield (t[0], t[1], t[2])
예제 #8
0
def test_is_iri(query):
    """
    Test to check behavior of is_iri method in PrefixManager.
    """
    assert PrefixManager.is_iri(query[0]) == query[1]