def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None: """ Adds an attribute to the attribute dictionary, respecting whether or not that attribute should be multi-valued. Multi-valued attributes will not contain duplicates. Some attributes are singular form of others. In such cases overflowing values will be placed into the correlating multi-valued attribute. For example, `name` attribute will hold only one value while any additional value will be stored as `synonym` attribute. Parameters ---------- attr_dict: dict Dictionary representing the attribute set of a node or an edge in a networkx graph key: str The name of the attribute value: str The value of the attribute """ if PrefixManager.is_iri(value): value = process_iri(value) if key in is_property_multivalued and is_property_multivalued[key]: if key not in attr_dict: attr_dict[key] = [value] elif value not in attr_dict[key]: attr_dict[key].append(value) else: if key == 'name': self._add_attribute(attr_dict, 'synonym', value) else: attr_dict[key] = value
def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier: """ Prepare the object of a triple. Parameters ---------- prop: str property name prop_type: str property type value: Any property value Returns ------- rdflib.term.Identifier An instance of rdflib.term.Identifier """ if prop_type == 'uriorcurie' or prop_type == 'xsd:anyURI': if isinstance(value, str) and PrefixManager.is_curie(value): o = self.uriref(value) elif isinstance(value, str) and PrefixManager.is_iri(value): if _is_valid_uri(value): o = URIRef(value) else: o = Literal(value) else: o = Literal(value) elif prop_type.startswith('xsd'): o = Literal(value, datatype=self.prefix_manager.expand(prop_type)) else: o = Literal(value, datatype=self.prefix_manager.expand("xsd:string")) return o
def get_biolink_element(prefix_manager: PrefixManager, predicate: Any) -> Optional[Element]: """ Returns a Biolink Model element for a given predicate. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager predicate: Any The CURIE of a predicate Returns ------- Optional[Element] The corresponding Biolink Model element """ toolkit = get_toolkit() if prefix_manager.is_iri(predicate): predicate_curie = prefix_manager.contract(predicate) else: predicate_curie = predicate if prefix_manager.is_curie(predicate_curie): reference = prefix_manager.get_reference(predicate_curie) else: reference = predicate_curie element = toolkit.get_element(reference) if not element: try: mapping = toolkit.get_element_by_mapping(predicate) if mapping: element = toolkit.get_element(mapping) except ValueError as e: log.error(e) return element
def process_predicate( prefix_manager: PrefixManager, p: Union[URIRef, str], predicate_mapping: Optional[Dict] = None, ) -> Tuple: """ Process a predicate where the method checks if there is a mapping in Biolink Model. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager p: Union[URIRef, str] The predicate predicate_mapping: Optional[Dict] Predicate mappings Returns ------- Tuple[str, str, str, str] A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available), the CURIE form of p, the reference of p """ if prefix_manager.is_iri(p): predicate = prefix_manager.contract(str(p)) else: predicate = None if prefix_manager.is_curie(p): property_name = prefix_manager.get_reference(p) predicate = p else: if predicate and prefix_manager.is_curie(predicate): property_name = prefix_manager.get_reference(predicate) else: property_name = p predicate = f":{p}" element = get_biolink_element(prefix_manager, p) canonical_uri = None if element: if isinstance(element, SlotDefinition): # predicate corresponds to a biolink slot if element.definition_uri: element_uri = prefix_manager.contract(element.definition_uri) else: element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if element.slot_uri: canonical_uri = element.slot_uri elif isinstance(element, ClassDefinition): # this will happen only when the IRI is actually # a reference to a class element_uri = prefix_manager.contract(element.class_uri) else: element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}" if "biolink:Attribute" in get_biolink_ancestors(element.name): element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if not predicate: predicate = element_uri else: # no mapping to biolink model; # look at predicate mappings element_uri = None if predicate_mapping: if p in predicate_mapping: property_name = predicate_mapping[p] predicate = f":{property_name}" # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri, # 'predicate': predicate, 'property_name': property_name} return element_uri, canonical_uri, predicate, property_name
def read_edge(self, edge: Dict) -> Dict: """ Read and parse an edge record. Parameters ---------- edge: Dict The edge record Returns ------- Dict The processed edge """ fixed_edge = dict() fixed_edge['subject'] = self.prefix_manager.contract(edge['sub']) if PrefixManager.is_iri(edge['pred']): curie = self.prefix_manager.contract(edge['pred']) if curie in self.ecache: edge_predicate = self.ecache[curie] else: element = get_biolink_element(curie) if not element: try: mapping = self.toolkit.get_element_by_mapping( edge['pred']) if mapping: element = self.toolkit.get_element(mapping) except ValueError as e: log.error(e) if element: edge_predicate = format_biolink_slots( element.name.replace(',', '')) fixed_edge['predicate'] = edge_predicate else: edge_predicate = 'biolink:related_to' self.ecache[curie] = edge_predicate fixed_edge['predicate'] = edge_predicate fixed_edge['relation'] = curie else: if edge['pred'] == 'is_a': fixed_edge['predicate'] = 'biolink:subclass_of' fixed_edge['relation'] = 'rdfs:subClassOf' elif edge['pred'] == 'has_part': fixed_edge['predicate'] = 'biolink:has_part' fixed_edge['relation'] = "BFO:0000051" elif edge['pred'] == 'part_of': fixed_edge['predicate'] = 'biolink:part_of' fixed_edge['relation'] = "BFO:0000050" else: fixed_edge[ 'predicate'] = f"biolink:{edge['pred'].replace(' ', '_')}" fixed_edge['relation'] = edge['pred'] fixed_edge['object'] = self.prefix_manager.contract(edge['obj']) for x in edge.keys(): if x not in {'sub', 'pred', 'obj'}: fixed_edge[x] = edge[x] return super().read_edge(fixed_edge)
def read_edge(self, edge: Dict) -> Optional[Tuple]: """ Read and parse an edge record. Parameters ---------- edge: Dict The edge record Returns ------- Dict The processed edge """ fixed_edge = dict() fixed_edge["subject"] = self.prefix_manager.contract(edge["sub"]) if PrefixManager.is_iri(edge["pred"]): curie = self.prefix_manager.contract(edge["pred"]) if curie in self.ecache: edge_predicate = self.ecache[curie] else: element = get_biolink_element(curie) if not element: try: mapping = self.toolkit.get_element_by_mapping( edge["pred"]) if mapping: element = self.toolkit.get_element(mapping) # TODO: not sure how this exception would be thrown here.. under what conditions? except ValueError as e: self.owner.log_error( entity=str(edge["pred"]), error_type=ErrorType.INVALID_EDGE_PREDICATE, message=str(e)) element = None if element: edge_predicate = format_biolink_slots( element.name.replace(",", "")) fixed_edge["predicate"] = edge_predicate else: edge_predicate = "biolink:related_to" self.ecache[curie] = edge_predicate fixed_edge["predicate"] = edge_predicate fixed_edge["relation"] = curie else: if edge["pred"] == "is_a": fixed_edge["predicate"] = "biolink:subclass_of" fixed_edge["relation"] = "rdfs:subClassOf" elif edge["pred"] == "has_part": fixed_edge["predicate"] = "biolink:has_part" fixed_edge["relation"] = "BFO:0000051" elif edge["pred"] == "part_of": fixed_edge["predicate"] = "biolink:part_of" fixed_edge["relation"] = "BFO:0000050" else: fixed_edge[ "predicate"] = f"biolink:{edge['pred'].replace(' ', '_')}" fixed_edge["relation"] = edge["pred"] fixed_edge["object"] = self.prefix_manager.contract(edge["obj"]) for x in edge.keys(): if x not in {"sub", "pred", "obj"}: fixed_edge[x] = edge[x] return super().read_edge(fixed_edge)
def export_edges(self) -> Set[URIRef]: """ Export all edges from networkx.MultiDiGraph. This method yields one (or more) triple that corresponds to an edge. Returns ------- Set[rdflib.term.URIRef] A triple """ cache = [] for u, v, k, data in self.graph.edges(data=True, keys=True): if data['edge_label'] in self.edge_properties: # treat as a direct edge s = self.uriref(u) p = self.uriref(data['edge_label']) o = self.uriref(v) yield (s, p, o) else: # reify s = self.uriref(u) p = self.uriref(data['edge_label']) o = self.uriref(v) cache.append((s, p, o)) if 'id' in data: s = self.uriref(data['id']) else: # generate a UUID for the reified node s = self.uriref(generate_uuid()) all_data = data.copy() all_data['type'] = 'biolink:Association' for prop, value in all_data.items(): if prop in {'id', 'association_id', 'edge_key'}: continue p = self.uriref(prop) if isinstance(value, list): for x in value: if isinstance(x, str) and PrefixManager.is_curie(x): o = self.uriref(x) elif isinstance(x, str) and PrefixManager.is_iri(x): o = URIRef(x) else: o = Literal(x) yield (s, p, o) else: if isinstance(value, str) and PrefixManager.is_curie(value): o = self.uriref(value) elif isinstance(value, str) and PrefixManager.is_iri(value): o = URIRef(value) else: # literal o = Literal(value) yield (s, p, o) for t in cache: yield (t[0], t[1], t[2])
def test_is_iri(query): """ Test to check behavior of is_iri method in PrefixManager. """ assert PrefixManager.is_iri(query[0]) == query[1]