def validate_edge_property_values(self, subject: str, object: str, data: dict) -> list: """ Validate an edge property's value. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties Returns ------- list A list of errors for a given edge """ errors = [] error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE if PrefixManager.is_curie(subject): prefix = PrefixManager.get_prefix(subject) if prefix and prefix not in self.get_all_prefixes(): message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) if PrefixManager.is_curie(object): prefix = PrefixManager.get_prefix(object) if prefix not in self.prefixes: message = f"Edge property 'object' has a value '{object}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) if 'relation' in data: if PrefixManager.is_curie(data['relation']): prefix = PrefixManager.get_prefix(data['relation']) if prefix not in self.prefixes: message = f"Edge property 'relation' has a value '{data['relation']}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge property 'relation' has a value '{data['relation']}' which is not a proper CURIE" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) return errors
def uriref(self, identifier: str) -> URIRef: """ Generate a rdflib.URIRef for a given string. Parameters ---------- identifier: str Identifier as string. Returns ------- rdflib.URIRef URIRef form of the input ``identifier`` """ if identifier.startswith('urn:uuid:'): uri = identifier elif identifier in reverse_property_mapping: # identifier is a property uri = reverse_property_mapping[identifier] else: # identifier is an entity if identifier.startswith(':'): # TODO: this should be handled upstream by prefixcommons-py uri = self.DEFAULT.term(identifier.replace(':', '', 1)) else: uri = self.prefix_manager.expand(identifier) if identifier == uri: if PrefixManager.is_curie(identifier): identifier = identifier.replace(':', '_') if ' ' in identifier: identifier = identifier.replace(' ', '_') uri = self.DEFAULT.term(identifier) return URIRef(uri)
def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier: """ Prepare the object of a triple. Parameters ---------- prop: str property name prop_type: str property type value: Any property value Returns ------- rdflib.term.Identifier An instance of rdflib.term.Identifier """ if prop_type == 'uriorcurie' or prop_type == 'xsd:anyURI': if isinstance(value, str) and PrefixManager.is_curie(value): o = self.uriref(value) elif isinstance(value, str) and PrefixManager.is_iri(value): if _is_valid_uri(value): o = URIRef(value) else: o = Literal(value) else: o = Literal(value) elif prop_type.startswith('xsd'): o = Literal(value, datatype=self.prefix_manager.expand(prop_type)) else: o = Literal(value, datatype=self.prefix_manager.expand("xsd:string")) return o
def validate_node_property_values(node: str, data: dict) -> list: """ Validate a node property's value. Parameters ---------- node: str Node identifier data: dict Node properties Returns ------- list A list of errors for a given node """ errors = [] error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE if not PrefixManager.is_curie(node): message = f"Node property 'id' expected to be of type 'CURIE'" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: prefix = PrefixManager.get_prefix(node) if prefix and prefix not in Validator.get_all_prefixes(): message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}' is not represented in Biolink Model JSON-LD context" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) return errors
def validate_node_property_values( self, node: str, data: dict ): """ Validate a node property's value. Parameters ---------- node: str Node identifier data: dict Node properties """ error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE if not PrefixManager.is_curie(node): message = f"Node property 'id' is expected to be of type 'CURIE'" self.log_error(node, error_type, message, MessageLevel.ERROR) else: prefix = PrefixManager.get_prefix(node) if prefix and prefix not in self.get_all_prefixes(): message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}'" + \ f" is not represented in Biolink Model JSON-LD context" self.log_error(node, error_type, message, MessageLevel.ERROR)
def validate_edge_predicate( self, subject: str, object: str, data: dict, toolkit: Optional[Toolkit] = None ): """ Validate ``edge_predicate`` field of a given edge. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties toolkit: Optional[Toolkit] Optional externally provided toolkit (default: use Validator class defined toolkit) """ if not toolkit: toolkit = Validator.get_toolkit() error_type = ErrorType.INVALID_EDGE_PREDICATE edge_predicate = data.get("predicate") if edge_predicate is None: message = "Edge does not have an 'predicate' property" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) elif not isinstance(edge_predicate, str): message = f"Edge property 'edge_predicate' is expected to be of type 'string'" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: if PrefixManager.is_curie(edge_predicate): edge_predicate = PrefixManager.get_reference(edge_predicate) m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate) if m: p = toolkit.get_element(snakecase_to_sentencecase(edge_predicate)) if p is None: message = f"Edge predicate '{edge_predicate}' is not in Biolink Model" self.log_error( f"{subject}->{object}", error_type, message, MessageLevel.ERROR, ) elif edge_predicate != p.name and edge_predicate in p.aliases: message = f"Edge predicate '{edge_predicate}' is actually an alias for {p.name}; " + \ f"Should replace {edge_predicate} with {p.name}" self.log_error( f"{subject}->{object}", error_type, message, MessageLevel.ERROR, ) else: message = f"Edge predicate '{edge_predicate}' is not in snake_case form" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
def validate_categories(self, node: str, data: dict, toolkit: Optional[Toolkit] = None): """ Validate ``category`` field of a given node. Parameters ---------- node: str Node identifier data: dict Node properties toolkit: Optional[Toolkit] Optional externally provided toolkit (default: use Validator class defined toolkit) """ if not toolkit: toolkit = Validator.get_toolkit() error_type = ErrorType.INVALID_CATEGORY categories = data.get("category") if categories is None: message = "Node does not have a 'category' property" self.log_error(node, error_type, message, MessageLevel.ERROR) elif not isinstance(categories, list): message = f"Node property 'category' is expected to be of type {list}" self.log_error(node, error_type, message, MessageLevel.ERROR) else: for category in categories: if PrefixManager.is_curie(category): category = PrefixManager.get_reference(category) m = re.match(r"^([A-Z][a-z\d]+)+$", category) if not m: # category is not CamelCase error_type = ErrorType.INVALID_CATEGORY message = f"Category '{category}' is not in CamelCase form" self.log_error(node, error_type, message, MessageLevel.ERROR) formatted_category = camelcase_to_sentencecase(category) if toolkit.is_mixin(formatted_category): message = f"Category '{category}' is a mixin in the Biolink Model" self.log_error(node, error_type, message, MessageLevel.ERROR) elif not toolkit.is_category(formatted_category): message = ( f"Category '{category}' is unknown in the current Biolink Model" ) self.log_error(node, error_type, message, MessageLevel.ERROR) else: c = toolkit.get_element(formatted_category.lower()) if c: if category != c.name and category in c.aliases: message = f"Category {category} is actually an alias for {c.name}; " + \ f"Should replace '{category}' with '{c.name}'" self.log_error(node, error_type, message, MessageLevel.ERROR)
def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]: """ This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph. This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form. Returns the CURIE identifiers used for the `subject` and `object` in the networkx.MultiDiGraph, and the processed `edge_label`. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple Returns ------- Tuple[str, str, str] A 3-nary tuple (of the form subject, object, predicate) that represents the edge """ s = self.add_node(subject_iri) o = self.add_node(object_iri) relation = self.prefix_manager.contract(predicate_iri) edge_label = process_iri(predicate_iri) if ' ' in edge_label: logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label)) if edge_label.startswith(self.BIOLINK): logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK)) edge_label = edge_label.replace(self.BIOLINK, '') if PrefixManager.is_curie(edge_label): name = curie_lookup(edge_label) if name: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name)) edge_label = name else: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL)) edge_label = self.DEFAULT_EDGE_LABEL kwargs = { 'subject': s, 'predicate': str(predicate_iri), 'object': o, 'relation': relation, 'edge_label': f"biolink:{edge_label}" } if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_label, o) if not self.graph.has_edge(s, o, key=key): self.graph.add_edge(s, o, key=key, **kwargs) # TODO: support append return s, o, edge_label
def validate_edge_predicate(subject: str, object: str, data: dict) -> list: """ Validate ``edge_predicate`` field of a given edge. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties Returns ------- list A list of errors for a given edge """ toolkit = get_toolkit() error_type = ErrorType.INVALID_EDGE_PREDICATE errors = [] edge_predicate = data.get('predicate') if edge_predicate is None: message = "Edge does not have an 'predicate' property" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) elif not isinstance(edge_predicate, str): message = f"Edge property 'edge_predicate' expected to be of type 'string'" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: if PrefixManager.is_curie(edge_predicate): edge_predicate = PrefixManager.get_reference(edge_predicate) m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate) if m: p = toolkit.get_element( snakecase_to_sentencecase(edge_predicate)) if p is None: message = f"Edge label '{edge_predicate}' not in Biolink Model" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) elif edge_predicate != p.name and edge_predicate in p.aliases: message = f"Edge label '{edge_predicate}' is actually an alias for {p.name}; Should replace {edge_predicate} with {p.name}" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge label '{edge_predicate}' is not in snake_case form" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) return errors
def validate_categories(node: str, data: dict) -> list: """ Validate ``category`` field of a given node. Parameters ---------- node: str Node identifier data: dict Node properties Returns ------- list A list of errors for a given node """ toolkit = get_toolkit() error_type = ErrorType.INVALID_CATEGORY errors = [] categories = data.get('category') if categories is None: message = "Node does not have a 'category' property" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) elif not isinstance(categories, list): message = f"Node property 'category' expected to be of type {list}" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: for category in categories: if PrefixManager.is_curie(category): category = PrefixManager.get_reference(category) m = re.match(r"^([A-Z][a-z\d]+)+$", category) if not m: # category is not CamelCase error_type = ErrorType.INVALID_CATEGORY message = f"Category '{category}' is not in CamelCase form" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) formatted_category = camelcase_to_sentencecase(category) if not toolkit.is_category(formatted_category): message = f"Category '{category}' not in Biolink Model" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: c = toolkit.get_element(formatted_category.lower()) if category != c.name and category in c.aliases: message = f"Category {category} is actually an alias for {c.name}; Should replace '{category}' with '{c.name}'" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) return errors
def validate_edge_property_values( self, subject: str, object: str, data: dict ): """ Validate an edge property's value. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties """ error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE prefixes = self.get_all_prefixes() if PrefixManager.is_curie(subject): prefix = PrefixManager.get_prefix(subject) if prefix and prefix not in prefixes: message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix " + \ f"'{prefix}' that is not represented in Biolink Model JSON-LD context" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) if PrefixManager.is_curie(object): prefix = PrefixManager.get_prefix(object) if prefix not in prefixes: message = f"Edge property 'object' has a value '{object}' with a CURIE " + \ f"prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
def get_category_via_superclass(graph: BaseGraph, curie: str, load_ontology: bool = True) -> Set[str]: """ Get category for a given CURIE by tracing its superclass, via ``subclass_of`` hierarchy, and getting the most appropriate category based on the superclass. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph Graph to traverse curie: str Input CURIE load_ontology: bool Determines whether to load ontology, based on CURIE prefix, or to simply rely on ``subclass_of`` hierarchy from graph Returns ------- Set[str] A set containing one (or more) category for the given CURIE """ log.debug("curie: {}".format(curie)) new_categories = [] toolkit = get_toolkit() if PrefixManager.is_curie(curie): ancestors = get_ancestors(graph, curie, relations=['subclass_of']) if len(ancestors) == 0 and load_ontology: cls = get_curie_lookup_service() ontology_graph = cls.ontology_graph new_categories += [ x for x in get_category_via_superclass(ontology_graph, curie, False) ] log.debug("Ancestors for CURIE {} via subClassOf: {}".format( curie, ancestors)) seen = [] for anc in ancestors: mapping = toolkit.get_by_mapping(anc) seen.append(anc) if mapping: # there is direct mapping to BioLink Model log.debug("Ancestor {} mapped to {}".format(anc, mapping)) seen_labels = [ graph.nodes()[x]['name'] for x in seen if 'name' in graph.nodes()[x] ] new_categories += [x for x in seen_labels] new_categories += [x for x in toolkit.ancestors(mapping)] break return set(new_categories)
def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None: """ Adds an attribute to an edge, while taking into account whether the attribute should be multi-valued. Multi-valued properties will not contain duplicates. The ``key`` may be a rdflib.URIRef or a URI string that maps onto a property name as defined in ``rdf_utils.property_mapping``. If the nodes in the edge does not exist then they will be created using ``subject_iri`` and ``object_iri``. If the edge itself does not exist then it will be created using ``subject_iri``, ``object_iri`` and ``predicate_iri``. Parameters ---------- subject_iri: [rdflib.URIRef, str] The IRI of the subject node of an edge in rdflib.Graph object_iri: rdflib.URIRef The IRI of the object node of an edge in rdflib.Graph predicate_iri: rdflib.URIRef The IRI of the predicate representing an edge in rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: str The value of the attribute """ if key.lower() in is_property_multivalued: key = key.lower() else: if not isinstance(key, URIRef): key = URIRef(key) key = property_mapping.get(key) if key is not None and value is not None: subject_curie = self.prefix_manager.contract(subject_iri) object_curie = self.prefix_manager.contract(object_iri) edge_label = process_iri(predicate_iri) if PrefixManager.is_curie(edge_label): edge_label = curie_lookup(edge_label) edge_key = generate_edge_key(subject_curie, edge_label, object_curie) attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key) self._add_attribute(attr_dict, key, value)
def get_biolink_element(prefix_manager: PrefixManager, predicate: Any) -> Optional[Element]: """ Returns a Biolink Model element for a given predicate. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager predicate: Any The CURIE of a predicate Returns ------- Optional[Element] The corresponding Biolink Model element """ toolkit = get_toolkit() if prefix_manager.is_iri(predicate): predicate_curie = prefix_manager.contract(predicate) else: predicate_curie = predicate if prefix_manager.is_curie(predicate_curie): reference = prefix_manager.get_reference(predicate_curie) else: reference = predicate_curie element = toolkit.get_element(reference) if not element: try: mapping = toolkit.get_element_by_mapping(predicate) if mapping: element = toolkit.get_element(mapping) except ValueError as e: log.error(e) return element
def process_predicate( prefix_manager: PrefixManager, p: Union[URIRef, str], predicate_mapping: Optional[Dict] = None, ) -> Tuple: """ Process a predicate where the method checks if there is a mapping in Biolink Model. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager p: Union[URIRef, str] The predicate predicate_mapping: Optional[Dict] Predicate mappings Returns ------- Tuple[str, str, str, str] A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available), the CURIE form of p, the reference of p """ if prefix_manager.is_iri(p): predicate = prefix_manager.contract(str(p)) else: predicate = None if prefix_manager.is_curie(p): property_name = prefix_manager.get_reference(p) predicate = p else: if predicate and prefix_manager.is_curie(predicate): property_name = prefix_manager.get_reference(predicate) else: property_name = p predicate = f":{p}" element = get_biolink_element(prefix_manager, p) canonical_uri = None if element: if isinstance(element, SlotDefinition): # predicate corresponds to a biolink slot if element.definition_uri: element_uri = prefix_manager.contract(element.definition_uri) else: element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if element.slot_uri: canonical_uri = element.slot_uri elif isinstance(element, ClassDefinition): # this will happen only when the IRI is actually # a reference to a class element_uri = prefix_manager.contract(element.class_uri) else: element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}" if "biolink:Attribute" in get_biolink_ancestors(element.name): element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if not predicate: predicate = element_uri else: # no mapping to biolink model; # look at predicate mappings element_uri = None if predicate_mapping: if p in predicate_mapping: property_name = predicate_mapping[p] predicate = f":{property_name}" # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri, # 'predicate': predicate, 'property_name': property_name} return element_uri, canonical_uri, predicate, property_name
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate( self.prefix_manager, edge["predicate_id"], self.predicate_mapping ) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { "subject": edge["subject_id"], "predicate": edge_predicate, "object": edge["object_id"], } del edge["predicate_id"] data = self.validate_edge(data) if not data: return # ? subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith("subject"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith("object"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v subject_node = self.load_node(subject_node) object_node = self.load_node(object_node) if not (subject_node and object_node): return # ? objs = [subject_node, object_node] for k, v in self.graph_metadata.items(): if k not in {"curie_map"}: data[k] = v edge_data = sanitize_import(data.copy()) if "subject" in edge_data and "object" in edge_data: if "id" not in edge_data: edge_data["id"] = generate_uuid() s = edge_data["subject"] o = edge_data["object"] self.set_edge_provenance(edge_data) key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: self.owner.log_error( entity=str(edge_data), error_type=ErrorType.MISSING_NODE, message="Ignoring edge with either a missing 'subject' or 'object'", message_level=MessageLevel.WARNING ) for o in objs: yield o
def export_edges(self) -> Set[URIRef]: """ Export all edges from networkx.MultiDiGraph. This method yields one (or more) triple that corresponds to an edge. Returns ------- Set[rdflib.term.URIRef] A triple """ cache = [] for u, v, k, data in self.graph.edges(data=True, keys=True): if data['edge_label'] in self.edge_properties: # treat as a direct edge s = self.uriref(u) p = self.uriref(data['edge_label']) o = self.uriref(v) yield (s, p, o) else: # reify s = self.uriref(u) p = self.uriref(data['edge_label']) o = self.uriref(v) cache.append((s, p, o)) if 'id' in data: s = self.uriref(data['id']) else: # generate a UUID for the reified node s = self.uriref(generate_uuid()) all_data = data.copy() all_data['type'] = 'biolink:Association' for prop, value in all_data.items(): if prop in {'id', 'association_id', 'edge_key'}: continue p = self.uriref(prop) if isinstance(value, list): for x in value: if isinstance(x, str) and PrefixManager.is_curie(x): o = self.uriref(x) elif isinstance(x, str) and PrefixManager.is_iri(x): o = URIRef(x) else: o = Literal(x) yield (s, p, o) else: if isinstance(value, str) and PrefixManager.is_curie(value): o = self.uriref(value) elif isinstance(value, str) and PrefixManager.is_iri(value): o = URIRef(value) else: # literal o = Literal(value) yield (s, p, o) for t in cache: yield (t[0], t[1], t[2])
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate(self.prefix_manager, edge['predicate_id'], self.predicate_mapping) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { 'subject': edge['subject_id'], 'predicate': edge_predicate, 'object': edge['object_id'], } del edge['predicate_id'] data = validate_edge(data) subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith('subject'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith('object'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v objs = [self.load_node(subject_node), self.load_node(object_node)] for k, v in self.graph_metadata.items(): if k not in {'curie_map'}: data[k] = v edge_data = sanitize_import(data.copy()) if 'subject' in edge_data and 'object' in edge_data: if 'id' not in edge_data: edge_data['id'] = generate_uuid() s = edge_data['subject'] o = edge_data['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys( ): edge_data['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_data['predicate'], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: log.info( "Ignoring edge with either a missing 'subject' or 'object': {}" .format(edge_data)) for o in objs: yield o
def add_edge( self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef, data: Optional[Dict[Any, Any]] = None, ) -> Dict: """ Add an edge to cache. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple data: Optional[Dict[Any, Any]] Additional edge properties Returns ------- Dict The edge data """ (element_uri, canonical_uri, predicate, property_name) = self.process_predicate( predicate_iri ) subject_curie = self.prefix_manager.contract(subject_iri) object_curie = self.prefix_manager.contract(object_iri) if subject_curie in self.node_cache: subject_node = self.node_cache[subject_curie] else: subject_node = self.add_node(subject_iri) if object_curie in self.node_cache: object_node = self.node_cache[object_curie] else: object_node = self.add_node(object_iri) edge_predicate = element_uri if element_uri else predicate if not edge_predicate: edge_predicate = property_name if ' ' in edge_predicate: log.debug( f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that not in snake_case form; replacing ' ' with '_'" ) edge_predicate_prefix = self.prefix_manager.get_prefix(edge_predicate) if edge_predicate_prefix not in {'biolink', 'rdf', 'rdfs', 'skos', 'owl'}: if PrefixManager.is_curie(edge_predicate): # name = curie_lookup(edge_predicate) # if name: # log.debug(f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that is actually a CURIE; Using its mapping instead: {name}") # edge_predicate = f"{edge_predicate_prefix}:{name}" # else: # log.debug(f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that is actually a CURIE; defaulting back to {self.DEFAULT_EDGE_PREDICATE}") edge_predicate = DEFAULT_EDGE_PREDICATE edge_key = generate_edge_key(subject_node['id'], edge_predicate, object_node['id']) if (subject_node['id'], object_node['id'], edge_key) in self.edge_cache: # edge already exists; process kwargs and update the edge edge_data = self.update_edge(subject_node['id'], object_node['id'], edge_key, data) else: # add a new edge edge_data = data if data else {} edge_data.update( { 'subject': subject_node['id'], 'predicate': f"{edge_predicate}", 'object': object_node['id'], } ) if 'relation' not in edge_data: edge_data['relation'] = predicate if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data: edge_data['provided_by'] = self.graph_metadata['provided_by'] self.edge_cache[(subject_node['id'], object_node['id'], edge_key)] = edge_data return edge_data
def test_is_curie(query): """ Test to check behavior of is_curie method in PrefixManager. """ assert PrefixManager.is_curie(query[0]) == query[1]