def validate_node_property_values(node: str, data: dict) -> list: """ Validate a node property's value. Parameters ---------- node: str Node identifier data: dict Node properties Returns ------- list A list of errors for a given node """ errors = [] error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE if not PrefixManager.is_curie(node): message = f"Node property 'id' expected to be of type 'CURIE'" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: prefix = PrefixManager.get_prefix(node) if prefix and prefix not in Validator.get_all_prefixes(): message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}' is not represented in Biolink Model JSON-LD context" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) return errors
def __init__(self): self.graph_metadata: Dict = {} self.node_filters = {} self.edge_filters = {} self.node_properties = set() self.edge_properties = set() self.prefix_manager = PrefixManager()
def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier: """ Prepare the object of a triple. Parameters ---------- prop: str property name prop_type: str property type value: Any property value Returns ------- rdflib.term.Identifier An instance of rdflib.term.Identifier """ if prop_type == 'uriorcurie' or prop_type == 'xsd:anyURI': if isinstance(value, str) and PrefixManager.is_curie(value): o = self.uriref(value) elif isinstance(value, str) and PrefixManager.is_iri(value): if _is_valid_uri(value): o = URIRef(value) else: o = Literal(value) else: o = Literal(value) elif prop_type.startswith('xsd'): o = Literal(value, datatype=self.prefix_manager.expand(prop_type)) else: o = Literal(value, datatype=self.prefix_manager.expand("xsd:string")) return o
class Sink(object): """ A Sink is responsible for writing data as records to a store where the store is a file or a database. Parameters: ---------- :param owner: Transformer Transformer to which the GraphSink belongs """ def __init__(self, owner): self.owner = owner self.prefix_manager = PrefixManager() self.node_properties = set() self.edge_properties = set() def set_reverse_prefix_map(self, m: Dict) -> None: """ Update default reverse prefix map. Parameters ---------- m: Dict A dictionary with IRI to prefix mappings """ self.prefix_manager.update_reverse_prefix_map(m) def write_node(self, record) -> None: """ Write a node record to the underlying store. Parameters ---------- record: Any A node record """ pass def write_edge(self, record) -> None: """ Write an edge record to the underlying store. Parameters ---------- record: Any An edge record """ pass def finalize(self) -> None: """ Operations that ought to be done after writing all the incoming data should be called by this method. """ pass
def validate_node_property_values( self, node: str, data: dict ): """ Validate a node property's value. Parameters ---------- node: str Node identifier data: dict Node properties """ error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE if not PrefixManager.is_curie(node): message = f"Node property 'id' is expected to be of type 'CURIE'" self.log_error(node, error_type, message, MessageLevel.ERROR) else: prefix = PrefixManager.get_prefix(node) if prefix and prefix not in self.get_all_prefixes(): message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}'" + \ f" is not represented in Biolink Model JSON-LD context" self.log_error(node, error_type, message, MessageLevel.ERROR)
def validate_edge_predicate( self, subject: str, object: str, data: dict, toolkit: Optional[Toolkit] = None ): """ Validate ``edge_predicate`` field of a given edge. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties toolkit: Optional[Toolkit] Optional externally provided toolkit (default: use Validator class defined toolkit) """ if not toolkit: toolkit = Validator.get_toolkit() error_type = ErrorType.INVALID_EDGE_PREDICATE edge_predicate = data.get("predicate") if edge_predicate is None: message = "Edge does not have an 'predicate' property" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) elif not isinstance(edge_predicate, str): message = f"Edge property 'edge_predicate' is expected to be of type 'string'" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: if PrefixManager.is_curie(edge_predicate): edge_predicate = PrefixManager.get_reference(edge_predicate) m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate) if m: p = toolkit.get_element(snakecase_to_sentencecase(edge_predicate)) if p is None: message = f"Edge predicate '{edge_predicate}' is not in Biolink Model" self.log_error( f"{subject}->{object}", error_type, message, MessageLevel.ERROR, ) elif edge_predicate != p.name and edge_predicate in p.aliases: message = f"Edge predicate '{edge_predicate}' is actually an alias for {p.name}; " + \ f"Should replace {edge_predicate} with {p.name}" self.log_error( f"{subject}->{object}", error_type, message, MessageLevel.ERROR, ) else: message = f"Edge predicate '{edge_predicate}' is not in snake_case form" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
def validate_categories(self, node: str, data: dict, toolkit: Optional[Toolkit] = None): """ Validate ``category`` field of a given node. Parameters ---------- node: str Node identifier data: dict Node properties toolkit: Optional[Toolkit] Optional externally provided toolkit (default: use Validator class defined toolkit) """ if not toolkit: toolkit = Validator.get_toolkit() error_type = ErrorType.INVALID_CATEGORY categories = data.get("category") if categories is None: message = "Node does not have a 'category' property" self.log_error(node, error_type, message, MessageLevel.ERROR) elif not isinstance(categories, list): message = f"Node property 'category' is expected to be of type {list}" self.log_error(node, error_type, message, MessageLevel.ERROR) else: for category in categories: if PrefixManager.is_curie(category): category = PrefixManager.get_reference(category) m = re.match(r"^([A-Z][a-z\d]+)+$", category) if not m: # category is not CamelCase error_type = ErrorType.INVALID_CATEGORY message = f"Category '{category}' is not in CamelCase form" self.log_error(node, error_type, message, MessageLevel.ERROR) formatted_category = camelcase_to_sentencecase(category) if toolkit.is_mixin(formatted_category): message = f"Category '{category}' is a mixin in the Biolink Model" self.log_error(node, error_type, message, MessageLevel.ERROR) elif not toolkit.is_category(formatted_category): message = ( f"Category '{category}' is unknown in the current Biolink Model" ) self.log_error(node, error_type, message, MessageLevel.ERROR) else: c = toolkit.get_element(formatted_category.lower()) if c: if category != c.name and category in c.aliases: message = f"Category {category} is actually an alias for {c.name}; " + \ f"Should replace '{category}' with '{c.name}'" self.log_error(node, error_type, message, MessageLevel.ERROR)
def __init__(self, owner): self.owner = owner self.graph_metadata: Dict = {} self.node_filters = {} self.edge_filters = {} self.node_properties = set() self.edge_properties = set() self.prefix_manager = PrefixManager() self.infores_context: Optional[InfoResContext] = None
def validate_edge_predicate(subject: str, object: str, data: dict) -> list: """ Validate ``edge_predicate`` field of a given edge. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties Returns ------- list A list of errors for a given edge """ toolkit = get_toolkit() error_type = ErrorType.INVALID_EDGE_PREDICATE errors = [] edge_predicate = data.get('predicate') if edge_predicate is None: message = "Edge does not have an 'predicate' property" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) elif not isinstance(edge_predicate, str): message = f"Edge property 'edge_predicate' expected to be of type 'string'" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: if PrefixManager.is_curie(edge_predicate): edge_predicate = PrefixManager.get_reference(edge_predicate) m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate) if m: p = toolkit.get_element( snakecase_to_sentencecase(edge_predicate)) if p is None: message = f"Edge label '{edge_predicate}' not in Biolink Model" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) elif edge_predicate != p.name and edge_predicate in p.aliases: message = f"Edge label '{edge_predicate}' is actually an alias for {p.name}; Should replace {edge_predicate} with {p.name}" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge label '{edge_predicate}' is not in snake_case form" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) return errors
def validate_categories(node: str, data: dict) -> list: """ Validate ``category`` field of a given node. Parameters ---------- node: str Node identifier data: dict Node properties Returns ------- list A list of errors for a given node """ toolkit = get_toolkit() error_type = ErrorType.INVALID_CATEGORY errors = [] categories = data.get('category') if categories is None: message = "Node does not have a 'category' property" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) elif not isinstance(categories, list): message = f"Node property 'category' expected to be of type {list}" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: for category in categories: if PrefixManager.is_curie(category): category = PrefixManager.get_reference(category) m = re.match(r"^([A-Z][a-z\d]+)+$", category) if not m: # category is not CamelCase error_type = ErrorType.INVALID_CATEGORY message = f"Category '{category}' is not in CamelCase form" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) formatted_category = camelcase_to_sentencecase(category) if not toolkit.is_category(formatted_category): message = f"Category '{category}' not in Biolink Model" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: c = toolkit.get_element(formatted_category.lower()) if category != c.name and category in c.aliases: message = f"Category {category} is actually an alias for {c.name}; Should replace '{category}' with '{c.name}'" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) return errors
def __init__(self, source_graph: nx.MultiDiGraph = None): if source_graph: self.graph = source_graph else: self.graph = nx.MultiDiGraph() self.graph_metadata = {} self.prefix_manager = PrefixManager() self.DEFAULT = Namespace(self.prefix_manager.prefix_map[':']) # TODO: use OBO IRI from biolink model context once https://github.com/biolink/biolink-model/issues/211 is resolved self.OBO = Namespace('http://purl.obolibrary.org/obo/') self.OBAN = Namespace(self.prefix_manager.prefix_map['OBAN']) self.PMID = Namespace(self.prefix_manager.prefix_map['PMID']) self.BIOLINK = Namespace(self.prefix_manager.prefix_map['biolink'])
def __init__( self, verbose: bool = False, progress_monitor: Optional[Callable[[GraphEntityType, List], None]] = None, schema: Optional[str] = None, error_log: str = None ): ErrorDetecting.__init__(self, error_log) # formal arguments self.verbose: bool = verbose self.progress_monitor: Optional[ Callable[[GraphEntityType, List], None] ] = progress_monitor # TODO: fix... this attribute is not used anywhere at the moment? self.schema: Optional[str] = schema # internal attributes # associated currently active _currently_active_toolkit with this Validator instance self.validating_toolkit = self.get_toolkit() self.prefix_manager = PrefixManager() self.jsonld = get_jsonld_context() self.prefixes = self.get_all_prefixes(self.jsonld) self.required_node_properties = self.get_required_node_properties() self.required_edge_properties = self.get_required_edge_properties()
def uriref(self, identifier: str) -> URIRef: """ Generate a rdflib.URIRef for a given string. Parameters ---------- identifier: str Identifier as string. Returns ------- rdflib.URIRef URIRef form of the input ``identifier`` """ if identifier.startswith('urn:uuid:'): uri = identifier elif identifier in reverse_property_mapping: # identifier is a property uri = reverse_property_mapping[identifier] else: # identifier is an entity if identifier.startswith(':'): # TODO: this should be handled upstream by prefixcommons-py uri = self.DEFAULT.term(identifier.replace(':', '', 1)) else: uri = self.prefix_manager.expand(identifier) if identifier == uri: if PrefixManager.is_curie(identifier): identifier = identifier.replace(':', '_') if ' ' in identifier: identifier = identifier.replace(' ', '_') uri = self.DEFAULT.term(identifier) return URIRef(uri)
def curie_lookup(curie: str) -> Optional[str]: """ Given a CURIE, find its label. This method first does a lookup in predefined maps. If none found, it makes use of CurieLookupService to look for the CURIE in a set of preloaded ontologies. Parameters ---------- curie: str A CURIE Returns ------- Optional[str] The label corresponding to the given CURIE """ cls = get_curie_lookup_service() name: Optional[str] = None prefix = PrefixManager.get_prefix(curie) if prefix in ['OIO', 'OWL', 'owl', 'OBO', 'rdfs']: name = stringcase.snakecase(curie.split(':', 1)[1]) elif curie in cls.curie_map: name = cls.curie_map[curie] elif curie in cls.ontology_graph: name = cls.ontology_graph.nodes()[curie]['name'] return name
def get_category(self, curie: str, node: dict) -> Optional[str]: """ Get category for a given CURIE. Parameters ---------- curie: str Curie for node node: dict Node data Returns ------- Optional[str] Category for the given node CURIE. """ category = None # use meta.basicPropertyValues if "meta" in node and "basicPropertyValues" in node["meta"]: for p in node["meta"]["basicPropertyValues"]: if p["pred"] == self.HAS_OBO_NAMESPACE: category = p["val"] element = self.toolkit.get_element(category) if element: category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}" else: element = self.toolkit.get_element_by_mapping(category) if element: category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element))}" else: category = "biolink:OntologyClass" if not category or category == "biolink:OntologyClass": prefix = PrefixManager.get_prefix(curie) # TODO: the mapping should be via biolink-model lookups if prefix == "HP": category = "biolink:PhenotypicFeature" elif prefix == "CHEBI": category = "biolink:ChemicalSubstance" elif prefix == "MONDO": category = "biolink:Disease" elif prefix == "UBERON": category = "biolink:AnatomicalEntity" elif prefix == "SO": category = "biolink:SequenceFeature" elif prefix == "CL": category = "biolink:Cell" elif prefix == "PR": category = "biolink:Protein" elif prefix == "NCBITaxon": category = "biolink:OrganismalEntity" else: self.owner.log_error( entity=f"{str(category)} for node {curie}", error_type=ErrorType.MISSING_CATEGORY, message= f"Missing category; Defaulting to 'biolink:OntologyClass'", message_level=MessageLevel.WARNING) return category
def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None: """ Adds an attribute to the attribute dictionary, respecting whether or not that attribute should be multi-valued. Multi-valued attributes will not contain duplicates. Some attributes are singular form of others. In such cases overflowing values will be placed into the correlating multi-valued attribute. For example, `name` attribute will hold only one value while any additional value will be stored as `synonym` attribute. Parameters ---------- attr_dict: dict Dictionary representing the attribute set of a node or an edge in a networkx graph key: str The name of the attribute value: str The value of the attribute """ if PrefixManager.is_iri(value): value = process_iri(value) if key in is_property_multivalued and is_property_multivalued[key]: if key not in attr_dict: attr_dict[key] = [value] elif value not in attr_dict[key]: attr_dict[key].append(value) else: if key == 'name': self._add_attribute(attr_dict, 'synonym', value) else: attr_dict[key] = value
def get_category(self, curie: str, node: dict) -> Optional[str]: """ Get category for a given CURIE. Parameters ---------- curie: str Curie for node node: dict Node data Returns ------- Optional[str] Category for the given node CURIE. """ category = None # use meta.basicPropertyValues if 'meta' in node and 'basicPropertyValues' in node['meta']: for p in node['meta']['basicPropertyValues']: if p['pred'] == self.HAS_OBO_NAMESPACE: category = p['val'] element = self.toolkit.get_element(category) if element: category = ( f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}" ) else: element = self.toolkit.get_element_by_mapping(category) if element: category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}" else: category = 'biolink:OntologyClass' if not category or category == 'biolink:OntologyClass': prefix = PrefixManager.get_prefix(curie) # TODO: the mapping should be via biolink-model lookups if prefix == 'HP': category = "biolink:PhenotypicFeature" elif prefix == 'CHEBI': category = "biolink:ChemicalSubstance" elif prefix == 'MONDO': category = "biolink:Disease" elif prefix == 'UBERON': category = "biolink:AnatomicalEntity" elif prefix == 'SO': category = "biolink:SequenceFeature" elif prefix == 'CL': category = "biolink:Cell" elif prefix == 'PR': category = "biolink:Protein" elif prefix == 'NCBITaxon': category = "biolink:OrganismalEntity" else: log.debug( f"{curie} Could not find a category mapping for '{category}'; Defaulting to 'biolink:OntologyClass'" ) return category
def test_prefix_manager(): """ Test to get an instance of PrefixManager. """ pm = PrefixManager() assert pm.prefix_map assert pm.reverse_prefix_map assert 'biolink' in pm.prefix_map assert '' in pm.prefix_map
def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]: """ This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph. This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form. Returns the CURIE identifiers used for the `subject` and `object` in the networkx.MultiDiGraph, and the processed `edge_label`. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple Returns ------- Tuple[str, str, str] A 3-nary tuple (of the form subject, object, predicate) that represents the edge """ s = self.add_node(subject_iri) o = self.add_node(object_iri) relation = self.prefix_manager.contract(predicate_iri) edge_label = process_iri(predicate_iri) if ' ' in edge_label: logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label)) if edge_label.startswith(self.BIOLINK): logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK)) edge_label = edge_label.replace(self.BIOLINK, '') if PrefixManager.is_curie(edge_label): name = curie_lookup(edge_label) if name: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name)) edge_label = name else: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL)) edge_label = self.DEFAULT_EDGE_LABEL kwargs = { 'subject': s, 'predicate': str(predicate_iri), 'object': o, 'relation': relation, 'edge_label': f"biolink:{edge_label}" } if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_label, o) if not self.graph.has_edge(s, o, key=key): self.graph.add_edge(s, o, key=key, **kwargs) # TODO: support append return s, o, edge_label
def __init__(self, verbose: bool = False): self.toolkit = get_toolkit() self.prefix_manager = PrefixManager() self.jsonld = get_jsonld_context() self.prefixes = Validator.get_all_prefixes(self.jsonld) self.required_node_properties = Validator.get_required_node_properties( ) self.required_edge_properties = Validator.get_required_edge_properties( ) self.verbose = verbose
def test_process_predicate(query): """ Test behavior of process_predicate method. """ pm = PrefixManager() x = process_predicate(pm, query[0]) assert x[0] == query[1] assert x[1] == query[2] assert x[2] == query[3] assert x[3] == query[4]
def _compile_prefix_stats(self, n: str): prefix = PrefixManager.get_prefix(n) if not prefix: error_type = ErrorType.MISSING_NODE_CURIE_PREFIX self.mkg.log_error(entity=n, error_type=error_type, message="Node 'id' has no CURIE prefix", message_level=MessageLevel.WARNING) else: if prefix not in self.category_stats["id_prefixes"]: self.category_stats["id_prefixes"].add(prefix)
def validate_edge_property_values( self, subject: str, object: str, data: dict ): """ Validate an edge property's value. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties """ error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE prefixes = self.get_all_prefixes() if PrefixManager.is_curie(subject): prefix = PrefixManager.get_prefix(subject) if prefix and prefix not in prefixes: message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix " + \ f"'{prefix}' that is not represented in Biolink Model JSON-LD context" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) if PrefixManager.is_curie(object): prefix = PrefixManager.get_prefix(object) if prefix not in prefixes: message = f"Edge property 'object' has a value '{object}' with a CURIE " + \ f"prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
def __init__(self, verbose: bool = False): self.toolkit = get_toolkit() self.prefix_manager = PrefixManager() self.prefixes = None self.required_node_properties = None self.required_edge_properties = None self.verbose = verbose try: self.jsonld = requests.get(CONTEXT_JSONLD).json() except: raise Exception('Unable to download JSON-LD context from {}'.format(CONTEXT_JSONLD))
def get_category_via_superclass(graph: BaseGraph, curie: str, load_ontology: bool = True) -> Set[str]: """ Get category for a given CURIE by tracing its superclass, via ``subclass_of`` hierarchy, and getting the most appropriate category based on the superclass. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph Graph to traverse curie: str Input CURIE load_ontology: bool Determines whether to load ontology, based on CURIE prefix, or to simply rely on ``subclass_of`` hierarchy from graph Returns ------- Set[str] A set containing one (or more) category for the given CURIE """ log.debug("curie: {}".format(curie)) new_categories = [] toolkit = get_toolkit() if PrefixManager.is_curie(curie): ancestors = get_ancestors(graph, curie, relations=['subclass_of']) if len(ancestors) == 0 and load_ontology: cls = get_curie_lookup_service() ontology_graph = cls.ontology_graph new_categories += [ x for x in get_category_via_superclass(ontology_graph, curie, False) ] log.debug("Ancestors for CURIE {} via subClassOf: {}".format( curie, ancestors)) seen = [] for anc in ancestors: mapping = toolkit.get_by_mapping(anc) seen.append(anc) if mapping: # there is direct mapping to BioLink Model log.debug("Ancestor {} mapped to {}".format(anc, mapping)) seen_labels = [ graph.nodes()[x]['name'] for x in seen if 'name' in graph.nodes()[x] ] new_categories += [x for x in seen_labels] new_categories += [x for x in toolkit.ancestors(mapping)] break return set(new_categories)
def analyse_node_category(self, n, data): prefix = PrefixManager.get_prefix(n) self.category_stats['count'] += 1 if prefix not in self.category_stats['id_prefixes']: self.category_stats['id_prefixes'].add(prefix) if 'provided_by' in data: for s in data['provided_by']: if s in self.category_stats['count_by_source']: self.category_stats['count_by_source'][s] += 1 else: self.category_stats['count_by_source'][s] = 1 else: self.category_stats['count_by_source']['unknown'] += 1
def _capture_prefix(self, n: str): prefix = PrefixManager.get_prefix(n) if not prefix: error_type = ErrorType.MISSING_NODE_CURIE_PREFIX self.summary.log_error(entity=n, error_type=error_type, message="Node 'id' has no CURIE prefix", message_level=MessageLevel.WARNING) else: if prefix in self.category_stats["count_by_id_prefix"]: self.category_stats["count_by_id_prefix"][prefix] += 1 else: self.category_stats["count_by_id_prefix"][prefix] = 1
def get_biolink_element(prefix_manager: PrefixManager, predicate: Any) -> Optional[Element]: """ Returns a Biolink Model element for a given predicate. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager predicate: Any The CURIE of a predicate Returns ------- Optional[Element] The corresponding Biolink Model element """ toolkit = get_toolkit() if prefix_manager.is_iri(predicate): predicate_curie = prefix_manager.contract(predicate) else: predicate_curie = predicate if prefix_manager.is_curie(predicate_curie): reference = prefix_manager.get_reference(predicate_curie) else: reference = predicate_curie element = toolkit.get_element(reference) if not element: try: mapping = toolkit.get_element_by_mapping(predicate) if mapping: element = toolkit.get_element(mapping) except ValueError as e: log.error(e) return element
def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None: """ Adds an attribute to an edge, while taking into account whether the attribute should be multi-valued. Multi-valued properties will not contain duplicates. The ``key`` may be a rdflib.URIRef or a URI string that maps onto a property name as defined in ``rdf_utils.property_mapping``. If the nodes in the edge does not exist then they will be created using ``subject_iri`` and ``object_iri``. If the edge itself does not exist then it will be created using ``subject_iri``, ``object_iri`` and ``predicate_iri``. Parameters ---------- subject_iri: [rdflib.URIRef, str] The IRI of the subject node of an edge in rdflib.Graph object_iri: rdflib.URIRef The IRI of the object node of an edge in rdflib.Graph predicate_iri: rdflib.URIRef The IRI of the predicate representing an edge in rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: str The value of the attribute """ if key.lower() in is_property_multivalued: key = key.lower() else: if not isinstance(key, URIRef): key = URIRef(key) key = property_mapping.get(key) if key is not None and value is not None: subject_curie = self.prefix_manager.contract(subject_iri) object_curie = self.prefix_manager.contract(object_iri) edge_label = process_iri(predicate_iri) if PrefixManager.is_curie(edge_label): edge_label = curie_lookup(edge_label) edge_key = generate_edge_key(subject_curie, edge_label, object_curie) attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key) self._add_attribute(attr_dict, key, value)
def __init__(self, verbose: bool = False, progress_monitor: Optional[Callable[[GraphEntityType, List], None]] = None): # formal arguments self.verbose: bool = verbose self.progress_monitor: Optional[Callable[[GraphEntityType, List], None]] = progress_monitor # internal attributes self.toolkit = get_toolkit() self.prefix_manager = PrefixManager() self.jsonld = get_jsonld_context() self.prefixes = Validator.get_all_prefixes(self.jsonld) self.required_node_properties = Validator.get_required_node_properties( ) self.required_edge_properties = Validator.get_required_edge_properties( ) self.errors: List[ValidationError] = list()