def validate_edge_predicate( self, subject: str, object: str, data: dict, toolkit: Optional[Toolkit] = None ): """ Validate ``edge_predicate`` field of a given edge. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties toolkit: Optional[Toolkit] Optional externally provided toolkit (default: use Validator class defined toolkit) """ if not toolkit: toolkit = Validator.get_toolkit() error_type = ErrorType.INVALID_EDGE_PREDICATE edge_predicate = data.get("predicate") if edge_predicate is None: message = "Edge does not have an 'predicate' property" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) elif not isinstance(edge_predicate, str): message = f"Edge property 'edge_predicate' is expected to be of type 'string'" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: if PrefixManager.is_curie(edge_predicate): edge_predicate = PrefixManager.get_reference(edge_predicate) m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate) if m: p = toolkit.get_element(snakecase_to_sentencecase(edge_predicate)) if p is None: message = f"Edge predicate '{edge_predicate}' is not in Biolink Model" self.log_error( f"{subject}->{object}", error_type, message, MessageLevel.ERROR, ) elif edge_predicate != p.name and edge_predicate in p.aliases: message = f"Edge predicate '{edge_predicate}' is actually an alias for {p.name}; " + \ f"Should replace {edge_predicate} with {p.name}" self.log_error( f"{subject}->{object}", error_type, message, MessageLevel.ERROR, ) else: message = f"Edge predicate '{edge_predicate}' is not in snake_case form" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
def validate_categories(self, node: str, data: dict, toolkit: Optional[Toolkit] = None): """ Validate ``category`` field of a given node. Parameters ---------- node: str Node identifier data: dict Node properties toolkit: Optional[Toolkit] Optional externally provided toolkit (default: use Validator class defined toolkit) """ if not toolkit: toolkit = Validator.get_toolkit() error_type = ErrorType.INVALID_CATEGORY categories = data.get("category") if categories is None: message = "Node does not have a 'category' property" self.log_error(node, error_type, message, MessageLevel.ERROR) elif not isinstance(categories, list): message = f"Node property 'category' is expected to be of type {list}" self.log_error(node, error_type, message, MessageLevel.ERROR) else: for category in categories: if PrefixManager.is_curie(category): category = PrefixManager.get_reference(category) m = re.match(r"^([A-Z][a-z\d]+)+$", category) if not m: # category is not CamelCase error_type = ErrorType.INVALID_CATEGORY message = f"Category '{category}' is not in CamelCase form" self.log_error(node, error_type, message, MessageLevel.ERROR) formatted_category = camelcase_to_sentencecase(category) if toolkit.is_mixin(formatted_category): message = f"Category '{category}' is a mixin in the Biolink Model" self.log_error(node, error_type, message, MessageLevel.ERROR) elif not toolkit.is_category(formatted_category): message = ( f"Category '{category}' is unknown in the current Biolink Model" ) self.log_error(node, error_type, message, MessageLevel.ERROR) else: c = toolkit.get_element(formatted_category.lower()) if c: if category != c.name and category in c.aliases: message = f"Category {category} is actually an alias for {c.name}; " + \ f"Should replace '{category}' with '{c.name}'" self.log_error(node, error_type, message, MessageLevel.ERROR)
def validate_edge_predicate(subject: str, object: str, data: dict) -> list: """ Validate ``edge_predicate`` field of a given edge. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties Returns ------- list A list of errors for a given edge """ toolkit = get_toolkit() error_type = ErrorType.INVALID_EDGE_PREDICATE errors = [] edge_predicate = data.get('predicate') if edge_predicate is None: message = "Edge does not have an 'predicate' property" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) elif not isinstance(edge_predicate, str): message = f"Edge property 'edge_predicate' expected to be of type 'string'" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: if PrefixManager.is_curie(edge_predicate): edge_predicate = PrefixManager.get_reference(edge_predicate) m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate) if m: p = toolkit.get_element( snakecase_to_sentencecase(edge_predicate)) if p is None: message = f"Edge label '{edge_predicate}' not in Biolink Model" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) elif edge_predicate != p.name and edge_predicate in p.aliases: message = f"Edge label '{edge_predicate}' is actually an alias for {p.name}; Should replace {edge_predicate} with {p.name}" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge label '{edge_predicate}' is not in snake_case form" errors.append( ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) return errors
def validate_categories(node: str, data: dict) -> list: """ Validate ``category`` field of a given node. Parameters ---------- node: str Node identifier data: dict Node properties Returns ------- list A list of errors for a given node """ toolkit = get_toolkit() error_type = ErrorType.INVALID_CATEGORY errors = [] categories = data.get('category') if categories is None: message = "Node does not have a 'category' property" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) elif not isinstance(categories, list): message = f"Node property 'category' expected to be of type {list}" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: for category in categories: if PrefixManager.is_curie(category): category = PrefixManager.get_reference(category) m = re.match(r"^([A-Z][a-z\d]+)+$", category) if not m: # category is not CamelCase error_type = ErrorType.INVALID_CATEGORY message = f"Category '{category}' is not in CamelCase form" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) formatted_category = camelcase_to_sentencecase(category) if not toolkit.is_category(formatted_category): message = f"Category '{category}' not in Biolink Model" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: c = toolkit.get_element(formatted_category.lower()) if category != c.name and category in c.aliases: message = f"Category {category} is actually an alias for {c.name}; Should replace '{category}' with '{c.name}'" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) return errors
def get_biolink_element(prefix_manager: PrefixManager, predicate: Any) -> Optional[Element]: """ Returns a Biolink Model element for a given predicate. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager predicate: Any The CURIE of a predicate Returns ------- Optional[Element] The corresponding Biolink Model element """ toolkit = get_toolkit() if prefix_manager.is_iri(predicate): predicate_curie = prefix_manager.contract(predicate) else: predicate_curie = predicate if prefix_manager.is_curie(predicate_curie): reference = prefix_manager.get_reference(predicate_curie) else: reference = predicate_curie element = toolkit.get_element(reference) if not element: try: mapping = toolkit.get_element_by_mapping(predicate) if mapping: element = toolkit.get_element(mapping) except ValueError as e: log.error(e) return element
def process_predicate( prefix_manager: PrefixManager, p: Union[URIRef, str], predicate_mapping: Optional[Dict] = None, ) -> Tuple: """ Process a predicate where the method checks if there is a mapping in Biolink Model. Parameters ---------- prefix_manager: PrefixManager An instance of prefix manager p: Union[URIRef, str] The predicate predicate_mapping: Optional[Dict] Predicate mappings Returns ------- Tuple[str, str, str, str] A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available), the CURIE form of p, the reference of p """ if prefix_manager.is_iri(p): predicate = prefix_manager.contract(str(p)) else: predicate = None if prefix_manager.is_curie(p): property_name = prefix_manager.get_reference(p) predicate = p else: if predicate and prefix_manager.is_curie(predicate): property_name = prefix_manager.get_reference(predicate) else: property_name = p predicate = f":{p}" element = get_biolink_element(prefix_manager, p) canonical_uri = None if element: if isinstance(element, SlotDefinition): # predicate corresponds to a biolink slot if element.definition_uri: element_uri = prefix_manager.contract(element.definition_uri) else: element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if element.slot_uri: canonical_uri = element.slot_uri elif isinstance(element, ClassDefinition): # this will happen only when the IRI is actually # a reference to a class element_uri = prefix_manager.contract(element.class_uri) else: element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}" if "biolink:Attribute" in get_biolink_ancestors(element.name): element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}" if not predicate: predicate = element_uri else: # no mapping to biolink model; # look at predicate mappings element_uri = None if predicate_mapping: if p in predicate_mapping: property_name = predicate_mapping[p] predicate = f":{property_name}" # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri, # 'predicate': predicate, 'property_name': property_name} return element_uri, canonical_uri, predicate, property_name
def test_get_reference(query): """ Test to check behavior of get_reference method in PrefixManager. """ assert PrefixManager.get_reference(query[0]) == query[1]