def get_required_edge_properties(toolkit: Optional[Toolkit] = None) -> list: """ Get all properties for an edge that are required, as defined by Biolink Model. Parameters ---------- toolkit: Optional[Toolkit] Optional externally provided toolkit (default: use Validator class defined toolkit) Returns ------- list A list of required edge properties """ if not toolkit: toolkit = Validator.get_toolkit() edge_properties = toolkit.get_all_edge_properties() required_properties = [] for p in edge_properties: element = toolkit.get_element(p) if element and element.deprecated is None: if hasattr(element, "required") and element.required: formatted_name = sentencecase_to_snakecase(element.name) required_properties.append(formatted_name) return required_properties
def get_required_node_properties() -> list: """ Get all properties for a node that are required, as defined by Biolink Model. Returns ------- list A list of required node properties """ toolkit = get_toolkit() node_properties = toolkit.get_all_node_properties() required_properties = [] for p in node_properties: element = toolkit.get_element(p) if element and element.deprecated is None: if hasattr(element, 'required') and element.required: formatted_name = sentencecase_to_snakecase(element.name) required_properties.append(formatted_name) elif element.name == 'category': formatted_name = sentencecase_to_snakecase(element.name) required_properties.append(formatted_name) return required_properties
def validate_categories(self, clique: list) -> Tuple[str, list]: """ For nodes in a clique, validate the category for each node to make sure that all nodes in a clique are of the same type. Parameters ---------- clique: list A list of nodes from a clique Returns ------- tuple[str, list] A tuple of clique category string and a list of invalid nodes """ invalid_nodes = [] all_categories = [] for node in clique: logging.info(node) node_data = self.clique_graph.nodes[node] if 'category' in node_data and len(node_data['category']) > 0: all_categories.append(node_data['category'][0]) if len(all_categories) == 0: return None, None (clique_category, clique_category_ancestors ) = self.get_the_most_specific_category(all_categories) logging.debug("Most specific category: {}".format(clique_category)) logging.debug("Most specific category ancestors: {}".format( clique_category_ancestors)) for node in clique: data = self.clique_graph.nodes[node] node_category = data['category'][0] logging.debug("node_category: {}".format(node_category)) # TODO: this sentencecase to snakecase transition needs to be handled properly ancestors = [ sentencecase_to_snakecase(x) for x in clique_category_ancestors ] logging.debug("clique ancestors: {}".format(ancestors)) if node_category not in ancestors: invalid_nodes.append(node) logging.info( "clique category '{}' does not match node: {}".format( clique_category, data)) # TODO: check if node category is a subclass of any of the ancestors via other ontologies logging.info("Invalid Nodes: {}".format(invalid_nodes)) return clique_category, invalid_nodes
def get_required_edge_properties() -> list: """ Get all properties for an edge that are required, as defined by Biolink Model. Returns ------- list A list of required edge properties """ toolkit = get_toolkit() edge_properties = toolkit.children('association slot') required_properties = [] for p in edge_properties: element = toolkit.get_element(p) if hasattr(element, 'required') and element.required: # TODO: this should be handled by bmt formatted_name = sentencecase_to_snakecase(element.name) required_properties.append(formatted_name) return required_properties
def get_required_node_properties(self) -> list: """ Get all properties for a node that are required, as defined by Biolink Model. Returns ------- list A list of required node properties """ if self.required_node_properties is None: node_properties = self.toolkit.children('node property') required_properties = [] for p in node_properties: element = self.toolkit.get_element(p) if hasattr(element, 'required') and element.required: # TODO: this should be handled by bmt formatted_name = sentencecase_to_snakecase(element.name) required_properties.append(formatted_name) self.required_node_properties = required_properties return self.required_node_properties
def test_sentencecase_to_snakecase(): s = sentencecase_to_snakecase('named thing') assert s == 'named_thing'
def categorize(self): """ Find and validate category for every node in self.graph """ node_to_categories = {} preserve = {} for n, data in self.graph.nodes(data=True): logging.info("Processing node {}".format(n)) new_categories = set() if 'category' in data: categories = data['category'] preserve[n] = data['category'] for category in categories: element = get_biolink_mapping(category) if element is not None: # there is a direct mapping to a BioLink Model class mapped_category = element['name'] logging.debug( "Category: {} has a direct mapping to BioLink Model class {}" .format(category, mapped_category)) new_categories.update([mapped_category]) else: if category in ADDITIONAL_LABELS: element = get_biolink_mapping( ADDITIONAL_LABELS[category]) if element is not None: # take a look at an additional list of mappings mapped_category = element['name'] logging.debug( "Category: {} mapped over to {} has a direct mapping to BioLink Model class {}" .format(category, ADDITIONAL_LABELS[category], mapped_category)) new_categories.update([mapped_category]) else: # subClassOf traversal required # assuming that the graph contains subClassOf edges # and the node subClassOf x new_categories.update( get_category_via_superclass( self.graph, category)) else: # try via subClassOf # subClassOf traversal required # assuming that the graph contains subClassOf edges # and the node subClassOf x logging.info( "node doesn't have a category field; trying to infer category via subclass_of axiom" ) for u, v, edge_data in self.graph.edges(n, data=True): logging.info("u: {} v: {} data: {}".format( u, v, edge_data)) if edge_data['edge_label'] == 'subclass_of': curie = v new_categories.update( get_category_via_superclass(self.graph, curie)) new_categories = [ sentencecase_to_snakecase(x) for x in new_categories ] if len(new_categories) == 0: new_categories.append('named_thing') logging.debug("Output categories: {}".format(new_categories)) node_to_categories[n] = new_categories nx.set_node_attributes(self.graph, node_to_categories, 'category') nx.set_node_attributes(self.graph, preserve, '_old_category')
def test_sentencecase_to_snakecase(): """ Test conversion of a sentence case text to snake_case. """ s = sentencecase_to_snakecase('named thing') assert s == 'named_thing'