コード例 #1
0
ファイル: validator.py プロジェクト: biolink/kgx
    def get_required_edge_properties(toolkit: Optional[Toolkit] = None) -> list:
        """
        Get all properties for an edge that are required, as defined by Biolink Model.

        Parameters
        ----------
        toolkit: Optional[Toolkit]
            Optional externally provided toolkit (default: use Validator class defined toolkit)

        Returns
        -------
        list
            A list of required edge properties

        """
        if not toolkit:
            toolkit = Validator.get_toolkit()
        edge_properties = toolkit.get_all_edge_properties()
        required_properties = []
        for p in edge_properties:
            element = toolkit.get_element(p)
            if element and element.deprecated is None:
                if hasattr(element, "required") and element.required:
                    formatted_name = sentencecase_to_snakecase(element.name)
                    required_properties.append(formatted_name)
        return required_properties
コード例 #2
0
ファイル: validator.py プロジェクト: STARInformatics/kgx
    def get_required_node_properties() -> list:
        """
        Get all properties for a node that are required, as defined by Biolink Model.

        Returns
        -------
        list
            A list of required node properties

        """
        toolkit = get_toolkit()
        node_properties = toolkit.get_all_node_properties()
        required_properties = []
        for p in node_properties:
            element = toolkit.get_element(p)
            if element and element.deprecated is None:
                if hasattr(element, 'required') and element.required:
                    formatted_name = sentencecase_to_snakecase(element.name)
                    required_properties.append(formatted_name)
                elif element.name == 'category':
                    formatted_name = sentencecase_to_snakecase(element.name)
                    required_properties.append(formatted_name)
        return required_properties
コード例 #3
0
ファイル: clique_merge.py プロジェクト: YaphetKG/kgx
    def validate_categories(self, clique: list) -> Tuple[str, list]:
        """
        For nodes in a clique, validate the category for each node to make sure that all nodes in a clique
        are of the same type.

        Parameters
        ----------
        clique: list
            A list of nodes from a clique

        Returns
        -------
        tuple[str, list]
            A tuple of clique category string and a list of invalid nodes

        """
        invalid_nodes = []
        all_categories = []
        for node in clique:
            logging.info(node)
            node_data = self.clique_graph.nodes[node]
            if 'category' in node_data and len(node_data['category']) > 0:
                all_categories.append(node_data['category'][0])
        if len(all_categories) == 0:
            return None, None
        (clique_category, clique_category_ancestors
         ) = self.get_the_most_specific_category(all_categories)
        logging.debug("Most specific category: {}".format(clique_category))
        logging.debug("Most specific category ancestors: {}".format(
            clique_category_ancestors))
        for node in clique:
            data = self.clique_graph.nodes[node]
            node_category = data['category'][0]
            logging.debug("node_category: {}".format(node_category))
            # TODO: this sentencecase to snakecase transition needs to be handled properly
            ancestors = [
                sentencecase_to_snakecase(x) for x in clique_category_ancestors
            ]
            logging.debug("clique ancestors: {}".format(ancestors))
            if node_category not in ancestors:
                invalid_nodes.append(node)
                logging.info(
                    "clique category '{}' does not match node: {}".format(
                        clique_category, data))
            # TODO: check if node category is a subclass of any of the ancestors via other ontologies
        logging.info("Invalid Nodes: {}".format(invalid_nodes))
        return clique_category, invalid_nodes
コード例 #4
0
    def get_required_edge_properties() -> list:
        """
        Get all properties for an edge that are required, as defined by Biolink Model.

        Returns
        -------
        list
            A list of required edge properties

        """
        toolkit = get_toolkit()
        edge_properties = toolkit.children('association slot')
        required_properties = []
        for p in edge_properties:
            element = toolkit.get_element(p)
            if hasattr(element, 'required') and element.required:
                # TODO: this should be handled by bmt
                formatted_name = sentencecase_to_snakecase(element.name)
                required_properties.append(formatted_name)
        return required_properties
コード例 #5
0
    def get_required_node_properties(self) -> list:
        """
        Get all properties for a node that are required, as defined by Biolink Model.

        Returns
        -------
        list
            A list of required node properties

        """
        if self.required_node_properties is None:
            node_properties = self.toolkit.children('node property')
            required_properties = []
            for p in node_properties:
                element = self.toolkit.get_element(p)
                if hasattr(element, 'required') and element.required:
                    # TODO: this should be handled by bmt
                    formatted_name = sentencecase_to_snakecase(element.name)
                    required_properties.append(formatted_name)
            self.required_node_properties = required_properties
        return self.required_node_properties
コード例 #6
0
ファイル: test_kgx_utils.py プロジェクト: vemonet/kgx
def test_sentencecase_to_snakecase():
    s = sentencecase_to_snakecase('named thing')
    assert s == 'named_thing'
コード例 #7
0
ファイル: transformer.py プロジェクト: LucaCappelletti94/kgx
    def categorize(self):
        """
        Find and validate category for every node in self.graph
        """
        node_to_categories = {}
        preserve = {}
        for n, data in self.graph.nodes(data=True):
            logging.info("Processing node {}".format(n))
            new_categories = set()
            if 'category' in data:
                categories = data['category']
                preserve[n] = data['category']
                for category in categories:
                    element = get_biolink_mapping(category)
                    if element is not None:
                        # there is a direct mapping to a BioLink Model class
                        mapped_category = element['name']
                        logging.debug(
                            "Category: {} has a direct mapping to BioLink Model class {}"
                            .format(category, mapped_category))
                        new_categories.update([mapped_category])
                    else:
                        if category in ADDITIONAL_LABELS:
                            element = get_biolink_mapping(
                                ADDITIONAL_LABELS[category])
                            if element is not None:
                                # take a look at an additional list of mappings
                                mapped_category = element['name']
                                logging.debug(
                                    "Category: {} mapped over to {} has a direct mapping to BioLink Model class {}"
                                    .format(category,
                                            ADDITIONAL_LABELS[category],
                                            mapped_category))
                                new_categories.update([mapped_category])
                        else:
                            # subClassOf traversal required
                            # assuming that the graph contains subClassOf edges
                            # and the node subClassOf x
                            new_categories.update(
                                get_category_via_superclass(
                                    self.graph, category))
            else:
                # try via subClassOf
                # subClassOf traversal required
                # assuming that the graph contains subClassOf edges
                # and the node subClassOf x
                logging.info(
                    "node doesn't have a category field; trying to infer category via subclass_of axiom"
                )
                for u, v, edge_data in self.graph.edges(n, data=True):
                    logging.info("u: {} v: {} data: {}".format(
                        u, v, edge_data))
                    if edge_data['edge_label'] == 'subclass_of':
                        curie = v
                        new_categories.update(
                            get_category_via_superclass(self.graph, curie))

            new_categories = [
                sentencecase_to_snakecase(x) for x in new_categories
            ]
            if len(new_categories) == 0:
                new_categories.append('named_thing')
            logging.debug("Output categories: {}".format(new_categories))
            node_to_categories[n] = new_categories
        nx.set_node_attributes(self.graph, node_to_categories, 'category')
        nx.set_node_attributes(self.graph, preserve, '_old_category')
コード例 #8
0
ファイル: test_kgx_utils.py プロジェクト: STARInformatics/kgx
def test_sentencecase_to_snakecase():
    """
    Test conversion of a sentence case text to snake_case.
    """
    s = sentencecase_to_snakecase('named thing')
    assert s == 'named_thing'