예제 #1
0
def test_get_biolink_element():
    # TODO: Parameterize
    element1 = get_biolink_element('gene')
    assert element1 is not None
    assert element1.name == 'gene'

    element2 = get_biolink_element('biolink:Gene')
    assert element2 is not None
    assert element2 == element1
예제 #2
0
파일: clique_merge.py 프로젝트: vemonet/kgx
    def get_the_most_specific_category(self, categories: list) -> Tuple[str, list]:
        """
        From a list of categories, it tries to fetch ancestors for all.
        The category with the longest ancestor is considered to be the most specific.

        Parameters
        ----------
        categories: list
            A list of categories

        Returns
        -------
        tuple[str, list]
            A tuple of the most specific category and a list of ancestors of that category

        """
        # TODO: could be integrated into update_categories method
        most_specific_category = None
        most_specific_category_ancestors = []
        for category in categories:
            logging.debug("category: {}".format(category))
            element = get_biolink_element(category)
            if element:
                # category exists in BioLink Model as a class or as an alias to a class
                mapped_category = element['name']
                ancestors = get_biolink_ancestors(mapped_category)
                logging.debug("ancestors: {}".format(ancestors))
                if len(ancestors) > len(most_specific_category_ancestors):
                    # the category with the longest list of ancestors will be the most specific category
                    most_specific_category = category
                    most_specific_category_ancestors = ancestors
        return most_specific_category, most_specific_category_ancestors
예제 #3
0
def test_get_biolink_element(query):
    """
    Test to get biolink element.
    """
    element1 = get_biolink_element(query[0])
    assert element1 is not None
    assert element1.name == query[1]
예제 #4
0
def check_categories(
    categories: List, closure: List, category_mapping: Optional[Dict[str, str]] = None
) -> Tuple[List, List, List]:
    """
    Check categories to ensure whether values in ``categories`` are valid biolink categories.
    Valid biolink categories are classes that descend from 'NamedThing'.
    Mixins, while valid ancestors, are not valid categories.

    Parameters
    ----------
    categories: List
        A list of categories to check
    closure: List
        A list of nodes in a clique
    category_mapping: Optional[Dict[str, str]]
        A map that provides mapping from a non-biolink category to a biolink category

    Returns
    -------
    Tuple[List, List, List]
        A tuple consisting of valid biolink categories, invalid biolink categories, and invalid categories

    """
    valid_biolink_categories = []
    invalid_biolink_categories = []
    invalid_categories = []
    tk = get_toolkit()
    for x in categories:
        # use the toolkit to check if the declared category is actually a mixin.
        if tk.is_mixin(x):
            invalid_categories.append(x)
            continue
        # get biolink element corresponding to category
        element = get_biolink_element(x)
        if element:
            mapped_category = format_biolink_category(element["name"])
            if mapped_category in closure:
                valid_biolink_categories.append(x)
            else:
                log.warning(f"category '{mapped_category}' not in closure: {closure}")
                if category_mapping:
                    mapped = category_mapping[x] if x in category_mapping.keys() else x
                    if mapped not in closure:
                        log.warning(
                            f"category '{mapped_category}' is not in category_mapping."
                        )
                        invalid_biolink_categories.append(x)
                else:
                    invalid_biolink_categories.append(x)
        else:
            log.warning(f"category '{x}' is not in Biolink Model")
            invalid_categories.append(x)
            continue
    return valid_biolink_categories, invalid_biolink_categories, invalid_categories
예제 #5
0
def check_categories(
    categories: List,
    closure: List,
    category_mapping: Optional[Dict[str,
                                    str]] = None) -> Tuple[List, List, List]:
    """
    Check categories to ensure whether values in ``categories`` are valid biolink categories.

    Parameters
    ----------
    categories: List
        A list of categories to check
    closure: List
        A list of nodes in a clique
    category_mapping: Optional[Dict[str, str]]
        A map that provides mapping from a non-biolink category to a biolink category

    Returns
    -------
    Tuple[List, List, List]
        A tuple consisting of valid biolink categories, invalid biolink categories, and invalid categories

    """
    valid_biolink_categories = []
    invalid_biolink_categories = []
    invalid_categories = []
    for x in categories:
        # get biolink element corresponding to category
        element = get_biolink_element(x)
        if element:
            mapped_category = format_biolink_category(element['name'])
            if mapped_category in closure:
                valid_biolink_categories.append(x)
            else:
                log.warning(
                    f"category '{mapped_category}' not in closure: {closure}")
                if category_mapping:
                    mapped = category_mapping[x] if x in category_mapping.keys(
                    ) else x
                    if mapped not in closure:
                        log.warning(
                            f"category '{mapped_category}' is not in category_mapping."
                        )
                        invalid_biolink_categories.append(x)
                else:
                    invalid_biolink_categories.append(x)
        else:
            log.warning(f"category '{x}' is not in Biolink Model")
            invalid_categories.append(x)
            continue
    return valid_biolink_categories, invalid_biolink_categories, invalid_categories
예제 #6
0
파일: clique_merge.py 프로젝트: vemonet/kgx
    def update_categories(self, clique: list):
        """
        For a given clique, get category for each node in clique and validate against BioLink Model,
        mapping to BioLink Model category where needed.

        Ex.: If a node has `gene` as its category, then this method adds all of its ancestors.

        Parameters
        ----------
        clique: list
            A list of nodes from a clique

        """
        updated_node_categories = {}
        for node in clique:
            data = self.clique_graph.nodes[node]
            print(data)
            if 'category' in data:
                categories = data['category']
            else:
                # get category from equivalence
                categories = self.get_category_from_equivalence(node, data)

            extended_categories = set()
            invalid_categories = []
            for category in categories:
                logging.debug("Looking at category: {}".format(category))
                element = get_biolink_element(category)
                if element:
                    # category exists in BioLink Model as a class or as an alias to a class
                    mapped_category = element['name']
                    ancestors = get_biolink_ancestors(mapped_category)
                    if len(ancestors) > len(extended_categories):
                        # the category with the longest list of ancestors will be the most specific category
                        logging.debug("Ancestors for {} is larger than previous one".format(mapped_category))
                        extended_categories = ancestors
                else:
                    logging.warning("[1] category '{}' not in BioLink Model".format(category))
                    invalid_categories.append(category)
            logging.debug("Invalid categories: {}".format(invalid_categories))

            for x in categories:
                element = get_biolink_element(x)
                if element is None:
                    logging.warning("[2] category '{}' is not in BioLink Model".format(x))
                    continue
                mapped_category = element['name']
                if mapped_category not in extended_categories:
                    logging.warning("category '{}' not in ancestor closure: {}".format(mapped_category, extended_categories))
                    mapped = MAPPING[x] if x in MAPPING.keys() else x
                    if mapped not in extended_categories:
                        logging.warning("category '{}' is not even in any custom defined mapping. ".format(mapped_category))
                        invalid_categories.append(x)

            update_dict = {'category': extended_categories}
            if invalid_categories:
                update_dict['_invalid_category'] = invalid_categories
            updated_node_categories[node] = update_dict
        logging.debug("Updating nodes in clique with: {}".format(updated_node_categories))
        nx.set_node_attributes(self.clique_graph, updated_node_categories)
        nx.set_node_attributes(self.target_graph, updated_node_categories)
예제 #7
0
    def read_edge(self, edge: Dict) -> Dict:
        """
        Read and parse an edge record.

        Parameters
        ----------
        edge: Dict
            The edge record

        Returns
        -------
        Dict
            The processed edge

        """
        fixed_edge = dict()
        fixed_edge['subject'] = self.prefix_manager.contract(edge['sub'])
        if PrefixManager.is_iri(edge['pred']):
            curie = self.prefix_manager.contract(edge['pred'])
            if curie in self.ecache:
                edge_predicate = self.ecache[curie]
            else:
                element = get_biolink_element(curie)
                if not element:
                    try:
                        mapping = self.toolkit.get_element_by_mapping(
                            edge['pred'])
                        if mapping:
                            element = self.toolkit.get_element(mapping)
                    except ValueError as e:
                        log.error(e)

                if element:
                    edge_predicate = format_biolink_slots(
                        element.name.replace(',', ''))
                    fixed_edge['predicate'] = edge_predicate
                else:
                    edge_predicate = 'biolink:related_to'
                self.ecache[curie] = edge_predicate
            fixed_edge['predicate'] = edge_predicate
            fixed_edge['relation'] = curie
        else:
            if edge['pred'] == 'is_a':
                fixed_edge['predicate'] = 'biolink:subclass_of'
                fixed_edge['relation'] = 'rdfs:subClassOf'
            elif edge['pred'] == 'has_part':
                fixed_edge['predicate'] = 'biolink:has_part'
                fixed_edge['relation'] = "BFO:0000051"
            elif edge['pred'] == 'part_of':
                fixed_edge['predicate'] = 'biolink:part_of'
                fixed_edge['relation'] = "BFO:0000050"
            else:
                fixed_edge[
                    'predicate'] = f"biolink:{edge['pred'].replace(' ', '_')}"
                fixed_edge['relation'] = edge['pred']

        fixed_edge['object'] = self.prefix_manager.contract(edge['obj'])
        for x in edge.keys():
            if x not in {'sub', 'pred', 'obj'}:
                fixed_edge[x] = edge[x]
        return super().read_edge(fixed_edge)
예제 #8
0
    def read_edge(self, edge: Dict) -> Optional[Tuple]:
        """
        Read and parse an edge record.

        Parameters
        ----------
        edge: Dict
            The edge record

        Returns
        -------
        Dict
            The processed edge

        """
        fixed_edge = dict()
        fixed_edge["subject"] = self.prefix_manager.contract(edge["sub"])
        if PrefixManager.is_iri(edge["pred"]):
            curie = self.prefix_manager.contract(edge["pred"])
            if curie in self.ecache:
                edge_predicate = self.ecache[curie]
            else:
                element = get_biolink_element(curie)
                if not element:
                    try:
                        mapping = self.toolkit.get_element_by_mapping(
                            edge["pred"])
                        if mapping:
                            element = self.toolkit.get_element(mapping)

                    #  TODO: not sure how this exception would be thrown here.. under what conditions?
                    except ValueError as e:
                        self.owner.log_error(
                            entity=str(edge["pred"]),
                            error_type=ErrorType.INVALID_EDGE_PREDICATE,
                            message=str(e))
                        element = None

                if element:
                    edge_predicate = format_biolink_slots(
                        element.name.replace(",", ""))
                    fixed_edge["predicate"] = edge_predicate
                else:
                    edge_predicate = "biolink:related_to"
                self.ecache[curie] = edge_predicate
            fixed_edge["predicate"] = edge_predicate
            fixed_edge["relation"] = curie
        else:
            if edge["pred"] == "is_a":
                fixed_edge["predicate"] = "biolink:subclass_of"
                fixed_edge["relation"] = "rdfs:subClassOf"
            elif edge["pred"] == "has_part":
                fixed_edge["predicate"] = "biolink:has_part"
                fixed_edge["relation"] = "BFO:0000051"
            elif edge["pred"] == "part_of":
                fixed_edge["predicate"] = "biolink:part_of"
                fixed_edge["relation"] = "BFO:0000050"
            else:
                fixed_edge[
                    "predicate"] = f"biolink:{edge['pred'].replace(' ', '_')}"
                fixed_edge["relation"] = edge["pred"]

        fixed_edge["object"] = self.prefix_manager.contract(edge["obj"])
        for x in edge.keys():
            if x not in {"sub", "pred", "obj"}:
                fixed_edge[x] = edge[x]
        return super().read_edge(fixed_edge)