コード例 #1
0
def set_all_information_content_values(ontology: Ontology,
                                       relations: List[str] = None):
    logger.info("calculating information content for all terms in ontology")
    roots = ontology.get_roots(relations=relations)
    for root_id in roots:
        if "num_subsumers" not in ontology.node(root_id):
            _set_num_subsumers_in_subgraph(ontology=ontology,
                                           root_id=root_id,
                                           relations=relations)
    for root_id in roots:
        if "num_leaves" not in ontology.node(root_id):
            _set_num_leaves_in_subgraph(ontology=ontology,
                                        root_id=root_id,
                                        relations=relations)
    for root_id in roots:
        if "depth" not in ontology.node(root_id):
            set_all_depths_in_subgraph(ontology=ontology,
                                       root_id=root_id,
                                       relations=relations)
    for root_id in roots:
        _set_information_content_in_subgraph(
            ontology=ontology,
            root_id=root_id,
            maxleaves=ontology.node(root_id)["num_leaves"],
            relations=relations)
コード例 #2
0
def get_all_paths_to_root(node_id: str,
                          ontology: Ontology,
                          min_distance_from_root: int = 0,
                          relations: List[str] = None,
                          nodeids_blacklist: List[str] = None,
                          previous_path: Union[None, List[str]] = None,
                          root_node=None) -> Set[Tuple[str]]:
    """get all possible paths connecting a go term to its root terms

    Args:
        node_id (str): a valid GO id for the starting term
        ontology (Ontology): the go ontology
        min_distance_from_root (int): return only terms at a specified minimum distance from root terms
        relations (List[str]): the list of relations to be used
        nodeids_blacklist (List[str]): a list of node ids to exclude from the paths
        previous_path (Union[None, List[str]]): the path to get to the current node
    Returns:
        Set[Tuple[str]]: the set of paths connecting the specified term to its root terms, each of which contains a
        sequence of terms ids
    """
    if previous_path is None:
        previous_path = []
    new_path = previous_path[:]
    if not nodeids_blacklist or node_id not in nodeids_blacklist:
        new_path.append(node_id)
    parents = [
        parent
        for parent in ontology.parents(node=node_id, relations=relations)
        if ontology.node(parent)["depth"] >= min_distance_from_root
    ]
    parents_same_root = []
    if root_node:
        for parent in parents:
            parent_root = None
            if "meta" in parent and "basicPropertyValues" in parent["meta"]:
                for basic_prop_val in parent["meta"]["basicPropertyValues"]:
                    if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                        parent_root = basic_prop_val["val"]
            if parent_root and parent_root == root_node:
                parents_same_root.append(parent)
        parents = parents_same_root

    if len(parents) > 0:
        # go up the tree, following a depth first visit
        paths_to_return = set()
        for parent in parents:
            for path in get_all_paths_to_root(
                    node_id=parent,
                    ontology=ontology,
                    previous_path=new_path,
                    min_distance_from_root=min_distance_from_root,
                    relations=relations,
                    nodeids_blacklist=nodeids_blacklist,
                    root_node=root_node):
                paths_to_return.add(path)
        return paths_to_return
    if len(new_path) == 0:
        return {(node_id, )}
    else:
        return {tuple(new_path)}
コード例 #3
0
def translate_file_to_ontology(handle, **args):
    if handle.endswith(".json"):
        g = obograph_util.convert_json_file(handle, **args)
        return Ontology(handle=handle, payload=g)
    elif handle.endswith(".ttl"):
        from ontobio.sparql.rdf2nx import RdfMapper
        logging.info("RdfMapper: {}".format(args))
        m = RdfMapper(**args)
        return m.convert(handle, 'ttl')
    else:
        if not (handle.endswith(".obo") or handle.endswith(".owl")):
            logging.info(
                "Attempting to parse non obo or owl file with owltools: " +
                handle)
        encoded = hashlib.sha256(handle.encode()).hexdigest()
        logging.info(" encoded: " + str(encoded))
        fn = '/tmp/' + encoded
        if not os.path.isfile(fn):
            cmd = ['owltools', handle, '-o', '-f', 'json', fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: " + fn)
        g = obograph_util.convert_json_file(fn, **args)
        return Ontology(handle=handle, payload=g)
コード例 #4
0
def get_all_common_ancestors(node_ids: List[str],
                             ontology: Ontology,
                             min_distance_from_root: int = 0,
                             nodeids_blacklist: List[str] = None):
    # check if all ids are connected to the same root node
    common_root = None
    for node_id in node_ids:
        onto_node = ontology.node(node_id)
        if "meta" in onto_node and "basicPropertyValues" in onto_node["meta"]:
            for basic_prop_val in onto_node["meta"]["basicPropertyValues"]:
                if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                    if common_root and common_root != basic_prop_val["val"]:
                        raise ValueError(
                            "Cannot get common ancestors of nodes connected to different roots"
                        )
                    common_root = basic_prop_val["val"]
    ancestors = defaultdict(list)
    for node_id in node_ids:
        for ancestor in ontology.ancestors(node=node_id, reflexive=True):
            onto_anc = ontology.node(ancestor)
            onto_anc_root = None
            if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]:
                for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]:
                    if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                        onto_anc_root = basic_prop_val["val"]
            if onto_anc["depth"] >= min_distance_from_root and (not onto_anc_root or onto_anc_root == common_root) \
                and (not nodeids_blacklist or ancestor not in nodeids_blacklist):
                ancestors[ancestor].append(node_id)
    return [(ancestor, ontology.label(ancestor), set(covered_nodes))
            for ancestor, covered_nodes in ancestors.items()
            if len(covered_nodes) > 1 or ancestor == covered_nodes[0]]
コード例 #5
0
def get_best_nodes_ic(
    node_ids: List[str],
    ontology: Ontology,
    max_number_of_terms: int = 3,
    min_distance_from_root: int = 0,
    slim_terms_ic_bonus_perc: int = 0,
    slim_set: set = None,
    nodeids_blacklist: List[str] = None
) -> Tuple[bool, List[Tuple[str, Set[str]]]]:
    """trim the list of terms by selecting the best combination of terms from the initial list or their common
    ancestors based on information content

    Args:
        node_ids (List[str]): the list of nodes to merge by common ancestor
        max_number_of_terms (int): minimum number of terms above which the merge operation is performed
        ontology (Ontology): the ontology
        min_distance_from_root (int): consider only nodes at a minimum distance from root as potential candidate for
            trimming
        slim_terms_ic_bonus_perc (int): boost the IC value for terms that appear in the slim set by the provided
            percentage
        slim_set (set): set of terms that belong to the slim for the provided ontology
        nodeids_blacklist (List[str]): a list of node ids to be excluded from common ancestors list
    Returns:
        Set[str]: the set of trimmed terms, together with the set of original terms that each of them covers
    """
    common_ancestors = get_all_common_ancestors(
        node_ids=node_ids,
        ontology=ontology,
        nodeids_blacklist=nodeids_blacklist)
    if "IC" not in ontology.node(common_ancestors[0][0]):
        logger.warning(
            "ontology terms do not have information content values set")
        set_all_information_content_values(ontology=ontology)
    values = [
        0 if node[0] not in node_ids
        and ontology.node(node[0])["depth"] < min_distance_from_root else
        ontology.node(node[0])["IC"] * (1 + slim_terms_ic_bonus_perc)
        if slim_set and node[0] in slim_set else ontology.node(node[0])["IC"]
        for node in common_ancestors
    ]
    if slim_set and any([node[0] in slim_set for node in common_ancestors]):
        logger.debug("some candidates are present in the slim set")
    # remove ancestors with zero IC
    common_ancestors = [
        common_ancestor
        for common_ancestor, value in zip(common_ancestors, values)
        if value > 0
    ]
    values = [value for value in values if value > 0]
    best_terms = find_set_covering(subsets=common_ancestors,
                                   max_num_subsets=max_number_of_terms,
                                   value=values,
                                   ontology=ontology)
    covered_terms = set([
        e for best_term_label, covered_terms in best_terms
        for e in covered_terms
    ])
    return covered_terms != set(node_ids), best_terms
コード例 #6
0
def create_ontology(handle=None, **args):
    ont = None
    logging.info("Determining strategy to load '{}' into memory...".format(handle))

    if handle.find("+") > -1:
        handles = handle.split("+")
        onts = [create_ontology(ont) for ont in handles]
        ont = onts.pop()
        ont.merge(onts)
        return ont

    # TODO: consider replacing with plugin architecture
    if handle.find(".") > 0 and os.path.isfile(handle):
        logging.info("Fetching obograph-json file from filesystem")
        ont = translate_file_to_ontology(handle, **args)
    elif handle.startswith("obo:"):
        logging.info("Fetching from OBO PURL")
        if handle.find(".") == -1:
            handle += '.owl'
        fn = '/tmp/'+handle
        if not os.path.isfile(fn):
            url = handle.replace("obo:","http://purl.obolibrary.org/obo/")
            cmd = ['owltools',url,'-o','-f','json',fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: "+fn)
        g = obograph_util.convert_json_file(fn)
        ont = Ontology(handle=handle, payload=g)
    elif handle.startswith("wdq:"):
        from ontobio.sparql.wikidata_ontology import EagerWikidataOntology
        logging.info("Fetching from Wikidata")
        ont = EagerWikidataOntology(handle=handle)
    elif handle.startswith("scigraph:"):
        from ontobio.neo.scigraph_ontology import RemoteScigraphOntology
        logging.info("Fetching from SciGraph")
        ont = RemoteScigraphOntology(handle=handle)
    elif handle.startswith("http:"):
        logging.info("Fetching from Web PURL: "+handle)
        encoded = hashlib.sha256(handle.encode()).hexdigest()
        #encoded = binascii.hexlify(bytes(handle, 'utf-8'))
        #base64.b64encode(bytes(handle, 'utf-8'))
        logging.info(" encoded: "+str(encoded))
        fn = '/tmp/'+encoded
        if not os.path.isfile(fn):
            cmd = ['owltools',handle,'-o','-f','json',fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: "+fn)
        g = obograph_util.convert_json_file(fn)
        ont = Ontology(handle=handle, payload=g)
    else:
        logging.info("Fetching from SPARQL")
        ont = EagerRemoteSparqlOntology(handle=handle)
        #g = get_digraph(handle, None, True)
    return ont
コード例 #7
0
    def set_ontology(self,
                     ontology_type: DataType,
                     ontology: Ontology,
                     config: GenedescConfigParser,
                     slim_cache_path: str = None) -> None:
        """set the go ontology and apply terms renaming

        Args:
            ontology_type (DataType): the type of ontology to set
            ontology (Ontology): an ontology object to set as go ontology
            config (GenedescConfigParser): configuration object where to read properties
            slim_cache_path (str): path to slim file to use
        """
        if ontology_type == DataType.GO:
            logger.info("Setting GO ontology")
            if self.go_relations:
                self.go_ontology = ontology.subontology(
                    relations=self.go_relations)
            else:
                self.go_ontology = ontology
        elif ontology_type == DataType.DO:
            logger.info("Setting DO ontology")
            if self.do_relations:
                self.do_ontology = ontology.subontology(
                    relations=self.do_relations)
            else:
                self.do_ontology = ontology
        elif ontology_type == DataType.EXPR:
            logger.info("Setting Expression ontology")
            if self.expr_relations:
                self.expression_ontology = ontology.subontology(
                    relations=self.expr_relations)
            else:
                self.expression_ontology = ontology
        module = get_module_from_data_type(ontology_type)
        ontology = self.get_ontology(data_type=ontology_type)
        terms_replacement_regex = config.get_module_property(
            module=module, prop=ConfigModuleProperty.RENAME_TERMS)
        if terms_replacement_regex:
            self.rename_ontology_terms(
                ontology=ontology,
                terms_replacement_regex=terms_replacement_regex)
        set_all_depths(ontology=ontology,
                       relations=self.get_relations(ontology_type))
        if config.get_module_property(
                module=module,
                prop=ConfigModuleProperty.TRIMMING_ALGORITHM) == "ic":
            set_ic_ontology_struct(ontology=ontology,
                                   relations=self.get_relations(ontology_type))
        if slim_cache_path:
            slim_url = config.get_module_property(
                module=module, prop=ConfigModuleProperty.SLIM_URL)
            self.load_slim(module=module,
                           slim_url=slim_url,
                           slim_cache_path=slim_cache_path)
コード例 #8
0
def set_all_depths(ontology: Ontology,
                   relations: List[str] = None,
                   comparison_func=max):
    for root_id in ontology.get_roots():
        if "type" not in ontology.node(root_id) or ontology.node_type(
                root_id) == "CLASS":
            set_all_depths_in_subgraph(ontology=ontology,
                                       root_id=root_id,
                                       relations=relations,
                                       comparison_func=comparison_func)
    for node_id, node_content in ontology.nodes().items():
        if "depth" not in node_content:
            node_content["depth"] = 0
コード例 #9
0
def _set_information_content_in_subgraph(ontology: Ontology,
                                         root_id: str,
                                         maxleaves: int,
                                         relations: List[str] = None):
    node = ontology.node(root_id)
    node["IC"] = -math.log(
        (float(node["num_leaves"]) / node["num_subsumers"] + 1) /
        (maxleaves + 1))
    for child_id in ontology.children(node=root_id, relations=relations):
        _set_information_content_in_subgraph(ontology=ontology,
                                             root_id=child_id,
                                             maxleaves=maxleaves,
                                             relations=relations)
コード例 #10
0
ファイル: rdflib_bridge.py プロジェクト: valearna/ontobio
def rdfgraph_to_ontol(rg):
    """
    Return an Ontology object from an rdflib graph object

    Status: Incomplete
    """
    digraph = networkx.MultiDiGraph()
    from rdflib.namespace import RDF
    label_map = {}
    for c in rg.subjects(RDF.type, OWL.Class):
        cid = contract_uri_wrap(c)
        logger.info("C={}".format(cid))
        for lit in rg.objects(c, RDFS.label):
            label_map[cid] = lit.value
            digraph.add_node(cid, label=lit.value)
        for s in rg.objects(c, RDFS.subClassOf):
            # todo - blank nodes
            sid = contract_uri_wrap(s)
            digraph.add_edge(sid, cid, pred='subClassOf')

    logger.info("G={}".format(digraph))
    payload = {
        'graph': digraph,
        #'xref_graph': xref_graph,
        #'graphdoc': obographdoc,
        #'logical_definitions': logical_definitions
    }

    ont = Ontology(handle='wd', payload=payload)
    return ont
コード例 #11
0
def node_is_in_branch(ontology: Ontology, node_id: str,
                      branch_root_ids: List[str]):
    branch_root_ids = set(branch_root_ids)
    return any([
        parent_id in branch_root_ids
        for parent_id in ontology.ancestors(node=node_id, reflexive=True)
    ])
コード例 #12
0
 def convert(self, filename=None, format='ttl'):
     if filename is not None:
         self.parse_rdf(filename=filename, format=format)
     g = networkx.MultiDiGraph()
     ont = Ontology(graph=g)
     self.add_triples(ont)
     return ont
コード例 #13
0
def protein_complex_sublcass_closure(ontology: Ontology) -> Set[str]:
    protein_containing_complex = association.Curie(namespace="GO",
                                                   identity="0032991")
    children_of_complexes = set(
        ontology.descendants(str(protein_containing_complex),
                             relations=["subClassOf"],
                             reflexive=True))
    return children_of_complexes
コード例 #14
0
def set_all_depths_in_subgraph(ontology: Ontology,
                               root_id: str,
                               relations: List[str] = None,
                               comparison_func=max,
                               current_depth: int = 0):
    """calculate and set max_depth and min_depth (maximum and minimum distances from root terms in the ontology)
    recursively for all terms in a branch of the ontology

    Args:
        ontology (Ontology): the ontology
        root_id (str): the ID of the root term of the branch to process
        relations (List[str]): list of relations to consider
        comparison_func: a comparison function to calculate the depth when multiple paths exist between the node and
            the root. max calculates the length of the longest path, min the one of the shortest
        current_depth (int): the current depth in the ontology
    """
    if "depth" not in ontology.node(root_id):
        ontology.node(root_id)["depth"] = current_depth
    else:
        ontology.node(root_id)["depth"] = comparison_func(
            ontology.node(root_id)["depth"], current_depth)
    for child_id in ontology.children(node=root_id, relations=relations):
        set_all_depths_in_subgraph(ontology=ontology,
                                   root_id=child_id,
                                   relations=relations,
                                   comparison_func=comparison_func,
                                   current_depth=current_depth + 1)
コード例 #15
0
def get_all_common_ancestors(node_ids: List[str],
                             ontology: Ontology,
                             min_distance_from_root: int = 0,
                             nodeids_blacklist: List[str] = None):
    """
    Retrieve all common ancestors for the provided list of nodes

    Args:
        node_ids (List[str]): list of starting nodes
        ontology (Ontology): the ontology to which the provided nodes belong
        min_distance_from_root (int): minimum distance from root node
        nodeids_blacklist (List[str]): node ids to be excluded from the result

    Returns:
        List[CommonAncestor]: list of common ancestors
    """
    common_root = nodes_have_same_root(node_ids=node_ids, ontology=ontology)
    if common_root is False:
        raise ValueError(
            "Cannot get common ancestors of nodes connected to different roots"
        )
    ancestors = defaultdict(list)
    for node_id in node_ids:
        for ancestor in ontology.ancestors(node=node_id, reflexive=True):
            onto_anc = ontology.node(ancestor)
            onto_anc_root = None
            if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]:
                for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]:
                    if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                        onto_anc_root = basic_prop_val["val"]
            if (ancestor in node_ids
                    or onto_anc["depth"] >= min_distance_from_root) and (
                        not onto_anc_root or onto_anc_root
                        == common_root) and (not nodeids_blacklist or ancestor
                                             not in nodeids_blacklist):
                ancestors[ancestor].append(node_id)
    return [
        CommonAncestor(node_id=ancestor,
                       node_label=ontology.label(ancestor),
                       covered_starting_nodes=set(covered_nodes))
        for ancestor, covered_nodes in ancestors.items()
        if len(covered_nodes) > 1 or ancestor == covered_nodes[0]
    ]
コード例 #16
0
    def rename_ontology_terms(
            ontology: Ontology,
            terms_replacement_regex: Dict[str, str] = None) -> None:
        """rename ontology terms based on regular expression matching

        Args:
            ontology (Ontology): the ontology containing the terms to be renamed
            terms_replacement_regex (Dict[str, str]): a dictionary containing the regular expression to be applied for
                renaming terms. Each key must be a regular expression to search for terms and the associated value
                another regular expression that defines the final result
        """
        logger.info("Renaming ontology terms")
        if terms_replacement_regex:
            for regex_to_substitute, regex_target in terms_replacement_regex.items(
            ):
                for node in ontology.search(regex_to_substitute,
                                            is_regex=True):
                    ontology.node(node)["label"] = re.sub(
                        regex_to_substitute, regex_target,
                        ontology.node(node)["label"])
コード例 #17
0
def materialize_inferences(ontology_graph: ontol.Ontology, annotation):
    materialized_annotations = []  #(gp, new_mf)

    mf = annotation["object"]["id"]
    gp = annotation["subject"]["id"]
    global __ancestors_cache
    mf_ancestors = ancestors(mf, ontology_graph, __ancestors_cache)

    # if mf_ancestors:
    #     logger.info("For {term} \"{termdef}\":".format(term=mf, termdef=ontology_graph.label(mf)))
    messages = []

    for mf_anc in mf_ancestors:
        has_part_mfs = neighbor_by_relation(ontology_graph, mf_anc, HAS_PART)

        # if has_part_mfs:
        #     logger.info("\tHas Parent --> {parent} \"{parentdef}\"".format(parent=mf_anc, parentdef=ontology_graph.label(mf_anc)))
        if has_part_mfs:
            messages.append((gp, mf, mf_anc, has_part_mfs))

        for new_mf in has_part_mfs:
            # logger.info("\t\thas_part --> {part} \"{partdef}\"".format(part=new_mf, partdef=ontology_graph.label(new_mf)))

            new_annotation = transform_relation(annotation, new_mf,
                                                ontology_graph)
            materialized_annotations.append(new_annotation)

    messages = [message for message in messages
                if message[3]]  # Filter out empty has_parts
    for message in messages:
        logger.info("\nFor {gp} -> {term} \"{termdef}\":".format(
            gp=message[0],
            term=message[1],
            termdef=ontology_graph.label(message[1])))
        logger.info("\tHas Parent --> {parent} \"{parentdef}\"".format(
            parent=message[1], parentdef=ontology_graph.label(message[1])))
        for part in message[3]:
            logger.info("\t\t has_part --> {part} \"{partdef}\"".format(
                part=part, partdef=ontology_graph.label(part)))

    return materialized_annotations
コード例 #18
0
ファイル: lexmap.py プロジェクト: meftaul/ontobio
 def __init__(self, wsmap=default_wsmap(), config=None):
     """
     Arguments
     ---------
     wdmap: dict
         maps words to normalized synonyms.
     config: dict
         A configuration conforming to LexicalMapConfigSchema
     """
     # maps label or syn value to Synonym object
     self.lmap = {}
     # maps node id to synonym objects
     self.smap = {}
     self.wsmap = wsmap
     self.npattern = re.compile('[\W_]+')
     self.exclude_obsolete = True
     self.ontology_pairs = None
     self.id_to_ontology_map = defaultdict(list)
     self.merged_ontology = Ontology()
     self.config = config if config is not None else {}
     self.stats = {}
コード例 #19
0
def _set_num_subsumers_in_subgraph(ontology: Ontology,
                                   root_id: str,
                                   relations: List[str] = None):
    if "num_subsumers" not in ontology.node(root_id):
        parents = set(ontology.parents(root_id))
        parents.discard(root_id)
        parents = list(parents)
        if not parents or all(
            ["set_subsumers" in ontology.node(parent) for parent in parents]):
            subsumers = {subsumer for parent in parents for subsumer in ontology.node(parent)["set_subsumers"]} | \
                        {root_id}
            ontology.node(root_id)["num_subsumers"] = len(subsumers)
            ontology.node(root_id)["set_subsumers"] = subsumers
            for child_id in ontology.children(node=root_id):
                _set_num_subsumers_in_subgraph(ontology, child_id, relations)
コード例 #20
0
def _set_information_content_in_subgraph(ontology: Ontology,
                                         root_id: str,
                                         maxleaves: int,
                                         relations: List[str] = None):
    node = ontology.node(root_id)
    if str(root_id) == root_id and "ARTIFICIAL_NODE:" in root_id:
        node["IC"] = 0
    else:
        if "num_leaves" in node and "num_subsumers" in node:
            node["IC"] = -math.log(
                (float(node["num_leaves"]) / node["num_subsumers"] + 1) /
                (maxleaves + 1))
        else:
            logger.warning("Disconnected node: " + root_id)
            node["IC"] = 0
    children = set(ontology.children(node=root_id, relations=relations))
    children.discard(root_id)
    children = list(children)
    for child_id in children:
        _set_information_content_in_subgraph(ontology=ontology,
                                             root_id=child_id,
                                             maxleaves=maxleaves,
                                             relations=relations)
コード例 #21
0
def _set_num_subsumers_in_subgraph(ontology: Ontology,
                                   root_id: str,
                                   relations: List[str] = None):
    parents = ontology.parents(root_id)
    if len(parents) == 1:
        ontology.node(root_id)["num_subsumers"] = ontology.node(
            parents[0])["num_subsumers"] + 1
    else:
        ontology.node(root_id)["num_subsumers"] = len(
            ontology.ancestors(node=root_id,
                               relations=relations,
                               reflexive=True))
    for child_id in ontology.children(node=root_id, relations=relations):
        _set_num_subsumers_in_subgraph(ontology=ontology,
                                       root_id=child_id,
                                       relations=relations)
コード例 #22
0
    def set_ontology(self,
                     ontology_type: DataType,
                     ontology: Ontology,
                     terms_replacement_regex: Dict[str, str] = None) -> None:
        """set the go ontology and apply terms renaming

        Args:
            ontology_type (DataType): the type of ontology to set
            ontology (Ontology): an ontology object to set as go ontology
            terms_replacement_regex (Dict[str, str]): a dictionary containing the regular expression to be applied for
                renaming terms. Each key must be a regular expression to search for terms and the associated value
                another regular expression that defines the final result
        """
        new_ontology = None
        if ontology_type == DataType.GO:
            logger.info("Setting GO ontology")
            self.go_ontology = ontology.subontology(
                relations=self.go_relations)
            new_ontology = self.go_ontology
        elif ontology_type == DataType.DO:
            logger.info("Setting DO ontology")
            self.do_ontology = ontology.subontology(
                relations=self.do_relations)
            new_ontology = self.do_ontology
        elif ontology_type == DataType.EXPR:
            logger.info("Setting Expression ontology")
            self.expression_ontology = ontology.subontology()
            DataManager.add_article_to_expression_nodes(
                self.expression_ontology)
            new_ontology = self.expression_ontology
        self.rename_ontology_terms(
            ontology=new_ontology,
            terms_replacement_regex=terms_replacement_regex)
        for root_id in new_ontology.get_roots():
            set_all_depths_in_subgraph(ontology=new_ontology,
                                       root_id=root_id,
                                       relations=None)
コード例 #23
0
def ancestors(term: str, ontology: ontol.Ontology, cache) -> Set[str]:
    click.echo("Computing ancestors for {}".format(term))
    if term == MF:
        click.echo("Found 0")
        return set()

    if term not in cache:
        anc = set(
            ontology.ancestors(term, relations=["subClassOf"], reflexive=True))
        cache[term] = anc
        click.echo("Found {} (from adding to cache: {} terms added)".format(
            len(anc), len(cache)))
    else:
        anc = cache[term]
        click.echo("Found {} (from cache)".format(len(anc)))

    return anc
コード例 #24
0
def find_set_covering(
        subsets: List[Tuple[str, str, Set[str]]],
        value: List[float] = None,
        max_num_subsets: int = None,
        ontology: Ontology = None) -> Union[None, List[Tuple[str, Set[str]]]]:
    """greedy algorithm to solve set covering problem

    Args:
        subsets (List[Tuple[str, str, Set[str]]]): list of subsets, each of which must contain a tuple with the first
        element being the ID of the subset, the second being the name, and the third the actual set of elements
        value (List[float]): list of costs of the subsets
        max_num_subsets (int): maximum number of subsets in the final list
        ontology (Ontology): ontology to use to remove possible parent-child relationships in the result set
    Returns:
        Union[None, List[str]]: the list of IDs of the subsets that maximize coverage with respect to the elements in
        the universe
    """
    logger.debug("starting set covering optimization")
    elem_to_process = {subset[0] for subset in subsets}
    if value and len(value) != len(elem_to_process):
        return None
    universe = set([e for subset in subsets for e in subset[2]])
    included_elmts = set()
    included_sets = []
    while len(elem_to_process) > 0 and included_elmts != universe and \
            (not max_num_subsets or len(included_sets) < max_num_subsets):
        if value:
            effect_sets = sorted(
                [(v * len(s[2] - included_elmts), s[2], s[1], s[0])
                 for s, v in zip(subsets, value) if s[0] in elem_to_process],
                key=lambda x: (-x[0], x[2]))
        else:
            effect_sets = sorted(
                [(len(s[2] - included_elmts), s[2], s[1], s[0])
                 for s in subsets if s[0] in elem_to_process],
                key=lambda x: (-x[0], x[2]))
        elem_to_process.remove(effect_sets[0][3])
        if ontology:
            for elem in included_sets:
                if effect_sets[0][3] in ontology.ancestors(elem[0]):
                    included_sets.remove(elem)
        included_elmts |= effect_sets[0][1]
        included_sets.append((effect_sets[0][3], effect_sets[0][1]))
    logger.debug("finished set covering optimization")
    return included_sets
コード例 #25
0
ファイル: tsv_expander.py プロジェクト: deepakunni3/ontobio
def expand_tsv(input: str,
               ontology: Ontology = None,
               outfile=None,
               sep='\t',
               cols: List[str] = None) -> None:
    """
    Adds additional columns to a TSV by performing additional ontology lookups

    For example, given a TSV with a column `term`, this can add a column `term_label`
    in future it may also add closures

    :param input: filename of a TSV (must have column headers)
    :param ontology: used for lookup
    :param outfile: pathname for output file
    :param sep: delimiter
    :param cols: names of columns
    :return:
    """
    with open(input, newline='') as io:
        reader = csv.DictReader(io, delimiter='\t')
        items = []
        if True:
            outwriter = csv.writer(outfile, delimiter=sep)
            first = True
            for row in reader:
                if first:
                    first = False
                    hdr = []
                    for k in row.keys():
                        hdr.append(k)
                        if k in cols:
                            hdr.append(f'{k}_label')
                    outwriter.writerow(hdr)
                vals = []
                for k, v in row.items():
                    vals.append(v)
                    if k in cols:
                        id = row[k]
                        label = ontology.label(id)
                        vals.append(label)
                        if label is None:
                            logging.warning(f"No id: {id}")
                        #item[f'{k}_label'] = label
                outwriter.writerow(vals)
コード例 #26
0
def _set_tot_annots_in_subgraph(ontology: Ontology,
                                root_id: str,
                                relations: List[str] = None):
    if "tot_annot_genes" not in ontology.node(root_id):
        children = set(ontology.children(root_id, relations=relations))
        children.discard(root_id)
        children = list(children)
        ontology.node(root_id)["tot_annot_genes"] = ontology.node(
            root_id)["rel_annot_genes"] | set([
                annot_gene for child_id in children
                for annot_gene in _set_tot_annots_in_subgraph(
                    ontology, child_id)
            ])
    return ontology.node(root_id)["tot_annot_genes"]
コード例 #27
0
def _set_num_leaves_in_subgraph(ontology: Ontology,
                                root_id: str,
                                relations: List[str] = None):
    num_leaves = 0
    for child_id in ontology.children(node=root_id):
        if "num_leaves" not in ontology.node(child_id):
            _set_num_leaves_in_subgraph(ontology=ontology,
                                        root_id=child_id,
                                        relations=relations)
        if ontology.node(child_id)["num_leaves"] == 0:
            num_leaves += 1
        else:
            num_leaves += ontology.node(child_id)["num_leaves"]
    ontology.node(root_id)["num_leaves"] = num_leaves
コード例 #28
0
def nodes_have_same_root(node_ids: List[str],
                         ontology: Ontology) -> Union[bool, str]:
    """
    Check whether all provided nodes are connected to the same root only

    Args:
        node_ids (List[str]): List of nodes to be checked
        ontology (Ontology): the ontology to which the provided nodes belong

    Returns:
        Union[bool, str]: the ID of the common root if all nodes are connected to the same and only root,
                          False otherwise
    """
    common_root = None
    for node_id in node_ids:
        onto_node = ontology.node(node_id)
        if "meta" in onto_node and "basicPropertyValues" in onto_node["meta"]:
            for basic_prop_val in onto_node["meta"]["basicPropertyValues"]:
                if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                    if common_root and common_root != basic_prop_val["val"]:
                        return False
                    common_root = basic_prop_val["val"]
    return common_root
コード例 #29
0
def _set_num_leaves_in_subgraph(ontology: Ontology,
                                root_id: str,
                                relations: List[str] = None):
    if "set_leaves" in ontology.node(root_id):
        return ontology.node(root_id)["set_leaves"]
    children = set(ontology.children(node=root_id))
    children.discard(root_id)
    children = list(children)
    if not children:
        leaves = {root_id}
        num_leaves = 0
    else:
        leaves = {
            leaf
            for child_id in children for leaf in _set_num_leaves_in_subgraph(
                ontology=ontology, root_id=child_id, relations=relations)
        }
        num_leaves = len(leaves)
    ontology.node(root_id)["num_leaves"] = num_leaves
    ontology.node(root_id)["set_leaves"] = leaves
    return leaves
コード例 #30
0
def neighbor_by_relation(ontology_graph: ontol.Ontology, term, relation):
    return ontology_graph.parents(term, relations=[relation])