def get_all_common_ancestors(node_ids: List[str], ontology: Ontology, min_distance_from_root: int = 0, nodeids_blacklist: List[str] = None): # check if all ids are connected to the same root node common_root = None for node_id in node_ids: onto_node = ontology.node(node_id) if "meta" in onto_node and "basicPropertyValues" in onto_node["meta"]: for basic_prop_val in onto_node["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": if common_root and common_root != basic_prop_val["val"]: raise ValueError( "Cannot get common ancestors of nodes connected to different roots" ) common_root = basic_prop_val["val"] ancestors = defaultdict(list) for node_id in node_ids: for ancestor in ontology.ancestors(node=node_id, reflexive=True): onto_anc = ontology.node(ancestor) onto_anc_root = None if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]: for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": onto_anc_root = basic_prop_val["val"] if onto_anc["depth"] >= min_distance_from_root and (not onto_anc_root or onto_anc_root == common_root) \ and (not nodeids_blacklist or ancestor not in nodeids_blacklist): ancestors[ancestor].append(node_id) return [(ancestor, ontology.label(ancestor), set(covered_nodes)) for ancestor, covered_nodes in ancestors.items() if len(covered_nodes) > 1 or ancestor == covered_nodes[0]]
def node_is_in_branch(ontology: Ontology, node_id: str, branch_root_ids: List[str]): branch_root_ids = set(branch_root_ids) return any([ parent_id in branch_root_ids for parent_id in ontology.ancestors(node=node_id, reflexive=True) ])
def _set_num_subsumers_in_subgraph(ontology: Ontology, root_id: str, relations: List[str] = None): parents = ontology.parents(root_id) if len(parents) == 1: ontology.node(root_id)["num_subsumers"] = ontology.node( parents[0])["num_subsumers"] + 1 else: ontology.node(root_id)["num_subsumers"] = len( ontology.ancestors(node=root_id, relations=relations, reflexive=True)) for child_id in ontology.children(node=root_id, relations=relations): _set_num_subsumers_in_subgraph(ontology=ontology, root_id=child_id, relations=relations)
def ancestors(term: str, ontology: ontol.Ontology, cache) -> Set[str]: click.echo("Computing ancestors for {}".format(term)) if term == MF: click.echo("Found 0") return set() if term not in cache: anc = set( ontology.ancestors(term, relations=["subClassOf"], reflexive=True)) cache[term] = anc click.echo("Found {} (from adding to cache: {} terms added)".format( len(anc), len(cache))) else: anc = cache[term] click.echo("Found {} (from cache)".format(len(anc))) return anc
def find_set_covering( subsets: List[Tuple[str, str, Set[str]]], value: List[float] = None, max_num_subsets: int = None, ontology: Ontology = None) -> Union[None, List[Tuple[str, Set[str]]]]: """greedy algorithm to solve set covering problem Args: subsets (List[Tuple[str, str, Set[str]]]): list of subsets, each of which must contain a tuple with the first element being the ID of the subset, the second being the name, and the third the actual set of elements value (List[float]): list of costs of the subsets max_num_subsets (int): maximum number of subsets in the final list ontology (Ontology): ontology to use to remove possible parent-child relationships in the result set Returns: Union[None, List[str]]: the list of IDs of the subsets that maximize coverage with respect to the elements in the universe """ logger.debug("starting set covering optimization") elem_to_process = {subset[0] for subset in subsets} if value and len(value) != len(elem_to_process): return None universe = set([e for subset in subsets for e in subset[2]]) included_elmts = set() included_sets = [] while len(elem_to_process) > 0 and included_elmts != universe and \ (not max_num_subsets or len(included_sets) < max_num_subsets): if value: effect_sets = sorted( [(v * len(s[2] - included_elmts), s[2], s[1], s[0]) for s, v in zip(subsets, value) if s[0] in elem_to_process], key=lambda x: (-x[0], x[2])) else: effect_sets = sorted( [(len(s[2] - included_elmts), s[2], s[1], s[0]) for s in subsets if s[0] in elem_to_process], key=lambda x: (-x[0], x[2])) elem_to_process.remove(effect_sets[0][3]) if ontology: for elem in included_sets: if effect_sets[0][3] in ontology.ancestors(elem[0]): included_sets.remove(elem) included_elmts |= effect_sets[0][1] included_sets.append((effect_sets[0][3], effect_sets[0][1])) logger.debug("finished set covering optimization") return included_sets
def get_all_common_ancestors(node_ids: List[str], ontology: Ontology, min_distance_from_root: int = 0, nodeids_blacklist: List[str] = None): """ Retrieve all common ancestors for the provided list of nodes Args: node_ids (List[str]): list of starting nodes ontology (Ontology): the ontology to which the provided nodes belong min_distance_from_root (int): minimum distance from root node nodeids_blacklist (List[str]): node ids to be excluded from the result Returns: List[CommonAncestor]: list of common ancestors """ common_root = nodes_have_same_root(node_ids=node_ids, ontology=ontology) if common_root is False: raise ValueError( "Cannot get common ancestors of nodes connected to different roots" ) ancestors = defaultdict(list) for node_id in node_ids: for ancestor in ontology.ancestors(node=node_id, reflexive=True): onto_anc = ontology.node(ancestor) onto_anc_root = None if "meta" in onto_anc and "basicPropertyValues" in onto_anc["meta"]: for basic_prop_val in onto_anc["meta"]["basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": onto_anc_root = basic_prop_val["val"] if (ancestor in node_ids or onto_anc["depth"] >= min_distance_from_root) and ( not onto_anc_root or onto_anc_root == common_root) and (not nodeids_blacklist or ancestor not in nodeids_blacklist): ancestors[ancestor].append(node_id) return [ CommonAncestor(node_id=ancestor, node_label=ontology.label(ancestor), covered_starting_nodes=set(covered_nodes)) for ancestor, covered_nodes in ancestors.items() if len(covered_nodes) > 1 or ancestor == covered_nodes[0] ]