def info_content_sim(gene0, gene1, term_list0, term_list1): for term0 in term_list0: for term1 in term_list1: if check_node_exist(BP_Graph, term0, term1): lca = nx.lowest_common_ancestor(BP_Graph, term0, term1) if check_node_exist(MF_Graph, term0, term1): lca = nx.lowest_common_ancestor(MF_Graph, term0, term1) return (len(term_list0) + len(term_list1)) / (len(Gene_Term_Info_List) + 1) pass
def simplify_bubbles(graph): """Removes all bubbles in the graph :Parameters: graph: (nx.Digraph) Returns: graph (nx.Digraph) """ ancestor_nodes = [] descendant_nodes = [] for node in graph.nodes: # Find predecessors of the node predecessors = list(graph.predecessors(node)) # If the node as more than 1 predecessor (might be a bubble) if len(predecessors) > 1: for i in range(len(predecessors)-1): # Find the lowest common ancestor of two of the predecessors ancestor = nx.lowest_common_ancestor(graph, predecessors[i], predecessors[i+1]) # If an ancestor exists if ancestor != None: # Save the two node at the extremity of the bubble ancestor_nodes.append(ancestor) descendant_nodes.append(node) break # Delete all the bubles finded in the graph for ancestor, descendant in zip(ancestor_nodes, descendant_nodes): if ancestor in graph.nodes and descendant in graph.nodes: graph = solve_bubble(graph, ancestor, descendant) return graph
def solve_out_tips(graph, ending_nodes): """Removes tips linked to bad ending nodes :Parameters: graph: (nx.Digraph) ending_nodes: list of sink nodes (list) Returns: graph (nx.Digraph) """ # Verify all pairs of ending nodes for i in range(len(ending_nodes)-1): path_list = [] path_length = [] weight_avg_list = [] # Check if one node of the pair hasn't been deleted yet if ending_nodes[i] in graph.nodes and ending_nodes[i+1] in graph.nodes: # Find the lowest common ancestor of two ending nodes ancestor = nx.lowest_common_ancestor(graph, ending_nodes[i], ending_nodes[i+1]) # If an ancestor exists if ancestor != None: # Compute all possible paths between ancestor and first ending node paths1 = list(nx.all_simple_paths(graph, source=ancestor, target=ending_nodes[i])) for path in paths1: path_list.append(path) # Compute all possible paths between ancestor and second ending node paths2 = list(nx.all_simple_paths(graph, source=ancestor, target=ending_nodes[i+1])) for path in paths2: path_list.append(path) # Compute lengths and average weights of all paths finded for path in path_list: path_length.append(len(path)) weight_avg_list.append(path_average_weight(graph, path)) # Remove paths that aren't the best (including the ending node) graph = select_best_path(graph, path_list, path_length, weight_avg_list, delete_sink_node=True) return graph
def simplify_bubbles(graph): couples = [] # For each node for end in graph.nodes: # Get a list of all the predecessors preds = list(graph.predecessors(end)) ancestors = [] # If there is more than one predecessor if len(preds) > 1: # Look at the lowest common ancestor between each pair of predecessors for node1, node2 in itertools.combinations(preds, 2): # If there is one, add it to the list of ancestors # (there is a bubble between start and end) start = nx.lowest_common_ancestor(graph, node1, node2, None) if start is not None: ancestors.append(start) # Get a list of couples representing a bubble for start in list(set(ancestors)): couples.append((start, end)) # Remove each bubble for start, end in couples: # If the nodes have not been removed yet if start in graph.nodes and end in graph.nodes: graph = solve_bubble(graph, start, end) return graph
def simplify_bubbles(grap): """ Simplifying the bubbles of a given graph Parameters ---------- grap : networkX graph Graph obtained from the NetworkX module. Returns ------- grap : networkX graph Graph obtained from the NetworkX module with bubbles simplified. """ bad_nd = [] for des_nd in grap.nodes: pred_list = list(grap.predecessors(des_nd)) leng = len(pred_list) if leng > 1: anc_nd = nx.lowest_common_ancestor(grap, pred_list[0], pred_list[1]) bad_nd.append([anc_nd, des_nd]) for anc_des in bad_nd: grap = solve_bubble(grap, anc_des[0], anc_des[1]) return grap
def sim_wup(G, i, j): # definindo o no raiz da arvore root = "owl.Thing" # calculando o Least Common Subsumer (Ancestor) LCS = nx.lowest_common_ancestor(G, i, j) H = G.to_undirected() # calculando a profundidade dos nos = menor caminho do no até a raiz depth_lcs = shortest_path_length(H, root, LCS) depth_node1 = shortest_path_length(H, root, i) depth_node2 = shortest_path_length(H, root, j) #print(i, j, LCS) #print(i, j, depth_lcs) #print(i, j, depth_node1) #print(i, j, depth_node2) try: sim_wup = (2 * depth_lcs) / (depth_node1 + depth_node2) except ZeroDivisionError: sim_wup = 0 #print(sim_wup) if i == j: sim_wup = 1.0 return (sim_wup)
def save_wordnet_data(root_output_path, wordnet_tree, synsetmap, examples=5, max_combinations=10): output_path = os.path.join(root_output_path, 'wordnedSubgraphs') os.makedirs(output_path, exist_ok=True) labels = list(synsetmap.keys()) for i in range(examples): l1 = labels[random.randint(0, len(labels) - 1)] l2 = labels[random.randint(0, len(labels) - 1)] trees = [] raw_combinations = list(itertools.product(synsetmap[l1], synsetmap[l2])) combinations = random.sample( raw_combinations, min(len(raw_combinations), max_combinations)) for pair in combinations: s1, s2 = pair ancestor = nx.lowest_common_ancestor(wordnet_tree, s1.name(), s2.name()) shortest_path_s1 = nx.shortest_path(wordnet_tree, ancestor, s1.name()) shortest_path_s2 = nx.shortest_path(wordnet_tree, ancestor, s2.name()) subgraph_nodes = set(shortest_path_s2).union(set(shortest_path_s1)) trees.append(wordnet_tree.subgraph(subgraph_nodes)) draw_subgraphs(output_path, l1, l2, trees, synsetmap)
def hyponymy_score_slow(self, hypernym, hyponym, dtype: Type = float): graph = self.dag if hypernym not in graph: raise ValueError(f"invalid node is specified: {hypernym}") if hyponym not in graph: raise ValueError(f"invalid node is specified: {hyponym}") lowest_common_ancestor = nx.lowest_common_ancestor( graph, hypernym, hyponym) # 1) hypernym is the ancestor of the hyponym (=hyponymy) if nx.has_path(graph, hypernym, hyponym): dist = nx.shortest_path_length(graph, hypernym, hyponym) # 2) hyponym is the ancestor of the hypernym (=reverse hyponymy) elif nx.has_path(graph, hyponym, hypernym): dist = -nx.shortest_path_length(graph, hyponym, hypernym) # 3) these two entities are the co-hyponym elif lowest_common_ancestor is not None: dist = -nx.shortest_path_length(graph, lowest_common_ancestor, hypernym) # 4) other else: dist = -self.depth(hypernym) return dtype(dist)
def sim_jcn(G, i, j): if i == j: sim_jcn = 1.0 return sim_jcn lcs = nx.lowest_common_ancestor(G, i, j) #print('*****************************************************') #print('node_i:' + str(i)) #print('node_j:' + str(j)) #print('node_lcs:' + str(lcs)) #print(' ') #print('**********') ic_i = information_content(G, i) #print(i) #print(ic_i) #print('**********') ic_j = information_content(G, j) #print(j) #print(ic_j) #print('**********') ic_lcs = information_content(G, lcs) #print(lcs) #print(ic_lcs) try: sim_jcn = 1 / (ic_i + ic_j - 2 * ic_lcs) except ZeroDivisionError: sim_jcn = 1 return sim_jcn
def simplify_bubbles(graph): """Prend un graphe et retourne un graphe sans bulle Nous ne prennons on compte que les cas avec 2 noeuds ancestre, car s'il y en a plus, menant au même MRCA, on choisira tout de même le chemin le plus cour. Parametres : ------------ graph : network Digraph() Returns : ------------ graph :network Digraph() """ #Creation de chaque couple noeud-MRCA des ancetres. couple_bubble = [] for node in graph: pred_node = list(graph.predecessors(node)) if len(pred_node) < 2: continue mrca = nx.lowest_common_ancestor(graph, pred_node[0], pred_node[1]) couple_bubble.append([mrca, node]) #Résolution des bulles des couples. for couple_nodes in couple_bubble: graph = solve_bubble(graph, couple_nodes[0], couple_nodes[1]) return graph
def simplify_bubbles(graph): """ This function simply the graph by taking off the bubbles. Parameter: --------- graph : object networkx DiGraph(). Return: ------ graph : object networkx DiGraph(). """ bubbles_nodes = [] # Found out all bubbles for node in graph: node_ancesstors = [x for x in graph.predecessors(node)] if len(node_ancesstors) > 1: # Not empty ancestors = nx.lowest_common_ancestor(graph, node_ancesstors[0], node_ancesstors[1]) bubbles_nodes.append([ancestors, node]) # Save the best path for each couple of bubbles. for nodes in bubbles_nodes: # Solve bubbles with previous function. graph = solve_bubble(graph, nodes[0], nodes[1]) return graph
def harmonize(self, g_matching): """ Helper method to harmonize greedy alignments. """ # ------------------------------------------------------- # ** Step 1 **: Dealing with out-of-pedigree nodes # = = = = = = = = = # Plan of action: Check Two cases: # [1] if a node is matched to a founder node # in the pedigree, then all of its ancestors should be # out-of-pedigree nodes. # [2] if 2 nodes are connected in the tree sequence, # they must be connected in the pedigree. Otherwise, # their ancestors in the tree sequence are out-of-pedigree # nodes. # Case [1]: ped_founders = self.ped.founders() for ts_n in list(g_matching): ped_n = g_matching[ts_n] if ped_n in ped_founders: ts_n_pred = self.ts.predecessors(ts_n) while len(ts_n_pred) > 0: curr_node = ts_n_pred[0] g_matching[curr_node] = None ts_n_pred.extend(self.ts.predecessors(curr_node)) ts_n_pred = ts_n_pred[1:] # Case [2]: for ts_n1 in list(g_matching): ped_n1 = g_matching[ts_n1] if ped_n1 is None: continue for ts_n2 in self.ts.siblings(ts_n1): if ts_n2 in g_matching and g_matching[ts_n2] is not None: ped_n2 = g_matching[ts_n2] ''' for pn1 in list(ped_n1): for pn2 in list(ped_n2): if nx.lowest_common_ancestor ( self.ped.graph, pn1, pn2 ) is None: ''' if nx.lowest_common_ancestor(self.ped.graph, ped_n1, ped_n2) is None: for ts_n in [ts_n1, ts_n2]: ts_n_pred = self.ts.predecessors(ts_n) while len(ts_n_pred) > 0: curr_node = ts_n_pred[0] g_matching[curr_node] = None ts_n_pred.extend( self.ts.predecessors(curr_node)) ts_n_pred = ts_n_pred[1:] return g_matching
def integrate_base_sim(gene0, gene1, term_list0, term_list1): for term0 in term_list0: for term1 in term_list1: if check_node_exist(BP_Graph, term0, term1): lca = nx.lowest_common_ancestor(BP_Graph, term0, term1) if lca is None: return 0 else: return get_lca_info2(lca) if check_node_exist(MF_Graph, term0, term1): lca = nx.lowest_common_ancestor(MF_Graph, term0, term1) if lca is None: return 0 else: return get_lca_info2(lca) return 0 pass
def wu_palmer(self, first_node: int, second_node: int) -> float: if first_node in self.nodes_set and second_node in self.nodes_set: if first_node == second_node: return 1.0 first_node_str = str(first_node) second_node_str = str(second_node) lowest_common_anc = nx.lowest_common_ancestor( self.graph, first_node_str, second_node_str) if lowest_common_anc is not None: all_ancestors = nx.algorithms.dag.ancestors( self.graph, lowest_common_anc) if len(all_ancestors) == 0: lca_depth = 2 else: lca_depth = min([ nx.shortest_path_length(self.graph, anc, lowest_common_anc) for anc in all_ancestors if self.graph.in_degree(anc) == 0 ]) + 2 first_depth = nx.shortest_path_length( self.graph, lowest_common_anc, first_node_str) + lca_depth second_depth = nx.shortest_path_length( self.graph, lowest_common_anc, second_node_str) + lca_depth wu_palmer_value = (2 * lca_depth) / (first_depth + second_depth) else: all_ancestors_for_first = nx.algorithms.dag.ancestors( self.graph, first_node_str) if len(all_ancestors_for_first) == 0: length_for_first = 2 else: length_for_first = min([ nx.shortest_path_length(self.graph, anc, first_node_str) for anc in all_ancestors_for_first if self.graph.in_degree(anc) == 0 ]) + 2 all_ancestors_for_second = nx.algorithms.dag.ancestors( self.graph, second_node_str) if len(all_ancestors_for_second) == 0: length_for_second = 2 else: length_for_second = min([ nx.shortest_path_length(self.graph, anc, second_node_str) for anc in all_ancestors_for_second if self.graph.in_degree(anc) == 0 ]) + 2 wu_palmer_value = 2 / (length_for_first + length_for_second) return wu_palmer_value else: return self.not_found_in_graph(first_node, second_node)
def _common_ancestor(types): g = nx.DiGraph() for t in types: g.add_edges_from(pairwise(reversed(inspect.getmro(t)))) while len(types) != 1: types, pairs = [], pairwise(types) for pair in pairs: types.append(nx.lowest_common_ancestor(g, *pair)) return types[0]
def sim_resnik(G, node1, node2): lcs = nx.lowest_common_ancestor(G, node1, node2) sim_res = information_content(G, lcs) if node1 == node2: sim_res = 1.0 return sim_res
def quick_ancestry_multiple_sets(list_of_termsets, ontology, intersection_ratio, iterations): """ :param list_of_termsets: termsets as found by explanations :param ontology: a nx graph :param intersection_ratio: maximum ratio of connected terms of other classes to a newly generalized term """ tmp_ancestor_storage = None ancestor_storage = list_of_termsets.copy() for a in range(iterations): tmp_ancestor_storage = ancestor_storage.copy() for enx, termset in enumerate(ancestor_storage): list_of_this_termset = list(termset) pairAncestorSet = set() setLength = len(list_of_this_termset) # boolean for whether the term was used to produce a pair ancestor or not used = [0] * setLength for item1 in range(setLength): item2 = random.randint(0, setLength - 1) if item1 != item2: # add ancestor of the pair of elements ancestor_element = nx.lowest_common_ancestor( ontology, list_of_this_termset[item1], list_of_this_termset[item2]) if ancestor_element is not None: # check intersection with other classes descendants_of_val = nx.descendants( ontology, ancestor_element) intersectionCount = 0 numberOfTerms = 0 for setTwo in range(len(tmp_ancestor_storage)): if enx != setTwo: intersectionCount += len( set.intersection(descendants_of_val, list_of_termsets[setTwo])) numberOfTerms += len(list_of_termsets[setTwo]) if numberOfTerms == 0 or intersection_ratio >= float( intersectionCount) / float(numberOfTerms): pairAncestorSet.add(ancestor_element) used[item1] = 1 used[item2] = 1 for k in range(len(used)): if used[k] == 0: pairAncestorSet.add(list_of_this_termset[k]) if len(pairAncestorSet) > 0: ancestor_storage[enx] = pairAncestorSet return (ancestor_storage)
def get_least_common_ancestor(self, first: N, second: N) -> N: """Calculates the least or lowest common ancestor node of two nodes of the graph. Both nodes have to be part of the graph! :param first: The first node :param second: The second node :return: The least common ancestor node of the two nodes """ return lowest_common_ancestor(self._graph, first, second)
def simplify_bubbles(kmer_tree): """Take a graph as argument and return a graph without bubbles.""" bubble_nodes = [] for node in kmer_tree: ancestor_node = [i for i in kmer_tree.predecessors(node)] if len(ancestor_node) >= 2: ancestor = nx.lowest_common_ancestor(kmer_tree, ancestor_node[0], ancestor_node[1]) bubble_nodes.append([ancestor, node]) for node_couples in bubble_nodes: kmer_tree = solve_bubble(kmer_tree, node_couples[0], node_couples[1]) return kmer_tree
def sim_resnik(G, i, j): if i == j: sim_res = 1.0 return sim_res lcs = nx.lowest_common_ancestor(G, i, j) print('lcs: ' + str(lcs)) sim_res = information_content(G, lcs) print('sim_res: ' + str(sim_res)) return sim_res
def lowest_common_ancestor(self, nodes): """ Computes the LCA of a bunch of nodes. :param nodes: tuple of nodes :return: the Lowest Common Ancestor of the nodes. """ # TODO change that using # networkx::tree_all_pairs_lowest_common_ancestor cur_lca = nodes[0] for node in nodes[1:]: cur_lca = nx.lowest_common_ancestor(self.tree, cur_lca, node) return cur_lca
def simplify_bubbles(graph): for node in graph.nodes: predecessors = list(graph.predecessors(node)) if len(predecessors) > 1: for i in range(len(predecessors)): for j in range(i, len(predecessors)): path = nx.lowest_common_ancestor( graph, predecessors[i], predecessors[j]) # getting common ancestors if path != predecessors[i] and path != predecessors[j]: graph = solve_bubble(graph, path, node) return graph return graph # If nothing is simplified return the same graph
def lowest_common_ancestor(self, node_1, node_2): # in a directed graph, lowest common ancestor has to calculated in the correct order # whichever node has fewer ancestors comes first in the func call if len(nx.ancestors(self.graph_, node_1)) < len(nx.ancestors(self.graph_, node_2)): func_args = (node_1, node_2) else: func_args = (node_2, node_1) # find the node id of the lowest common ancestor lca = nx.lowest_common_ancestor(self.graph_, *func_args) return lca
def sim_wup(Graph, node1, node2): # definindo o no raiz da arvore root = "Thing" # calculando o Least Common Subsumer (Ancestor) LCS = nx.lowest_common_ancestor(Graph, node1, node2) # calculando a profundidade dos nos = menor caminho do no até a raiz depth_lcs = nx.shortest_path_length(Graph, root, LCS) depth_node1 = nx.shortest_path_length(Graph, root, node1) depth_node2 = nx.shortest_path_length(Graph, root, node2) sim_wup = (2 * depth_lcs) / (depth_node1 + depth_node2) return(sim_wup)
def lca(tree: nx.DiGraph, a: Hashable, b: Hashable) -> Optional[Hashable]: """Wrapper around the NetworkX `lowest_common_ancestor` but allows either source or target node to not be in the tree :param tree: Node hierarchy :type tree: nx.DiaGraph :param a: [description] :type a: Hashable :param b: [description] :type b: H :return: Common ancestor if it exists :rtype: [type] """ if a in tree and b in tree: return nx.lowest_common_ancestor(tree, a, b) return None
def gen_hits(tree: LCATaxonomy, nx_tree: nx.Graph, num_reads_to_simulate: int=1_000): num_hits = np.random.poisson(lam=1.0, size=num_reads_to_simulate) + 1 tips = np.array(list(tree.ref_to_taxa_name.keys())) for num_hit in num_hits: choices = np.random.choice(tips, size=num_hit, replace=False) node_ids = [tree.ref_to_node_id_ix_level[c][0] for c in choices] if len(node_ids) == 1: lca = node_ids[0] else: lca = reduce( lambda x, y: nx.lowest_common_ancestor(nx_tree, x, y), node_ids) yield node_ids, lca, choices
def simplify_bubbles(graph): """ Remove all bubbles from the graph """ bubbles = [] for node in graph.nodes: middle = list(graph.predecessors(node)) if len(middle) >= 2: c_ancestor = nx.lowest_common_ancestor(graph, middle[0], middle[1]) bubbles.append([c_ancestor, node]) for bubble in bubbles: graph = solve_bubble(graph, bubble[0], bubble[1]) return graph
def simplify_bubbles(graph): """ remove all bubbles from a graph """ for node_1 in graph.nodes: nodes_ancestor = list(graph.predecessors(node_1)) if len(nodes_ancestor) > 1: for i in range(len(nodes_ancestor) - 1): for j in range(i + 1, len(nodes_ancestor)): path = nx.lowest_common_ancestor(graph, nodes_ancestor[i],\ nodes_ancestor[j]) if path != nodes_ancestor[i] and path != nodes_ancestor[j]: graph = solve_bubble(graph, path, node_1) return graph return graph
def simplify_bubbles(graph): """Take graph and return it without bubble""" new_g = graph bubble = [] #Pour trouver les bulles on regarde les noeuds qui ont plus que 1 predecesseur for node in new_g.nodes(): pred = list(graph.predecessors(node)) #Si un noeud à plusieurs prédécesseur alors on regarde si ces prédécesseurs on un ancêtre commun #Si c'est le cas, alors on ajoute dans une liste le noeud de début et le noeud de fin de bulle if len(pred) > 1: anc = nx.lowest_common_ancestor(new_g, pred[0], pred[1]) bubble.append([anc, node]) #On utilise la fonction solve_bubble pour éliminer les bulles en envoyant dans la fonction #les début et fin de bulles for i in range(len(bubble)): new_g = solve_bubble(new_g, bubble[i][0], bubble[i][1]) return new_g
def simplify_bubbles(graph): couples_noeuds = [] for node in graph.nodes: if len(list(graph.predecessors(node))) > 1: liste_noeud_predecesseur = list(graph.predecessors(node)) combinaisons_noeud = itertools.combinations( liste_noeud_predecesseur, 2) for combinaison_noeud in combinaisons_noeud: noeud_commun = nx.lowest_common_ancestor( graph, combinaison_noeud[0], combinaison_noeud[1], None) if noeud_commun is None: continue else: couples_noeuds.append((noeud_commun, node)) for couple_noeud in couples_noeuds: if couple_noeud[0] in graph.nodes and couple_noeud[1] in graph.nodes: graph = solve_bubble(graph, couple_noeud[0], couple_noeud[1]) return graph
def test_lowest_common_ancestor1(self): """Test that the one-pair function works on default.""" G = nx.DiGraph([(0, 1), (2, 1)]) sentinel = object() assert_is(nx.lowest_common_ancestor(G, 0, 2, default=sentinel), sentinel)
def test_lowest_common_ancestor2(self): """Test that the one-pair function works on identity.""" G = nx.DiGraph() G.add_node(3) assert_equal(nx.lowest_common_ancestor(G, 3, 3), 3)