def randomBipartiteGraph(n, m, k, directed=False): G = nx.Graph() G=add_nodes_with_bipartite_label(G,n,m) if directed: G=nx.DiGraph(G) seed = None random.seed(seed) max_edges = n*m # max_edges for bipartite networks if k >= max_edges: # Maybe we should raise an exception here return bipartite.complete_bipartite_graph(n, m, create_using=G) top = [n for n,d in G.nodes(data=True) if d['bipartite']==0] bottom = list(set(G) - set(top)) edge_count = 0 while edge_count < k: # generate random edge,u,v u = random.choice(top) v = random.choice(bottom) if v in G[u]: continue else: G.add_edge(u,v, weight = random.randint(1,10)) edge_count += 1 return G
def test_not_enough_neighbors(): with pytest.raises(NetworkXError): G = complete_bipartite_graph(1, 2) node_redundancy(G)
def test_no_redundant_nodes(): G = complete_bipartite_graph(2, 2) rc = node_redundancy(G) assert all(redundancy == 1 for redundancy in rc.values())
def test_not_enough_neighbors(): G = complete_bipartite_graph(1, 2) node_redundancy(G)
def break_mwop(self, tree, allow_gene_copies="No"): """ Break a homology group into orthogroups where all proteins are orthologous to all proteins, using the Minimum Weight Orthogonal Partition (MWOP) criterion. A phylogenetic *species* tree is used to guide the order of actions in the algorithm. Optionally, the *gene* tree is used for keeping together recent gene copies. Returns a list of sets of gene names. Each set represents an orthogroup. See DOI:10.1007/978-3-642-23038-7_30 for details. """ eps = 0.001 tree = tree.copy() if allow_gene_copies == "No": allow_gene_copies = False # initialize gene sets genomes = [self.nodes[g]['genome'] for g in self.nodes] V = {genome: [] for genome in genomes} for gene in self.nodes: genome = self.nodes[gene]['genome'] V[genome].append(set([gene])) # if allow_gene_copies mode is on, use gene tree # to cluster recent gene copies together by searching # for monophyletic groups (from the same genome) if allow_gene_copies and self.gene_tree: all_genomes = set([gene.genome for gene in self.gene_tree]) if allow_gene_copies == "exclude_ref" and self.ref_genome_name and self.ref_genome_name in all_genomes: all_genomes.remove(self.ref_genome_name) gene_copies = [] for genome in all_genomes: for node in self.gene_tree.get_monophyletic(values=[genome], target_attr="genome"): if not node.is_leaf(): gene_copies.append([gene.name for gene in node.get_leaves()]) # create sets of gene copies and remove single-gene sets # all genes in gene copies groups: rm = set(chain.from_iterable(gene_copies)) for genome in V: new_Vi = [] for s in V[genome]: if s == set() or next(iter(s)) not in rm: new_Vi.append(s) V[genome] = new_Vi for cp in gene_copies: cp_genome = self.nodes[cp[0]]['genome'] V[cp_genome].append(set(cp)) # complete with empty sets nm = max([len(l) for l in V.values()]) for genome in V: V[genome] = fill_list_to_length(V[genome], nm, set()) # remove tree leaves with no genes for genome in tree: if genome.name not in V: genome.delete() # traverse tree and create new orthogroups n_leaves = len(tree.get_tree_root()) while n_leaves > 1: # find two closest genomes based on tree dist_matrix = tree_to_distance_matrix(tree, sister_only=True) closest_genomes = row_col_min(dist_matrix) # create bipartite graph (BG) # (BG has integers as labels, so Vi_d and Vj_d # store the integer --> gene set mapping) Vi = V[closest_genomes[0]] Vi_d = {n: Vi[n] for n in range(nm)} Vj = V[closest_genomes[1]] Vj_d = {n: Vj[n-nm] for n in range(nm,nm*2)} bipartite_graph = bipartite.complete_bipartite_graph(nm,nm) # assign weights to BG edges for edge in bipartite_graph.edges: # look for corresponding edge in homology graph (HG) # first, translate BG integers to gene sets g1 = Vi_d[edge[0]] g2 = Vj_d[edge[1]] # calculate edge weights as mean of weights between sets pairs = product(g1,g2) total_weight = 0 pairs_involved = 0 for p in pairs: if p in self.edges: # orthologs total_weight += self.edges[p]['weight'] else: # paralogs total_weight += -eps pairs_involved += 1 if pairs_involved == 0: # when using an empty set bipartite_graph.edges[edge]['weight'] = eps else: bipartite_graph.edges[edge]['weight'] = total_weight/pairs_involved # find BG maximum weight matching (MWM) # ensure matches are always from Vi to Vj mwm = matching.max_weight_matching(bipartite_graph,maxcardinality=True) mwm = {(min(x),max(x)) for x in mwm} # update gene sets according to MWM # this is done by set unions on every matching new_Vi = [] for match in mwm: g1 = Vi_d[match[0]] g2 = Vj_d[match[1]] g1 = g1.union(g2) new_Vi.append(g1) V[closest_genomes[0]] = new_Vi # then remove other genome from V del(V[closest_genomes[1]]) # update tree - remove leaf corresponding to Vj # but update branch length of Vi to mean of branch # lengths of Vi and Vj Vj_bl = (tree&closest_genomes[1]).dist j = tree.search_nodes(name=closest_genomes[1])[0] j.delete() Vi_bl = (tree&closest_genomes[0]).dist new_Vi_bl = (Vi_bl + Vj_bl)/2 (tree&closest_genomes[0]).dist = new_Vi_bl # calculate number of leaves left n_leaves = len(tree.get_tree_root()) # finish when all leaves were merged # return a list of graphs, each is an OG ogs_list = list(V.values())[0] # create list of empty graphs ogs_graph_list = [ nx.Graph() for og in ogs_list ] for i in range(len(ogs_list)): # populate with nodes ogs_graph_list[i].add_nodes_from([(n, {'genome': self.nodes[n]['genome']}) for n in ogs_list[i]]) # assign OG name ogs_graph_list[i].orthogroup = "%s.%s" %(self.orthogroup, i) return ogs_graph_list