def randomBipartiteGraph(n, m, k, directed=False):
    G = nx.Graph()
    G=add_nodes_with_bipartite_label(G,n,m)
    
    if directed:
        G=nx.DiGraph(G)
    seed = None
    random.seed(seed)

    max_edges = n*m # max_edges for bipartite networks
    if k >= max_edges: # Maybe we should raise an exception here
        return bipartite.complete_bipartite_graph(n, m, create_using=G)

    top = [n for n,d in G.nodes(data=True) if d['bipartite']==0]
    bottom = list(set(G) - set(top))
    edge_count = 0
    while edge_count < k:
        # generate random edge,u,v
        u = random.choice(top)
        v = random.choice(bottom)
        if v in G[u]:
            continue
        else:
            G.add_edge(u,v, weight = random.randint(1,10))
            edge_count += 1
    return G
Esempio n. 2
0
def test_not_enough_neighbors():
    with pytest.raises(NetworkXError):
        G = complete_bipartite_graph(1, 2)
        node_redundancy(G)
Esempio n. 3
0
def test_no_redundant_nodes():
    G = complete_bipartite_graph(2, 2)
    rc = node_redundancy(G)
    assert all(redundancy == 1 for redundancy in rc.values())
Esempio n. 4
0
def test_not_enough_neighbors():
    G = complete_bipartite_graph(1, 2)
    node_redundancy(G)
Esempio n. 5
0
  def break_mwop(self, tree, allow_gene_copies="No"):
    """
    Break a homology group into orthogroups
    where all proteins are orthologous to
    all proteins, using the
    Minimum Weight Orthogonal Partition (MWOP)
    criterion. A phylogenetic *species* tree
    is used to guide the order of actions in the
    algorithm. Optionally, the *gene* tree is used
    for keeping together recent gene copies.
    Returns a list of sets of gene names. Each
    set represents an orthogroup.
    See DOI:10.1007/978-3-642-23038-7_30 for
    details.
    """
    eps = 0.001
    tree = tree.copy()
    if allow_gene_copies == "No":
      allow_gene_copies = False

    # initialize gene sets
    genomes = [self.nodes[g]['genome'] for g in self.nodes]
    V = {genome: [] for genome in genomes}
    for gene in self.nodes:
      genome = self.nodes[gene]['genome']
      V[genome].append(set([gene]))
    # if allow_gene_copies mode is on, use gene tree
    # to cluster recent gene copies together by searching
    # for monophyletic groups (from the same genome)
    if allow_gene_copies and self.gene_tree:
      all_genomes = set([gene.genome for gene in self.gene_tree])
      if allow_gene_copies == "exclude_ref" and self.ref_genome_name and self.ref_genome_name in all_genomes:
        all_genomes.remove(self.ref_genome_name)
      gene_copies = []
      for genome in all_genomes:
        for node in self.gene_tree.get_monophyletic(values=[genome], target_attr="genome"):
          if not node.is_leaf():
            gene_copies.append([gene.name for gene in node.get_leaves()])

      # create sets of gene copies and remove single-gene sets
      # all genes in gene copies groups:
      rm = set(chain.from_iterable(gene_copies))
      for genome in V:
        new_Vi = []
        for s in V[genome]:
          if s == set() or next(iter(s)) not in rm:
            new_Vi.append(s)
        V[genome] = new_Vi
      for cp in gene_copies:
        cp_genome = self.nodes[cp[0]]['genome']
        V[cp_genome].append(set(cp))
    # complete with empty sets
    nm = max([len(l) for l in V.values()])
    for genome in V:
      V[genome] = fill_list_to_length(V[genome], nm, set())

    # remove tree leaves with no genes
    for genome in tree:
      if genome.name not in V:
        genome.delete()

    # traverse tree and create new orthogroups
    n_leaves = len(tree.get_tree_root())
    while n_leaves > 1:
      # find two closest genomes based on tree
      dist_matrix = tree_to_distance_matrix(tree, sister_only=True)
      closest_genomes = row_col_min(dist_matrix)
      # create bipartite graph (BG)
      # (BG has integers as labels, so Vi_d and Vj_d
      # store the integer --> gene set mapping)
      Vi = V[closest_genomes[0]]
      Vi_d = {n: Vi[n] for n in range(nm)}
      Vj = V[closest_genomes[1]]
      Vj_d = {n: Vj[n-nm] for n in range(nm,nm*2)}
      bipartite_graph = bipartite.complete_bipartite_graph(nm,nm)
      # assign weights to BG edges
      for edge in bipartite_graph.edges:
        # look for corresponding edge in homology graph (HG)
        # first, translate BG integers to gene sets
        g1 = Vi_d[edge[0]]
        g2 = Vj_d[edge[1]]
        # calculate edge weights as mean of weights between sets
        pairs = product(g1,g2)
        total_weight = 0
        pairs_involved = 0
        for p in pairs:
          if p in self.edges: # orthologs
            total_weight += self.edges[p]['weight']
          else: # paralogs
            total_weight += -eps
          pairs_involved += 1
        if pairs_involved == 0: # when using an empty set
          bipartite_graph.edges[edge]['weight'] = eps
        else:
          bipartite_graph.edges[edge]['weight'] = total_weight/pairs_involved
      # find BG maximum weight matching (MWM)
      # ensure matches are always from Vi to Vj
      mwm = matching.max_weight_matching(bipartite_graph,maxcardinality=True)
      mwm = {(min(x),max(x)) for x in mwm}
      # update gene sets according to MWM
      # this is done by set unions on every matching
      new_Vi = []
      for match in mwm:
        g1 = Vi_d[match[0]]
        g2 = Vj_d[match[1]]
        g1 = g1.union(g2)
        new_Vi.append(g1)
      V[closest_genomes[0]] = new_Vi
      # then remove other genome from V
      del(V[closest_genomes[1]])
      # update tree - remove leaf corresponding to Vj
      # but update branch length of Vi to mean of branch
      # lengths of Vi and Vj
      Vj_bl = (tree&closest_genomes[1]).dist
      j = tree.search_nodes(name=closest_genomes[1])[0]
      j.delete()
      Vi_bl = (tree&closest_genomes[0]).dist
      new_Vi_bl = (Vi_bl + Vj_bl)/2
      (tree&closest_genomes[0]).dist = new_Vi_bl
      # calculate number of leaves left
      n_leaves = len(tree.get_tree_root())

    # finish when all leaves were merged
    # return a list of graphs, each is an OG
    ogs_list = list(V.values())[0]
    # create list of empty graphs
    ogs_graph_list = [ nx.Graph() for og in ogs_list ]
    for i in range(len(ogs_list)):
      # populate with nodes
      ogs_graph_list[i].add_nodes_from([(n, {'genome': self.nodes[n]['genome']}) for n in ogs_list[i]])
      # assign OG name
      ogs_graph_list[i].orthogroup = "%s.%s" %(self.orthogroup, i)
    return ogs_graph_list