def do_test_coal_sim(self, stree, gene2species, n, ntrees=10000, tabsize=30): """Perform a coal gene tree simulation test""" tops = [] lookup = {} util.tic("simulating %d trees" % ntrees) for i in xrange(ntrees): tree, recon = coal.sample_multicoal_tree(stree, n, namefunc=lambda x: x) tops.append(phylo.hash_tree(tree)) lookup[tops[-1]] = (tree, recon) util.toc() hist = histtab(tops) probs = [] for row in hist: tree, recon= lookup[row["item"]] try: #treelib.draw_tree_names(tree, maxlen=5) treelib.remove_single_children(tree) nodes = set(tree.postorder()) for node, snode in recon.items(): if node not in nodes: del recon[node] p = coal.prob_coal_recon_topology(tree, recon, stree, n) except: draw_tree(tree, maxlen=5, minlen=5) raise probs.append(exp(p)) return hist, probs
def _test_multicoal_tree(stree, n, nsamples): """test multicoal_tree""" tops = {} for i in xrange(nsamples): tree, recon = coal.sample_multicoal_tree(stree, n, namefunc=lambda x: x) top = phylo.hash_tree(tree) tops.setdefault(top, [0, tree, recon])[0] += 1 tab = Table(headers=["top", "simple_top", "percent", "prob"]) for top, (num, tree, recon) in tops.items(): tree2 = tree.copy() treelib.remove_single_children(tree2) print phylo.hash_tree(tree2) print phylo.hash_tree(stree) tab.add(top=top, simple_top=phylo.hash_tree(tree2), percent=num/float(nsamples), prob=exp(coal.prob_multicoal_recon_topology( tree, recon, stree, n))) tab.sort(col="prob", reverse=True) return tab, tops
def test_cdf_bmc(self): # test cdf mrca BMC stree = treelib.parse_newick( "((A:1000, B:1000):500, (C:700, D:700):800);") n = 1000 gene_counts = dict.fromkeys(stree.leaf_names(), 1) T = 2000 p = exp(coal.cdf_mrca_bounded_multicoal(gene_counts, T, stree, n)) nsamples = 5000 c = 0 for i in xrange(nsamples): tree, recon = coal.sample_multicoal_tree(stree, n) if treelib.get_tree_timestamps(tree)[tree.root] < T: c += 1 p2 = c / float(nsamples) fequal(p, p2, .05)
def do_test_coal_sim(self, stree, gene2species, n, ntrees=10000, tabsize=30): """Perform a coal gene tree simulation test""" tops = [] lookup = {} util.tic("simulating %d trees" % ntrees) for i in xrange(ntrees): tree, recon = coal.sample_multicoal_tree(stree, n, namefunc=lambda x: x) tops.append(phylo.hash_tree(tree)) lookup[tops[-1]] = (tree, recon) util.toc() hist = histtab(tops) probs = [] for row in hist: tree, recon = lookup[row["item"]] try: #treelib.draw_tree_names(tree, maxlen=5) treelib.remove_single_children(tree) nodes = set(tree.postorder()) for node, snode in recon.items(): if node not in nodes: del recon[node] p = coal.prob_coal_recon_topology(tree, recon, stree, n) except: draw_tree(tree, maxlen=5, minlen=5) raise probs.append(exp(p)) return hist, probs