def test_dlcoal_tree(locus_tree, n, daughters, nsamples): """test multicoal_tree""" tops = {} for i in xrange(nsamples): if i % (nsamples // 100) == 0: print i coal_tree, coal_recon = dlcoal.sample_locus_coal_tree( locus_tree, n, leaf_counts=None, daughters=daughters, namefunc=lambda x: x) top = phylo.hash_tree(coal_tree) tops.setdefault(top, [0, coal_tree, coal_recon])[0] += 1 tab = Table(headers=["top", "simple_top", "percent", "prob"]) for top, (num, tree, recon) in tops.items(): tree2 = tree.copy() treelib.remove_single_children(tree2) tab.add(top=top, simple_top=phylo.hash_tree(tree2), percent=num/float(nsamples), prob=exp(dlcoal.prob_locus_coal_recon_topology( tree, recon, locus_tree, n, daughters))) tab.sort(col="prob", reverse=True) return tab, tops
def sample_dlcoal_no_ifix(stree, n, freq, duprate, lossrate, freqdup, freqloss,\ forcetime, namefunc=lambda x: x, \ remove_single=True, name_internal="n", minsize=0): """Sample a gene tree from the DLCoal model using the new simulator""" # generate the locus tree while True: locus_tree, locus_extras = sim_DLILS_gene_tree(stree, n, freq, \ duprate, lossrate, \ freqdup, freqloss, \ forcetime) if len(locus_tree.leaves()) >= minsize: break if len(locus_tree.nodes) <= 1: # TODO: check 1 value # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # create new (expanded) locus tree logged_locus_tree, logged_extras = locus_to_logged_tree(locus_tree, popsize = n) daughters = logged_extras[0] pops = logged_extras[1] log_recon = logged_extras[2] # treelib.assert_tree(logged_locus_tree) # removed locus_tree_copy from below coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(logged_locus_tree, n=pops, daughters=daughters, namefunc=lambda lognamex: log_recon[lognamex] + '_' + str(lognamex)) # print set(coal_tree) - set(coal_tree.postorder()) treelib.assert_tree(coal_tree) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information ### TODO: update this now that we're using logged locus tree, new sample function extra = {"locus_tree": locus_tree, "locus_recon": locus_extras['recon'], "locus_events": locus_extras['events'], "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra
def debug_test3(): stree = treelib.read_tree('examples/nbin.stree') # run from ../ of this directory for node in stree: node.dist *= 1e7 # gen per myr popsize = 2e7 freq = 1e0 dr = .0000012 / 1e7 #.0012/1e7 lr = .0000011 / 1e7 #.0006/1e7 freqdup = freqloss = .05 forcetime = 1e7 for node in stree: print node.name, node.dist, len(node.children) print locus_tree, locus_extras = sim_DLILS_gene_tree(stree, popsize, freq, \ dr, lr, \ freqdup, freqloss, \ forcetime) for node in locus_tree: print node.name, node.dist, len(node.children) print logged_locus_tree, logged_extras = locus_to_logged_tree(locus_tree, popsize) daughters = logged_extras[0] pops = logged_extras[1] coal_tree, coal_recon = dlcoal.sample_locus_coal_tree(logged_locus_tree, n=pops, daughters=daughters, namefunc=lambda x: logged_extras[2][x] + '_' + str(x)) #begin debug print coal_tree.leaf_names() try: # print set(coal_tree) - set(coal_tree.postorder()) treelib.assert_tree(coal_tree) except AssertionError: print 'assertion error thrown on coal_tree being a proper tree' from rasmus import util hd= util.hist_dict(x.name for x in coal_tree.postorder()) for key in hd.keys(): print key if hd[key]>1 else '', print print len(coal_tree.nodes) - len(list(coal_tree.postorder()))