def test_top(self): outdir = 'test/tmp/test_coal/BMC_test_top/' make_clean_dir(outdir) stree = treelib.parse_newick( "(((A:200, E:200):800, B:1000):500, (C:700, D:700):800);") n = 500 T = 2000 nsamples = 4000 # compare top hist with simpler rejection sampling tops = {} tops2 = {} for i in xrange(nsamples): # use rejection sampling tree, recon = coal.sample_bounded_multicoal_tree_reject( stree, n, T, namefunc=lambda x: x) # sample tree tree2, recon2 = coal.sample_bounded_multicoal_tree( stree, n, T, namefunc=lambda x: x) top = phylo.hash_tree(tree) top2 = phylo.hash_tree(tree2) tops.setdefault(top, [0, tree, recon])[0] += 1 tops.setdefault(top2, [0, tree2, recon2]) tops2.setdefault(top2, [0, tree2, recon2])[0] += 1 tops2.setdefault(top, [0, tree, recon]) keys = tops.keys() x = [safelog(tops[i][0], default=0) for i in keys] y = [safelog(tops2[i][0], default=0) for i in keys] self.assertTrue(stats.corr(x, y) > .9) p = Gnuplot() p.enableOutput(False) p.plot(x, y) p.plot([min(x), max(x)], [min(x), max(x)], style="lines") p.enableOutput(True) p.save(outdir + 'plot.png')
def walk(node): if node.is_leaf(): doomtable[nodelookup[node]] = -util.INF else: for child in node.children: walk(child) i = nodelookup[node] p = 1.0 for child in node: p *= sum( birthdeath.prob_birth_death1(d, child.dist, birth, death) * exp(doomtable[nodelookup[child]]) ** d for d in range(0, maxdoom + 1) ) doomtable[i] = util.safelog(p, e, -util.INF)
def walk(node): if node.is_leaf(): doomtable[nodelookup[node]] = -util.INF else: for child in node.children: walk(child) i = nodelookup[node] p = 1.0 for child in node: p *= sum( birthdeath.prob_birth_death1(d, child.dist, birth, death) * exp(doomtable[nodelookup[child]])**d for d in range(0, maxdoom + 1)) doomtable[i] = util.safelog(p, e, -util.INF)
def prob_locus_coal_recon_topology_samples( coal_tree, coal_recon, locus_tree, locus_recon, locus_events, popsizes, stree, stimes, daughters, duprate, lossrate, nsamples, pretime=None, premean=None): if dlcoalc: # sample some reason branch lengths just for logging purposes locus_times = duploss.sample_dup_times( locus_tree, stree, locus_recon, duprate, lossrate, pretime, premean, events=locus_events) treelib.set_dists_from_timestamps(locus_tree, locus_times) # use C code return coal.prob_locus_coal_recon_topology_samples( coal_tree, coal_recon, locus_tree, locus_recon, locus_events, popsizes, stree, stimes, daughters, duprate, lossrate, nsamples, pretime, premean) else: # python backup prob = 0.0 for i in xrange(nsamples): # sample duplication times locus_times = duploss.sample_dup_times( locus_tree, stree, locus_recon, duprate, lossrate, pretime, premean, events=locus_events) treelib.set_dists_from_timestamps(locus_tree, locus_times) # coal topology probability coal_prob = prob_locus_coal_recon_topology( coal_tree, coal_recon, locus_tree, popsizes, daughters) prob += exp(coal_prob) prob = util.safelog(prob / nsamples) return prob
def prob_coal_recon_topology(tree, recon, locus_tree, n, daughters): """ Returns the log probability of a reconciled gene tree ('tree', 'recon') from the coalescent model given a locus_tree 'locus_tree', population sizes 'n', and daughters set 'daughters' """ # init population sizes popsizes = coal.init_popsizes(locus_tree, n) # log probability lnp = 0.0 nodes = set(tree.postorder()) # init reverse reconciliation rev_recon = {} for node, snode in recon.iteritems(): if node not in nodes: raise Exception("node '%s' not in tree" % node.name) rev_recon.setdefault(snode, []).append(node) # init lineage counts lineages = {} for snode in locus_tree: if snode.is_leaf(): lineages[snode] = len([x for x in rev_recon[snode] if x.is_leaf()]) else: lineages[snode] = 0 # iterate through species tree branches for snode in locus_tree.postorder(): if snode.parent: # non root branch u = lineages[snode] # subtract number of coals in branch v = u - len([x for x in rev_recon.get(snode, []) if not x.is_leaf()]) lineages[snode.parent] += v if snode not in daughters: try: lnp += util.safelog( coal.prob_coal_counts(u, v, snode.dist, popsizes[snode.name])) except: print u, v, snode.dist, popsizes[snode.name] raise else: assert v == 1 lnp -= util.safelog(coal.num_labeled_histories(u, v)) else: # normal coalesent u = lineages[snode] lnp -= util.safelog(coal.num_labeled_histories(u, 1)) # correct for topologies H(T) # find connected subtrees that are in the same species branch subtrees = [] subtree_root = {} for node in tree.preorder(): if node.parent and recon[node] == recon[node.parent]: subtree_root[node] = subtree_root[node.parent] else: subtrees.append(node) subtree_root[node] = node # find leaves through recursion def walk(node, subtree, leaves): if node.is_leaf(): leaves.append(node) elif (subtree_root[node.children[0]] != subtree and subtree_root[node.children[1]] != subtree): leaves.append(node) else: for child in node.children: walk(child, subtree, leaves) # apply correction for each subtree for subtree in subtrees: leaves = [] for child in subtree.children: walk(subtree, subtree, leaves) if len(leaves) > 2: lnp += util.safelog( birthdeath.num_topology_histories(subtree, leaves)) return lnp
def prob_dlcoal_recon_topology(coal_tree, coal_recon, locus_tree, locus_recon, locus_events, daughters, stree, n, duprate, lossrate, pretime=None, premean=None, maxdoom=20, nsamples=100, add_spec=True): """ Probability of a reconcile gene tree in the DLCoal model. coal_tree -- coalescent tree coal_recon -- reconciliation of coalescent tree to locus tree locus_tree -- locus tree (has dup-loss) locus_recon -- reconciliation of locus tree to species tree locus_events -- events dict for locus tree stree -- species tree n -- population sizes in species tree duprate -- duplication rate lossrate -- loss rate You must also specify one of the following pretime -- starting time before species tree premean -- mean starting time before species tree Note: locus tree must have implied speciation nodes present """ dups = phylo.count_dup(locus_tree, locus_events) # ensure implicit speciations are present if add_spec: phylo.add_implied_spec_nodes(locus_tree, stree, locus_recon, locus_events) # init popsizes for locus tree stree_popsizes = coal.init_popsizes(stree, n) popsizes = {} for node in locus_tree: popsizes[node.name] = stree_popsizes[locus_recon[node].name] # duploss probability util.tic("top") dl_prob = spidir.calc_birth_death_prior(locus_tree, stree, locus_recon, duprate, lossrate, maxdoom=maxdoom) util.toc() # daughters probability d_prob = dups * log(.5) # integrate over duplication times using sampling prob = 0.0 #util.tic("int") for i in xrange(nsamples): # sample duplication times locus_times = spidir.topology_prior.sample_dup_times( locus_tree, stree, locus_recon, duprate, lossrate, pretime, premean, events=locus_events) assert len(locus_times) == len(locus_tree.nodes), ( len(locus_times), len(locus_tree.nodes)) treelib.set_dists_from_timestamps(locus_tree, locus_times) # coal topology probability coal_prob = prob_coal_recon_topology(coal_tree, coal_recon, locus_tree, popsizes, daughters) prob += exp(coal_prob) print coal_prob #util.toc() return dl_prob + d_prob + util.safelog(prob / nsamples)