def test_prob_coal2(self): outdir = 'test/tmp/test_coal/Coal_test_prob_coal2/' make_clean_dir(outdir) k = 2 n = 1000 p = Gnuplot() p.enableOutput(False) p.plotfunc(lambda t: coal.prob_coal(t, k, n), 0, 4000, 10, ymin=0) # draw single coal samples x = [coal.sample_coal(k, n) for i in xrange(200)] plotdistrib(x, 40, plot=p) p.enableOutput(True) p.save(outdir + 'plot.png') eq_sample_pdf(x, lambda t: coal.prob_coal(t, k, n), 40)
def test_prob_coal_cond_counts_simple(self): # when we condition on b=1, it is the same as the bounded coal # PDF. # prob_coal_cond_counts is actually a more general version of # prob_bounded_coal outdir = 'test/tmp/test_coal/Coal_test_prob_coal_counys_simple/' make_clean_dir(outdir) a = 5 b = 1 t = 2000 n = 1000 p = Gnuplot() p.enableOutput(False) for x in frange(0, 2000, 10): y = coal.prob_coal_cond_counts_simple(x, a, b, t, n) y2 = coal.prob_bounded_coal(x, a, n, t) self.assertAlmostEqual(y, y2)
def test_cdf_mrca(self): outdir = 'test/tmp/test_coal/Coal_test_cdf_mrca/' make_clean_dir(outdir) n = 1000 k = 6 step = 10 x = list(frange(0, 5000, step)) y = [coal.prob_mrca(i, k, n) * step for i in x] y2 = cumsum(y) y3 = [coal.cdf_mrca(t, k, n) for t in x] p = Gnuplot() p.enableOutput(False) p.plot(x, y2, style="lines") p.plot(x, y3, style="lines") p.enableOutput(True) p.save(outdir + 'plot.png') eq_sample_pdf(x, lambda t: coal.cdf_mrca(t, k, n), 40)
def test_plot_prob_bounded_coal(self): n = 1000 k = 4 t = 800 alltimes = [] # sample times for i in xrange(5000): while True: times = [0] for j in xrange(k, 1, -1): times.append(times[-1] + coal.sample_coal(j, n)) if times[-1] >= t: break if times[-1] < t: break alltimes.append(times) p = Gnuplot() for i in range(1, 2): x, y = distrib([q[i] - q[i-1] for q in alltimes], width=20) p.plot(x, y, style="lines", xmax=500) x = list(frange(0, 500, 10)) #for i in range(1, 2): #k): y2 = [coal.prob_bounded_coal(j, k, n, t) for j in x] p.plot(x, y2, style="lines", xmax=500) fequals(y, y2, rel=.05, eabs=.01)
def test_coin_sample_post(self): """Test sampling from posterior distribution""" outdir = 'test/tmp/test_hmm/test_coin_sample_post/' make_clean_dir(outdir) model = make_coin_model() # sample states and data ndata = 100 states = list(islice(hmm.sample_hmm_states(model), ndata)) data = list(hmm.sample_hmm_data(model, states)) model.prob_emission = ( lambda pos, state: model.prob_emission_data(state, data[pos])) p = Gnuplot() p.enableOutput(False) p.plot(states, style="lines") probs = hmm.get_posterior_probs(model, len(data)) states2 = [exp(probs[i][1]) for i in xrange(len(data))] p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=12) for i in range(2, 10): states2 = hmm.sample_posterior(model, ndata) self.assertTrue(stats.corr(states, states2) > .5) p.plot(util.vadds(states2, 1.5 * i), style="lines", miny=-1, maxy=12) p.enableOutput(True) p.save(outdir + 'plot.png')
def test_coin(self): """Test that viterbi and posterior coding work well.""" outdir = 'test/tmp/test_hmm/test_coin/' make_clean_dir(outdir) model = make_coin_model() # sample states ndata = 100 states = list(islice(hmm.sample_hmm_states(model), ndata)) p = Gnuplot() p.enableOutput(False) p.plot(states, style="lines") # sample data data = list(hmm.sample_hmm_data(model, states)) # viterbi model.prob_emission = ( lambda pos, state: model.prob_emission_data(state, data[pos])) states2 = hmm.viterbi(model, len(data)) # posterior probs = hmm.get_posterior_probs(model, len(data)) states3 = [exp(probs[i][1]) for i in xrange(len(data))] # assert that inferences correlates with true state self.assertTrue(stats.corr(states, states2) > .5) self.assertTrue(stats.corr(states, states3) > .5) # plot inference p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=4) p.plot(util.vadds(states3, 2.5), style="lines", miny=-1, maxy=4) p.enableOutput(True) p.save(outdir + 'plot.png')
def test_coin_sample_post(self): """Test sampling from posterior distribution""" outdir = 'test/tmp/test_hmm/test_coin_sample_post/' make_clean_dir(outdir) model = make_coin_model() # sample states and data ndata = 100 states = list(islice(hmm.sample_hmm_states(model), ndata)) data = list(hmm.sample_hmm_data(model, states)) model.prob_emission = (lambda pos, state: model.prob_emission_data(state, data[pos])) p = Gnuplot() p.enableOutput(False) p.plot(states, style="lines") probs = hmm.get_posterior_probs(model, len(data)) states2 = [exp(probs[i][1]) for i in xrange(len(data))] p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=12) for i in range(2, 10): states2 = hmm.sample_posterior(model, ndata) self.assertTrue(stats.corr(states, states2) > .5) p.plot(util.vadds(states2, 1.5*i), style="lines", miny=-1, maxy=12) p.enableOutput(True) p.save(outdir + 'plot.png')
def test_coin(self): """Test that viterbi and posterior coding work well.""" outdir = 'test/tmp/test_hmm/test_coin/' make_clean_dir(outdir) model = make_coin_model() # sample states ndata = 100 states = list(islice(hmm.sample_hmm_states(model), ndata)) p = Gnuplot() p.enableOutput(False) p.plot(states, style="lines") # sample data data = list(hmm.sample_hmm_data(model, states)) # viterbi model.prob_emission = (lambda pos, state: model.prob_emission_data(state, data[pos])) states2 = hmm.viterbi(model, len(data)) # posterior probs = hmm.get_posterior_probs(model, len(data)) states3 = [exp(probs[i][1]) for i in xrange(len(data))] # assert that inferences correlates with true state self.assertTrue(stats.corr(states, states2) > .5) self.assertTrue(stats.corr(states, states3) > .5) # plot inference p.plot(util.vadds(states2, 1.5), style="lines", miny=-1, maxy=4) p.plot(util.vadds(states3, 2.5), style="lines", miny=-1, maxy=4) p.enableOutput(True) p.save(outdir + 'plot.png')
def test_top(self): outdir = 'test/tmp/test_coal/BMC_test_top/' make_clean_dir(outdir) stree = treelib.parse_newick( "(((A:200, E:200):800, B:1000):500, (C:700, D:700):800);") n = 500 T = 2000 nsamples = 4000 # compare top hist with simpler rejection sampling tops = {} tops2 = {} for i in xrange(nsamples): # use rejection sampling tree, recon = coal.sample_bounded_multicoal_tree_reject( stree, n, T, namefunc=lambda x: x) # sample tree tree2, recon2 = coal.sample_bounded_multicoal_tree( stree, n, T, namefunc=lambda x: x) top = phylo.hash_tree(tree) top2 = phylo.hash_tree(tree2) tops.setdefault(top, [0, tree, recon])[0] += 1 tops.setdefault(top2, [0, tree2, recon2]) tops2.setdefault(top2, [0, tree2, recon2])[0] += 1 tops2.setdefault(top, [0, tree, recon]) keys = tops.keys() x = [safelog(tops[i][0], default=0) for i in keys] y = [safelog(tops2[i][0], default=0) for i in keys] self.assertTrue(stats.corr(x, y) > .9) p = Gnuplot() p.enableOutput(False) p.plot(x, y) p.plot([min(x), max(x)], [min(x), max(x)], style="lines") p.enableOutput(True) p.save(outdir + 'plot.png')
def test_fast_sample_bounded_coal(self): # sample bounded coal times efficiently n = 1000 k = 5 t = 500 alltimes = [] # sample times for i in xrange(5000): while True: times = [0] for j in xrange(k, 1, -1): times.append(times[-1] + coal.sample_coal(j, n)) if times[-1] >= t: break if times[-1] < t: break alltimes.append(times) p = Gnuplot() for i in range(1, k): x, y = distrib([q[i] - q[i-1] for q in alltimes], width=30) p.plot(x, y, style="lines", xmax=500) p.enableOutput(True) p.replot() # sample times efficently alltimes2 = [] for i in xrange(5000): times = [0] for j in xrange(k, 1, -1): times.append(times[-1] + coal.sample_bounded_coal(j, n, t-times[-1])) alltimes2.append(times) #p = Gnuplot() for i in range(1, k): x, y = distrib([q[i] - q[i-1] for q in alltimes2], width=30) p.plot(x, y, style="lines", xmax=500) p.enableOutput(True) p.replot()
def test_cdf_coal_cond_counts(self): # test coalescent pdf when conditioned on future lineage counts outdir = 'test/tmp/test_coal/Coal_test_cdf_coal_cond_counts/' make_clean_dir(outdir) a = 5 for b in xrange(2, a): t = 500 n = 1000 p = Gnuplot() p.enableOutput(False) p.plotfunc(lambda x: coal.cdf_coal_cond_counts( x, a, b, t, n), 0, t, 10) # draw single coal samples using rejection sampling s = [] for i in xrange(1000): while True: times = coal.sample_coal_times(a, n) if times[a-b-1] < t and (b == 1 or times[a-b] > t): break s.append(times[0]) x2, y2 = stats.cdf(s) p.plot(x2, y2, style='lines') p.enableOutput(True) p.save(outdir + 'plot-%d.png' % b) eq_sample_pdf( x2, lambda x: coal.prob_coal_cond_counts(x, a, b, t, n), 40)