def test_nlineages(self): """ Test lineage counting """ k = 4 n = 1e4 rho = 1.5e-8 * 1 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(ntimes=6) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) nlineages, nrecombs, ncoals = arghmm.get_nlineages_recomb_coal( tree, times) treelib.draw_tree_names(tree.get_tree(), scale=4e-3) print list(arghmm.iter_coal_states(tree, times)) print nlineages self.assert_(nlineages == sorted(nlineages, reverse=True)) print nlineages print nrecombs print ncoals
def test_prior(self): """ Calculate state priors """ k = 10 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) prior = [model.prob_prior(0, j) for j in xrange(model.get_num_states(0))] print prior print sum(map(exp, prior)) fequal(sum(map(exp, prior)), 1.0, rel=.01)
def test_trans(): """ Calculate transition probabilities """ create_data = False if create_data: make_clean_dir('test/data/test_trans') k = 8 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=10, maxtime=200000) popsizes = [n] * len(times) ntests = 40 # generate test data if create_data: for i in range(ntests): arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) arg.write('test/data/test_trans/%d.arg' % i) for i in range(ntests): print 'arg', i arg = arglib.read_arg('test/data/test_trans/%d.arg' % i) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
def test_post(self): k = 6 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 10 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recombs", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k - 1) keep = set(arg.leaf_names()) - set([new_name]) arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) probs = arghmm.get_posterior_probs(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol)) fequal(p, 1.0, rel=1e-2)
def test_trans_single(self): """ Calculate transition probabilities Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) pos = 10 tree = arg.get_marginal_tree(pos) mat = arghmm.calc_transition_probs(tree, model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) print model.states[pos] pc(mat) for row in mat: print sum(map(exp, row))
def test_trans_single(self): """ Calculate transition probabilities Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) pos = 10 tree = arg.get_marginal_tree(pos) mat = arghmm.calc_transition_probs( tree, model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) print model.states[pos] pc(mat) for row in mat: print sum(map(exp, row))
def test_prior(self): """ Calculate state priors """ k = 10 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) prior = [ model.prob_prior(0, j) for j in xrange(model.get_num_states(0)) ] print prior print sum(map(exp, prior)) fequal(sum(map(exp, prior)), 1.0, rel=.01)
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k - 1) thread = list( arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k-1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_norecomb_plot(self): k = 50 n = 1e4 rho = 1.5e-8 * .0001 rho2 = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=20) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # get thread new_name = "n%d" % (k - 1) keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho2, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) # simulate a new thread states = list(islice(hmm.sample_hmm_states(model), 0, arg.end)) data = list(hmm.sample_hmm_data(model, states)) seqs[new_name] = "".join(data) #alignlib.print_align(seqs) thread = [ model.times[model.states[i][s][1]] for i, s in enumerate(states) ] p = plot(thread, style="lines") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .75)) low = list(arghmm.iter_posterior_times(model, probs, .25)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_local_trees(self): rho = 1.5e-8 # recomb/site/gen l = 10000 # length of locus k = 10 # number of lineages n = 2*1e4 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) blocks1 = util.cget(arglib.iter_local_trees(arg, 200, 1200), 0) blocks2 = list(arglib.iter_recomb_blocks(arg, 200, 1200)) self.assertEqual(blocks1, blocks2)
def test_post_real(self): k = 3 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 100000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) #arg = arglib.read_arg("test/data/real.arg") #seqs = fasta.read_fasta("test/data/real.fa") #arglib.write_arg("test/data/real.arg", arg) #fasta.write_fasta("test/data/real.fa", seqs) times = arghmm.get_time_points(maxtime=50000, ntimes=20) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list( arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=10, ylog=10) #alignlib.print_align(seqs) # remove chrom keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) #print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_iter_sprs(self): rho = 1.5e-8 # recomb/site/gen l = 100000 # length of locus k = 6 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) for a, b in izip(arglib.iter_arg_sprs(arg), arglib.iter_arg_sprs_simple(arg)): self.assertEqual(a, b)
def test_smcify_arg(self): rho = 1.5e-8 # recomb/site/gen l = 100000 # length of locus k = 6 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) arg = arglib.smcify_arg(arg) for pos, (rnode, rtime), (cnode, ctime) in arglib.iter_arg_sprs(arg): self.assertNotEqual(rnode, cnode)
def test_trans_switch_single(self): """ Calculate transitions probabilities for switching between blocks Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) #arglib.write_arg("tmp/a.arg", arg) #arg = arglib.read_arg("tmp/a.arg") #arg.set_ancestral() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(5) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) # get recombs recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg)) print "recomb", recombs pos = recombs[0] + 1 tree = arg.get_marginal_tree(pos - .5) last_tree = arg.get_marginal_tree(pos - 1 - .5) print "states1>>", model.states[pos - 1] print "states2>>", model.states[pos] treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5) treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print "pos>>", pos recomb = [x for x in tree if x.event == "recomb" and x.pos + 1 == pos][0] mat = arghmm.calc_transition_probs_switch(tree, last_tree, recomb.name, model.states[pos - 1], model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) pc(mat)
def test_norecomb_plot(self): k = 50 n = 1e4 rho = 1.5e-8 * .0001 rho2 = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=20) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # get thread new_name = "n%d" % (k-1) keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho2, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) # simulate a new thread states = list(islice(hmm.sample_hmm_states(model), 0, arg.end)) data = list(hmm.sample_hmm_data(model, states)) seqs[new_name] = "".join(data) #alignlib.print_align(seqs) thread = [model.times[model.states[i][s][1]] for i, s in enumerate(states)] p = plot(thread, style="lines") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .75)) low = list(arghmm.iter_posterior_times(model, probs, .25)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_trans_switch_single(self): """ Calculate transitions probabilities for switching between blocks Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) #arglib.write_arg("tmp/a.arg", arg) #arg = arglib.read_arg("tmp/a.arg") #arg.set_ancestral() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(5) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) # get recombs recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg)) print "recomb", recombs pos = recombs[0] + 1 tree = arg.get_marginal_tree(pos-.5) last_tree = arg.get_marginal_tree(pos-1-.5) print "states1>>", model.states[pos-1] print "states2>>", model.states[pos] treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5) treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print "pos>>", pos recomb = [x for x in tree if x.event == "recomb" and x.pos+1 == pos][0] mat = arghmm.calc_transition_probs_switch( tree, last_tree, recomb.name, model.states[pos-1], model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) pc(mat)
def test_post_c(self): k = 3 n = 1e4 rho = 1.5e-8 * 30 mu = 2.5e-8 * 100 length = 100 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print arglib.get_recomb_pos(arg) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) model = arghmm.ArgHmm(arg, seqs, new_name="n%d" % (k - 1), times=times, rho=rho, mu=mu) print "states", len(model.states[0]) util.tic("C") probs1 = list(arghmm.get_posterior_probs(model, length, verbose=True)) util.toc() util.tic("python") probs2 = list(hmm.get_posterior_probs(model, length, verbose=True)) util.toc() print "probs1" pc(probs1) print "probs2" pc(probs2) for col1, col2 in izip(probs1, probs2): for a, b in izip(col1, col2): fequal(a, b)
def test_smcify_arg_remove_thread(self): rho = 1.5e-8 # recomb/site/gen l = 100000 # length of locus k = 6 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) remove_chroms = set("n%d" % (k-1)) keep = [x for x in arg.leaf_names() if x not in remove_chroms] arg = arg.copy() arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg)
def test_post_real(self): k = 3 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 100000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) #arg = arglib.read_arg("test/data/real.arg") #seqs = fasta.read_fasta("test/data/real.fa") #arglib.write_arg("test/data/real.arg", arg) #fasta.write_fasta("test/data/real.fa", seqs) times = arghmm.get_time_points(maxtime=50000, ntimes=20) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=10, ylog=10) #alignlib.print_align(seqs) # remove chrom keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) #print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_iter_sprs_time(self): rho = 1.5e-8 # recomb/site/gen l = 100000 # length of locus k = 40 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) x = list(arglib.iter_arg_sprs(arg)) x = list(arglib.iter_arg_sprs_simple(arg)) x = list(arglib.iter_arg_sprs(arg, use_leaves=True)) x = list(arglib.iter_arg_sprs_simple(arg, use_leaves=True)) x
def test_post_c(self): k = 3 n = 1e4 rho = 1.5e-8 * 30 mu = 2.5e-8 * 100 length = 100 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print arglib.get_recomb_pos(arg) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) model = arghmm.ArgHmm(arg, seqs, new_name="n%d" % (k-1), times=times, rho=rho, mu=mu) print "states", len(model.states[0]) util.tic("C") probs1 = list(arghmm.get_posterior_probs(model, length, verbose=True)) util.toc() util.tic("python") probs2 = list(hmm.get_posterior_probs(model, length, verbose=True)) util.toc() print "probs1" pc(probs1) print "probs2" pc(probs2) for col1, col2 in izip(probs1, probs2): for a, b in izip(col1, col2): fequal(a, b)
def test_post3(self): k = 3 n = 1e4 rho = 1.5e-8 * 3 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) thread = list(arghmm.iter_chrom_thread(arg, arg["n2"], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=0) # remove chrom keep = ["n0", "n1"] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name="n2", times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [1000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_iter_sprs_remove_thread(self): rho = 1.5e-8 # recomb/site/gen l = 100000 # length of locus k = 6 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) remove_chroms = set("n%d" % (k-1)) keep = [x for x in arg.leaf_names() if x not in remove_chroms] arg = arg.copy() arglib.subarg_by_leaf_names(arg, keep) for a, b in izip(arglib.iter_arg_sprs(arg), arglib.iter_arg_sprs_simple(arg)): self.assertEqual(a, b)
def test_iter_sprs_leaves(self): rho = 1.5e-8 # recomb/site/gen l = 100000 # length of locus k = 40 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) for a, b in izip(arglib.iter_arg_sprs(arg, use_leaves=True), arglib.iter_arg_sprs_simple(arg, use_leaves=True)): a[1][0].sort() a[2][0].sort() b[1][0].sort() b[2][0].sort() self.assertEqual(a, b)
def test_marginal_leaves(self): rho = 1.5e-8 # recomb/site/gen l = 10000 # length of locus k = 10 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) for (start, end), tree in arglib.iter_local_trees(arg): arglib.remove_single_lineages(tree) mid = (start + end) / 2.0 for node in tree: a = set(tree.leaves(node)) b = set(arglib.get_marginal_leaves(arg, node, mid)) self.assertEqual(a, b)
def test_post2(self): k = 2 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 10 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) thread = list(arghmm.iter_chrom_thread(arg, arg["n1"], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=0) #alignlib.print_align(seqs) # remove chrom keep = ["n0"] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name="n1", times=times, rho=rho, mu=mu) print "states", len(model.states[0]) probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_plot_thread(self): """ Test thread retrieval """ k = 60 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1000e3) / 20 arg = arglib.sample_arg(k, n, rho, start=0, end=length) node = arg.leaves().next() x = range(length) y = cget(arghmm.iter_chrom_thread(arg, node, by_block=False), 1) p = plot(x, y, style='lines') pause()
def test_trans(): """ Calculate transition probabilities """ k = 4 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
def test_trans(): """ Calculate transition probabilities """ k = 4 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
def test_read_write(self): """Read and write an ARG""" rho = 1.5e-8 # recomb/site/gen l = 10000 # length of locus k = 10 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) # round ages and pos for easy equality for node in arg: node.age = round(node.age) node.pos = round(node.pos) stream = StringIO.StringIO() arglib.write_arg(stream, arg) stream.seek(0) arg2 = arglib.read_arg(stream) self.assertTrue(arg.equal(arg2))
def test_thread(self): """ Test thread retrieval """ k = 10 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) for (block, tree), threadi in izip( arglib.iter_tree_tracks(arg), arghmm.iter_chrom_thread(arg, arg["n9"], by_block=True)): print block print threadi treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4)
def test_states(self): """ Test state enumeration """ k = 2 n = 1e4 rho = 1.5e-8 * 100 length = 1000 for i in xrange(20): arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) states = list(arghmm.iter_coal_states(tree, times)) treelib.draw_tree_names(tree.get_tree(), scale=4e-4, minlen=6, maxlen=6) print states
def test_emit_argmax(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 0.0 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name])) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) nstates = model.get_num_states(1) probs = [0.0 for j in xrange(nstates)] for i in xrange(1, length): if i % 100 == 0: print i for j in xrange(nstates): probs[j] += model.prob_emission(i, j) print # is the maximum likelihood emission matching truth data = sorted(zip(probs, model.states[0]), reverse=True) pc(data[:20]) state = (thread[0][0], times.index(thread[0][1])) print data[0][1], state assert data[0][1] == state
def test_emit_argmax(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 0.0 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name])) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) nstates = model.get_num_states(1) probs = [0.0 for j in xrange(nstates)] for i in xrange(1, length): if i % 100 == 0: print i for j in xrange(nstates): probs[j] += model.prob_emission(i, j) print # is the maximum likelihood emission matching truth data = sorted(zip(probs, model.states[0]), reverse=True) pc(data[:20]) state = (thread[0][0], times.index(thread[0][1])) print data[0][1], state assert data[0][1] == state
def test_backward(self): """ Run backward algorithm """ k = 3 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "recomb", model.recomb_pos print "muts", len(muts) probs = hmm.backward_algorithm(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol))
def test_trans(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 1000 times = arghmm.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) arghmm.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert arghmm.assert_transition_probs(tree, times, popsizes, rho)
def test_trans_internal(): """ Calculate transition probabilities for internal branch re-sampling Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs_internal(tree, times, popsizes, rho)
def test_trans_internal(): """ Calculate transition probabilities for internal branch re-sampling Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs_internal( tree, times, popsizes, rho)
def test_trans(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 1000 times = arghmm.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) arghmm.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert arghmm.assert_transition_probs(tree, times, popsizes, rho)
def test_pars_seq(self): """ Test parsimony ancestral sequence inference """ k = 10 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) pos = int(muts[0][2]) tree = arg.get_marginal_tree(pos) print "pos =", pos treelib.draw_tree_names(tree.get_tree(), scale=4e-4, minlen=5) arglib.remove_single_lineages(tree) ancestral = arghmm.emit.parsimony_ancestral_seq(tree, seqs, pos) util.print_dict(ancestral)
def test_recomb(self): """ Investigate the fact that some recombinations are not visible """ k = 3 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) arg.set_ancestral() arg.prune() recombs = arglib.get_recomb_pos(arg) # find recombs by walking recombs2 = [] i = 0 while True: tree = arg.get_marginal_tree(i-.5) recomb = arghmm.find_tree_next_recomb(tree, i+1, tree=True) if recomb: recombs2.append(recomb.pos) i = recomb.pos else: break # these are suppose to differ because some recombination occur # in the hole of ancestral sequence intervals print recombs print recombs2 arglib.write_arg("tmp/b.arg", arg)