def test_post(self): k = 6 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 10 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recombs", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k - 1) keep = set(arg.leaf_names()) - set([new_name]) arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) probs = arghmm.get_posterior_probs(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol)) fequal(p, 1.0, rel=1e-2)
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k - 1) thread = list( arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_norecomb_plot(self): k = 50 n = 1e4 rho = 1.5e-8 * .0001 rho2 = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=20) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # get thread new_name = "n%d" % (k - 1) keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho2, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) # simulate a new thread states = list(islice(hmm.sample_hmm_states(model), 0, arg.end)) data = list(hmm.sample_hmm_data(model, states)) seqs[new_name] = "".join(data) #alignlib.print_align(seqs) thread = [ model.times[model.states[i][s][1]] for i, s in enumerate(states) ] p = plot(thread, style="lines") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .75)) low = list(arghmm.iter_posterior_times(model, probs, .25)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k-1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_post_real(self): k = 3 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 100000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) #arg = arglib.read_arg("test/data/real.arg") #seqs = fasta.read_fasta("test/data/real.fa") #arglib.write_arg("test/data/real.arg", arg) #fasta.write_fasta("test/data/real.fa", seqs) times = arghmm.get_time_points(maxtime=50000, ntimes=20) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list( arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=10, ylog=10) #alignlib.print_align(seqs) # remove chrom keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) #print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_post_c(self): k = 3 n = 1e4 rho = 1.5e-8 * 30 mu = 2.5e-8 * 100 length = 100 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print arglib.get_recomb_pos(arg) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) model = arghmm.ArgHmm(arg, seqs, new_name="n%d" % (k - 1), times=times, rho=rho, mu=mu) print "states", len(model.states[0]) util.tic("C") probs1 = list(arghmm.get_posterior_probs(model, length, verbose=True)) util.toc() util.tic("python") probs2 = list(hmm.get_posterior_probs(model, length, verbose=True)) util.toc() print "probs1" pc(probs1) print "probs2" pc(probs2) for col1, col2 in izip(probs1, probs2): for a, b in izip(col1, col2): fequal(a, b)
def test_norecomb_plot(self): k = 50 n = 1e4 rho = 1.5e-8 * .0001 rho2 = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=20) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # get thread new_name = "n%d" % (k-1) keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho2, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) # simulate a new thread states = list(islice(hmm.sample_hmm_states(model), 0, arg.end)) data = list(hmm.sample_hmm_data(model, states)) seqs[new_name] = "".join(data) #alignlib.print_align(seqs) thread = [model.times[model.states[i][s][1]] for i, s in enumerate(states)] p = plot(thread, style="lines") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .75)) low = list(arghmm.iter_posterior_times(model, probs, .25)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_post_real(self): k = 3 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 100000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) #arg = arglib.read_arg("test/data/real.arg") #seqs = fasta.read_fasta("test/data/real.fa") #arglib.write_arg("test/data/real.arg", arg) #fasta.write_fasta("test/data/real.fa", seqs) times = arghmm.get_time_points(maxtime=50000, ntimes=20) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=10, ylog=10) #alignlib.print_align(seqs) # remove chrom keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) #print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_post_c(self): k = 3 n = 1e4 rho = 1.5e-8 * 30 mu = 2.5e-8 * 100 length = 100 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print arglib.get_recomb_pos(arg) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) model = arghmm.ArgHmm(arg, seqs, new_name="n%d" % (k-1), times=times, rho=rho, mu=mu) print "states", len(model.states[0]) util.tic("C") probs1 = list(arghmm.get_posterior_probs(model, length, verbose=True)) util.toc() util.tic("python") probs2 = list(hmm.get_posterior_probs(model, length, verbose=True)) util.toc() print "probs1" pc(probs1) print "probs2" pc(probs2) for col1, col2 in izip(probs1, probs2): for a, b in izip(col1, col2): fequal(a, b)
def test_post3(self): k = 3 n = 1e4 rho = 1.5e-8 * 3 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) thread = list(arghmm.iter_chrom_thread(arg, arg["n2"], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=0) # remove chrom keep = ["n0", "n1"] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name="n2", times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [1000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_post2(self): k = 2 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 10 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) thread = list(arghmm.iter_chrom_thread(arg, arg["n1"], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=0) #alignlib.print_align(seqs) # remove chrom keep = ["n0"] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name="n1", times=times, rho=rho, mu=mu) print "states", len(model.states[0]) probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()