def test_sample_arg_popsizes_trees(self): """ Fully sample an ARG from stratch using API """ k = 2 rho = 1.5e-8 mu = 2.5e-8 length = int(20e6) times = arghmm.get_time_points(ntimes=30, maxtime=160000) popsizes = [1e4 * (61.-i)/60. for i in range(len(times))] #popsizes = [1e4 for i in range(len(times))] refine = 0 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() util.tic("estimate popsizes") popsizes2 = arghmm.est_popsizes_trees(arg, times=times, step=length/1000, verbose=True) util.toc() print popsizes2 p = plot(times, popsizes, xlog=10, xmin=10, ymin=0, ymax=20000) p.plot(times[1:], popsizes2) pause()
def test_trans_switch(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 1000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) recombs = [] while len(recombs) == 0: arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos-.5) rpos, r, c = arglib.iter_arg_sprs(arg, start=pos-.5).next() spr = (r, c) assert arghmm.assert_transition_switch_probs(tree, spr, times, popsizes, rho)
def test_trans_switch(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 1000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) recombs = [] while len(recombs) == 0: arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos - .5) rpos, r, c = arglib.iter_arg_sprs(arg, start=pos - .5).next() spr = (r, c) assert arghmm.assert_transition_switch_probs(tree, spr, times, popsizes, rho)
def test_arg_joint(self): """ Compute joint probability of an ARG """ k = 2 n = 1e4 rho = 1.5e-8 * 20 rho2 = rho mu = 2.5e-8 * 20 length = 10000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) refine = 0 arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) lk = arghmm.calc_joint_prob(arg, seqs, mu=mu, rho=rho, times=times) print lk
def test_nlineages(self): """ Test lineage counting """ k = 4 n = 1e4 rho = 1.5e-8 * 1 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(ntimes=6) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) nlineages, nrecombs, ncoals = arghmm.get_nlineages_recomb_coal( tree, times) treelib.draw_tree_names(tree.get_tree(), scale=4e-3) print list(arghmm.iter_coal_states(tree, times)) print nlineages self.assert_(nlineages == sorted(nlineages, reverse=True)) print nlineages print nrecombs print ncoals
def test_emit_internal(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) trees, names = arghmm.arg2ctrees(arg, times) seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names) assert arghmm.arghmm_assert_emit_internal(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_compress_align(self): """Test the compression of sequence alignments""" k = 12 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 200e3 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) compress = 20 arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) seqs2, cols = arghmm.compress_align(seqs, compress) print seqs2.alignlen(), length / compress delta = [cols[i] - cols[i-1] for i in range(1, len(cols))] plot(cols) plothist(delta, width=1) variant = [arghmm.is_variant(seqs, i) for i in range(seqs.alignlen())] print histtab(variant) print histtab(mget(variant, cols)) pause()
def test_prior(self): """ Calculate state priors """ k = 10 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) prior = [ model.prob_prior(0, j) for j in xrange(model.get_num_states(0)) ] print prior print sum(map(exp, prior)) fequal(sum(map(exp, prior)), 1.0, rel=.01)
def test_sample_arg_popsizes_trees(self): """ Fully sample an ARG from stratch using API """ k = 2 rho = 1.5e-8 mu = 2.5e-8 length = int(20e6) times = arghmm.get_time_points(ntimes=30, maxtime=160000) popsizes = [1e4 * (61.-i)/60. for i in range(len(times))] #popsizes = [1e4 for i in range(len(times))] refine = 0 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() util.tic("estimate popsizes") popsizes2 = arghmm.est_popsizes_trees(arg, times=times, step=length/1000, verbose=True) util.toc() print(popsizes2) p = plot(times, popsizes, xlog=10, xmin=10, ymin=0, ymax=20000) p.plot(times[1:], popsizes2) pause()
def test_popsizes_over_time(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e6) / 20 times = arghmm.get_time_points(ntimes=30, maxtime=160000) a = 60. b = 15 #popsizes = [1e4 * (a - b + abs(i-b))/a for i in range(len(times))] popsizes = [1e4 * (a - i)/a for i in range(len(times))] #popsizes = [1e4 for i in range(len(times))] refine = 0 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() util.tic("estimate popsizes") popsizes2 = arghmm.est_arg_popsizes(arg, times=times) util.toc() print(popsizes2) p = plot(times, popsizes, xlog=10, xmin=10, ymin=0, ymax=20000) p.plot(times[1:], popsizes2) pause()
def test_est_popsize(self): """ Fully sample an ARG from stratch using API """ k = 50 rho = 1.5e-8 mu = 2.5e-8 length = int(1e6) times = arghmm.get_time_points(ntimes=30, maxtime=200000) popsize = 1e4 refine = 0 util.tic("sim ARG") arg = arghmm.sample_arg_dsmc(k, 2 * popsize, rho, start=0, end=length, times=times) #arg = arglib.sample_arg_smc(k, 2 * popsize, # rho, start=0, end=length) #arg = arglib.sample_arg(k, 2 * popsize, rho, start=0, end=length) util.toc() x = [] for tree in arglib.iter_marginal_trees(arg): arglib.remove_single_lineages(tree) x.append(mle_popsize_tree(tree, mintime=0)) p = plot(x, ymin=0) p.plot([0, len(x)], [popsize, popsize], style='lines') pause()
def test_trans_single(self): """ Calculate transition probabilities Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) pos = 10 tree = arg.get_marginal_tree(pos) mat = arghmm.calc_transition_probs(tree, model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) print model.states[pos] pc(mat) for row in mat: print sum(map(exp, row))
def test_popsizes_over_time(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e6) / 20 times = arghmm.get_time_points(ntimes=30, maxtime=160000) a = 60. b = 15 #popsizes = [1e4 * (a - b + abs(i-b))/a for i in range(len(times))] popsizes = [1e4 * (a - i)/a for i in range(len(times))] #popsizes = [1e4 for i in range(len(times))] refine = 0 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() util.tic("estimate popsizes") popsizes2 = arghmm.est_arg_popsizes(arg, times=times) util.toc() print popsizes2 p = plot(times, popsizes, xlog=10, xmin=10, ymin=0, ymax=20000) p.plot(times[1:], popsizes2) pause()
def test_emit(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) trees, names = arghmm.arg2ctrees(arg, times) seqs2, nseqs, seqlen = arghmm.seqs2cseqs(seqs, names + [new_name]) assert arghmm.arghmm_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_prior(self): """ Calculate state priors """ k = 10 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) prior = [model.prob_prior(0, j) for j in xrange(model.get_num_states(0))] print prior print sum(map(exp, prior)) fequal(sum(map(exp, prior)), 1.0, rel=.01)
def test_emit_parsimony(self): """ Calculate emission probabilities with parsimony """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) x = []; y = [] for i in range(20): print i arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) x.append(arghmm.calc_likelihood( arg, seqs, mu=mu, times=times, delete_arg=False)) y.append(arghmm.calc_likelihood_parsimony( arg, seqs, mu=mu, times=times, delete_arg=False)) p = plot(x, y, xlab="true likelihood", ylab="parsimony likelihood") p.plot([min(x), max(x)], [min(x), max(x)], style="lines") pause()
def test_compress_align(self): """Test the compression of sequence alignments""" k = 12 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 200e3 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) compress = 20 arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) seqs2, cols = arghmm.compress_align(seqs, compress) print seqs2.alignlen(), length / compress delta = [cols[i] - cols[i - 1] for i in range(1, len(cols))] plot(cols) plothist(delta, width=1) variant = [arghmm.is_variant(seqs, i) for i in range(seqs.alignlen())] print histtab(variant) print histtab(mget(variant, cols)) pause()
def test_post(self): k = 6 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 10 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recombs", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k - 1) keep = set(arg.leaf_names()) - set([new_name]) arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) probs = arghmm.get_posterior_probs(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol)) fequal(p, 1.0, rel=1e-2)
def test_trans_single(self): """ Calculate transition probabilities Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) pos = 10 tree = arg.get_marginal_tree(pos) mat = arghmm.calc_transition_probs( tree, model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) print model.states[pos] pc(mat) for row in mat: print sum(map(exp, row))
def test_trans_switch_internal(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) trees, names = arghmm.arg2ctrees(arg, times) assert arghmm.assert_transition_probs_switch_internal( trees, times, popsizes, rho)
def test_ld_block(self): k = 30 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 200e3 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) compress = 20 arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) sites = arghmm.seqs2sites(seqs) #cols = transpose(seqs.values())[::10000] cols = mget(sites, sites.positions) cols = cols[:1000] ld = arghmm.calc_ld_matrix(cols, arghmm.calc_ld_Dp) heatmap(ld, width=2, height=2) pause()
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k - 1) thread = list( arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_norecomb_plot(self): k = 50 n = 1e4 rho = 1.5e-8 * .0001 rho2 = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=20) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # get thread new_name = "n%d" % (k - 1) keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho2, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) # simulate a new thread states = list(islice(hmm.sample_hmm_states(model), 0, arg.end)) data = list(hmm.sample_hmm_data(model, states)) seqs[new_name] = "".join(data) #alignlib.print_align(seqs) thread = [ model.times[model.states[i][s][1]] for i, s in enumerate(states) ] p = plot(thread, style="lines") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .75)) low = list(arghmm.iter_posterior_times(model, probs, .25)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_post_plot(self): k = 6 n = 1e4 rho = 1.5e-8 * 50 mu = 2.5e-8 * 50 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=30) arghmm.discretize_arg(arg, times) pause() # save #arglib.write_arg("test/data/k4.arg", arg) #fasta.write_fasta("test/data/k4.fa", seqs) new_name = "n%d" % (k-1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=times[1], ylog=10) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [10000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.gnuplot("set linestyle 2") p.plot(high, style="lines") p.gnuplot("set linestyle 2") p.plot(low, style="lines") #write_list("test/data/post_real.txt", cget(thread, 1)) #write_list("test/data/post_high.txt", high) #write_list("test/data/post_low.txt", low) pause()
def test_post_real(self): k = 3 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 100000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) #arg = arglib.read_arg("test/data/real.arg") #seqs = fasta.read_fasta("test/data/real.fa") #arglib.write_arg("test/data/real.arg", arg) #fasta.write_fasta("test/data/real.fa", seqs) times = arghmm.get_time_points(maxtime=50000, ntimes=20) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list( arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=10, ylog=10) #alignlib.print_align(seqs) # remove chrom keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) #print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_norecomb_plot(self): k = 50 n = 1e4 rho = 1.5e-8 * .0001 rho2 = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=20) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # get thread new_name = "n%d" % (k-1) keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho2, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) # simulate a new thread states = list(islice(hmm.sample_hmm_states(model), 0, arg.end)) data = list(hmm.sample_hmm_data(model, states)) seqs[new_name] = "".join(data) #alignlib.print_align(seqs) thread = [model.times[model.states[i][s][1]] for i, s in enumerate(states)] p = plot(thread, style="lines") probs = arghmm.get_posterior_probs(model, length, verbose=True) print "done" high = list(arghmm.iter_posterior_times(model, probs, .75)) low = list(arghmm.iter_posterior_times(model, probs, .25)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_trans_switch_single(self): """ Calculate transitions probabilities for switching between blocks Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) #arglib.write_arg("tmp/a.arg", arg) #arg = arglib.read_arg("tmp/a.arg") #arg.set_ancestral() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(5) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) # get recombs recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg)) print "recomb", recombs pos = recombs[0] + 1 tree = arg.get_marginal_tree(pos - .5) last_tree = arg.get_marginal_tree(pos - 1 - .5) print "states1>>", model.states[pos - 1] print "states2>>", model.states[pos] treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5) treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print "pos>>", pos recomb = [x for x in tree if x.event == "recomb" and x.pos + 1 == pos][0] mat = arghmm.calc_transition_probs_switch(tree, last_tree, recomb.name, model.states[pos - 1], model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) pc(mat)
def test_trans_switch_single(self): """ Calculate transitions probabilities for switching between blocks Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) #arglib.write_arg("tmp/a.arg", arg) #arg = arglib.read_arg("tmp/a.arg") #arg.set_ancestral() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(5) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) # get recombs recombs = list(x.pos for x in arghmm.iter_visible_recombs(arg)) print "recomb", recombs pos = recombs[0] + 1 tree = arg.get_marginal_tree(pos-.5) last_tree = arg.get_marginal_tree(pos-1-.5) print "states1>>", model.states[pos-1] print "states2>>", model.states[pos] treelib.draw_tree_names(last_tree.get_tree(), minlen=5, maxlen=5) treelib.draw_tree_names(tree.get_tree(), minlen=5, maxlen=5) print "pos>>", pos recomb = [x for x in tree if x.event == "recomb" and x.pos+1 == pos][0] mat = arghmm.calc_transition_probs_switch( tree, last_tree, recomb.name, model.states[pos-1], model.states[pos], model.nlineages, model.times, model.time_steps, model.popsizes, rho) pc(mat)
def test_post_c(self): k = 3 n = 1e4 rho = 1.5e-8 * 30 mu = 2.5e-8 * 100 length = 100 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print arglib.get_recomb_pos(arg) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom keep = ["n%d" % i for i in range(k - 1)] arglib.subarg_by_leaf_names(arg, keep) model = arghmm.ArgHmm(arg, seqs, new_name="n%d" % (k - 1), times=times, rho=rho, mu=mu) print "states", len(model.states[0]) util.tic("C") probs1 = list(arghmm.get_posterior_probs(model, length, verbose=True)) util.toc() util.tic("python") probs2 = list(hmm.get_posterior_probs(model, length, verbose=True)) util.toc() print "probs1" pc(probs1) print "probs2" pc(probs2) for col1, col2 in izip(probs1, probs2): for a, b in izip(col1, col2): fequal(a, b)
def test_prior_tree(self): k = 10 n = 1e4 popsizes = [n] * 20 length = 10 times = arghmm.get_time_points(ntimes=20, maxtime=1000000) arg = arghmm.sample_arg_dsmc(k, 2*n, 1e-50, start=0, end=length, times=times) trees, names = arghmm.arg2ctrees(arg, times) print arghmm.arghmm_tree_prior_prob(trees, times, len(times), popsizes)
def test_post_real(self): k = 3 n = 1e4 rho = 1.5e-8 mu = 2.5e-8 length = 100000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) #arg = arglib.read_arg("test/data/real.arg") #seqs = fasta.read_fasta("test/data/real.fa") #arglib.write_arg("test/data/real.arg", arg) #fasta.write_fasta("test/data/real.fa", seqs) times = arghmm.get_time_points(maxtime=50000, ntimes=20) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=10, ylog=10) #alignlib.print_align(seqs) # remove chrom keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) #print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_post_c(self): k = 3 n = 1e4 rho = 1.5e-8 * 30 mu = 2.5e-8 * 100 length = 100 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print arglib.get_recomb_pos(arg) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom keep = ["n%d" % i for i in range(k-1)] arglib.subarg_by_leaf_names(arg, keep) model = arghmm.ArgHmm(arg, seqs, new_name="n%d" % (k-1), times=times, rho=rho, mu=mu) print "states", len(model.states[0]) util.tic("C") probs1 = list(arghmm.get_posterior_probs(model, length, verbose=True)) util.toc() util.tic("python") probs2 = list(hmm.get_posterior_probs(model, length, verbose=True)) util.toc() print "probs1" pc(probs1) print "probs2" pc(probs2) for col1, col2 in izip(probs1, probs2): for a, b in izip(col1, col2): fequal(a, b)
def test_post3(self): k = 3 n = 1e4 rho = 1.5e-8 * 3 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) arg.prune() muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) thread = list(arghmm.iter_chrom_thread(arg, arg["n2"], by_block=False)) p = plot(cget(thread, 1), style="lines", ymin=0) # remove chrom keep = ["n0", "n1"] arglib.subarg_by_leaf_names(arg, keep) arg.set_ancestral() arg.prune() model = arghmm.ArgHmm(arg, seqs, new_name="n2", times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "muts", len(muts) print "recomb", len(model.recomb_pos) - 2, model.recomb_pos[1:-1] p.plot(model.recomb_pos, [1000] * len(model.recomb_pos), style="points") probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def get_time_points(arg, ntimes=20): times2 = arghmm.get_time_points(ntimes=ntimes) times3 = sorted(unique([x.age for x in arg])) times = [] for x in times2: i, j = util.binsearch(times3, x) if i is None: i = j if j is None: j = i if abs(times3[i] - x) < 1: times.append(times3[i]) elif abs(times3[j] - x) < 1: times.append(times3[j]) else: times.append(x) return times
def test_state_corr(self): k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) nstates = len(model.states[0]) prior = [-util.INF] * nstates prior[random.randint(0, nstates)] = 0.0 probs1 = list(arghmm.forward_algorithm(model, length, verbose=True)) probs2 = list( arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) model.rho *= 1e-9 probs3 = list( arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) p = plot(vsubs(probs1[length - 1], mean(probs1[length - 1]))) p.plot(vsubs(probs2[length - 1], mean(probs2[length - 1]))) p.plot(vsubs(probs3[length - 1], mean(probs3[length - 1]))) pause()
def test_arg_treelens(self): k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 10000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) # convert to C++ and back trees, names = arghmm.arg2ctrees(arg, times) treelens = [0.0] * arghmm.get_local_trees_ntrees(trees) arghmm.get_treelens(trees, times, len(times), treelens) print treelens
def test_prior_tree(self): k = 10 n = 1e4 popsizes = [n] * 20 length = 10 times = arghmm.get_time_points(ntimes=20, maxtime=1000000) arg = arghmm.sample_arg_dsmc(k, 2 * n, 1e-50, start=0, end=length, times=times) trees, names = arghmm.arg2ctrees(arg, times) print arghmm.arghmm_tree_prior_prob(trees, times, len(times), popsizes)
def test_post2(self): k = 2 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 10 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) times = arghmm.get_time_points() arghmm.discretize_arg(arg, times) thread = list(arghmm.iter_chrom_thread(arg, arg["n1"], by_block=False)) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) p = plot(cget(thread, 1), style="lines", ymin=0) #alignlib.print_align(seqs) # remove chrom keep = ["n0"] arglib.subarg_by_leaf_names(arg, keep) arg = arglib.smcify_arg(arg) model = arghmm.ArgHmm(arg, seqs, new_name="n1", times=times, rho=rho, mu=mu) print "states", len(model.states[0]) probs = arghmm.get_posterior_probs(model, length, verbose=True) high = list(arghmm.iter_posterior_times(model, probs, .95)) low = list(arghmm.iter_posterior_times(model, probs, .05)) p.plot(high, style="lines") p.plot(low, style="lines") pause()
def test_emit_parsimony(self): """ Calculate emission probabilities with parsimony """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) x = [] y = [] for i in range(20): print i arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, muts) x.append( arghmm.calc_likelihood(arg, seqs, mu=mu, times=times, delete_arg=False)) y.append( arghmm.calc_likelihood_parsimony(arg, seqs, mu=mu, times=times, delete_arg=False)) p = plot(x, y, xlab="true likelihood", ylab="parsimony likelihood") p.plot([min(x), max(x)], [min(x), max(x)], style="lines") pause()
def test_arg_treelens(self): k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 10000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) arg = arghmm.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) # convert to C++ and back trees, names = arghmm.arg2ctrees(arg, times) treelens = [0.0] * arghmm.get_local_trees_ntrees(trees) arghmm.get_treelens(trees, times, len(times), treelens) print treelens
def test_arg_joint(self): """ Compute joint probability of an ARG """ k = 2 n = 1e4 rho = 1.5e-8 * 20 rho2 = rho mu = 2.5e-8 * 20 length = 10000 times = arghmm.get_time_points(ntimes=20, maxtime=200000) refine = 0 arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) lk = arghmm.calc_joint_prob(arg, seqs, mu=mu, rho=rho, times=times) print lk
def test_trans_switch_internal(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) trees, names = arghmm.arg2ctrees(arg, times) assert arghmm.assert_transition_probs_switch_internal( trees, times, popsizes, rho)
def test_states(self): """ Test state enumeration """ k = 2 n = 1e4 rho = 1.5e-8 * 100 length = 1000 for i in xrange(20): arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) states = list(arghmm.iter_coal_states(tree, times)) treelib.draw_tree_names(tree.get_tree(), scale=4e-4, minlen=6, maxlen=6) print states
def test_sample_arg_popsizes_trees_infer(self): """ Fully sample an ARG from stratch using API """ k = 6 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=160000) popsizes = [1e4 * (61.-i)/60. for i in range(len(times))] refine = 5 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) popsizes2 = [0] * (len(times) - 1) nsamples = 1 for i in range(nsamples): arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=popsizes, refine=refine, verbose=True, carg=True) popsizes3 = arghmm.est_popsizes_trees(arg2, times, length/1000, verbose=True) print(popsizes3) popsizes2 = vadd(popsizes2, popsizes3) popsizes2 = vdivs(popsizes2, float(nsamples)) print(popsizes2) p = plot(times, popsizes, xlog=10, xmin=10) p.plot(times[1:], popsizes2) pause()
def test_state_corr(self): k = 12 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200e3) arg = arghmm.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = arghmm.sample_arg_mutations(arg, mu, times) seqs = arglib.make_alignment(arg, muts) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) nstates = len(model.states[0]) prior = [-util.INF] * nstates prior[random.randint(0, nstates)] = 0.0 probs1 = list(arghmm.forward_algorithm(model, length, verbose=True)) probs2 = list(arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) model.rho *= 1e-9 probs3 = list(arghmm.forward_algorithm(model, length, prior=prior, verbose=True)) p = plot(vsubs(probs1[length-1], mean(probs1[length-1]))) p.plot(vsubs(probs2[length-1], mean(probs2[length-1]))) p.plot(vsubs(probs3[length-1], mean(probs3[length-1]))) pause()
def test_sample_arg_popsizes_trees_infer(self): """ Fully sample an ARG from stratch using API """ k = 6 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=160000) popsizes = [1e4 * (61.-i)/60. for i in range(len(times))] refine = 5 util.tic("sim ARG") #arg = arglib.sample_arg_smc(k, 2 * popsizes[0], # rho, start=0, end=length) arg = arghmm.sample_arg_dsmc(k, [2*p for p in popsizes], rho, start=0, end=length, times=times) util.toc() muts = arghmm.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) popsizes2 = [0] * (len(times) - 1) nsamples = 1 for i in range(nsamples): arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=popsizes, refine=refine, verbose=True, carg=True) popsizes3 = arghmm.est_popsizes_trees(arg2, times, length/1000, verbose=True) print popsizes3 popsizes2 = vadd(popsizes2, popsizes3) popsizes2 = vdivs(popsizes2, float(nsamples)) print popsizes2 p = plot(times, popsizes, xlog=10, xmin=10) p.plot(times[1:], popsizes2) pause()
def test_emit_argmax(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 0.0 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k-1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name])) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) nstates = model.get_num_states(1) probs = [0.0 for j in xrange(nstates)] for i in xrange(1, length): if i % 100 == 0: print i for j in xrange(nstates): probs[j] += model.prob_emission(i, j) print # is the maximum likelihood emission matching truth data = sorted(zip(probs, model.states[0]), reverse=True) pc(data[:20]) state = (thread[0][0], times.index(thread[0][1])) print data[0][1], state assert data[0][1] == state
def test_emit_argmax(self): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 0.0 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) new_name = "n%d" % (k - 1) thread = list(arghmm.iter_chrom_thread(arg, arg[new_name])) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times) nstates = model.get_num_states(1) probs = [0.0 for j in xrange(nstates)] for i in xrange(1, length): if i % 100 == 0: print i for j in xrange(nstates): probs[j] += model.prob_emission(i, j) print # is the maximum likelihood emission matching truth data = sorted(zip(probs, model.states[0]), reverse=True) pc(data[:20]) state = (thread[0][0], times.index(thread[0][1])) print data[0][1], state assert data[0][1] == state
def test_backward(self): """ Run backward algorithm """ k = 3 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k - 1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "recomb", model.recomb_pos print "muts", len(muts) probs = hmm.backward_algorithm(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol))
def test_trans(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 1000 times = arghmm.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) arghmm.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert arghmm.assert_transition_probs(tree, times, popsizes, rho)
def test_trans(self): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 1000 times = arghmm.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) arghmm.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert arghmm.assert_transition_probs(tree, times, popsizes, rho)
def test_recomb(self): """ Investigate the fact that some recombinations are not visible """ k = 3 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) times = arghmm.get_time_points(10) arghmm.discretize_arg(arg, times) arg.set_ancestral() arg.prune() recombs = arglib.get_recomb_pos(arg) # find recombs by walking recombs2 = [] i = 0 while True: tree = arg.get_marginal_tree(i-.5) recomb = arghmm.find_tree_next_recomb(tree, i+1, tree=True) if recomb: recombs2.append(recomb.pos) i = recomb.pos else: break # these are suppose to differ because some recombination occur # in the hole of ancestral sequence intervals print recombs print recombs2 arglib.write_arg("tmp/b.arg", arg)
def test_backward(self): """ Run backward algorithm """ k = 3 n = 1e4 rho = 1.5e-8 * 100 mu = 2.5e-8 * 100 length = 10000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) times = arghmm.get_time_points(ntimes=10) arghmm.discretize_arg(arg, times) tree = arg.get_marginal_tree(0) print tree.root.age treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4) # remove chrom new_name = "n%d" % (k-1) arg = arghmm.remove_arg_thread(arg, new_name) model = arghmm.ArgHmm(arg, seqs, new_name=new_name, times=times, rho=rho, mu=mu) print "states", len(model.states[0]) print "recomb", model.recomb_pos print "muts", len(muts) probs = hmm.backward_algorithm(model, length, verbose=True) for pcol in probs: p = sum(map(exp, pcol)) print p, " ".join("%.3f" % f for f in map(exp, pcol))