def test_emit_internal(): """ Calculate emission probabilities for internal branches """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names) assert argweaverc.argweaver_assert_emit_internal(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_emit(): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) new_name = "n%d" % (k-1) arg = argweaver.remove_arg_thread(arg, new_name) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name]) assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_emit(): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name]) assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_arg_convert(): """ Test conversion for python to C args """ k = 10 n = 1e4 rho = 1.5e-8 * 20 length = 10000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) # convert to C++ and back trees, names = argweaverc.arg2ctrees(arg, times) arg2 = argweaverc.ctrees2arg(trees, names, times) arg_equal(arg, arg2)
def test_arg_convert(): """ Test conversion for python to C args """ k = 10 n = 1e4 rho = 1.5e-8 * 20 length = 10000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) # convert to C++ and back trees, names = argweaverc.arg2ctrees(arg, times) arg2 = argweaverc.ctrees2arg(trees, names, times) arg_equal(arg, arg2)
def test_trans_switch_internal(): """ Calculate transition probabilities for switch matrix and internal branches Only calculate a single matrix """ k = 10 n = 1e4 rho = 1.5e-8 * 20 length = int(100e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) trees, names = argweaverc.arg2ctrees(arg, times) assert argweaverc.assert_transition_probs_switch_internal( trees, times, popsizes, rho)
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def test_emit_internal(): """ Calculate emission probabilities for internal branches """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names) assert argweaverc.argweaver_assert_emit_internal( trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2*n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recombs(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def show_plots(arg_file, sites_file, stats_file, output_prefix, rho, mu, popsize, ntimes=20, maxtime=200000): """ Show plots of convergence. """ # read true arg and seqs times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime) arg = arglib.read_arg(arg_file) argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer") arg = arglib.smcify_arg(arg) seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file)) # compute true stats arglen = arglib.arglen(arg) arg = argweaverc.arg2ctrees(arg, times) nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1 lk = argweaverc.calc_likelihood( arg, seqs, mu=mu, times=times, delete_arg=False) prior = argweaverc.calc_prior_prob( arg, rho=rho, times=times, popsizes=popsize, delete_arg=False) joint = lk + prior data = read_table(stats_file) # joint y2 = joint y = data.cget("joint") rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="joint probability", xlab="iterations", ylab="joint probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # lk y2 = lk y = data.cget("likelihood") rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="likelihood", xlab="iterations", ylab="likelihood") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # prior y2 = prior y = data.cget("prior") rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="prior probability", xlab="iterations", ylab="prior probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # nrecombs y2 = nrecombs y = data.cget("recombs") rplot_start(output_prefix + ".trace.nrecombs.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="number of recombinations", xlab="iterations", ylab="number of recombinations") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # arglen y2 = arglen y = data.cget("arglen") rplot_start(output_prefix + ".trace.arglen.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="ARG branch length", xlab="iterations", ylab="ARG branch length") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True)