def _test_prog_infsites(): make_clean_dir("test/tmp/test_prog_infsites") run_cmd("""bin/arg-sim \ -k 40 -L 200000 \ -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \ --ntimes 20 --maxtime 400e3 \ -o test/tmp/test_prog_infsites/0""") make_clean_dir("test/tmp/test_prog_infsites/0.sample") run_cmd("""bin/arg-sample \ -s test/tmp/test_prog_infsites/0.sites \ -N 1e4 -r 1.5e-8 -m 2.5e-8 \ --ntimes 5 --maxtime 100e3 -c 1 \ --climb 0 -n 20 --infsites \ -x 1 \ -o test/tmp/test_prog_infsites/0.sample/out""") arg = argweaver.read_arg( "test/tmp/test_prog_infsites/0.sample/out.0.smc.gz") sites = argweaver.read_sites("test/tmp/test_prog_infsites/0.sites") print "names", sites.names print noncompats = [] for block, tree in arglib.iter_local_trees(arg): tree = tree.get_tree() treelib.remove_single_children(tree) phylo.hash_order_tree(tree) for pos, col in sites.iter_region(block[0] + 1, block[1] + 1): assert block[0] + 1 <= pos <= block[1] + 1, (block, pos) split = sites_split(sites.names, col) node = arglib.split_to_tree_branch(tree, split) if node is None: noncompats.append(pos) print "noncompat", block, pos, col print phylo.hash_tree(tree) print tree.leaf_names() print "".join(col[sites.names.index(name)] for name in tree.leaf_names()) print split print print "num noncompats", len(noncompats)
def _test_prog_infsites(): make_clean_dir("test/data/test_prog_infsites") run_cmd("""bin/arg-sim \ -k 40 -L 200000 \ -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \ --ntimes 20 --maxtime 400e3 \ -o test/data/test_prog_infsites/0""") make_clean_dir("test/data/test_prog_infsites/0.sample") run_cmd("""bin/arg-sample \ -s test/data/test_prog_infsites/0.sites \ -N 1e4 -r 1.5e-8 -m 2.5e-8 \ --ntimes 5 --maxtime 100e3 -c 1 \ --climb 0 -n 20 --infsites \ -x 1 \ -o test/data/test_prog_infsites/0.sample/out""") arg = argweaver.read_arg( "test/data/test_prog_infsites/0.sample/out.0.smc.gz") sites = argweaver.read_sites("test/data/test_prog_infsites/0.sites") print "names", sites.names print noncompats = [] for block, tree in arglib.iter_local_trees(arg): tree = tree.get_tree() treelib.remove_single_children(tree) phylo.hash_order_tree(tree) for pos, col in sites.iter_region(block[0]+1, block[1]+1): assert block[0]+1 <= pos <= block[1]+1, (block, pos) split = sites_split(sites.names, col) node = arglib.split_to_tree_branch(tree, split) if node is None: noncompats.append(pos) print "noncompat", block, pos, col print phylo.hash_tree(tree) print tree.leaf_names() print "".join(col[sites.names.index(name)] for name in tree.leaf_names()) print split print print "num noncompats", len(noncompats)
def show_plots(arg_file, sites_file, stats_file, output_prefix, rho, mu, popsize, ntimes=20, maxtime=200000): """ Show plots of convergence. """ # read true arg and seqs times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime) arg = arglib.read_arg(arg_file) argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer") arg = arglib.smcify_arg(arg) seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file)) # compute true stats arglen = arglib.arglen(arg) arg = argweaverc.arg2ctrees(arg, times) nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1 lk = argweaverc.calc_likelihood( arg, seqs, mu=mu, times=times, delete_arg=False) prior = argweaverc.calc_prior_prob( arg, rho=rho, times=times, popsizes=popsize, delete_arg=False) joint = lk + prior data = read_table(stats_file) # joint y2 = joint y = data.cget("joint") rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="joint probability", xlab="iterations", ylab="joint probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # lk y2 = lk y = data.cget("likelihood") rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="likelihood", xlab="iterations", ylab="likelihood") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # prior y2 = prior y = data.cget("prior") rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="prior probability", xlab="iterations", ylab="prior probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # nrecombs y2 = nrecombs y = data.cget("recombs") rplot_start(output_prefix + ".trace.nrecombs.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="number of recombinations", xlab="iterations", ylab="number of recombinations") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # arglen y2 = arglen y = data.cget("arglen") rplot_start(output_prefix + ".trace.arglen.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="ARG branch length", xlab="iterations", ylab="ARG branch length") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True)