def test_trans(): """ Calculate transition probabilities """ create_data = False if create_data: make_clean_dir('test/data/test_trans') k = 8 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=10, maxtime=200000) popsizes = [n] * len(times) ntests = 40 # generate test data if create_data: for i in range(ntests): arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) arg.write('test/data/test_trans/%d.arg' % i) for i in range(ntests): print 'arg', i arg = arglib.read_arg('test/data/test_trans/%d.arg' % i) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
def test_trans_switch(): """ Calculate transition probabilities for switch matrix Only calculate a single matrix """ create_data = False if create_data: make_clean_dir('test/data/test_trans_switch') # model parameters k = 12 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) ntests = 100 # generate test data if create_data: for i in range(ntests): # Sample ARG with at least one recombination. while True: arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) if any(x.event == "recomb" for x in arg): break arg.write('test/data/test_trans_switch/%d.arg' % i) for i in range(ntests): print('arg', i) arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i) argweaver.discretize_arg(arg, times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos - .5) rpos, r, c = next(arglib.iter_arg_sprs(arg, start=pos - .5)) spr = (r, c) if not argweaverc.assert_transition_switch_probs( tree, spr, times, popsizes, rho): tree2 = tree.get_tree() treelib.remove_single_children(tree2) treelib.draw_tree_names(tree2, maxlen=5, minlen=5) assert False
def test_trans_switch(): """ Calculate transition probabilities for switch matrix Only calculate a single matrix """ create_data = False if create_data: make_clean_dir('test/data/test_trans_switch') # model parameters k = 12 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) ntests = 100 # generate test data if create_data: for i in range(ntests): # Sample ARG with at least one recombination. while True: arg = argweaver.sample_arg_dsmc( k, 2*n, rho, start=0, end=length, times=times) if any(x.event == "recomb" for x in arg): break arg.write('test/data/test_trans_switch/%d.arg' % i) for i in range(ntests): print 'arg', i arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i) argweaver.discretize_arg(arg, times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos-.5) rpos, r, c = arglib.iter_arg_sprs(arg, start=pos-.5).next() spr = (r, c) if not argweaverc.assert_transition_switch_probs( tree, spr, times, popsizes, rho): tree2 = tree.get_tree() treelib.remove_single_children(tree2) treelib.draw_tree_names(tree2, maxlen=5, minlen=5) assert False
def test_read_write(self): """Read and write an ARG""" rho = 1.5e-8 # recomb/site/gen l = 10000 # length of locus k = 10 # number of lineages n = 2*10000 # effective popsize arg = arglib.sample_arg(k, n, rho, 0, l) # round ages and pos for easy equality for node in arg: node.age = round(node.age) node.pos = round(node.pos) stream = StringIO.StringIO() arglib.write_arg(stream, arg) stream.seek(0) arg2 = arglib.read_arg(stream) self.assertTrue(arg.equal(arg2))
def show_plots(arg_file, sites_file, stats_file, output_prefix, rho, mu, popsize, ntimes=20, maxtime=200000): """ Show plots of convergence. """ # read true arg and seqs times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime) arg = arglib.read_arg(arg_file) argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer") arg = arglib.smcify_arg(arg) seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file)) # compute true stats arglen = arglib.arglen(arg) arg = argweaverc.arg2ctrees(arg, times) nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1 lk = argweaverc.calc_likelihood( arg, seqs, mu=mu, times=times, delete_arg=False) prior = argweaverc.calc_prior_prob( arg, rho=rho, times=times, popsizes=popsize, delete_arg=False) joint = lk + prior data = read_table(stats_file) # joint y2 = joint y = data.cget("joint") rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="joint probability", xlab="iterations", ylab="joint probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # lk y2 = lk y = data.cget("likelihood") rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="likelihood", xlab="iterations", ylab="likelihood") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # prior y2 = prior y = data.cget("prior") rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="prior probability", xlab="iterations", ylab="prior probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # nrecombs y2 = nrecombs y = data.cget("recombs") rplot_start(output_prefix + ".trace.nrecombs.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="number of recombinations", xlab="iterations", ylab="number of recombinations") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # arglen y2 = arglen y = data.cget("arglen") rplot_start(output_prefix + ".trace.arglen.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="ARG branch length", xlab="iterations", ylab="ARG branch length") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True)
i, j = util.binsearch(times3, x) if i is None: i = j if j is None: j = i if abs(times3[i] - x) < 1: times.append(times3[i]) elif abs(times3[j] - x) < 1: times.append(times3[j]) else: times.append(x) return times #============================================================================= if 1: #times = arghmm.get_time_points(ntimes=20) arg = arglib.read_arg("test/data/sample.arg") seqs = read_fasta("test/data/sample.fa") trees = list(arglib.iter_tree_tracks(arg, convert=True)) # draw mappings win = argvis.show_tree_track(trees) nleaves = ilen(arg.leaves()) for i in xrange(len(trees)-1): block1, _tree1 = trees[i] block2, _tree2 = trees[i + 1] pos = block2[0] tree1 = arg.get_marginal_tree(pos-.5) tree2 = arg.get_marginal_tree(pos+.5) layout1 = argvis.layout_arg(tree1)
i, j = util.binsearch(times3, x) if i is None: i = j if j is None: j = i if abs(times3[i] - x) < 1: times.append(times3[i]) elif abs(times3[j] - x) < 1: times.append(times3[j]) else: times.append(x) return times #============================================================================= if 1: #times = arghmm.get_time_points(ntimes=20) arg = arglib.read_arg("test/data/sample.arg") seqs = read_fasta("test/data/sample.fa") trees = list(arglib.iter_tree_tracks(arg, convert=True)) # draw mappings win = argvis.show_tree_track(trees) nleaves = ilen(arg.leaves()) for i in xrange(len(trees) - 1): block1, _tree1 = trees[i] block2, _tree2 = trees[i + 1] pos = block2[0] tree1 = arg.get_marginal_tree(pos - .5) tree2 = arg.get_marginal_tree(pos + .5) layout1 = argvis.layout_arg(tree1)