def sample_arg_mutations(arg, mu, times=None): """ Simulate mutations on an ARG. Mutations are represented as (node, parent, site, time). arg -- ARG on which to simulate mutations mu -- mutation rate (mutations/site/gen) times -- optional list of discretized time points """ mutations = [] minlen = times[1] * .1 if times else 0.0 for (start, end), tree in arglib.iter_tree_tracks(arg): arglib.remove_single_lineages(tree) for node in tree: if not node.parents: continue blen = max(node.get_dist(), minlen) rate = blen * mu i = start while i < end: i += random.expovariate(rate) if i < end: t = random.uniform(node.age, node.age + blen) mutations.append((node, node.parents[0], int(i), t)) return mutations
def sample_arg_mutations(arg, mu, times=None): """ Simulate mutations on an ARG. Mutations are represented as (node, parent, site, time). arg -- ARG on which to simulate mutations mu -- mutation rate (mutations/site/gen) times -- optional list of discretized time points """ mutations = [] minlen = times[1] * 0.1 if times else 0.0 for (start, end), tree in arglib.iter_tree_tracks(arg): arglib.remove_single_lineages(tree) for node in tree: if not node.parents: continue blen = max(node.get_dist(), minlen) rate = blen * mu i = start while i < end: i += random.expovariate(rate) if i < end: t = random.uniform(node.age, node.age + blen) mutations.append((node, node.parents[0], int(i), t)) return mutations
def test_arg_equal(arg, arg2): # test recomb points recombs = sorted(x.pos for x in arg if x.event == "recomb") recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb") assert recombs == recombs2 # check local tree topologies for (start, end), tree in arglib.iter_tree_tracks(arg): pos = (start + end) / 2.0 arglib.remove_single_lineages(tree) tree1 = tree.get_tree() tree2 = arg2.get_marginal_tree(pos) arglib.remove_single_lineages(tree2) tree2 = tree2.get_tree() hash1 = phylo.hash_tree(tree1) hash2 = phylo.hash_tree(tree2) print print pos print hash1 print hash2 assert hash1 == hash2 # check sprs sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True) sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True) for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in izip(sprs1, sprs2): recomb1 = (sorted(recomb1[0]), recomb1[1]) recomb2 = (sorted(recomb2[0]), recomb2[1]) coal1 = (sorted(coal1[0]), coal1[1]) coal2 = (sorted(coal2[0]), coal2[1]) print print (pos1, recomb1, coal1) print (pos2, recomb2, coal2) # check pos, leaves, time assert pos1 == pos2 assert recomb1 == recomb2 assert coal1 == coal2
def test_arg_equal(arg, arg2): # test recomb points recombs = sorted(x.pos for x in arg if x.event == "recomb") recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb") assert recombs == recombs2 # check local tree topologies for (start, end), tree in arglib.iter_tree_tracks(arg): pos = (start + end) / 2.0 arglib.remove_single_lineages(tree) tree1 = tree.get_tree() tree2 = arg2.get_marginal_tree(pos) arglib.remove_single_lineages(tree2) tree2 = tree2.get_tree() hash1 = phylo.hash_tree(tree1) hash2 = phylo.hash_tree(tree2) print print pos print hash1 print hash2 assert hash1 == hash2 # check sprs sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True) sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True) for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in izip(sprs1, sprs2): recomb1 = (sorted(recomb1[0]), recomb1[1]) recomb2 = (sorted(recomb2[0]), recomb2[1]) coal1 = (sorted(coal1[0]), coal1[1]) coal2 = (sorted(coal2[0]), coal2[1]) print print(pos1, recomb1, coal1) print(pos2, recomb2, coal2) # check pos, leaves, time assert pos1 == pos2 assert recomb1 == recomb2 assert coal1 == coal2
def test_thread(self): """ Test thread retrieval """ k = 10 n = 1e4 rho = 1.5e-8 * 10 mu = 2.5e-8 * 100 length = 1000 arg = arglib.sample_arg(k, n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) for (block, tree), threadi in izip( arglib.iter_tree_tracks(arg), arghmm.iter_chrom_thread(arg, arg["n9"], by_block=True)): print block print threadi treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4)
def test_est_popsize2(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 mu = 2.5e-8 length = int(4e6) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) x2, y2 = stats.smooth2(x, y, 100e3) p = plot(x, y, ymin=0) p.plot(x2, y2, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()
if abs(times3[i] - x) < 1: times.append(times3[i]) elif abs(times3[j] - x) < 1: times.append(times3[j]) else: times.append(x) return times #============================================================================= if 1: #times = arghmm.get_time_points(ntimes=20) arg = arglib.read_arg("test/data/sample.arg") seqs = read_fasta("test/data/sample.fa") trees = list(arglib.iter_tree_tracks(arg, convert=True)) # draw mappings win = argvis.show_tree_track(trees) nleaves = ilen(arg.leaves()) for i in xrange(len(trees)-1): block1, _tree1 = trees[i] block2, _tree2 = trees[i + 1] pos = block2[0] tree1 = arg.get_marginal_tree(pos-.5) tree2 = arg.get_marginal_tree(pos+.5) layout1 = argvis.layout_arg(tree1) layout2 = argvis.layout_arg(tree2) #layout1 = treelib.layout_tree_vertical( # treelib.layout_tree(tree1, xscale=1, yscale=1), leaves=0)
def test_est_arg_popsize(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(2e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) # sim seq mut = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, mut) util.toc() # sample arg util.tic("sample arg") arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=1e4, carg=True) arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg2): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) # thin popsizes x2 = list(range(0, length, length//5000)); y2 = [] j = 0 for i in range(len(x2)): while j < len(x) and x[j] < x2[i]: j += 1 y2.append(y[min(j, len(y)-1)]) x3, y3 = stats.smooth2(x2, y2, 100e3) p = plot(x, y, ymin=0) p.plot(x3, y3, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()
if abs(times3[i] - x) < 1: times.append(times3[i]) elif abs(times3[j] - x) < 1: times.append(times3[j]) else: times.append(x) return times #============================================================================= if 1: #times = arghmm.get_time_points(ntimes=20) arg = arglib.read_arg("test/data/sample.arg") seqs = read_fasta("test/data/sample.fa") trees = list(arglib.iter_tree_tracks(arg, convert=True)) # draw mappings win = argvis.show_tree_track(trees) nleaves = ilen(arg.leaves()) for i in xrange(len(trees) - 1): block1, _tree1 = trees[i] block2, _tree2 = trees[i + 1] pos = block2[0] tree1 = arg.get_marginal_tree(pos - .5) tree2 = arg.get_marginal_tree(pos + .5) layout1 = argvis.layout_arg(tree1) layout2 = argvis.layout_arg(tree2) #layout1 = treelib.layout_tree_vertical( # treelib.layout_tree(tree1, xscale=1, yscale=1), leaves=0)
def test_est_arg_popsize(self): """ Fully sample an ARG from stratch using API """ k = 20 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(2e6) / 20 times = arghmm.get_time_points(ntimes=20, maxtime=200000) popsize = 1e4 popsize2 = 1e4 * .5 a = int(.3 * length) b = int(.7 * length) refine = 0 util.tic("sim ARG") arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=0, end=a) arg = arglib.sample_arg_smc(k, 2 * popsize2, rho, start=a, end=b, init_tree=arg) arg = arglib.sample_arg_smc(k, 2 * popsize, rho, start=b, end=length, init_tree=arg) # sim seq mut = arghmm.sample_arg_mutations(arg, mu, times) seqs = arghmm.make_alignment(arg, mut) util.toc() # sample arg util.tic("sample arg") arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times, popsizes=1e4, carg=True) arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, rho=rho, mu=mu, times=times, refine=200) util.toc() x = []; y = [] for (start, end), tree in arglib.iter_tree_tracks(arg2): arglib.remove_single_lineages(tree) x.append(start) y.append(mle_popsize_tree(tree, mintime=0)) # thin popsizes x2 = range(0, length, length//5000); y2 = [] j = 0 for i in range(len(x2)): while j < len(x) and x[j] < x2[i]: j += 1 y2.append(y[min(j, len(y)-1)]) x3, y3 = stats.smooth2(x2, y2, 100e3) p = plot(x, y, ymin=0) p.plot(x3, y3, style='lines') p.plot([0, a, a, b, b, length], [popsize, popsize, popsize2, popsize2, popsize, popsize], style='lines') pause()