def test_coal_counts2(self): b = 3 t = 1000.0 n = 1000 for b in xrange(1, 10): for a in xrange(b, 10): i = coal.prob_coal_counts(a, b, t, n) j = coal.prob_coal_counts_slow(a, b, t, n) fequal(i, j)
def test_coal_counts(self): b = 1 t = 1000.0 n = 1000 for a in xrange(1, 100): i = coal.prob_coal_counts(a, b, t, n) j = coal.cdf_mrca(t, a, n) fequal(i, j) for a in xrange(1, 10): i = sum(coal.prob_coal_counts(a, b, t, n) for b in xrange(1, a+1)) fequal(i, 1.0)
def test_cdf_bmc(self): # test cdf mrca BMC stree = treelib.parse_newick( "((A:1000, B:1000):500, (C:700, D:700):800);") n = 1000 gene_counts = dict.fromkeys(stree.leaf_names(), 1) T = 2000 p = exp(coal.cdf_mrca_bounded_multicoal(gene_counts, T, stree, n)) nsamples = 5000 c = 0 for i in xrange(nsamples): tree, recon = coal.sample_multicoal_tree(stree, n) if treelib.get_tree_timestamps(tree)[tree.root] < T: c += 1 p2 = c / float(nsamples) fequal(p, p2, .05)
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2*n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recombs(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def test_trans_two(): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 2 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) time_steps = [times[i] - times[i - 1] for i in range(1, len(times))] time_steps.append(200000 * 10000.0) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) arg = argweaver.make_trunk_arg(0, length, "n0") pos = 10 tree = arg.get_marginal_tree(pos) nlineages = argweaver.get_nlineages_recomb_coal(tree, times) states = list(argweaver.iter_coal_states(tree, times)) mat = argweaver.calc_transition_probs(tree, states, nlineages, times, time_steps, popsizes, rho) nstates = len(states) def coal(j): return 1.0 - exp(-time_steps[j] / (2.0 * n)) def recoal2(k, j): p = coal(j) for m in range(k, j): p *= 1.0 - coal(m) return p def recoal(k, j): if j == nstates - 1: return exp(-sum(time_steps[m] / (2.0 * n) for m in range(k, j))) else: return ((1.0 - exp(-time_steps[j] / (2.0 * n))) * exp(-sum(time_steps[m] / (2.0 * n) for m in range(k, j)))) def isrecomb(i): return 1.0 - exp(-max(rho * 2.0 * times[i], rho)) def recomb(i, k): treelen = 2 * times[i] + time_steps[i] if k < i: return 2.0 * time_steps[k] / treelen / 2.0 else: return time_steps[k] / treelen / 2.0 def trans(i, j): a = states[i][1] b = states[j][1] p = sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b) + 1)) p += sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b) + 1)) p *= isrecomb(a) if i == j: p += 1.0 - isrecomb(a) return p for i in range(len(states)): for j in range(len(states)): print isrecomb(states[i][1]) print states[i], states[j], mat[i][j], log(trans(i, j)) fequal(mat[i][j], log(trans(i, j))) # recombs add up to 1 fequal(sum(recomb(i, k) for k in range(i + 1)), 0.5) # recoal add up to 1 fequal(sum(recoal(i, j) for j in range(i, nstates)), 1.0) # recomb * recoal add up to .5 fequal( sum( sum( recoal(k, j) * recomb(i, k) for k in range(0, min(i, j) + 1)) for j in range(0, nstates)), 0.5) fequal(sum(trans(i, j) for j in range(len(states))), 1.0)
def test_trans_two(): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 2 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) time_steps = [times[i] - times[i-1] for i in range(1, len(times))] time_steps.append(200000*10000.0) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) print "recomb", arglib.get_recombs(arg) arg = argweaver.make_trunk_arg(0, length, "n0") pos = 10 tree = arg.get_marginal_tree(pos) nlineages = argweaver.get_nlineages_recomb_coal(tree, times) states = list(argweaver.iter_coal_states(tree, times)) mat = argweaver.calc_transition_probs( tree, states, nlineages, times, time_steps, popsizes, rho) nstates = len(states) def coal(j): return 1.0 - exp(-time_steps[j]/(2.0 * n)) def recoal2(k, j): p = coal(j) for m in range(k, j): p *= 1.0 - coal(m) return p def recoal(k, j): if j == nstates-1: return exp(- sum(time_steps[m] / (2.0 * n) for m in range(k, j))) else: return ((1.0 - exp(-time_steps[j]/(2.0 * n))) * exp(- sum(time_steps[m] / (2.0 * n) for m in range(k, j)))) def isrecomb(i): return 1.0 - exp(-max(rho * 2.0 * times[i], rho)) def recomb(i, k): treelen = 2*times[i] + time_steps[i] if k < i: return 2.0 * time_steps[k] / treelen / 2.0 else: return time_steps[k] / treelen / 2.0 def trans(i, j): a = states[i][1] b = states[j][1] p = sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b)+1)) p += sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b)+1)) p *= isrecomb(a) if i == j: p += 1.0 - isrecomb(a) return p for i in range(len(states)): for j in range(len(states)): print isrecomb(states[i][1]) print states[i], states[j], mat[i][j], log(trans(i, j)) fequal(mat[i][j], log(trans(i, j))) # recombs add up to 1 fequal(sum(recomb(i, k) for k in range(i+1)), 0.5) # recoal add up to 1 fequal(sum(recoal(i, j) for j in range(i, nstates)), 1.0) # recomb * recoal add up to .5 fequal(sum(sum(recoal(k, j) * recomb(i, k) for k in range(0, min(i, j)+1)) for j in range(0, nstates)), 0.5) fequal(sum(trans(i, j) for j in range(len(states))), 1.0)