def test_emit_internal(): """ Calculate emission probabilities for internal branches """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names) assert argweaverc.argweaver_assert_emit_internal(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def sample_arg_dsmc(k, popsize, rho, recombmap=None, start=0.0, end=0.0, times=None, times2=None, init_tree=None, names=None, make_names=True): """ Returns an ARG sampled from the Discrete Sequentially Markov Coalescent k -- chromosomes popsize -- effective population size rho -- recombination rate (recombinations / site / generation) recombmap -- map for variable recombination rate start -- staring chromosome coordinate end -- ending chromsome coordinate names -- names to use for leaves (default: None) make_names -- make names using strings (default: True) """ if times is None: maxtime = 160000 delta = .01 ntimes = 20 times = argweaver.get_time_points(ntimes, maxtime, delta) it = sample_dsmc_sprs( k, popsize, rho, recombmap=recombmap, start=start, end=end, times=times, times2=times2, init_tree=init_tree, names=names, make_names=make_names) tree = it.next() arg = arglib.make_arg_from_sprs(tree, it) return arg
def test_emit(): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) new_name = "n%d" % (k-1) arg = argweaver.remove_arg_thread(arg, new_name) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name]) assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_node_numbering(): """ Test node numbering across ARG. A node should keep the same numbering until it is broken by recombination. The new recoal node should take the index of the broken node. """ k = 10 n = 1e4 rho = 1.5e-8 * 20 length = 10000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) (ptrees, ages, sprs, blocks), all_nodes = ( argweaverc.get_treeset(arg, times)) # check nodes list nnodes = len(all_nodes[0]) last_nodes = None for i, nodes in enumerate(all_nodes): if last_nodes: recombj = sprs[i][0] brokenj = ptrees[i][recombj] for j in range(nnodes): if j != brokenj: nose.tools.assert_equal(last_nodes[j], nodes[j]) last_nodes = nodes
def test_trans_switch(): """ Calculate transition probabilities for switch matrix Only calculate a single matrix """ k = 12 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) recombs = [] while len(recombs) == 0: arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos - .5) rpos, r, c = arglib.iter_arg_sprs(arg, start=pos - .5).next() spr = (r, c) assert argweaverc.assert_transition_switch_probs(tree, spr, times, popsizes, rho)
def sample_thread(arg, seqs, rho=1.5e-8, mu=2.5e-8, popsize=1e4, times=None, ntimes=20, maxtime=200000, verbose=False): if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=maxtime, delta=.01) popsizes = [popsize] * len(times) if verbose: util.tic("sample thread") trees, names = arg2ctrees(arg, times) seqs2 = [seqs[name] for name in names] new_name = [x for x in seqs.keys() if x not in names][0] names.append(new_name) seqs2.append(seqs[new_name]) seqlen = len(seqs2[0]) trees = argweaver_sample_thread( trees, times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, None) arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def test_trans(): """ Calculate transition probabilities """ create_data = False if create_data: make_clean_dir('test/data/test_trans') k = 8 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=10, maxtime=200000) popsizes = [n] * len(times) ntests = 40 # generate test data if create_data: for i in range(ntests): arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) arg.write('test/data/test_trans/%d.arg' % i) for i in range(ntests): print 'arg', i arg = arglib.read_arg('test/data/test_trans/%d.arg' % i) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
def calc_joint_prob(arg, seqs, ntimes=20, mu=2.5e-8, rho=1.5e-8, popsizes=1e4, times=None, verbose=False, delete_arg=True): """ Calculate arg_joint_prob """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("calc likelihood") trees, names = arg2ctrees(arg, times) seqs, nseqs, seqlen = seqs2cseqs(seqs, names) p = argweaver_joint_prob( trees, times, len(times), popsizes, mu, rho, seqs, nseqs, seqlen) if delete_arg: delete_local_trees(trees) if verbose: util.toc() return p
def sample_arg_dsmc(k, popsize, rho, recombmap=None, start=0.0, end=0.0, times=None, init_tree=None, names=None, make_names=True): """ Returns an ARG sampled from the Discrete Sequentially Markov Coalescent k -- chromosomes popsize -- effective population size rho -- recombination rate (recombinations / site / generation) recombmap -- map for variable recombination rate start -- staring chromosome coordinate end -- ending chromsome coordinate names -- names to use for leaves (default: None) make_names -- make names using strings (default: True) """ if times is None: maxtime = 160000 delta = .01 ntimes = 20 times = argweaver.get_time_points(ntimes, maxtime, delta) it = sample_dsmc_sprs( k, popsize, rho, recombmap=recombmap, start=start, end=end, times=times, init_tree=init_tree, names=names, make_names=make_names) tree = it.next() arg = arglib.make_arg_from_sprs(tree, it) return arg
def test_trans_switch(): """ Calculate transition probabilities for switch matrix Only calculate a single matrix """ k = 12 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) recombs = [] while len(recombs) == 0: arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos-.5) rpos, r, c = arglib.iter_arg_sprs(arg, start=pos-.5).next() spr = (r, c) assert argweaverc.assert_transition_switch_probs( tree, spr, times, popsizes, rho)
def test_emit(): """ Calculate emission probabilities """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(1e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name]) assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu, seqs2, nseqs, seqlen)
def resample_arg_region(arg, seqs, region_start, region_end, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, times=None, carg=False, refine=1, verbose=False): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("resample arg") # convert arg to c++ if verbose: util.tic("convert arg") trees, names = arg2ctrees(arg, times) if verbose: util.toc() # get sequences in same order # and add all other sequences not in arg yet leaves = set(names) for name, seq in seqs.items(): if name not in leaves: names.append(name) seqs2, nseqs, seqlen = seqs2cseqs(seqs, names) # resample arg seqlen = len(seqs[names[0]]) trees = argweaver_resample_arg_region( trees, times, len(times), popsizes, rho, mu, seqs2, nseqs, seqlen, region_start, region_end, refine) #trees = argweaver_resample_arg_region( # trees, times, len(times), # popsizes, rho, mu, seqs2, nseqs, seqlen, # region_start, region_end) # convert arg back to python if carg: arg = (trees, names) else: arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def sample_all_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, refine=1, times=None, verbose=False, carg=False, prob_path_switch=.1): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("resample arg") # convert arg to c++ if verbose: util.tic("convert arg") arg = argweaver.make_trunk_arg( 0, len(seqs.values()[0]), name=seqs.keys()[0]) trees, names = arg2ctrees(arg, times) if verbose: util.toc() # get sequences in same order # and add all other sequences not in arg yet seqs2 = [seqs[name] for name in names] leaves = set(names) for name, seq in seqs.items(): if name not in leaves: names.append(name) seqs2.append(seq) # resample arg seqlen = len(seqs[names[0]]) trees = argweaver_resample_all_arg( trees, times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, refine, prob_path_switch) if carg: arg = (trees, names) else: # convert arg back to python arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def test_trans_switch(): """ Calculate transition probabilities for switch matrix Only calculate a single matrix """ create_data = False if create_data: make_clean_dir('test/data/test_trans_switch') # model parameters k = 12 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) ntests = 100 # generate test data if create_data: for i in range(ntests): # Sample ARG with at least one recombination. while True: arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) if any(x.event == "recomb" for x in arg): break arg.write('test/data/test_trans_switch/%d.arg' % i) for i in range(ntests): print('arg', i) arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i) argweaver.discretize_arg(arg, times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos - .5) rpos, r, c = next(arglib.iter_arg_sprs(arg, start=pos - .5)) spr = (r, c) if not argweaverc.assert_transition_switch_probs( tree, spr, times, popsizes, rho): tree2 = tree.get_tree() treelib.remove_single_children(tree2) treelib.draw_tree_names(tree2, maxlen=5, minlen=5) assert False
def resample_mcmc_arg(arg, seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, refine=1, times=None, verbose=False, carg=False, window=200000, niters2=5): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("resample arg") # convert arg to c++ if verbose: util.tic("convert arg") trees, names = arg2ctrees(arg, times) if verbose: util.toc() # get sequences in same order # and add all other sequences not in arg yet leaves = set(names) names = list(names) for name in seqs: if name not in leaves: names.append(name) seqs2, nseqs, seqlen = seqs2cseqs(seqs, names) # resample arg trees = argweaver_resample_mcmc_arg( trees, times, len(times), popsizes, rho, mu, seqs2, nseqs, seqlen, refine, niters2, window) if carg: arg = (trees, names) else: # convert arg back to python arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def test_trans_switch(): """ Calculate transition probabilities for switch matrix Only calculate a single matrix """ create_data = False if create_data: make_clean_dir('test/data/test_trans_switch') # model parameters k = 12 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) ntests = 100 # generate test data if create_data: for i in range(ntests): # Sample ARG with at least one recombination. while True: arg = argweaver.sample_arg_dsmc( k, 2*n, rho, start=0, end=length, times=times) if any(x.event == "recomb" for x in arg): break arg.write('test/data/test_trans_switch/%d.arg' % i) for i in range(ntests): print 'arg', i arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i) argweaver.discretize_arg(arg, times) recombs = [x.pos for x in arg if x.event == "recomb"] pos = recombs[0] tree = arg.get_marginal_tree(pos-.5) rpos, r, c = arglib.iter_arg_sprs(arg, start=pos-.5).next() spr = (r, c) if not argweaverc.assert_transition_switch_probs( tree, spr, times, popsizes, rho): tree2 = tree.get_tree() treelib.remove_single_children(tree2) treelib.draw_tree_names(tree2, maxlen=5, minlen=5) assert False
def test_arg_convert(): """ Test conversion for python to C args """ k = 10 n = 1e4 rho = 1.5e-8 * 20 length = 10000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times) # convert to C++ and back trees, names = argweaverc.arg2ctrees(arg, times) arg2 = argweaverc.ctrees2arg(trees, names, times) arg_equal(arg, arg2)
def argweaver_forward_algorithm(arg, seqs, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, times=None, ntimes=20, maxtime=180000, verbose=False, prior=[], internal=False, slow=False): if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=maxtime, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) probs = [] if verbose: util.tic("forward") if is_carg(arg): trees, names = arg else: trees, names = arg2ctrees(arg, times) seqs2 = [seqs[node] for node in names] for name in seqs.keys(): if name not in names: seqs2.append(seqs[name]) seqlen = len(seqs2[0]) fw = argweaver_forward_alg(trees, times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, len(prior) > 0, prior, internal, slow) nstates = [0] * seqlen argweaver_get_nstates(trees, len(times), internal, nstates) probs = [row[:n] for row, n in zip(fw, nstates)] delete_forward_matrix(fw, seqlen) if verbose: util.toc() return probs
def test_arg_convert(): """ Test conversion for python to C args """ k = 10 n = 1e4 rho = 1.5e-8 * 20 length = 10000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) # convert to C++ and back trees, names = argweaverc.arg2ctrees(arg, times) arg2 = argweaverc.ctrees2arg(trees, names, times) arg_equal(arg, arg2)
def test_arg_joint(): """ Compute joint probability of an ARG """ k = 2 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = 10000 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times=times) seqs = arglib.make_alignment(arg, muts) lk = argweaver.calc_joint_prob(arg, seqs, mu=mu, rho=rho, times=times) print lk
def test_trans(): """ Calculate transition probabilities """ k = 4 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
def test_trans(): """ Calculate transition probabilities """ k = 4 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=4, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
def test_trans_switch_internal(): """ Calculate transition probabilities for switch matrix and internal branches Only calculate a single matrix """ k = 10 n = 1e4 rho = 1.5e-8 * 20 length = int(100e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) popsizes = [n] * len(times) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) trees, names = argweaverc.arg2ctrees(arg, times) assert argweaverc.assert_transition_probs_switch_internal( trees, times, popsizes, rho)
def test_trans_internal(): """ Calculate transition probabilities for internal branch re-sampling Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs_internal( tree, times, popsizes, rho)
def test_trans_internal(): """ Calculate transition probabilities for internal branch re-sampling Only calculate a single matrix """ k = 5 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) pos = 10 tree = arg.get_marginal_tree(pos) assert argweaverc.assert_transition_probs_internal(tree, times, popsizes, rho)
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def sample_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, refine=0, nremove=1, times=None, verbose=False, carg=False): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("sample arg") names = [] seqs2 = [] for name, seq in seqs.items(): names.append(name) seqs2.append(seq) # sample arg trees = argweaver_sample_arg_refine( times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs))(*seqs2), len(seqs), len(seqs2[0]), refine, nremove) if carg: arg = (trees, names) else: # convert to python arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def test_emit_internal(): """ Calculate emission probabilities for internal branches """ k = 10 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(10e3) / 20 times = argweaver.get_time_points(ntimes=20, maxtime=200000) arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length, times=times) muts = argweaver.sample_arg_mutations(arg, mu, times) seqs = argweaver.make_alignment(arg, muts) trees, names = argweaverc.arg2ctrees(arg, times) seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names) assert argweaverc.argweaver_assert_emit_internal( trees, len(times), times, mu, seqs2, nseqs, seqlen)
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2*n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recombs(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def calc_likelihood_parsimony(arg, seqs, ntimes=20, mu=2.5e-8, times=None, delete_arg=True, verbose=False): """ Calculate arg_likelihood """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if verbose: util.tic("calc likelihood") trees, names = arg2ctrees(arg, times) seqs, nseqs, seqlen = seqs2cseqs(seqs, names) lk = argweaver_likelihood_parsimony( trees, times, len(times), mu, seqs, nseqs, seqlen) if delete_arg: delete_local_trees(trees) if verbose: util.toc() return lk
def show_plots(arg_file, sites_file, stats_file, output_prefix, rho, mu, popsize, ntimes=20, maxtime=200000): """ Show plots of convergence. """ # read true arg and seqs times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime) arg = arglib.read_arg(arg_file) argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer") arg = arglib.smcify_arg(arg) seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file)) # compute true stats arglen = arglib.arglen(arg) arg = argweaverc.arg2ctrees(arg, times) nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1 lk = argweaverc.calc_likelihood( arg, seqs, mu=mu, times=times, delete_arg=False) prior = argweaverc.calc_prior_prob( arg, rho=rho, times=times, popsizes=popsize, delete_arg=False) joint = lk + prior data = read_table(stats_file) # joint y2 = joint y = data.cget("joint") rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="joint probability", xlab="iterations", ylab="joint probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # lk y2 = lk y = data.cget("likelihood") rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="likelihood", xlab="iterations", ylab="likelihood") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # prior y2 = prior y = data.cget("prior") rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="prior probability", xlab="iterations", ylab="prior probability") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # nrecombs y2 = nrecombs y = data.cget("recombs") rplot_start(output_prefix + ".trace.nrecombs.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="number of recombinations", xlab="iterations", ylab="number of recombinations") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True) # arglen y2 = arglen y = data.cget("arglen") rplot_start(output_prefix + ".trace.arglen.pdf", width=8, height=5) rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)], main="ARG branch length", xlab="iterations", ylab="ARG branch length") rp.lines([0, len(y)], [y2, y2], col="gray") rplot_end(True)
def test_trans_two(): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 2 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) time_steps = [times[i] - times[i-1] for i in range(1, len(times))] time_steps.append(200000*10000.0) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) print "recomb", arglib.get_recombs(arg) arg = argweaver.make_trunk_arg(0, length, "n0") pos = 10 tree = arg.get_marginal_tree(pos) nlineages = argweaver.get_nlineages_recomb_coal(tree, times) states = list(argweaver.iter_coal_states(tree, times)) mat = argweaver.calc_transition_probs( tree, states, nlineages, times, time_steps, popsizes, rho) nstates = len(states) def coal(j): return 1.0 - exp(-time_steps[j]/(2.0 * n)) def recoal2(k, j): p = coal(j) for m in range(k, j): p *= 1.0 - coal(m) return p def recoal(k, j): if j == nstates-1: return exp(- sum(time_steps[m] / (2.0 * n) for m in range(k, j))) else: return ((1.0 - exp(-time_steps[j]/(2.0 * n))) * exp(- sum(time_steps[m] / (2.0 * n) for m in range(k, j)))) def isrecomb(i): return 1.0 - exp(-max(rho * 2.0 * times[i], rho)) def recomb(i, k): treelen = 2*times[i] + time_steps[i] if k < i: return 2.0 * time_steps[k] / treelen / 2.0 else: return time_steps[k] / treelen / 2.0 def trans(i, j): a = states[i][1] b = states[j][1] p = sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b)+1)) p += sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b)+1)) p *= isrecomb(a) if i == j: p += 1.0 - isrecomb(a) return p for i in range(len(states)): for j in range(len(states)): print isrecomb(states[i][1]) print states[i], states[j], mat[i][j], log(trans(i, j)) fequal(mat[i][j], log(trans(i, j))) # recombs add up to 1 fequal(sum(recomb(i, k) for k in range(i+1)), 0.5) # recoal add up to 1 fequal(sum(recoal(i, j) for j in range(i, nstates)), 1.0) # recomb * recoal add up to .5 fequal(sum(sum(recoal(k, j) * recomb(i, k) for k in range(0, min(i, j)+1)) for j in range(0, nstates)), 0.5) fequal(sum(trans(i, j) for j in range(len(states))), 1.0)
def test_trans_two(): """ Calculate transition probabilities for k=2 Only calculate a single matrix """ k = 2 n = 1e4 rho = 1.5e-8 * 20 length = 1000 times = argweaver.get_time_points(ntimes=5, maxtime=200000) time_steps = [times[i] - times[i - 1] for i in range(1, len(times))] time_steps.append(200000 * 10000.0) popsizes = [n] * len(times) arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length) argweaver.discretize_arg(arg, times) print "recomb", arglib.get_recomb_pos(arg) arg = argweaver.make_trunk_arg(0, length, "n0") pos = 10 tree = arg.get_marginal_tree(pos) nlineages = argweaver.get_nlineages_recomb_coal(tree, times) states = list(argweaver.iter_coal_states(tree, times)) mat = argweaver.calc_transition_probs(tree, states, nlineages, times, time_steps, popsizes, rho) nstates = len(states) def coal(j): return 1.0 - exp(-time_steps[j] / (2.0 * n)) def recoal2(k, j): p = coal(j) for m in range(k, j): p *= 1.0 - coal(m) return p def recoal(k, j): if j == nstates - 1: return exp(-sum(time_steps[m] / (2.0 * n) for m in range(k, j))) else: return ((1.0 - exp(-time_steps[j] / (2.0 * n))) * exp(-sum(time_steps[m] / (2.0 * n) for m in range(k, j)))) def isrecomb(i): return 1.0 - exp(-max(rho * 2.0 * times[i], rho)) def recomb(i, k): treelen = 2 * times[i] + time_steps[i] if k < i: return 2.0 * time_steps[k] / treelen / 2.0 else: return time_steps[k] / treelen / 2.0 def trans(i, j): a = states[i][1] b = states[j][1] p = sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b) + 1)) p += sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b) + 1)) p *= isrecomb(a) if i == j: p += 1.0 - isrecomb(a) return p for i in range(len(states)): for j in range(len(states)): print isrecomb(states[i][1]) print states[i], states[j], mat[i][j], log(trans(i, j)) fequal(mat[i][j], log(trans(i, j))) # recombs add up to 1 fequal(sum(recomb(i, k) for k in range(i + 1)), 0.5) # recoal add up to 1 fequal(sum(recoal(i, j) for j in range(i, nstates)), 1.0) # recomb * recoal add up to .5 fequal( sum( sum( recoal(k, j) * recomb(i, k) for k in range(0, min(i, j) + 1)) for j in range(0, nstates)), 0.5) fequal(sum(trans(i, j) for j in range(len(states))), 1.0)