Esempio n. 1
0
def test_emit_internal():
    """
    Calculate emission probabilities for internal branches
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(10e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k,
                                    2 * n,
                                    rho,
                                    start=0,
                                    end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names)

    assert argweaverc.argweaver_assert_emit_internal(trees, len(times), times,
                                                     mu, seqs2, nseqs, seqlen)
Esempio n. 2
0
def sample_arg_dsmc(k, popsize, rho, recombmap=None,
                    start=0.0, end=0.0, times=None, times2=None,
                    init_tree=None, names=None, make_names=True):
    """
    Returns an ARG sampled from the Discrete Sequentially Markov Coalescent

    k   -- chromosomes
    popsize -- effective population size
    rho -- recombination rate (recombinations / site / generation)
    recombmap -- map for variable recombination rate
    start -- staring chromosome coordinate
    end   -- ending chromsome coordinate

    names -- names to use for leaves (default: None)
    make_names -- make names using strings (default: True)
    """
    if times is None:
        maxtime = 160000
        delta = .01
        ntimes = 20
        times = argweaver.get_time_points(ntimes, maxtime, delta)

    it = sample_dsmc_sprs(
        k, popsize, rho, recombmap=recombmap,
        start=start, end=end, times=times, times2=times2,
        init_tree=init_tree, names=names, make_names=make_names)
    tree = it.next()
    arg = arglib.make_arg_from_sprs(tree, it)

    return arg
Esempio n. 3
0
def test_emit():
    """
    Calculate emission probabilities
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(1e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    new_name = "n%d" % (k-1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name])

    assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu,
                                            seqs2, nseqs, seqlen)
Esempio n. 4
0
def test_node_numbering():
    """
    Test node numbering across ARG.

    A node should keep the same numbering until it is broken by
    recombination. The new recoal node should take the index of the broken
    node.
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = 10000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    (ptrees, ages, sprs, blocks), all_nodes = (
        argweaverc.get_treeset(arg, times))

    # check nodes list
    nnodes = len(all_nodes[0])
    last_nodes = None
    for i, nodes in enumerate(all_nodes):
        if last_nodes:
            recombj = sprs[i][0]
            brokenj = ptrees[i][recombj]
            for j in range(nnodes):
                if j != brokenj:
                    nose.tools.assert_equal(last_nodes[j], nodes[j])

        last_nodes = nodes
Esempio n. 5
0
def test_trans_switch():
    """
    Calculate transition probabilities for switch matrix

    Only calculate a single matrix
    """

    k = 12
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)

    recombs = []

    while len(recombs) == 0:
        arg = argweaver.sample_arg_dsmc(k,
                                        2 * n,
                                        rho,
                                        start=0,
                                        end=length,
                                        times=times)
        recombs = [x.pos for x in arg if x.event == "recomb"]

    pos = recombs[0]
    tree = arg.get_marginal_tree(pos - .5)
    rpos, r, c = arglib.iter_arg_sprs(arg, start=pos - .5).next()
    spr = (r, c)

    assert argweaverc.assert_transition_switch_probs(tree, spr, times,
                                                     popsizes, rho)
Esempio n. 6
0
def test_node_numbering():
    """
    Test node numbering across ARG.

    A node should keep the same numbering until it is broken by
    recombination. The new recoal node should take the index of the broken
    node.
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = 10000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    (ptrees, ages, sprs, blocks), all_nodes = (
        argweaverc.get_treeset(arg, times))

    # check nodes list
    nnodes = len(all_nodes[0])
    last_nodes = None
    for i, nodes in enumerate(all_nodes):
        if last_nodes:
            recombj = sprs[i][0]
            brokenj = ptrees[i][recombj]
            for j in range(nnodes):
                if j != brokenj:
                    nose.tools.assert_equal(last_nodes[j], nodes[j])

        last_nodes = nodes
Esempio n. 7
0
def sample_thread(arg, seqs, rho=1.5e-8, mu=2.5e-8, popsize=1e4,
                  times=None, ntimes=20, maxtime=200000, verbose=False):

    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=maxtime, delta=.01)
    popsizes = [popsize] * len(times)

    if verbose:
        util.tic("sample thread")

    trees, names = arg2ctrees(arg, times)

    seqs2 = [seqs[name] for name in names]

    new_name = [x for x in seqs.keys() if x not in names][0]
    names.append(new_name)
    seqs2.append(seqs[new_name])
    seqlen = len(seqs2[0])

    trees = argweaver_sample_thread(
        trees, times, len(times),
        popsizes, rho, mu,
        (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, None)
    arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Esempio n. 8
0
def test_trans():
    """
    Calculate transition probabilities
    """
    create_data = False
    if create_data:
        make_clean_dir('test/data/test_trans')

    k = 8
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=10, maxtime=200000)
    popsizes = [n] * len(times)
    ntests = 40

    # generate test data
    if create_data:
        for i in range(ntests):
            arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length)
            argweaver.discretize_arg(arg, times)
            arg.write('test/data/test_trans/%d.arg' % i)

    for i in range(ntests):
        print 'arg', i
        arg = arglib.read_arg('test/data/test_trans/%d.arg' % i)
        argweaver.discretize_arg(arg, times)
        pos = 10
        tree = arg.get_marginal_tree(pos)

        assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
Esempio n. 9
0
def calc_joint_prob(arg, seqs, ntimes=20, mu=2.5e-8, rho=1.5e-8, popsizes=1e4,
                    times=None, verbose=False, delete_arg=True):
    """
    Calculate arg_joint_prob
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("calc likelihood")

    trees, names = arg2ctrees(arg, times)
    seqs, nseqs, seqlen = seqs2cseqs(seqs, names)

    p = argweaver_joint_prob(
        trees, times, len(times), popsizes, mu, rho, seqs, nseqs, seqlen)
    if delete_arg:
        delete_local_trees(trees)

    if verbose:
        util.toc()

    return p
Esempio n. 10
0
def sample_arg_dsmc(k, popsize, rho, recombmap=None,
                    start=0.0, end=0.0, times=None,
                    init_tree=None, names=None, make_names=True):
    """
    Returns an ARG sampled from the Discrete Sequentially Markov Coalescent

    k   -- chromosomes
    popsize -- effective population size
    rho -- recombination rate (recombinations / site / generation)
    recombmap -- map for variable recombination rate
    start -- staring chromosome coordinate
    end   -- ending chromsome coordinate

    names -- names to use for leaves (default: None)
    make_names -- make names using strings (default: True)
    """
    if times is None:
        maxtime = 160000
        delta = .01
        ntimes = 20
        times = argweaver.get_time_points(ntimes, maxtime, delta)

    it = sample_dsmc_sprs(
        k, popsize, rho, recombmap=recombmap,
        start=start, end=end, times=times,
        init_tree=init_tree, names=names, make_names=make_names)
    tree = it.next()
    arg = arglib.make_arg_from_sprs(tree, it)

    return arg
Esempio n. 11
0
def test_trans_switch():
    """
    Calculate transition probabilities for switch matrix

    Only calculate a single matrix
    """

    k = 12
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)

    recombs = []

    while len(recombs) == 0:
        arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                        times=times)
        recombs = [x.pos for x in arg if x.event == "recomb"]

    pos = recombs[0]
    tree = arg.get_marginal_tree(pos-.5)
    rpos, r, c = arglib.iter_arg_sprs(arg, start=pos-.5).next()
    spr = (r, c)

    assert argweaverc.assert_transition_switch_probs(
        tree, spr, times, popsizes, rho)
Esempio n. 12
0
def test_emit():
    """
    Calculate emission probabilities
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(1e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k,
                                    2 * n,
                                    rho,
                                    start=0,
                                    end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name])

    assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu,
                                            seqs2, nseqs, seqlen)
Esempio n. 13
0
def resample_arg_region(arg, seqs, region_start, region_end,
                        ntimes=20, rho=1.5e-8, mu=2.5e-8,
                        popsizes=1e4, times=None, carg=False,
                        refine=1, verbose=False):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("resample arg")

    # convert arg to c++
    if verbose:
        util.tic("convert arg")
    trees, names = arg2ctrees(arg, times)
    if verbose:
        util.toc()

    # get sequences in same order
    # and add all other sequences not in arg yet
    leaves = set(names)
    for name, seq in seqs.items():
        if name not in leaves:
            names.append(name)
    seqs2, nseqs, seqlen = seqs2cseqs(seqs, names)

    # resample arg
    seqlen = len(seqs[names[0]])

    trees = argweaver_resample_arg_region(
        trees, times, len(times),
        popsizes, rho, mu, seqs2, nseqs, seqlen,
        region_start, region_end, refine)

    #trees = argweaver_resample_arg_region(
    #    trees, times, len(times),
    #    popsizes, rho, mu, seqs2, nseqs, seqlen,
    #    region_start, region_end)

    # convert arg back to python
    if carg:
        arg = (trees, names)
    else:
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Esempio n. 14
0
def sample_all_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4,
                   refine=1, times=None, verbose=False, carg=False,
                   prob_path_switch=.1):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("resample arg")

    # convert arg to c++
    if verbose:
        util.tic("convert arg")

    arg = argweaver.make_trunk_arg(
        0, len(seqs.values()[0]), name=seqs.keys()[0])
    trees, names = arg2ctrees(arg, times)
    if verbose:
        util.toc()

    # get sequences in same order
    # and add all other sequences not in arg yet
    seqs2 = [seqs[name] for name in names]
    leaves = set(names)
    for name, seq in seqs.items():
        if name not in leaves:
            names.append(name)
            seqs2.append(seq)

    # resample arg
    seqlen = len(seqs[names[0]])
    trees = argweaver_resample_all_arg(
        trees, times, len(times),
        popsizes, rho, mu,
        (C.c_char_p * len(seqs2))(*seqs2), len(seqs2),
        seqlen, refine, prob_path_switch)

    if carg:
        arg = (trees, names)
    else:
        # convert arg back to python
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Esempio n. 15
0
def test_trans_switch():
    """
    Calculate transition probabilities for switch matrix

    Only calculate a single matrix
    """
    create_data = False
    if create_data:
        make_clean_dir('test/data/test_trans_switch')

    # model parameters
    k = 12
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)
    ntests = 100

    # generate test data
    if create_data:
        for i in range(ntests):
            # Sample ARG with at least one recombination.
            while True:
                arg = argweaver.sample_arg_dsmc(k,
                                                2 * n,
                                                rho,
                                                start=0,
                                                end=length,
                                                times=times)
                if any(x.event == "recomb" for x in arg):
                    break
            arg.write('test/data/test_trans_switch/%d.arg' % i)

    for i in range(ntests):
        print('arg', i)
        arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i)
        argweaver.discretize_arg(arg, times)
        recombs = [x.pos for x in arg if x.event == "recomb"]
        pos = recombs[0]
        tree = arg.get_marginal_tree(pos - .5)
        rpos, r, c = next(arglib.iter_arg_sprs(arg, start=pos - .5))
        spr = (r, c)

        if not argweaverc.assert_transition_switch_probs(
                tree, spr, times, popsizes, rho):
            tree2 = tree.get_tree()
            treelib.remove_single_children(tree2)
            treelib.draw_tree_names(tree2, maxlen=5, minlen=5)
            assert False
Esempio n. 16
0
def resample_mcmc_arg(arg, seqs, ntimes=20,
                      rho=1.5e-8, mu=2.5e-8, popsizes=1e4,
                      refine=1, times=None, verbose=False, carg=False,
                      window=200000, niters2=5):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("resample arg")

    # convert arg to c++
    if verbose:
        util.tic("convert arg")
    trees, names = arg2ctrees(arg, times)
    if verbose:
        util.toc()

    # get sequences in same order
    # and add all other sequences not in arg yet
    leaves = set(names)
    names = list(names)
    for name in seqs:
        if name not in leaves:
            names.append(name)
    seqs2, nseqs, seqlen = seqs2cseqs(seqs, names)

    # resample arg
    trees = argweaver_resample_mcmc_arg(
        trees, times, len(times),
        popsizes, rho, mu,
        seqs2, nseqs, seqlen, refine, niters2, window)

    if carg:
        arg = (trees, names)
    else:
        # convert arg back to python
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Esempio n. 17
0
def test_trans_switch():
    """
    Calculate transition probabilities for switch matrix

    Only calculate a single matrix
    """
    create_data = False
    if create_data:
        make_clean_dir('test/data/test_trans_switch')

    # model parameters
    k = 12
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)
    ntests = 100

    # generate test data
    if create_data:
        for i in range(ntests):
            # Sample ARG with at least one recombination.
            while True:
                arg = argweaver.sample_arg_dsmc(
                    k, 2*n, rho, start=0, end=length, times=times)
                if any(x.event == "recomb" for x in arg):
                    break
            arg.write('test/data/test_trans_switch/%d.arg' % i)

    for i in range(ntests):
        print 'arg', i
        arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i)
        argweaver.discretize_arg(arg, times)
        recombs = [x.pos for x in arg if x.event == "recomb"]
        pos = recombs[0]
        tree = arg.get_marginal_tree(pos-.5)
        rpos, r, c = arglib.iter_arg_sprs(arg, start=pos-.5).next()
        spr = (r, c)

        if not argweaverc.assert_transition_switch_probs(
                tree, spr, times, popsizes, rho):
            tree2 = tree.get_tree()
            treelib.remove_single_children(tree2)
            treelib.draw_tree_names(tree2, maxlen=5, minlen=5)
            assert False
Esempio n. 18
0
def test_arg_convert():
    """
    Test conversion for python to C args
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = 10000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times)

    # convert to C++ and back
    trees, names = argweaverc.arg2ctrees(arg, times)
    arg2 = argweaverc.ctrees2arg(trees, names, times)

    arg_equal(arg, arg2)
Esempio n. 19
0
def argweaver_forward_algorithm(arg, seqs, rho=1.5e-8,
                                mu=2.5e-8, popsizes=1e4, times=None,
                                ntimes=20, maxtime=180000,
                                verbose=False,
                                prior=[], internal=False, slow=False):
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=maxtime, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    probs = []

    if verbose:
        util.tic("forward")

    if is_carg(arg):
        trees, names = arg
    else:
        trees, names = arg2ctrees(arg, times)

    seqs2 = [seqs[node] for node in names]
    for name in seqs.keys():
        if name not in names:
            seqs2.append(seqs[name])
    seqlen = len(seqs2[0])

    fw = argweaver_forward_alg(trees, times, len(times),
                               popsizes, rho, mu,
                               (C.c_char_p * len(seqs2))(*seqs2), len(seqs2),
                               seqlen, len(prior) > 0, prior, internal,
                               slow)

    nstates = [0] * seqlen
    argweaver_get_nstates(trees, len(times), internal, nstates)

    probs = [row[:n] for row, n in zip(fw, nstates)]

    delete_forward_matrix(fw, seqlen)

    if verbose:
        util.toc()

    return probs
Esempio n. 20
0
def test_arg_convert():
    """
    Test conversion for python to C args
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = 10000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    # convert to C++ and back
    trees, names = argweaverc.arg2ctrees(arg, times)
    arg2 = argweaverc.ctrees2arg(trees, names, times)

    arg_equal(arg, arg2)
Esempio n. 21
0
def test_arg_joint():
    """
    Compute joint probability of an ARG
    """

    k = 2
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = 10000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)
    muts = argweaver.sample_arg_mutations(arg, mu, times=times)
    seqs = arglib.make_alignment(arg, muts)

    lk = argweaver.calc_joint_prob(arg, seqs, mu=mu, rho=rho, times=times)
    print lk
Esempio n. 22
0
def test_trans():
    """
    Calculate transition probabilities
    """

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=4, maxtime=200000)
    popsizes = [n] * len(times)

    arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length)
    argweaver.discretize_arg(arg, times)

    pos = 10
    tree = arg.get_marginal_tree(pos)

    assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
Esempio n. 23
0
def test_trans():
    """
    Calculate transition probabilities
    """

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=4, maxtime=200000)
    popsizes = [n] * len(times)

    arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length)
    argweaver.discretize_arg(arg, times)

    pos = 10
    tree = arg.get_marginal_tree(pos)

    assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
Esempio n. 24
0
def test_trans_switch_internal():
    """
    Calculate transition probabilities for switch matrix and internal branches

    Only calculate a single matrix
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = int(100e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)
    trees, names = argweaverc.arg2ctrees(arg, times)

    assert argweaverc.assert_transition_probs_switch_internal(
        trees, times, popsizes, rho)
Esempio n. 25
0
def test_trans_internal():
    """
    Calculate transition probabilities for internal branch re-sampling

    Only calculate a single matrix
    """

    k = 5
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=5, maxtime=200000)
    popsizes = [n] * len(times)

    arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length)
    argweaver.discretize_arg(arg, times)

    pos = 10
    tree = arg.get_marginal_tree(pos)

    assert argweaverc.assert_transition_probs_internal(
        tree, times, popsizes, rho)
Esempio n. 26
0
def test_trans_internal():
    """
    Calculate transition probabilities for internal branch re-sampling

    Only calculate a single matrix
    """

    k = 5
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=5, maxtime=200000)
    popsizes = [n] * len(times)

    arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length)
    argweaver.discretize_arg(arg, times)

    pos = 10
    tree = arg.get_marginal_tree(pos)

    assert argweaverc.assert_transition_probs_internal(tree, times, popsizes,
                                                       rho)
Esempio n. 27
0
def test_forward():

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(100e3 / 20)
    times = argweaver.get_time_points(ntimes=100)

    arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length)
    muts = arglib.sample_arg_mutations(arg, mu)
    seqs = arglib.make_alignment(arg, muts)

    print "muts", len(muts)
    print "recomb", len(arglib.get_recomb_pos(arg))

    argweaver.discretize_arg(arg, times)

    # remove chrom
    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    carg = argweaverc.arg2ctrees(arg, times)

    util.tic("C fast")
    probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times)
    util.toc()

    util.tic("C slow")
    probs2 = argweaverc.argweaver_forward_algorithm(carg,
                                                    seqs,
                                                    times=times,
                                                    slow=True)
    util.toc()

    for i, (col1, col2) in enumerate(izip(probs1, probs2)):
        for a, b in izip(col1, col2):
            fequal(a, b, rel=.0001)
Esempio n. 28
0
def sample_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4,
               refine=0, nremove=1, times=None, verbose=False,
               carg=False):
    """
    Sample ARG for sequences
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)
    if isinstance(popsizes, float) or isinstance(popsizes, int):
        popsizes = [popsizes] * len(times)

    if verbose:
        util.tic("sample arg")

    names = []
    seqs2 = []
    for name, seq in seqs.items():
        names.append(name)
        seqs2.append(seq)

    # sample arg
    trees = argweaver_sample_arg_refine(
        times, len(times),
        popsizes, rho, mu,
        (C.c_char_p * len(seqs))(*seqs2), len(seqs), len(seqs2[0]), refine,
        nremove)

    if carg:
        arg = (trees, names)
    else:
        # convert to python
        arg = ctrees2arg(trees, names, times, verbose=verbose)

    if verbose:
        util.toc()

    return arg
Esempio n. 29
0
def test_emit_internal():
    """
    Calculate emission probabilities for internal branches
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(10e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names)

    assert argweaverc.argweaver_assert_emit_internal(
        trees, len(times), times, mu, seqs2, nseqs, seqlen)
Esempio n. 30
0
def test_forward():

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(100e3 / 20)
    times = argweaver.get_time_points(ntimes=100)

    arg = arglib.sample_arg_smc(k, 2*n, rho, start=0, end=length)
    muts = arglib.sample_arg_mutations(arg, mu)
    seqs = arglib.make_alignment(arg, muts)

    print "muts", len(muts)
    print "recomb", len(arglib.get_recombs(arg))

    argweaver.discretize_arg(arg, times)

    # remove chrom
    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    carg = argweaverc.arg2ctrees(arg, times)

    util.tic("C fast")
    probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times)
    util.toc()

    util.tic("C slow")
    probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times,
                                                    slow=True)
    util.toc()

    for i, (col1, col2) in enumerate(izip(probs1, probs2)):
        for a, b in izip(col1, col2):
            fequal(a, b, rel=.0001)
Esempio n. 31
0
def calc_likelihood_parsimony(arg, seqs, ntimes=20, mu=2.5e-8,
                              times=None, delete_arg=True, verbose=False):
    """
    Calculate arg_likelihood
    """
    if times is None:
        times = argweaver.get_time_points(
            ntimes=ntimes, maxtime=80000, delta=.01)

    if verbose:
        util.tic("calc likelihood")

    trees, names = arg2ctrees(arg, times)
    seqs, nseqs, seqlen = seqs2cseqs(seqs, names)

    lk = argweaver_likelihood_parsimony(
        trees, times, len(times), mu, seqs, nseqs, seqlen)
    if delete_arg:
        delete_local_trees(trees)

    if verbose:
        util.toc()

    return lk
Esempio n. 32
0
def show_plots(arg_file, sites_file, stats_file, output_prefix,
               rho, mu, popsize, ntimes=20, maxtime=200000):
    """
    Show plots of convergence.
    """

    # read true arg and seqs
    times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime)
    arg = arglib.read_arg(arg_file)
    argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer")
    arg = arglib.smcify_arg(arg)
    seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file))

    # compute true stats
    arglen = arglib.arglen(arg)
    arg = argweaverc.arg2ctrees(arg, times)
    nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1
    lk = argweaverc.calc_likelihood(
        arg, seqs, mu=mu, times=times,
        delete_arg=False)
    prior = argweaverc.calc_prior_prob(
        arg, rho=rho, times=times, popsizes=popsize,
                        delete_arg=False)
    joint = lk + prior

    data = read_table(stats_file)

    # joint
    y2 = joint
    y = data.cget("joint")
    rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="joint probability",
            xlab="iterations",
            ylab="joint probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # lk
    y2 = lk
    y = data.cget("likelihood")
    rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="likelihood",
            xlab="iterations",
            ylab="likelihood")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # prior
    y2 = prior
    y = data.cget("prior")
    rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="prior probability",
            xlab="iterations",
            ylab="prior probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # nrecombs
    y2 = nrecombs
    y = data.cget("recombs")
    rplot_start(output_prefix + ".trace.nrecombs.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="number of recombinations",
            xlab="iterations",
            ylab="number of recombinations")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # arglen
    y2 = arglen
    y = data.cget("arglen")
    rplot_start(output_prefix + ".trace.arglen.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="ARG branch length",
            xlab="iterations",
            ylab="ARG branch length")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)
Esempio n. 33
0
def show_plots(arg_file, sites_file, stats_file, output_prefix,
               rho, mu, popsize, ntimes=20, maxtime=200000):
    """
    Show plots of convergence.
    """

    # read true arg and seqs
    times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime)
    arg = arglib.read_arg(arg_file)
    argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer")
    arg = arglib.smcify_arg(arg)
    seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file))

    # compute true stats
    arglen = arglib.arglen(arg)
    arg = argweaverc.arg2ctrees(arg, times)
    nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1
    lk = argweaverc.calc_likelihood(
        arg, seqs, mu=mu, times=times,
        delete_arg=False)
    prior = argweaverc.calc_prior_prob(
        arg, rho=rho, times=times, popsizes=popsize,
                        delete_arg=False)
    joint = lk + prior

    data = read_table(stats_file)

    # joint
    y2 = joint
    y = data.cget("joint")
    rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="joint probability",
            xlab="iterations",
            ylab="joint probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # lk
    y2 = lk
    y = data.cget("likelihood")
    rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="likelihood",
            xlab="iterations",
            ylab="likelihood")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # prior
    y2 = prior
    y = data.cget("prior")
    rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="prior probability",
            xlab="iterations",
            ylab="prior probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # nrecombs
    y2 = nrecombs
    y = data.cget("recombs")
    rplot_start(output_prefix + ".trace.nrecombs.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="number of recombinations",
            xlab="iterations",
            ylab="number of recombinations")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # arglen
    y2 = arglen
    y = data.cget("arglen")
    rplot_start(output_prefix + ".trace.arglen.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="ARG branch length",
            xlab="iterations",
            ylab="ARG branch length")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)
Esempio n. 34
0
def test_trans_two():
    """
    Calculate transition probabilities for k=2

    Only calculate a single matrix
    """

    k = 2
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=5, maxtime=200000)
    time_steps = [times[i] - times[i-1]
                  for i in range(1, len(times))]
    time_steps.append(200000*10000.0)
    popsizes = [n] * len(times)

    arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length)

    argweaver.discretize_arg(arg, times)
    print "recomb", arglib.get_recombs(arg)

    arg = argweaver.make_trunk_arg(0, length, "n0")

    pos = 10
    tree = arg.get_marginal_tree(pos)
    nlineages = argweaver.get_nlineages_recomb_coal(tree, times)
    states = list(argweaver.iter_coal_states(tree, times))
    mat = argweaver.calc_transition_probs(
        tree, states, nlineages,
        times, time_steps, popsizes, rho)

    nstates = len(states)

    def coal(j):
        return 1.0 - exp(-time_steps[j]/(2.0 * n))

    def recoal2(k, j):
        p = coal(j)
        for m in range(k, j):
            p *= 1.0 - coal(m)
        return p

    def recoal(k, j):
        if j == nstates-1:
            return exp(- sum(time_steps[m] / (2.0 * n)
                             for m in range(k, j)))
        else:
            return ((1.0 - exp(-time_steps[j]/(2.0 * n))) *
                    exp(- sum(time_steps[m] / (2.0 * n)
                              for m in range(k, j))))

    def isrecomb(i):
        return 1.0 - exp(-max(rho * 2.0 * times[i], rho))

    def recomb(i, k):
        treelen = 2*times[i] + time_steps[i]
        if k < i:
            return 2.0 * time_steps[k] / treelen / 2.0
        else:
            return time_steps[k] / treelen / 2.0

    def trans(i, j):
        a = states[i][1]
        b = states[j][1]

        p = sum(recoal(k, b) * recomb(a, k)
                for k in range(0, min(a, b)+1))
        p += sum(recoal(k, b) * recomb(a, k)
                 for k in range(0, min(a, b)+1))
        p *= isrecomb(a)
        if i == j:
            p += 1.0 - isrecomb(a)
        return p

    for i in range(len(states)):
        for j in range(len(states)):
            print isrecomb(states[i][1])
            print states[i], states[j], mat[i][j], log(trans(i, j))
            fequal(mat[i][j], log(trans(i, j)))

        # recombs add up to 1
        fequal(sum(recomb(i, k) for k in range(i+1)), 0.5)

        # recoal add up to 1
        fequal(sum(recoal(i, j) for j in range(i, nstates)), 1.0)

        # recomb * recoal add up to .5
        fequal(sum(sum(recoal(k, j) * recomb(i, k)
                       for k in range(0, min(i, j)+1))
                   for j in range(0, nstates)), 0.5)

        fequal(sum(trans(i, j) for j in range(len(states))), 1.0)
Esempio n. 35
0
def test_trans_two():
    """
    Calculate transition probabilities for k=2

    Only calculate a single matrix
    """

    k = 2
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=5, maxtime=200000)
    time_steps = [times[i] - times[i - 1] for i in range(1, len(times))]
    time_steps.append(200000 * 10000.0)
    popsizes = [n] * len(times)

    arg = arglib.sample_arg(k, 2 * n, rho, start=0, end=length)

    argweaver.discretize_arg(arg, times)
    print "recomb", arglib.get_recomb_pos(arg)

    arg = argweaver.make_trunk_arg(0, length, "n0")

    pos = 10
    tree = arg.get_marginal_tree(pos)
    nlineages = argweaver.get_nlineages_recomb_coal(tree, times)
    states = list(argweaver.iter_coal_states(tree, times))
    mat = argweaver.calc_transition_probs(tree, states, nlineages, times,
                                          time_steps, popsizes, rho)

    nstates = len(states)

    def coal(j):
        return 1.0 - exp(-time_steps[j] / (2.0 * n))

    def recoal2(k, j):
        p = coal(j)
        for m in range(k, j):
            p *= 1.0 - coal(m)
        return p

    def recoal(k, j):
        if j == nstates - 1:
            return exp(-sum(time_steps[m] / (2.0 * n) for m in range(k, j)))
        else:
            return ((1.0 - exp(-time_steps[j] / (2.0 * n))) *
                    exp(-sum(time_steps[m] / (2.0 * n) for m in range(k, j))))

    def isrecomb(i):
        return 1.0 - exp(-max(rho * 2.0 * times[i], rho))

    def recomb(i, k):
        treelen = 2 * times[i] + time_steps[i]
        if k < i:
            return 2.0 * time_steps[k] / treelen / 2.0
        else:
            return time_steps[k] / treelen / 2.0

    def trans(i, j):
        a = states[i][1]
        b = states[j][1]

        p = sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b) + 1))
        p += sum(recoal(k, b) * recomb(a, k) for k in range(0, min(a, b) + 1))
        p *= isrecomb(a)
        if i == j:
            p += 1.0 - isrecomb(a)
        return p

    for i in range(len(states)):
        for j in range(len(states)):
            print isrecomb(states[i][1])
            print states[i], states[j], mat[i][j], log(trans(i, j))
            fequal(mat[i][j], log(trans(i, j)))

        # recombs add up to 1
        fequal(sum(recomb(i, k) for k in range(i + 1)), 0.5)

        # recoal add up to 1
        fequal(sum(recoal(i, j) for j in range(i, nstates)), 1.0)

        # recomb * recoal add up to .5
        fequal(
            sum(
                sum(
                    recoal(k, j) * recomb(i, k)
                    for k in range(0,
                                   min(i, j) + 1))
                for j in range(0, nstates)), 0.5)

        fequal(sum(trans(i, j) for j in range(len(states))), 1.0)