예제 #1
0
def test_emit_internal():
    """
    Calculate emission probabilities for internal branches
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(10e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k,
                                    2 * n,
                                    rho,
                                    start=0,
                                    end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names)

    assert argweaverc.argweaver_assert_emit_internal(trees, len(times), times,
                                                     mu, seqs2, nseqs, seqlen)
예제 #2
0
def test_emit():
    """
    Calculate emission probabilities
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(1e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    new_name = "n%d" % (k-1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name])

    assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu,
                                            seqs2, nseqs, seqlen)
예제 #3
0
def test_emit():
    """
    Calculate emission probabilities
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(1e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k,
                                    2 * n,
                                    rho,
                                    start=0,
                                    end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names + [new_name])

    assert argweaverc.argweaver_assert_emit(trees, len(times), times, mu,
                                            seqs2, nseqs, seqlen)
예제 #4
0
def test_arg_convert():
    """
    Test conversion for python to C args
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = 10000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2 * n, rho, start=0, end=length, times=times)

    # convert to C++ and back
    trees, names = argweaverc.arg2ctrees(arg, times)
    arg2 = argweaverc.ctrees2arg(trees, names, times)

    arg_equal(arg, arg2)
예제 #5
0
def test_arg_convert():
    """
    Test conversion for python to C args
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = 10000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    # convert to C++ and back
    trees, names = argweaverc.arg2ctrees(arg, times)
    arg2 = argweaverc.ctrees2arg(trees, names, times)

    arg_equal(arg, arg2)
예제 #6
0
def test_trans_switch_internal():
    """
    Calculate transition probabilities for switch matrix and internal branches

    Only calculate a single matrix
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    length = int(100e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)
    trees, names = argweaverc.arg2ctrees(arg, times)

    assert argweaverc.assert_transition_probs_switch_internal(
        trees, times, popsizes, rho)
예제 #7
0
def test_forward():

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(100e3 / 20)
    times = argweaver.get_time_points(ntimes=100)

    arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length)
    muts = arglib.sample_arg_mutations(arg, mu)
    seqs = arglib.make_alignment(arg, muts)

    print "muts", len(muts)
    print "recomb", len(arglib.get_recomb_pos(arg))

    argweaver.discretize_arg(arg, times)

    # remove chrom
    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    carg = argweaverc.arg2ctrees(arg, times)

    util.tic("C fast")
    probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times)
    util.toc()

    util.tic("C slow")
    probs2 = argweaverc.argweaver_forward_algorithm(carg,
                                                    seqs,
                                                    times=times,
                                                    slow=True)
    util.toc()

    for i, (col1, col2) in enumerate(izip(probs1, probs2)):
        for a, b in izip(col1, col2):
            fequal(a, b, rel=.0001)
예제 #8
0
def test_emit_internal():
    """
    Calculate emission probabilities for internal branches
    """

    k = 10
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(10e3) / 20
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)

    arg = argweaver.sample_arg_dsmc(k, 2*n, rho, start=0, end=length,
                                    times=times)

    muts = argweaver.sample_arg_mutations(arg, mu, times)
    seqs = argweaver.make_alignment(arg, muts)

    trees, names = argweaverc.arg2ctrees(arg, times)
    seqs2, nseqs, seqlen = argweaverc.seqs2cseqs(seqs, names)

    assert argweaverc.argweaver_assert_emit_internal(
        trees, len(times), times, mu, seqs2, nseqs, seqlen)
예제 #9
0
def test_forward():

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(100e3 / 20)
    times = argweaver.get_time_points(ntimes=100)

    arg = arglib.sample_arg_smc(k, 2*n, rho, start=0, end=length)
    muts = arglib.sample_arg_mutations(arg, mu)
    seqs = arglib.make_alignment(arg, muts)

    print "muts", len(muts)
    print "recomb", len(arglib.get_recombs(arg))

    argweaver.discretize_arg(arg, times)

    # remove chrom
    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    carg = argweaverc.arg2ctrees(arg, times)

    util.tic("C fast")
    probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times)
    util.toc()

    util.tic("C slow")
    probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times,
                                                    slow=True)
    util.toc()

    for i, (col1, col2) in enumerate(izip(probs1, probs2)):
        for a, b in izip(col1, col2):
            fequal(a, b, rel=.0001)
예제 #10
0
def show_plots(arg_file, sites_file, stats_file, output_prefix,
               rho, mu, popsize, ntimes=20, maxtime=200000):
    """
    Show plots of convergence.
    """

    # read true arg and seqs
    times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime)
    arg = arglib.read_arg(arg_file)
    argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer")
    arg = arglib.smcify_arg(arg)
    seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file))

    # compute true stats
    arglen = arglib.arglen(arg)
    arg = argweaverc.arg2ctrees(arg, times)
    nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1
    lk = argweaverc.calc_likelihood(
        arg, seqs, mu=mu, times=times,
        delete_arg=False)
    prior = argweaverc.calc_prior_prob(
        arg, rho=rho, times=times, popsizes=popsize,
                        delete_arg=False)
    joint = lk + prior

    data = read_table(stats_file)

    # joint
    y2 = joint
    y = data.cget("joint")
    rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="joint probability",
            xlab="iterations",
            ylab="joint probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # lk
    y2 = lk
    y = data.cget("likelihood")
    rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="likelihood",
            xlab="iterations",
            ylab="likelihood")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # prior
    y2 = prior
    y = data.cget("prior")
    rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="prior probability",
            xlab="iterations",
            ylab="prior probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # nrecombs
    y2 = nrecombs
    y = data.cget("recombs")
    rplot_start(output_prefix + ".trace.nrecombs.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="number of recombinations",
            xlab="iterations",
            ylab="number of recombinations")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # arglen
    y2 = arglen
    y = data.cget("arglen")
    rplot_start(output_prefix + ".trace.arglen.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="ARG branch length",
            xlab="iterations",
            ylab="ARG branch length")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)
예제 #11
0
def show_plots(arg_file, sites_file, stats_file, output_prefix,
               rho, mu, popsize, ntimes=20, maxtime=200000):
    """
    Show plots of convergence.
    """

    # read true arg and seqs
    times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime)
    arg = arglib.read_arg(arg_file)
    argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer")
    arg = arglib.smcify_arg(arg)
    seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file))

    # compute true stats
    arglen = arglib.arglen(arg)
    arg = argweaverc.arg2ctrees(arg, times)
    nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1
    lk = argweaverc.calc_likelihood(
        arg, seqs, mu=mu, times=times,
        delete_arg=False)
    prior = argweaverc.calc_prior_prob(
        arg, rho=rho, times=times, popsizes=popsize,
                        delete_arg=False)
    joint = lk + prior

    data = read_table(stats_file)

    # joint
    y2 = joint
    y = data.cget("joint")
    rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="joint probability",
            xlab="iterations",
            ylab="joint probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # lk
    y2 = lk
    y = data.cget("likelihood")
    rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="likelihood",
            xlab="iterations",
            ylab="likelihood")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # prior
    y2 = prior
    y = data.cget("prior")
    rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="prior probability",
            xlab="iterations",
            ylab="prior probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # nrecombs
    y2 = nrecombs
    y = data.cget("recombs")
    rplot_start(output_prefix + ".trace.nrecombs.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="number of recombinations",
            xlab="iterations",
            ylab="number of recombinations")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # arglen
    y2 = arglen
    y = data.cget("arglen")
    rplot_start(output_prefix + ".trace.arglen.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="ARG branch length",
            xlab="iterations",
            ylab="ARG branch length")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)