Esempio n. 1
0
def test_trans():
    """
    Calculate transition probabilities
    """
    create_data = False
    if create_data:
        make_clean_dir('test/data/test_trans')

    k = 8
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=10, maxtime=200000)
    popsizes = [n] * len(times)
    ntests = 40

    # generate test data
    if create_data:
        for i in range(ntests):
            arg = arglib.sample_arg(k, 2*n, rho, start=0, end=length)
            argweaver.discretize_arg(arg, times)
            arg.write('test/data/test_trans/%d.arg' % i)

    for i in range(ntests):
        print 'arg', i
        arg = arglib.read_arg('test/data/test_trans/%d.arg' % i)
        argweaver.discretize_arg(arg, times)
        pos = 10
        tree = arg.get_marginal_tree(pos)

        assert argweaverc.assert_transition_probs(tree, times, popsizes, rho)
Esempio n. 2
0
def test_trans_switch():
    """
    Calculate transition probabilities for switch matrix

    Only calculate a single matrix
    """
    create_data = False
    if create_data:
        make_clean_dir('test/data/test_trans_switch')

    # model parameters
    k = 12
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)
    ntests = 100

    # generate test data
    if create_data:
        for i in range(ntests):
            # Sample ARG with at least one recombination.
            while True:
                arg = argweaver.sample_arg_dsmc(k,
                                                2 * n,
                                                rho,
                                                start=0,
                                                end=length,
                                                times=times)
                if any(x.event == "recomb" for x in arg):
                    break
            arg.write('test/data/test_trans_switch/%d.arg' % i)

    for i in range(ntests):
        print('arg', i)
        arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i)
        argweaver.discretize_arg(arg, times)
        recombs = [x.pos for x in arg if x.event == "recomb"]
        pos = recombs[0]
        tree = arg.get_marginal_tree(pos - .5)
        rpos, r, c = next(arglib.iter_arg_sprs(arg, start=pos - .5))
        spr = (r, c)

        if not argweaverc.assert_transition_switch_probs(
                tree, spr, times, popsizes, rho):
            tree2 = tree.get_tree()
            treelib.remove_single_children(tree2)
            treelib.draw_tree_names(tree2, maxlen=5, minlen=5)
            assert False
Esempio n. 3
0
def test_trans_switch():
    """
    Calculate transition probabilities for switch matrix

    Only calculate a single matrix
    """
    create_data = False
    if create_data:
        make_clean_dir('test/data/test_trans_switch')

    # model parameters
    k = 12
    n = 1e4
    rho = 1.5e-8 * 20
    length = 1000
    times = argweaver.get_time_points(ntimes=20, maxtime=200000)
    popsizes = [n] * len(times)
    ntests = 100

    # generate test data
    if create_data:
        for i in range(ntests):
            # Sample ARG with at least one recombination.
            while True:
                arg = argweaver.sample_arg_dsmc(
                    k, 2*n, rho, start=0, end=length, times=times)
                if any(x.event == "recomb" for x in arg):
                    break
            arg.write('test/data/test_trans_switch/%d.arg' % i)

    for i in range(ntests):
        print 'arg', i
        arg = arglib.read_arg('test/data/test_trans_switch/%d.arg' % i)
        argweaver.discretize_arg(arg, times)
        recombs = [x.pos for x in arg if x.event == "recomb"]
        pos = recombs[0]
        tree = arg.get_marginal_tree(pos-.5)
        rpos, r, c = arglib.iter_arg_sprs(arg, start=pos-.5).next()
        spr = (r, c)

        if not argweaverc.assert_transition_switch_probs(
                tree, spr, times, popsizes, rho):
            tree2 = tree.get_tree()
            treelib.remove_single_children(tree2)
            treelib.draw_tree_names(tree2, maxlen=5, minlen=5)
            assert False
Esempio n. 4
0
    def test_read_write(self):
        """Read and write an ARG"""

        rho = 1.5e-8   # recomb/site/gen
        l = 10000      # length of locus
        k = 10         # number of lineages
        n = 2*10000    # effective popsize

        arg = arglib.sample_arg(k, n, rho, 0, l)
        # round ages and pos for easy equality
        for node in arg:
            node.age = round(node.age)
            node.pos = round(node.pos)

        stream = StringIO.StringIO()
        arglib.write_arg(stream, arg)
        stream.seek(0)
        arg2 = arglib.read_arg(stream)

        self.assertTrue(arg.equal(arg2))
Esempio n. 5
0
def show_plots(arg_file, sites_file, stats_file, output_prefix,
               rho, mu, popsize, ntimes=20, maxtime=200000):
    """
    Show plots of convergence.
    """

    # read true arg and seqs
    times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime)
    arg = arglib.read_arg(arg_file)
    argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer")
    arg = arglib.smcify_arg(arg)
    seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file))

    # compute true stats
    arglen = arglib.arglen(arg)
    arg = argweaverc.arg2ctrees(arg, times)
    nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1
    lk = argweaverc.calc_likelihood(
        arg, seqs, mu=mu, times=times,
        delete_arg=False)
    prior = argweaverc.calc_prior_prob(
        arg, rho=rho, times=times, popsizes=popsize,
                        delete_arg=False)
    joint = lk + prior

    data = read_table(stats_file)

    # joint
    y2 = joint
    y = data.cget("joint")
    rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="joint probability",
            xlab="iterations",
            ylab="joint probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # lk
    y2 = lk
    y = data.cget("likelihood")
    rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="likelihood",
            xlab="iterations",
            ylab="likelihood")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # prior
    y2 = prior
    y = data.cget("prior")
    rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="prior probability",
            xlab="iterations",
            ylab="prior probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # nrecombs
    y2 = nrecombs
    y = data.cget("recombs")
    rplot_start(output_prefix + ".trace.nrecombs.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="number of recombinations",
            xlab="iterations",
            ylab="number of recombinations")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # arglen
    y2 = arglen
    y = data.cget("arglen")
    rplot_start(output_prefix + ".trace.arglen.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="ARG branch length",
            xlab="iterations",
            ylab="ARG branch length")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)
Esempio n. 6
0
        i, j = util.binsearch(times3, x)
        if i is None: i = j
        if j is None: j = i
        if abs(times3[i] - x) < 1:
            times.append(times3[i])
        elif abs(times3[j] - x) < 1:
            times.append(times3[j])
        else:
            times.append(x)
    return times
    

#=============================================================================
if 1:
    #times = arghmm.get_time_points(ntimes=20)
    arg = arglib.read_arg("test/data/sample.arg")
    seqs = read_fasta("test/data/sample.fa")
    
    trees = list(arglib.iter_tree_tracks(arg, convert=True))

    # draw mappings
    win = argvis.show_tree_track(trees)

    nleaves = ilen(arg.leaves())
    for i in xrange(len(trees)-1):
        block1, _tree1 = trees[i]
        block2, _tree2 = trees[i + 1]
        pos = block2[0]
        tree1 = arg.get_marginal_tree(pos-.5)
        tree2 = arg.get_marginal_tree(pos+.5)
        layout1 = argvis.layout_arg(tree1)
Esempio n. 7
0
def show_plots(arg_file, sites_file, stats_file, output_prefix,
               rho, mu, popsize, ntimes=20, maxtime=200000):
    """
    Show plots of convergence.
    """

    # read true arg and seqs
    times = argweaver.get_time_points(ntimes=ntimes, maxtime=maxtime)
    arg = arglib.read_arg(arg_file)
    argweaver.discretize_arg(arg, times, ignore_top=False, round_age="closer")
    arg = arglib.smcify_arg(arg)
    seqs = argweaver.sites2seqs(argweaver.read_sites(sites_file))

    # compute true stats
    arglen = arglib.arglen(arg)
    arg = argweaverc.arg2ctrees(arg, times)
    nrecombs = argweaverc.get_local_trees_ntrees(arg[0]) - 1
    lk = argweaverc.calc_likelihood(
        arg, seqs, mu=mu, times=times,
        delete_arg=False)
    prior = argweaverc.calc_prior_prob(
        arg, rho=rho, times=times, popsizes=popsize,
                        delete_arg=False)
    joint = lk + prior

    data = read_table(stats_file)

    # joint
    y2 = joint
    y = data.cget("joint")
    rplot_start(output_prefix + ".trace.joint.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="joint probability",
            xlab="iterations",
            ylab="joint probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # lk
    y2 = lk
    y = data.cget("likelihood")
    rplot_start(output_prefix + ".trace.lk.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="likelihood",
            xlab="iterations",
            ylab="likelihood")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # prior
    y2 = prior
    y = data.cget("prior")
    rplot_start(output_prefix + ".trace.prior.pdf", width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="prior probability",
            xlab="iterations",
            ylab="prior probability")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # nrecombs
    y2 = nrecombs
    y = data.cget("recombs")
    rplot_start(output_prefix + ".trace.nrecombs.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="number of recombinations",
            xlab="iterations",
            ylab="number of recombinations")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)

    # arglen
    y2 = arglen
    y = data.cget("arglen")
    rplot_start(output_prefix + ".trace.arglen.pdf",
                width=8, height=5)
    rp.plot(y, t="l", ylim=[min(min(y), y2), max(max(y), y2)],
            main="ARG branch length",
            xlab="iterations",
            ylab="ARG branch length")
    rp.lines([0, len(y)], [y2, y2], col="gray")
    rplot_end(True)
Esempio n. 8
0
        i, j = util.binsearch(times3, x)
        if i is None: i = j
        if j is None: j = i
        if abs(times3[i] - x) < 1:
            times.append(times3[i])
        elif abs(times3[j] - x) < 1:
            times.append(times3[j])
        else:
            times.append(x)
    return times


#=============================================================================
if 1:
    #times = arghmm.get_time_points(ntimes=20)
    arg = arglib.read_arg("test/data/sample.arg")
    seqs = read_fasta("test/data/sample.fa")

    trees = list(arglib.iter_tree_tracks(arg, convert=True))

    # draw mappings
    win = argvis.show_tree_track(trees)

    nleaves = ilen(arg.leaves())
    for i in xrange(len(trees) - 1):
        block1, _tree1 = trees[i]
        block2, _tree2 = trees[i + 1]
        pos = block2[0]
        tree1 = arg.get_marginal_tree(pos - .5)
        tree2 = arg.get_marginal_tree(pos + .5)
        layout1 = argvis.layout_arg(tree1)