Esempio n. 1
0
    def test_sample_arg_smc(self):
        """Sample an ARG using the SMC process"""

        length = 10000     # length of locus
        k = 5              # number of lineages
        n = 1e4            # effective popsize
        rho = 1.5e-8       # recomb/site/gen

        arg = arglib.sample_arg_smc(k, n, rho, 0, length)
        arglib.assert_arg(arg)
Esempio n. 2
0
    def test_est_popsize2(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8
        mu = 2.5e-8
        length = int(4e6)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        x2, y2 = stats.smooth2(x, y, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x2, y2, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()
Esempio n. 3
0
    def test_est_popsize2(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8
        mu = 2.5e-8
        length = int(4e6)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        x2, y2 = stats.smooth2(x, y, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x2, y2, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()
Esempio n. 4
0
def test_forward():

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(100e3 / 20)
    times = argweaver.get_time_points(ntimes=100)

    arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length)
    muts = arglib.sample_arg_mutations(arg, mu)
    seqs = arglib.make_alignment(arg, muts)

    print "muts", len(muts)
    print "recomb", len(arglib.get_recomb_pos(arg))

    argweaver.discretize_arg(arg, times)

    # remove chrom
    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    carg = argweaverc.arg2ctrees(arg, times)

    util.tic("C fast")
    probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times)
    util.toc()

    util.tic("C slow")
    probs2 = argweaverc.argweaver_forward_algorithm(carg,
                                                    seqs,
                                                    times=times,
                                                    slow=True)
    util.toc()

    for i, (col1, col2) in enumerate(izip(probs1, probs2)):
        for a, b in izip(col1, col2):
            fequal(a, b, rel=.0001)
Esempio n. 5
0
def test_forward():

    k = 4
    n = 1e4
    rho = 1.5e-8 * 20
    mu = 2.5e-8 * 20
    length = int(100e3 / 20)
    times = argweaver.get_time_points(ntimes=100)

    arg = arglib.sample_arg_smc(k, 2*n, rho, start=0, end=length)
    muts = arglib.sample_arg_mutations(arg, mu)
    seqs = arglib.make_alignment(arg, muts)

    print "muts", len(muts)
    print "recomb", len(arglib.get_recombs(arg))

    argweaver.discretize_arg(arg, times)

    # remove chrom
    new_name = "n%d" % (k - 1)
    arg = argweaver.remove_arg_thread(arg, new_name)

    carg = argweaverc.arg2ctrees(arg, times)

    util.tic("C fast")
    probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times)
    util.toc()

    util.tic("C slow")
    probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times,
                                                    slow=True)
    util.toc()

    for i, (col1, col2) in enumerate(izip(probs1, probs2)):
        for a, b in izip(col1, col2):
            fequal(a, b, rel=.0001)
Esempio n. 6
0
    smc_coals_list = []

    for i in range(20):
        k = 10
        n = 10e3
        length = 500e3
        rho = 1.5e-8

        # simulate an ARG from the CwR and convert it into SMC-style
        tic("simulate %d" % i)
        cwr_arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        cwr_arg_converted = arglib.smcify_arg(cwr_arg)
        toc()

        # simulate an ARG directly from SMC process
        smc_arg = arglib.sample_arg_smc(k, n, rho, start=0, end=length)

        # gather all coalescence times
        cwr_coals = [node.age for node in cwr_arg_converted
                     if node.event == 'coal']
        smc_coals = [node.age for node in smc_arg
                     if node.event == 'coal']
        print len(cwr_coals), len(smc_coals)

        cwr_coals_list.append(cwr_coals)
        smc_coals_list.append(smc_coals)


    rplot_start('figures/cwr-smc-coals.pdf')
    rp.plot([], main='Comparison of CwR and SMC coalescence times',
            xlab='generations', ylab='', xlim=[50, 100e3], ylim=[0, 1],
Esempio n. 7
0
    def test_est_arg_popsize(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(2e6) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        # sim seq
        mut = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, mut)
        util.toc()

        # sample arg
        util.tic("sample arg")
        arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times,
                                 popsizes=1e4, carg=True)
        arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, 
                                         rho=rho, mu=mu, times=times,
                                         refine=200)
        arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, 
                                       rho=rho, mu=mu, times=times,
                                       refine=200)
        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg2):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        # thin popsizes
        x2 = list(range(0, length, length//5000)); y2 = []
        j = 0
        for i in range(len(x2)):
            while j < len(x) and x[j] < x2[i]:
                j += 1
            y2.append(y[min(j, len(y)-1)])

        x3, y3 = stats.smooth2(x2, y2, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x3, y3, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()
Esempio n. 8
0
    def test_est_arg_popsize(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(2e6) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        # sim seq
        mut = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, mut)
        util.toc()

        # sample arg
        util.tic("sample arg")
        arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times,
                                 popsizes=1e4, carg=True)
        arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, 
                                         rho=rho, mu=mu, times=times,
                                         refine=200)
        arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, 
                                       rho=rho, mu=mu, times=times,
                                       refine=200)
        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg2):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        # thin popsizes
        x2 = range(0, length, length//5000); y2 = []
        j = 0
        for i in range(len(x2)):
            while j < len(x) and x[j] < x2[i]:
                j += 1
            y2.append(y[min(j, len(y)-1)])

        x3, y3 = stats.smooth2(x2, y2, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x3, y3, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()