예제 #1
0
파일: sim.py 프로젝트: jjberg2/argweaver
def sample_arg_mutations(arg, mu, times=None):
    """
    Simulate mutations on an ARG.

    Mutations are represented as (node, parent, site, time).

    arg -- ARG on which to simulate mutations
    mu -- mutation rate (mutations/site/gen)
    times -- optional list of discretized time points
    """
    mutations = []
    minlen = times[1] * .1 if times else 0.0

    for (start, end), tree in arglib.iter_tree_tracks(arg):
        arglib.remove_single_lineages(tree)
        for node in tree:
            if not node.parents:
                continue
            blen = max(node.get_dist(), minlen)
            rate = blen * mu
            i = start
            while i < end:
                i += random.expovariate(rate)
                if i < end:
                    t = random.uniform(node.age, node.age + blen)
                    mutations.append((node, node.parents[0], int(i), t))
    return mutations
예제 #2
0
파일: sim.py 프로젝트: jeffhsu3/argweaver
def sample_arg_mutations(arg, mu, times=None):
    """
    Simulate mutations on an ARG.

    Mutations are represented as (node, parent, site, time).

    arg -- ARG on which to simulate mutations
    mu -- mutation rate (mutations/site/gen)
    times -- optional list of discretized time points
    """
    mutations = []
    minlen = times[1] * 0.1 if times else 0.0

    for (start, end), tree in arglib.iter_tree_tracks(arg):
        arglib.remove_single_lineages(tree)
        for node in tree:
            if not node.parents:
                continue
            blen = max(node.get_dist(), minlen)
            rate = blen * mu
            i = start
            while i < end:
                i += random.expovariate(rate)
                if i < end:
                    t = random.uniform(node.age, node.age + blen)
                    mutations.append((node, node.parents[0], int(i), t))
    return mutations
예제 #3
0
def test_arg_equal(arg, arg2):

    # test recomb points
    recombs = sorted(x.pos for x in arg if x.event == "recomb")
    recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb")
    assert recombs == recombs2


    # check local tree topologies
    for (start, end), tree in arglib.iter_tree_tracks(arg):
        pos = (start + end) / 2.0

        arglib.remove_single_lineages(tree)
        tree1 = tree.get_tree()

        tree2 = arg2.get_marginal_tree(pos)
        arglib.remove_single_lineages(tree2)
        tree2 = tree2.get_tree()

        hash1 = phylo.hash_tree(tree1)
        hash2 = phylo.hash_tree(tree2)
        print
        print pos
        print hash1
        print hash2
        assert hash1 == hash2

    # check sprs
    sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True)
    sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True)

    for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in izip(sprs1, sprs2):
        recomb1 = (sorted(recomb1[0]), recomb1[1])
        recomb2 = (sorted(recomb2[0]), recomb2[1])
        coal1 = (sorted(coal1[0]), coal1[1])
        coal2 = (sorted(coal2[0]), coal2[1])

        print
        print (pos1, recomb1, coal1)
        print (pos2, recomb2, coal2)

        # check pos, leaves, time
        assert pos1 == pos2
        assert recomb1 == recomb2
        assert coal1 == coal2
예제 #4
0
def test_arg_equal(arg, arg2):

    # test recomb points
    recombs = sorted(x.pos for x in arg if x.event == "recomb")
    recombs2 = sorted(x.pos for x in arg2 if x.event == "recomb")
    assert recombs == recombs2

    # check local tree topologies
    for (start, end), tree in arglib.iter_tree_tracks(arg):
        pos = (start + end) / 2.0

        arglib.remove_single_lineages(tree)
        tree1 = tree.get_tree()

        tree2 = arg2.get_marginal_tree(pos)
        arglib.remove_single_lineages(tree2)
        tree2 = tree2.get_tree()

        hash1 = phylo.hash_tree(tree1)
        hash2 = phylo.hash_tree(tree2)
        print
        print pos
        print hash1
        print hash2
        assert hash1 == hash2

    # check sprs
    sprs1 = arglib.iter_arg_sprs(arg, use_leaves=True)
    sprs2 = arglib.iter_arg_sprs(arg2, use_leaves=True)

    for (pos1, recomb1, coal1), (pos2, recomb2, coal2) in izip(sprs1, sprs2):
        recomb1 = (sorted(recomb1[0]), recomb1[1])
        recomb2 = (sorted(recomb2[0]), recomb2[1])
        coal1 = (sorted(coal1[0]), coal1[1])
        coal2 = (sorted(coal2[0]), coal2[1])

        print
        print(pos1, recomb1, coal1)
        print(pos2, recomb2, coal2)

        # check pos, leaves, time
        assert pos1 == pos2
        assert recomb1 == recomb2
        assert coal1 == coal2
예제 #5
0
    def test_thread(self):
        """
        Test thread retrieval
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 10
        mu = 2.5e-8 * 100
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        for (block, tree), threadi in izip(
                arglib.iter_tree_tracks(arg),
                arghmm.iter_chrom_thread(arg, arg["n9"], by_block=True)):
            print block
            print threadi
            treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4)
예제 #6
0
    def test_thread(self):
        """
        Test thread retrieval
        """

        k = 10
        n = 1e4
        rho = 1.5e-8 * 10
        mu = 2.5e-8 * 100
        length = 1000
        arg = arglib.sample_arg(k, n, rho, start=0, end=length)
        muts = arglib.sample_arg_mutations(arg, mu)
        seqs = arglib.make_alignment(arg, muts)

        for (block, tree), threadi in izip(
            arglib.iter_tree_tracks(arg),
            arghmm.iter_chrom_thread(arg, arg["n9"], by_block=True)):
            print block
            print threadi
            treelib.draw_tree_names(tree.get_tree(), minlen=5, scale=4e-4)
예제 #7
0
    def test_est_popsize2(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8
        mu = 2.5e-8
        length = int(4e6)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        x2, y2 = stats.smooth2(x, y, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x2, y2, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()
예제 #8
0
    def test_est_popsize2(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8
        mu = 2.5e-8
        length = int(4e6)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        x2, y2 = stats.smooth2(x, y, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x2, y2, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()
예제 #9
0
        if abs(times3[i] - x) < 1:
            times.append(times3[i])
        elif abs(times3[j] - x) < 1:
            times.append(times3[j])
        else:
            times.append(x)
    return times
    

#=============================================================================
if 1:
    #times = arghmm.get_time_points(ntimes=20)
    arg = arglib.read_arg("test/data/sample.arg")
    seqs = read_fasta("test/data/sample.fa")
    
    trees = list(arglib.iter_tree_tracks(arg, convert=True))

    # draw mappings
    win = argvis.show_tree_track(trees)

    nleaves = ilen(arg.leaves())
    for i in xrange(len(trees)-1):
        block1, _tree1 = trees[i]
        block2, _tree2 = trees[i + 1]
        pos = block2[0]
        tree1 = arg.get_marginal_tree(pos-.5)
        tree2 = arg.get_marginal_tree(pos+.5)
        layout1 = argvis.layout_arg(tree1)
        layout2 = argvis.layout_arg(tree2)
        #layout1 = treelib.layout_tree_vertical(
        #    treelib.layout_tree(tree1, xscale=1, yscale=1), leaves=0)
예제 #10
0
    def test_est_arg_popsize(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(2e6) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        # sim seq
        mut = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, mut)
        util.toc()

        # sample arg
        util.tic("sample arg")
        arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times,
                                 popsizes=1e4, carg=True)
        arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, 
                                         rho=rho, mu=mu, times=times,
                                         refine=200)
        arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, 
                                       rho=rho, mu=mu, times=times,
                                       refine=200)
        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg2):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        # thin popsizes
        x2 = list(range(0, length, length//5000)); y2 = []
        j = 0
        for i in range(len(x2)):
            while j < len(x) and x[j] < x2[i]:
                j += 1
            y2.append(y[min(j, len(y)-1)])

        x3, y3 = stats.smooth2(x2, y2, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x3, y3, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()
예제 #11
0
        if abs(times3[i] - x) < 1:
            times.append(times3[i])
        elif abs(times3[j] - x) < 1:
            times.append(times3[j])
        else:
            times.append(x)
    return times


#=============================================================================
if 1:
    #times = arghmm.get_time_points(ntimes=20)
    arg = arglib.read_arg("test/data/sample.arg")
    seqs = read_fasta("test/data/sample.fa")

    trees = list(arglib.iter_tree_tracks(arg, convert=True))

    # draw mappings
    win = argvis.show_tree_track(trees)

    nleaves = ilen(arg.leaves())
    for i in xrange(len(trees) - 1):
        block1, _tree1 = trees[i]
        block2, _tree2 = trees[i + 1]
        pos = block2[0]
        tree1 = arg.get_marginal_tree(pos - .5)
        tree2 = arg.get_marginal_tree(pos + .5)
        layout1 = argvis.layout_arg(tree1)
        layout2 = argvis.layout_arg(tree2)
        #layout1 = treelib.layout_tree_vertical(
        #    treelib.layout_tree(tree1, xscale=1, yscale=1), leaves=0)
예제 #12
0
    def test_est_arg_popsize(self):
        """
        Fully sample an ARG from stratch using API
        """

        k = 20
        rho = 1.5e-8 * 20
        mu = 2.5e-8 * 20
        length = int(2e6) / 20
        times = arghmm.get_time_points(ntimes=20, maxtime=200000)
        popsize = 1e4
        popsize2 = 1e4 * .5
        a = int(.3 * length)
        b = int(.7 * length)
        refine = 0

        util.tic("sim ARG")
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=0, end=a)
        arg = arglib.sample_arg_smc(k, 2 * popsize2,
                                    rho, start=a, end=b,
                                    init_tree=arg)
        arg = arglib.sample_arg_smc(k, 2 * popsize,
                                    rho, start=b, end=length,
                                    init_tree=arg)

        # sim seq
        mut = arghmm.sample_arg_mutations(arg, mu, times)
        seqs = arghmm.make_alignment(arg, mut)
        util.toc()

        # sample arg
        util.tic("sample arg")
        arg2 = arghmm.sample_arg(seqs, rho=rho, mu=mu, times=times,
                                 popsizes=1e4, carg=True)
        arg2 = arghmm.resample_climb_arg(arg2, seqs, popsizes=1e4, 
                                         rho=rho, mu=mu, times=times,
                                         refine=200)
        arg2 = arghmm.resample_all_arg(arg2, seqs, popsizes=1e4, 
                                       rho=rho, mu=mu, times=times,
                                       refine=200)
        util.toc()

        x = []; y = []
        for (start, end), tree in arglib.iter_tree_tracks(arg2):
            arglib.remove_single_lineages(tree)
            x.append(start)
            y.append(mle_popsize_tree(tree, mintime=0))

        # thin popsizes
        x2 = range(0, length, length//5000); y2 = []
        j = 0
        for i in range(len(x2)):
            while j < len(x) and x[j] < x2[i]:
                j += 1
            y2.append(y[min(j, len(y)-1)])

        x3, y3 = stats.smooth2(x2, y2, 100e3)
        p = plot(x, y, ymin=0)
        p.plot(x3, y3, style='lines')
        p.plot([0, a, a, b, b, length],
               [popsize, popsize, popsize2, popsize2, popsize, popsize],
               style='lines')
        
        pause()