Beispiel #1
0
def runcoasim(outputdir,
              populationSpec,
              NeRef,
              L,
              r,
              outgroup,
              migrationSpec=False):
    out = open(outputdir + "/trees.dnd", "w")

    if migrationSpec:
        arg = CoaSim.simulate([],
                              populationSpec,
                              migrationSpec=migrationSpec,
                              rho=4 * NeRef * L * r,
                              keepEmptyIntervals=True)
    else:
        arg = CoaSim.simulate([],
                              populationSpec,
                              rho=4 * NeRef * L * r,
                              keepEmptyIntervals=True)

    for i in arg.intervals:
        s = str(i.tree)
        if outgroup > 0:
            s.replace(')', ' ')
            s = s.replace(')', ' ')
            s = s.replace('(', ' ')
            s = s.replace("'0'", ' ')
            s = s.replace("'1'", ' ')
            s = s.replace("'2'", ' ')
            s = s.replace(':', ' ')
            s = s.replace(';', ' ')
            s = s.replace(',', ' ')
            s = s.replace('  ', ' ')
            s = s.replace('  ', ' ')
            s = s.replace('  ', ' ')
            s = s.strip()
            ss = s.split()
            max = 0
            for e in ss:
                if float(e) > max:
                    max = float(e)

            s = str(i.tree)
            s = s.replace(';', '')
            val = outgroup - max
            s = '(' + s + ' : ' + str(val) + ", 'outgroup' : " + str(
                outgroup) + ');'

        out.writelines(str(i.start) + " " + str(i.end) + " " + s + "\n")

    out.close()
Beispiel #2
0
#!/bin/env python

import CoaSim
from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \
                                Migration as Mi

popSpec = P(1,M(1.5,[P(1,S(4),name='1'),P(1,S(4),name='2')]))
migSpec = [Mi('1','2',1),Mi('2','1',1)]

tree = CoaSim.simulate([], popSpec, migSpec,
                       keepEmptyIntervals=True,
                       keepMigrationEvents=True,
                       seed=10).intervals[0].tree

assert str(tree) == "(((('5' : 0.259312,('0' : 0.171585,'3' : 0.171585) : [&migration={1,0.218047,0}] 0.0877268) : 0.13414,('2' : 0.218047,'1' : 0.218047) : [&migration={1,0.218047,0}] 0.175404) : 0.491754,('7' : 0.0821288,'4' : 0.0821288) : 0.803077) : 0.117671,'6' : [&migration={1,0.205103,0,0.171585,1}] 1.00288);"
def simulateCoasim(spec, **args):
    # test if all args are there
    if "length" not in args:
        raise Exception("Sequence 'length' parameter required")

    if "NeRef" not in args:
        raise Exception("'NeRef' parameter required")

    if "r" not in args:
        raise Exception("'r' (recombination rate?) parameter required")

    if "g" not in args:
        raise Exception("'g' (Generation time) parameter required")

    if "u" not in args:
        raise Exception("'u' (mutation rate) parameter required")

    if "optionsFile" not in args:
        raise Exception("'optionsFile' parameter required")

    if "migration" not in args:
        args["migration"] = []

    # test T's and N's
    count = 0
    t = "T1"
    while t in args:
        count = count + 1
        t = t + str(len(t))

    # Test N
    n = "N"
    for i in range(1, count + 1):
        if "N" + str(i) not in args:
            raise Exception("'N" + str(i) + "' parameter required")
        n = n + str(i)
        if n not in args:
            raise Exception("'" + n + "' parameter required")

    # run the suff

    st = spec(**args)
    kei = True
    if "keepEmptyIntervals" in args:
        kei = args["keepEmptyIntervals"]

    #    print "simulating"

    arg = CoaSim.simulate(
        [],
        st,
        rho=4 * args["NeRef"] * args["length"] * args["r"],
        keepEmptyIntervals=kei,
        migrationSpec=args["migration"],
    )  # what about 4

    if "recMap" in args:
        # map intervals and parse trees
        trees = mapIntervalsAndParseTrees(arg.intervals, args["recMap"])
    else:
        # parse trees
        trees = []
        for interval in arg.intervals:
            trees.append((interval.start, interval.end, parse_tree(str(interval.tree))))

    # reroot trees
    if "addOutGroup" in args and args["addOutGroup"]:
        trees = addOutGroup(trees, **args)

    # scale trees
    trees = scale_trees(trees, 2.0 * args["NeRef"] * args["g"] * args["u"])

    if "tree_height_file" in args:
        fout = open(args["tree_height_file"], "w")
        bps = args["length"]
        if "tree_visitor" not in args:
            th = TreeHeight()
        else:
            th = args["tree_visitor"]
        for (start, end, tree) in trees:
            th.reset()
            tree.dfs_traverse(th)
            h = th.get_result()
            to_write = "%s\n" % str(h)
            for i in range(int(start * bps), int(end * bps)):
                fout.write(to_write)
        fout.close()

    # for (s,e,t) in trees:
    # print s,e,t

    if "hook" in args:
        args["hook"].run(arg, trees, args)
    #         args["hook"].parseTrees(arg, args)

    return bppSeqGen(trees, **args)
Beispiel #4
0
    def mergeEvent(self, pops, t, k):
        self.counts['me'] += 1



from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \
                                Bottleneck as B, Growth as G, \
                                Migration as Mi

cb = callbacks()
CoaSim.simulate(
    [],
    P(1,
      M(1.5, [P(1, S(2), name='1'), P(1, S(2), name='2')]),
      epochs=[B(.2, 1.5, 2), G(10, 2)]),
    [Mi('1', '2', 0.001), Mi('2', '1', 0.002)],
    rho=40,
    Q=10,
    gamma=2,
    keepEmptyIntervals=True,
    seed=10,
    callbacks=cb)

# this is just regression testing, not proper testing :-(
expected = {
    'me': 1,
    'bn_leave': 1,
    'mig': 4,
    'g_enter': 1,
    'coa': 173,
    'gc': 10,
    'bn_enter': 1,
Beispiel #5
0
#!/bin/env python

import CoaSim

markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9),
           CoaSim.SNPMarker(0.3, 0.1, 0.9)]

intervals = CoaSim.simulate(markers, 5, rho=4, seed=10).intervals

i1 = intervals[0]
t1 = i1.tree
r  = t1.root

assert r.isAncestral(i1.start)
assert not r.isAncestral(i1.end)
assert r.isAncestral(i1.start + (i1.end-i1.start)/2)
assert not r.isAncestral(i1.start - 1e-10)
assert r.isAncestral(i1.end - 1e-10)

assert r.children == r.children         # testing comparison...

# regression testing...
assert len(r.children) == 2
assert len(r.children[0].children) == 1
Beispiel #6
0
#!/bin/env python

import CoaSim

markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)]

intervals = CoaSim.simulate(markers, 5, rho=4, seed=10).intervals
trees = [i.tree for i in intervals]
assert intervals == [t.interval for t in trees]

# regression testing... FIXME: dependent on local random number generator...
assert [t.branchLength
        for t in trees] == [6.8694553479721083, 4.8932502412522147]
assert [t.height for t in trees] == [2.0935405459452552, 1.2403594539408294]

assert str(
    t
) == "(('3' : 0.970517,'0' : 0.970517) : 0.269843,(('4' : 0.638217,'2' : 0.638217) : 0.165581,'1' : 0.803798) : 0.436561);"
Beispiel #7
0
#!/bin/env python

import CoaSim

markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9),
           CoaSim.SNPMarker(0.3, 0.1, 0.9)]

# first a very simple regression test ... not really thorough
seqs = CoaSim.simulate(markers, 5, rho=40, seed=10).sequences
assert seqs == [[0, 0], [0, 0], [1, 0], [0, 0], [0, 1]]

# test incorrect input
try:
    CoaSim.simulate([markers[1],markers[0]], 5)
    assert False
except ValueError, e:
    assert str(e) == 'Marker positions out of sequence.'

try:
    CoaSim.simulate(markers, -5)
    assert False
except ValueError, e:
    assert str(e) == 'Non-positive sample size.'


try:
    CoaSim.simulate(markers, 5, rho=-2)
    assert False
except ValueError, e:
    assert str(e) == 'Negative rate or intensity: -2.'
Beispiel #8
0
#!/bin/env python

import CoaSim
arg = CoaSim.simulate([],
                      5,
                      rho=20,
                      gamma=10,
                      Q=2,
                      seed=10,
                      keepEmptyIntervals=True)

counter = dict()
for node in arg.nodes:
    try:
        counter[type(node)] += 1
    except KeyError:
        counter[type(node)] = 1

assert len(counter) == 4
assert counter[CoaSim.LeafNode] == 5
assert counter[CoaSim.CoalescentNode] == 72
assert counter[CoaSim.RecombinationNode] == 88
assert counter[CoaSim.GeneConversionNode] == 82




from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \
                                Bottleneck as B, Growth as G, \
                                Migration as Mi
Beispiel #9
0
#!/bin/env python

import CoaSim

markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9),
           CoaSim.SNPMarker(0.3, 0.1, 0.9)]

assert len(CoaSim.simulate(markers, 5).intervals) == 1 # no recomb, 1 interval

arg = CoaSim.simulate(markers, 5, rho=4)
assert arg.intervals == arg.intervals # check that comparison works
# this wont work, however: assert arg.intervals[0] is arg.intervals[0]
# since we get new objects for each call to intervals.

# regression testing... FIXME: dependent on local random number generator...
arg = CoaSim.simulate(markers, 5, rho=4, seed=10)
assert [i.start for i in arg.intervals] == [0.092338229572557953, 0.2567803758460937]
assert [i.end for i in arg.intervals] == [0.2458365472247063, 0.42215607660922971]
assert [i.length for i in arg.intervals] == [0.15349831765214833, 0.16537570076313601]

Beispiel #10
0
#!/bin/env python

import CoaSim

markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)]

assert len(CoaSim.simulate(markers, 5).intervals) == 1  # no recomb, 1 interval

arg = CoaSim.simulate(markers, 5, rho=4)
assert arg.intervals == arg.intervals  # check that comparison works
# this wont work, however: assert arg.intervals[0] is arg.intervals[0]
# since we get new objects for each call to intervals.

# regression testing... FIXME: dependent on local random number generator...
arg = CoaSim.simulate(markers, 5, rho=4, seed=10)
assert [i.start
        for i in arg.intervals] == [0.092338229572557953, 0.2567803758460937]
assert [i.end
        for i in arg.intervals] == [0.2458365472247063, 0.42215607660922971]
assert [i.length
        for i in arg.intervals] == [0.15349831765214833, 0.16537570076313601]
Beispiel #11
0
#!/bin/env python

import CoaSim
arg = CoaSim.simulate([], 5, rho=20, gamma=10, Q=2,
                      seed=10, keepEmptyIntervals=True)

counter = dict()
for node in arg.nodes:
    try:
        counter[type(node)] += 1
    except KeyError:
        counter[type(node)] = 1

assert len(counter) == 4
assert counter[CoaSim.LeafNode] == 5
assert counter[CoaSim.CoalescentNode] == 72
assert counter[CoaSim.RecombinationNode] == 88
assert counter[CoaSim.GeneConversionNode] == 82




from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \
                                Bottleneck as B, Growth as G, \
                                Migration as Mi



arg = CoaSim.simulate([], P(1,M(1.5,[P(1,S(2),name='1'),P(1,S(2),name='2')])),
                          [Mi('1','2',0.001),Mi('2','1',0.002)],
                      keepEmptyIntervals=True, seed=10)
Beispiel #12
0
#!/bin/env python

import CoaSim
from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \
                                Migration as Mi

popSpec = P(1, M(1.5, [P(1, S(4), name='1'), P(1, S(4), name='2')]))
migSpec = [Mi('1', '2', 1), Mi('2', '1', 1)]

tree = CoaSim.simulate([],
                       popSpec,
                       migSpec,
                       keepEmptyIntervals=True,
                       keepMigrationEvents=True,
                       seed=10).intervals[0].tree

assert str(
    tree
) == "(((('5' : 0.259312,('0' : 0.171585,'3' : 0.171585) : [&migration={1,0.218047,0}] 0.0877268) : 0.13414,('2' : 0.218047,'1' : 0.218047) : [&migration={1,0.218047,0}] 0.175404) : 0.491754,('7' : 0.0821288,'4' : 0.0821288) : 0.803077) : 0.117671,'6' : [&migration={1,0.205103,0,0.171585,1}] 1.00288);"
def simulateCoasim(spec, **args):
    # test if all args are there
    if "length" not in args:
        raise Exception("Sequence 'length' parameter required")

    if "NeRef" not in args:
        raise Exception("'NeRef' parameter required")

    if "r" not in args:
        raise Exception("'r' (recombination rate?) parameter required")

    if "g" not in args:
        raise Exception("'g' (Generation time) parameter required")

    if "u" not in args:
        raise Exception("'u' (mutation rate) parameter required")

    if "optionsFile" not in args:
        raise Exception("'optionsFile' parameter required")

    if "migration" not in args:
        args["migration"] = []

    # test T's and N's
    count = 0
    t = "T1"
    while t in args:
        count = count + 1
        t = t + str(len(t))

    # Test N
    n = "N"
    for i in range(1, count + 1):
        if "N" + str(i) not in args:
            raise Exception("'N" + str(i) + "' parameter required")
        n = n + str(i)
        if n not in args:
            raise Exception("'" + n + "' parameter required")

    # run the stuff

    st = spec(**args)
    kei = True
    if "keepEmptyIntervals" in args:
        kei = args["keepEmptyIntervals"]

    #    print "simulating"

    arg = CoaSim.simulate(
        [],
        st,
        rho=4 * args["NeRef"] * args["length"] * args["r"],
        keepEmptyIntervals=kei,
        migrationSpec=args["migration"],
    )  # what about 4

    ## # FIXME: possibly do this instead: collapse all runs of identical trees:
    ## trees = list()
    ## prevTree = None
    ## for tree in [(x.start, x.end, str(x.tree)) for x in arg.intervals]:
    ##     if tree == prevTree:
    ##         trees[-1][1] = end
    ##     else:
    ##         trees.append([start, end, tree])
    ##     prevTree = tree

    ## if "recMap" in args:
    ##     # map intervals and parse trees
    ##     trees = mapIntervalsAndParseTrees(trees, args["recMap"])
    ## else:
    ##     # parse trees
    ##     for i in range(len(trees)):
    ##         trees[i][2] = parse_tree(trees[i][2])

    if "recMap" in args:
        # map intervals and parse trees
        trees = mapIntervalsAndParseTrees([(x.start, x.end, x.tree) for x in arg.intervals], args["recMap"])
    else:
        # parse trees
        trees = []
        for interval in arg.intervals:
            trees.append((interval.start, interval.end, parse_tree(str(interval.tree))))

    # reroot trees
    if "addOutGroup" in args and args["addOutGroup"]:
        trees = addOutGroup(trees, **args)

    # scale trees
    trees = scale_trees(trees, 2.0 * args["NeRef"] * args["g"] * args["u"])

    if "tree_height_file" in args:
        fout = open(args["tree_height_file"], "w")
        bps = args["length"]
        if "tree_visitor" not in args:
            th = TreeHeight()
        else:
            th = args["tree_visitor"]
        for (start, end, tree) in trees:
            th.reset()
            tree.dfs_traverse(th)
            h = th.get_result()
            to_write = "%s\n" % str(h)
            for i in range(int(start * bps), int(end * bps)):
                fout.write(to_write)
        fout.close()

    # for (s,e,t) in trees:
    # print s,e,t

    if "hook" in args:
        args["hook"].run(arg, trees, args)
    #         args["hook"].parseTrees(arg, args)

    #     for t in trees:
    #         print t

    # ####
    #     def getTree(tree):
    #         left, right = tree.get_edges()
    #         if isinstance(left[0], Leaf) and left[0].identifier == '3': # mayby identifier is not an int but a str...
    #             # outgroup is included - call on ingroup
    #             return right[0]
    #         elif isinstance(right[0], Leaf) and right[0].identifier == '3': # mayby identifier is not an int but a str...
    #             # outgroup is included - call on ingroup
    #             return left[0]
    #         else:
    #             return tree
    #
    #     th = TreeHeight()
    #
    #     for (start, end, tree) in trees:
    #         th.reset()
    #         t = getTree(tree)
    #         t.dfs_traverse(th)
    #         h = th.get_result()
    #         print h
    #
    #
    #     with open("coasim.trees", 'w') as f:
    #         for s, e, t in trees:
    #             print >>f, s, e, t
    #    sys.exit()
    ####

    return bppSeqGen(trees, **args)
def simulateCoasim(spec, **args):
    #test if all args are there
    if "length" not in args:
        raise Exception("Sequence 'length' parameter required")
    
    if "NeRef" not in args:
        raise Exception("'NeRef' parameter required")
    
    if "r" not in args:
        raise Exception("'r' (recombination rate?) parameter required")

    if "g" not in args:
        raise Exception("'g' (Generation time) parameter required")

    if "u" not in args:
        raise Exception("'u' (mutation rate) parameter required")

    if "optionsFile" not in args:
        raise Exception("'optionsFile' parameter required")

    if "migration" not in args:
        args["migration"] = []

    #test T's and N's
    count = 0;
    t = "T1"
    while t in args:
        count = count +1
        t = t + str(len(t))

    #Test N
    n = "N"
    for i in range(1, count+1):
        if "N" + str(i) not in args:
            raise Exception("'N" + str(i) + "' parameter required")
        n = n + str(i)
        if n not in args:
            raise Exception("'" + n + "' parameter required")

     
    #run the suff
    
    st = spec(**args)
    kei = True
    if "keepEmptyIntervals" in args:
        kei = args["keepEmptyIntervals"];
    
#    print "simulating"
        
    arg = CoaSim.simulate([], st, rho = 4 * args["NeRef"] * args["length"] * args["r"],
            keepEmptyIntervals=kei, migrationSpec=args["migration"]) #what about 4
    

    trees = []
    if "hotspotWidth" in args and "hotspotRatio" in args:
        # hotspotWidth
        hw = args["hotspotWidth"]
        # ratio of hotspot non-hotstop recombination rate:
        F = args["hotspotRatio"]
        p = hw / (1 - hw)
        # width of hotspot region before contraction:
        ohw = (F*p)/(F*p+1)
        # expansion factor:
        e = (1-hw) / (1-ohw)
        # contraction factor:
        c = hw / ohw

        args["hotSpotBackgroundRate"] = args["r"] * (1-hw)/(1-ohw)
        print >>sys.stderr, "Simulating hotspot", args["hotSpotBackgroundRate"]

#         # nr hotspots:
#         nrHotSpots = 4
#         # o iohw into 
#         m = generateMap(nrHotSpots, ohw, c, e):
        
        # map
        m = [(0, e), ((1-ohw)/2.0, c), (((1-ohw)/2.0)+ohw, e), (1.0, None)]
        j = 0    
        prevEnd = 0
        recPoints = list()
        for i, interval in enumerate(arg.intervals):
            while j < len(m)-1 and interval.start >= m[j+1][0]:
                j += 1
            start = prevEnd
            if j < len(m)-1 and interval.end > m[j+1][0]:
                end = prevEnd + (m[j+1][0] - interval.start) * m[j][1] + (interval.end - m[j+1][0]) * m[j+1][1]
            else:
                end = prevEnd + (interval.end - interval.start) * m[j][1]
            prevEnd = end
            trees.append((start, end, parse_tree(str(interval.tree))))
        
    else:
        for interval in arg.intervals:
            trees.append((interval.start, interval.end, parse_tree(str(interval.tree))))        
#     for s, e, t in trees:
#         print s, e
#     import sys
#     sys.exit()

    #reroot trees
    if "addOutGroup" in args and args["addOutGroup"]:
        trees = addOutGroup(trees, **args)
    

#    print "scaling"
    #scale trees
    trees = scale_trees(trees, 2.0*args["NeRef"]*args["g"]*args["u"])

    if "tree_height_file" in args:
        fout = open(args["tree_height_file"], "w")
        bps = args["length"]
        if "tree_visitor" not in args:
            th = TreeHeight()
        else:
            th = args["tree_visitor"]
        for (start, end, tree) in trees:
            th.reset();
            tree.dfs_traverse(th)
            h = th.get_result()
            to_write = "%s\n" % str(h)
            for i in range(int(start*bps), int(end*bps)):
                fout.write(to_write)
        fout.close()

    #for (s,e,t) in trees:
        #print s,e,t

    if "hook" in args:
        args["hook"].parseARG(arg, trees, args)
#         args["hook"].parseTrees(arg, args)
    
    return bppSeqGen(trees, **args)
Beispiel #15
0
    def migrationEvent(self, pop1, pop2, t, k):
        self.counts['mig'] += 1
    def mergeEvent(self, pops, t, k):
        self.counts['me'] += 1
        


from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \
                                Bottleneck as B, Growth as G, \
                                Migration as Mi

cb = callbacks()
CoaSim.simulate([], P(1,M(1.5,[P(1,S(2),name='1'),P(1,S(2),name='2')]),
                      epochs=[B(.2,1.5,2),G(10,2)]),
                    [Mi('1','2',0.001),Mi('2','1',0.002)],
                rho=40,Q=10,gamma=2,
                keepEmptyIntervals=True, seed=10,
                callbacks=cb)

# this is just regression testing, not proper testing :-(
expected = {'me': 1, 'bn_leave': 1, 'mig': 4, 'g_enter': 1, 'coa': 173,
            'gc': 10, 'bn_enter': 1, 'g_leave': 0, 'rec': 168}
assert cb.counts == expected

class AllDone: pass
class ex_callbacks(callbacks):
    def coalescentEvent(self, n, k):
        callbacks.coalescentEvent(self, n, k)
        if self.counts['coa'] == 120:
            raise AllDone
Beispiel #16
0
#!/bin/env python

import CoaSim

markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)]

# first a very simple regression test ... not really thorough
seqs = CoaSim.simulate(markers, 5, rho=40, seed=10).sequences
assert seqs == [[0, 0], [0, 0], [1, 0], [0, 0], [0, 1]]

# test incorrect input
try:
    CoaSim.simulate([markers[1], markers[0]], 5)
    assert False
except ValueError, e:
    assert str(e) == 'Marker positions out of sequence.'

try:
    CoaSim.simulate(markers, -5)
    assert False
except ValueError, e:
    assert str(e) == 'Non-positive sample size.'

try:
    CoaSim.simulate(markers, 5, rho=-2)
    assert False
except ValueError, e:
    assert str(e) == 'Negative rate or intensity: -2.'

try:
    CoaSim.simulate(markers, 5, gamma=-2)