def runcoasim(outputdir, populationSpec, NeRef, L, r, outgroup, migrationSpec=False): out = open(outputdir + "/trees.dnd", "w") if migrationSpec: arg = CoaSim.simulate([], populationSpec, migrationSpec=migrationSpec, rho=4 * NeRef * L * r, keepEmptyIntervals=True) else: arg = CoaSim.simulate([], populationSpec, rho=4 * NeRef * L * r, keepEmptyIntervals=True) for i in arg.intervals: s = str(i.tree) if outgroup > 0: s.replace(')', ' ') s = s.replace(')', ' ') s = s.replace('(', ' ') s = s.replace("'0'", ' ') s = s.replace("'1'", ' ') s = s.replace("'2'", ' ') s = s.replace(':', ' ') s = s.replace(';', ' ') s = s.replace(',', ' ') s = s.replace(' ', ' ') s = s.replace(' ', ' ') s = s.replace(' ', ' ') s = s.strip() ss = s.split() max = 0 for e in ss: if float(e) > max: max = float(e) s = str(i.tree) s = s.replace(';', '') val = outgroup - max s = '(' + s + ' : ' + str(val) + ", 'outgroup' : " + str( outgroup) + ');' out.writelines(str(i.start) + " " + str(i.end) + " " + s + "\n") out.close()
#!/bin/env python import CoaSim from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \ Migration as Mi popSpec = P(1,M(1.5,[P(1,S(4),name='1'),P(1,S(4),name='2')])) migSpec = [Mi('1','2',1),Mi('2','1',1)] tree = CoaSim.simulate([], popSpec, migSpec, keepEmptyIntervals=True, keepMigrationEvents=True, seed=10).intervals[0].tree assert str(tree) == "(((('5' : 0.259312,('0' : 0.171585,'3' : 0.171585) : [&migration={1,0.218047,0}] 0.0877268) : 0.13414,('2' : 0.218047,'1' : 0.218047) : [&migration={1,0.218047,0}] 0.175404) : 0.491754,('7' : 0.0821288,'4' : 0.0821288) : 0.803077) : 0.117671,'6' : [&migration={1,0.205103,0,0.171585,1}] 1.00288);"
def simulateCoasim(spec, **args): # test if all args are there if "length" not in args: raise Exception("Sequence 'length' parameter required") if "NeRef" not in args: raise Exception("'NeRef' parameter required") if "r" not in args: raise Exception("'r' (recombination rate?) parameter required") if "g" not in args: raise Exception("'g' (Generation time) parameter required") if "u" not in args: raise Exception("'u' (mutation rate) parameter required") if "optionsFile" not in args: raise Exception("'optionsFile' parameter required") if "migration" not in args: args["migration"] = [] # test T's and N's count = 0 t = "T1" while t in args: count = count + 1 t = t + str(len(t)) # Test N n = "N" for i in range(1, count + 1): if "N" + str(i) not in args: raise Exception("'N" + str(i) + "' parameter required") n = n + str(i) if n not in args: raise Exception("'" + n + "' parameter required") # run the suff st = spec(**args) kei = True if "keepEmptyIntervals" in args: kei = args["keepEmptyIntervals"] # print "simulating" arg = CoaSim.simulate( [], st, rho=4 * args["NeRef"] * args["length"] * args["r"], keepEmptyIntervals=kei, migrationSpec=args["migration"], ) # what about 4 if "recMap" in args: # map intervals and parse trees trees = mapIntervalsAndParseTrees(arg.intervals, args["recMap"]) else: # parse trees trees = [] for interval in arg.intervals: trees.append((interval.start, interval.end, parse_tree(str(interval.tree)))) # reroot trees if "addOutGroup" in args and args["addOutGroup"]: trees = addOutGroup(trees, **args) # scale trees trees = scale_trees(trees, 2.0 * args["NeRef"] * args["g"] * args["u"]) if "tree_height_file" in args: fout = open(args["tree_height_file"], "w") bps = args["length"] if "tree_visitor" not in args: th = TreeHeight() else: th = args["tree_visitor"] for (start, end, tree) in trees: th.reset() tree.dfs_traverse(th) h = th.get_result() to_write = "%s\n" % str(h) for i in range(int(start * bps), int(end * bps)): fout.write(to_write) fout.close() # for (s,e,t) in trees: # print s,e,t if "hook" in args: args["hook"].run(arg, trees, args) # args["hook"].parseTrees(arg, args) return bppSeqGen(trees, **args)
def mergeEvent(self, pops, t, k): self.counts['me'] += 1 from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \ Bottleneck as B, Growth as G, \ Migration as Mi cb = callbacks() CoaSim.simulate( [], P(1, M(1.5, [P(1, S(2), name='1'), P(1, S(2), name='2')]), epochs=[B(.2, 1.5, 2), G(10, 2)]), [Mi('1', '2', 0.001), Mi('2', '1', 0.002)], rho=40, Q=10, gamma=2, keepEmptyIntervals=True, seed=10, callbacks=cb) # this is just regression testing, not proper testing :-( expected = { 'me': 1, 'bn_leave': 1, 'mig': 4, 'g_enter': 1, 'coa': 173, 'gc': 10, 'bn_enter': 1,
#!/bin/env python import CoaSim markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)] intervals = CoaSim.simulate(markers, 5, rho=4, seed=10).intervals i1 = intervals[0] t1 = i1.tree r = t1.root assert r.isAncestral(i1.start) assert not r.isAncestral(i1.end) assert r.isAncestral(i1.start + (i1.end-i1.start)/2) assert not r.isAncestral(i1.start - 1e-10) assert r.isAncestral(i1.end - 1e-10) assert r.children == r.children # testing comparison... # regression testing... assert len(r.children) == 2 assert len(r.children[0].children) == 1
#!/bin/env python import CoaSim markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)] intervals = CoaSim.simulate(markers, 5, rho=4, seed=10).intervals trees = [i.tree for i in intervals] assert intervals == [t.interval for t in trees] # regression testing... FIXME: dependent on local random number generator... assert [t.branchLength for t in trees] == [6.8694553479721083, 4.8932502412522147] assert [t.height for t in trees] == [2.0935405459452552, 1.2403594539408294] assert str( t ) == "(('3' : 0.970517,'0' : 0.970517) : 0.269843,(('4' : 0.638217,'2' : 0.638217) : 0.165581,'1' : 0.803798) : 0.436561);"
#!/bin/env python import CoaSim markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)] # first a very simple regression test ... not really thorough seqs = CoaSim.simulate(markers, 5, rho=40, seed=10).sequences assert seqs == [[0, 0], [0, 0], [1, 0], [0, 0], [0, 1]] # test incorrect input try: CoaSim.simulate([markers[1],markers[0]], 5) assert False except ValueError, e: assert str(e) == 'Marker positions out of sequence.' try: CoaSim.simulate(markers, -5) assert False except ValueError, e: assert str(e) == 'Non-positive sample size.' try: CoaSim.simulate(markers, 5, rho=-2) assert False except ValueError, e: assert str(e) == 'Negative rate or intensity: -2.'
#!/bin/env python import CoaSim arg = CoaSim.simulate([], 5, rho=20, gamma=10, Q=2, seed=10, keepEmptyIntervals=True) counter = dict() for node in arg.nodes: try: counter[type(node)] += 1 except KeyError: counter[type(node)] = 1 assert len(counter) == 4 assert counter[CoaSim.LeafNode] == 5 assert counter[CoaSim.CoalescentNode] == 72 assert counter[CoaSim.RecombinationNode] == 88 assert counter[CoaSim.GeneConversionNode] == 82 from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \ Bottleneck as B, Growth as G, \ Migration as Mi
#!/bin/env python import CoaSim markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)] assert len(CoaSim.simulate(markers, 5).intervals) == 1 # no recomb, 1 interval arg = CoaSim.simulate(markers, 5, rho=4) assert arg.intervals == arg.intervals # check that comparison works # this wont work, however: assert arg.intervals[0] is arg.intervals[0] # since we get new objects for each call to intervals. # regression testing... FIXME: dependent on local random number generator... arg = CoaSim.simulate(markers, 5, rho=4, seed=10) assert [i.start for i in arg.intervals] == [0.092338229572557953, 0.2567803758460937] assert [i.end for i in arg.intervals] == [0.2458365472247063, 0.42215607660922971] assert [i.length for i in arg.intervals] == [0.15349831765214833, 0.16537570076313601]
#!/bin/env python import CoaSim arg = CoaSim.simulate([], 5, rho=20, gamma=10, Q=2, seed=10, keepEmptyIntervals=True) counter = dict() for node in arg.nodes: try: counter[type(node)] += 1 except KeyError: counter[type(node)] = 1 assert len(counter) == 4 assert counter[CoaSim.LeafNode] == 5 assert counter[CoaSim.CoalescentNode] == 72 assert counter[CoaSim.RecombinationNode] == 88 assert counter[CoaSim.GeneConversionNode] == 82 from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \ Bottleneck as B, Growth as G, \ Migration as Mi arg = CoaSim.simulate([], P(1,M(1.5,[P(1,S(2),name='1'),P(1,S(2),name='2')])), [Mi('1','2',0.001),Mi('2','1',0.002)], keepEmptyIntervals=True, seed=10)
#!/bin/env python import CoaSim from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \ Migration as Mi popSpec = P(1, M(1.5, [P(1, S(4), name='1'), P(1, S(4), name='2')])) migSpec = [Mi('1', '2', 1), Mi('2', '1', 1)] tree = CoaSim.simulate([], popSpec, migSpec, keepEmptyIntervals=True, keepMigrationEvents=True, seed=10).intervals[0].tree assert str( tree ) == "(((('5' : 0.259312,('0' : 0.171585,'3' : 0.171585) : [&migration={1,0.218047,0}] 0.0877268) : 0.13414,('2' : 0.218047,'1' : 0.218047) : [&migration={1,0.218047,0}] 0.175404) : 0.491754,('7' : 0.0821288,'4' : 0.0821288) : 0.803077) : 0.117671,'6' : [&migration={1,0.205103,0,0.171585,1}] 1.00288);"
def simulateCoasim(spec, **args): # test if all args are there if "length" not in args: raise Exception("Sequence 'length' parameter required") if "NeRef" not in args: raise Exception("'NeRef' parameter required") if "r" not in args: raise Exception("'r' (recombination rate?) parameter required") if "g" not in args: raise Exception("'g' (Generation time) parameter required") if "u" not in args: raise Exception("'u' (mutation rate) parameter required") if "optionsFile" not in args: raise Exception("'optionsFile' parameter required") if "migration" not in args: args["migration"] = [] # test T's and N's count = 0 t = "T1" while t in args: count = count + 1 t = t + str(len(t)) # Test N n = "N" for i in range(1, count + 1): if "N" + str(i) not in args: raise Exception("'N" + str(i) + "' parameter required") n = n + str(i) if n not in args: raise Exception("'" + n + "' parameter required") # run the stuff st = spec(**args) kei = True if "keepEmptyIntervals" in args: kei = args["keepEmptyIntervals"] # print "simulating" arg = CoaSim.simulate( [], st, rho=4 * args["NeRef"] * args["length"] * args["r"], keepEmptyIntervals=kei, migrationSpec=args["migration"], ) # what about 4 ## # FIXME: possibly do this instead: collapse all runs of identical trees: ## trees = list() ## prevTree = None ## for tree in [(x.start, x.end, str(x.tree)) for x in arg.intervals]: ## if tree == prevTree: ## trees[-1][1] = end ## else: ## trees.append([start, end, tree]) ## prevTree = tree ## if "recMap" in args: ## # map intervals and parse trees ## trees = mapIntervalsAndParseTrees(trees, args["recMap"]) ## else: ## # parse trees ## for i in range(len(trees)): ## trees[i][2] = parse_tree(trees[i][2]) if "recMap" in args: # map intervals and parse trees trees = mapIntervalsAndParseTrees([(x.start, x.end, x.tree) for x in arg.intervals], args["recMap"]) else: # parse trees trees = [] for interval in arg.intervals: trees.append((interval.start, interval.end, parse_tree(str(interval.tree)))) # reroot trees if "addOutGroup" in args and args["addOutGroup"]: trees = addOutGroup(trees, **args) # scale trees trees = scale_trees(trees, 2.0 * args["NeRef"] * args["g"] * args["u"]) if "tree_height_file" in args: fout = open(args["tree_height_file"], "w") bps = args["length"] if "tree_visitor" not in args: th = TreeHeight() else: th = args["tree_visitor"] for (start, end, tree) in trees: th.reset() tree.dfs_traverse(th) h = th.get_result() to_write = "%s\n" % str(h) for i in range(int(start * bps), int(end * bps)): fout.write(to_write) fout.close() # for (s,e,t) in trees: # print s,e,t if "hook" in args: args["hook"].run(arg, trees, args) # args["hook"].parseTrees(arg, args) # for t in trees: # print t # #### # def getTree(tree): # left, right = tree.get_edges() # if isinstance(left[0], Leaf) and left[0].identifier == '3': # mayby identifier is not an int but a str... # # outgroup is included - call on ingroup # return right[0] # elif isinstance(right[0], Leaf) and right[0].identifier == '3': # mayby identifier is not an int but a str... # # outgroup is included - call on ingroup # return left[0] # else: # return tree # # th = TreeHeight() # # for (start, end, tree) in trees: # th.reset() # t = getTree(tree) # t.dfs_traverse(th) # h = th.get_result() # print h # # # with open("coasim.trees", 'w') as f: # for s, e, t in trees: # print >>f, s, e, t # sys.exit() #### return bppSeqGen(trees, **args)
def simulateCoasim(spec, **args): #test if all args are there if "length" not in args: raise Exception("Sequence 'length' parameter required") if "NeRef" not in args: raise Exception("'NeRef' parameter required") if "r" not in args: raise Exception("'r' (recombination rate?) parameter required") if "g" not in args: raise Exception("'g' (Generation time) parameter required") if "u" not in args: raise Exception("'u' (mutation rate) parameter required") if "optionsFile" not in args: raise Exception("'optionsFile' parameter required") if "migration" not in args: args["migration"] = [] #test T's and N's count = 0; t = "T1" while t in args: count = count +1 t = t + str(len(t)) #Test N n = "N" for i in range(1, count+1): if "N" + str(i) not in args: raise Exception("'N" + str(i) + "' parameter required") n = n + str(i) if n not in args: raise Exception("'" + n + "' parameter required") #run the suff st = spec(**args) kei = True if "keepEmptyIntervals" in args: kei = args["keepEmptyIntervals"]; # print "simulating" arg = CoaSim.simulate([], st, rho = 4 * args["NeRef"] * args["length"] * args["r"], keepEmptyIntervals=kei, migrationSpec=args["migration"]) #what about 4 trees = [] if "hotspotWidth" in args and "hotspotRatio" in args: # hotspotWidth hw = args["hotspotWidth"] # ratio of hotspot non-hotstop recombination rate: F = args["hotspotRatio"] p = hw / (1 - hw) # width of hotspot region before contraction: ohw = (F*p)/(F*p+1) # expansion factor: e = (1-hw) / (1-ohw) # contraction factor: c = hw / ohw args["hotSpotBackgroundRate"] = args["r"] * (1-hw)/(1-ohw) print >>sys.stderr, "Simulating hotspot", args["hotSpotBackgroundRate"] # # nr hotspots: # nrHotSpots = 4 # # o iohw into # m = generateMap(nrHotSpots, ohw, c, e): # map m = [(0, e), ((1-ohw)/2.0, c), (((1-ohw)/2.0)+ohw, e), (1.0, None)] j = 0 prevEnd = 0 recPoints = list() for i, interval in enumerate(arg.intervals): while j < len(m)-1 and interval.start >= m[j+1][0]: j += 1 start = prevEnd if j < len(m)-1 and interval.end > m[j+1][0]: end = prevEnd + (m[j+1][0] - interval.start) * m[j][1] + (interval.end - m[j+1][0]) * m[j+1][1] else: end = prevEnd + (interval.end - interval.start) * m[j][1] prevEnd = end trees.append((start, end, parse_tree(str(interval.tree)))) else: for interval in arg.intervals: trees.append((interval.start, interval.end, parse_tree(str(interval.tree)))) # for s, e, t in trees: # print s, e # import sys # sys.exit() #reroot trees if "addOutGroup" in args and args["addOutGroup"]: trees = addOutGroup(trees, **args) # print "scaling" #scale trees trees = scale_trees(trees, 2.0*args["NeRef"]*args["g"]*args["u"]) if "tree_height_file" in args: fout = open(args["tree_height_file"], "w") bps = args["length"] if "tree_visitor" not in args: th = TreeHeight() else: th = args["tree_visitor"] for (start, end, tree) in trees: th.reset(); tree.dfs_traverse(th) h = th.get_result() to_write = "%s\n" % str(h) for i in range(int(start*bps), int(end*bps)): fout.write(to_write) fout.close() #for (s,e,t) in trees: #print s,e,t if "hook" in args: args["hook"].parseARG(arg, trees, args) # args["hook"].parseTrees(arg, args) return bppSeqGen(trees, **args)
def migrationEvent(self, pop1, pop2, t, k): self.counts['mig'] += 1 def mergeEvent(self, pops, t, k): self.counts['me'] += 1 from CoaSim.popStructure import Population as P, Merge as M, Sample as S, \ Bottleneck as B, Growth as G, \ Migration as Mi cb = callbacks() CoaSim.simulate([], P(1,M(1.5,[P(1,S(2),name='1'),P(1,S(2),name='2')]), epochs=[B(.2,1.5,2),G(10,2)]), [Mi('1','2',0.001),Mi('2','1',0.002)], rho=40,Q=10,gamma=2, keepEmptyIntervals=True, seed=10, callbacks=cb) # this is just regression testing, not proper testing :-( expected = {'me': 1, 'bn_leave': 1, 'mig': 4, 'g_enter': 1, 'coa': 173, 'gc': 10, 'bn_enter': 1, 'g_leave': 0, 'rec': 168} assert cb.counts == expected class AllDone: pass class ex_callbacks(callbacks): def coalescentEvent(self, n, k): callbacks.coalescentEvent(self, n, k) if self.counts['coa'] == 120: raise AllDone
#!/bin/env python import CoaSim markers = [CoaSim.SNPMarker(0.2, 0.1, 0.9), CoaSim.SNPMarker(0.3, 0.1, 0.9)] # first a very simple regression test ... not really thorough seqs = CoaSim.simulate(markers, 5, rho=40, seed=10).sequences assert seqs == [[0, 0], [0, 0], [1, 0], [0, 0], [0, 1]] # test incorrect input try: CoaSim.simulate([markers[1], markers[0]], 5) assert False except ValueError, e: assert str(e) == 'Marker positions out of sequence.' try: CoaSim.simulate(markers, -5) assert False except ValueError, e: assert str(e) == 'Non-positive sample size.' try: CoaSim.simulate(markers, 5, rho=-2) assert False except ValueError, e: assert str(e) == 'Negative rate or intensity: -2.' try: CoaSim.simulate(markers, 5, gamma=-2)