class Model(object): def __init__(self): self.pool = SampleTree() self.eventQueue = EventQueue() self.__resetCounts() ################################################################## # there are five kinds of rates: # N: (fixed) number of bases in the model # rll: rate for dcj on the bases in the contig pool # rld: rate for dcj where one break is in the pool # and the other rate is in the garbage # rdd: both in garbage # fl: telomere loss modifier # fg: telomere gain modifier # pgain: dead gain probability ################################################################## def setParameters(self, N, rll, rld = 0, rdd = 0, fl = 0, fg = 0, pgain = 0): self.eventQueue.reset() self.N = N self.fl = fl self.fg = fg self.pgain = pgain if rll > 0: self.eventQueue.addEventType(N * rll, self.__llEvent) if rld > 0: self.eventQueue.addEventType(N * rld, self.__ldEvent) if rdd > 0: self.eventQueue.addEventType(N * rdd, self.__ddEvent) ################################################################## # intitialize the starting state # the the contigs will all have the same sizes (modulo rounding) # in order to satisfy the input parameters exactly ################################################################## def setStartingState(self, garbageSize, numLinear, numCircular): assert self.N > garbageSize + numLinear + numCircular self.pool = SampleTree() numGarbage = 0 if garbageSize > 0: garbage = CircularContig(garbageSize) garbage.setDead() self.pool.insert(garbage, garbage.numBases()) numGarbage = 1 lrat = float(numLinear) / (numLinear + numCircular) crat = float(numCircular) / (numLinear + numCircular) linearBases = math.floor((self.N - garbageSize) * lrat) circularBases = math.ceil((self.N - garbageSize) * crat) assert linearBases + circularBases + garbageSize == self.N if numLinear > 0: linSize = math.floor(linearBases / numLinear) extra = linearBases % numLinear added = 0 for i in range(numLinear): size = linSize if i < extra: size += 1 # plus 1 since number of adjacencies is 1 + number of bases contig = LinearContig(size + 1) self.pool.insert(contig, contig.numBases()) added += contig.size assert added == linearBases + numLinear assert self.pool.size() == numLinear + numGarbage assert self.pool.weight() == linearBases + garbageSize if numCircular > 0: circSize = math.floor(circularBases / numCircular) extra = circularBases % numCircular added = 0 for i in range(numCircular): size = circSize if i < extra: size += 1 contig = CircularContig(size) self.pool.insert(contig, contig.numBases()) added += contig.size assert added == circularBases assert self.pool.size() == numLinear + numCircular + numGarbage assert self.pool.weight() == circularBases + linearBases + \ garbageSize ################################################################## # run the simulation for the specified time ################################################################## def simulate(self, time): self.eventQueue.begin() self.__resetCounts() while True: nextEvent = self.eventQueue.next(time) if nextEvent is not None: nextEvent() else: break ################################################################## # draw (and remove) two random adajcenies and their # contigs from the pool (only if they are not dead) ################################################################## def __drawSamples(self): sampleNode1, offset1 = self.pool.uniformSample() sampleNode2, offset2 = self.pool.uniformSample() # the offset is weighted based on the number of bases # we want to translate this into number of edges (splitting) # the probability between linear and telomere edges. # so for linear contigs with zero offset, we flip a coin to # move it to the other side. if sampleNode1.data.isLinear() and offset1 == 0: if random.random() < 0.5: offset1 = sampleNode1.data.numBases() if sampleNode2 is not sampleNode1 and sampleNode2.data.isLinear() and\ offset2 == 0: if random.random() < 0.5: offset2 = sampleNode2.data.numBases() assert offset1 < sampleNode1.data.size assert offset2 < sampleNode2.data.size return (sampleNode1, offset1, sampleNode2, offset2) ################################################################## #LIVE-LIVE event. Is normal DCJ operation between two live contigs #unless the two breakpoints are identical or on telomeres, in which #case fl and fg parameters are used to use fission operations to #modifiy the number of telomeres ################################################################## def __llEvent(self): if self.pool.size() == 0 or self.pool.weight() == 1: return # draw (and remove) two random adajcenies and their #contigs from the pool (only if they are not dead) sampleNode1, offset1, sampleNode2, offset2 = self.__drawSamples() c1 = sampleNode1.data c2 = sampleNode2.data # don't deal with dead contigs in this event if c1.isDead() == True or c2.isDead() == True: return self.pool.remove(sampleNode1) if c1 is not c2: self.pool.remove(sampleNode2) # case 1) gain of telomere if sampleNode1 is sampleNode2 and offset1 == offset2: return self.__llGain(c1, c2, offset1, offset2) # case 2) loss of telomere elif c1.isLinear() and c2.isLinear() and \ (offset1 == 0 or offset1 == c1.size - 1) and \ (offset2 == 0 or offset2 == c2.size - 1): return self.__llLoss(c1, c2, offset1, offset2) # case 3) no gain or loss self.llCount += 1 forward = random.randint(0, 1) == 1 # do the dcj dcjResult = dcj(c1, offset1, c2, offset2, forward) # add the resulting contigs back to the pool for res in dcjResult: self.pool.insert(res, res.numBases()) ################################################################## # Do the fission telomere gain operation (if fg check passes) ################################################################## def __llGain(self, c1, c2, offset1, offset2): # correct "not composite check below" if c1.isCircular() or (offset1 != 0 and offset1 != c1.size - 1): forward = self.fg > random.random() if forward: self.fgCount += 1 dcjResult = dcj(c1, offset1, c2, offset2, forward) if c1.isCircular(): assert len(dcjResult) == 1 and dcjResult[0].isLinear() else: assert len(dcjResult) == 2 and dcjResult[0].isLinear() \ and dcjResult[1].isLinear() # add the resulting contigs back to the pool for res in dcjResult: self.pool.insert(res, res.numBases()) return self.pool.insert(c1, c1.numBases()) if c2 is not c1: self.pool.insert(c2, c2.numBases()) ################################################################## # Do the fission telomer loss operation (if fl check passes) ################################################################## def __llLoss(self, c1, c2, offset1, offset2): if c1 is c2: forward = self.fl / 4.0 > random.random() else: forward = self.fl / 2.0 > random.random() if forward: c1 = c1.circularize() if c1 is not c2: c2 = c2.circularize() dcjResult = dcj(c1, offset1, c2, offset2, forward) self.flCount += 1 assert len(dcjResult) == 1 if c1 is not c2: assert dcjResult[0].isLinear() else: assert dcjResult[0].isCircular() # add the resulting contigs back to the pool for res in dcjResult: self.pool.insert(res, res.numBases()) else: self.pool.insert(c1, c1.numBases()) if c2 is not c1: self.pool.insert(c2, c2.numBases()) ################################################################## #LIVE-DEAD (or DEAD-LIVE) event. One contig is alive and the #other is the unique dead contig. This can result in a loss of #live contigs and/or change in number of live bases ################################################################## def __ldEvent(self): if self.pool.size() == 0 or self.pool.weight() == 1: return # draw (and remove) two random adajcenies and their #contigs from the pool (only if they are not dead) sampleNode1, offset1, sampleNode2, offset2 = self.__drawSamples() c1 = sampleNode1.data c2 = sampleNode2.data # only deal with live / dead contigs in this event if (c1.isDead() == c2.isDead()): return self.pool.remove(sampleNode1) if c1 is not c2: self.pool.remove(sampleNode2) # make sure c1 is alive and c2 is dead if c1.isDead(): c1, c2 = c2, c1 offset1, offset2 = offset2, offset1 # do the dcj dcjResult = dcj(c1, offset1, c2, offset2, random.randint(0, 1) == 1) deadIdx = 0; if len(dcjResult) == 2 and \ random.randint(0, dcjResult[0].size + dcjResult[1].size) >= \ dcjResult[0].size: deadIdx = 1 dcjResult[deadIdx].setDead(True) if len(dcjResult) == 1: self.ldLossCount += 1 else: self.ldSwapCount += 1 # add the resulting contigs back to the pool deadCount = 0 for res in dcjResult: if res.isDead(): deadCount += 1 self.pool.insert(res, res.numBases()) assert deadCount == 1 ################################################################## #DEAD-DEAD event. The dead contig rearranges with itself. pgain #is used to decide how oftern this oepration breaks off a new circular #live chormosome ################################################################## def __ddEvent(self): if self.pool.size() == 0 or self.pool.weight() == 1: return sampleNode1, offset1, sampleNode2, offset2 = self.__drawSamples() c1 = sampleNode1.data c2 = sampleNode2.data # only deal with dead / dead contigs in this event if (c1.isDead() == False or c2.isDead() == False): return # only support single dead contig assert c1 is c2 # don't know what to do here if (offset1 == offset2): return self.pool.remove(sampleNode1) if c1 is not c2: self.pool.remove(sampleNode2) #forward means do not cut forward = random.random() > self.pgain # do the dcj dcjResult = dcj(c1, offset1, c2, offset2, forward) deadIdx = 0; if len(dcjResult) == 2 and \ random.randint(0, dcjResult[0].size + dcjResult[1].size) \ >= dcjResult[0].size: deadIdx = 1 dcjResult[deadIdx].setDead(True) if forward: self.ddSwapCount += 1 assert len(dcjResult) == 1 else: self.ddGainCount += 1 assert len(dcjResult) == 2 assert not dcjResult[0].isDead() or not dcjResult[1].isDead() # add the resulting contigs back to the pool for res in dcjResult: self.pool.insert(res, res.numBases()) ################################################################## # all counters set to zero. ################################################################## def __resetCounts(self): self.llCount = 0 self.fgCount = 0 self.flCount = 0 self.ldLossCount = 0 self.ldSwapCount = 0 self.ddGainCount = 0 self.ddSwapCount = 0