def accuracy( self, xcoverage ): ### Calculate accuracy (if required) at xcoverage and returns '''Calculate accuracy (if required) at xcoverage and returns.''' try: ### ~ [1] Calculate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### while len(self.list['Accuracy']) <= xcoverage: if not int((1.0 - self.list['Accuracy'][-1]) * self.getNum('GenomeSize')): self.list['Accuracy'].append(1.0) continue xcov = len(self.list['Accuracy']) majority = int( xcov / 2.0) + 1 # Number of correct reads needed for majority try: self.list['Accuracy'].append( rje.logBinomial(majority, xcov, 1.0 - self.getNum('ErrPerBase'), exact=False, callobj=self)) except: self.list['Accuracy'].append( rje.logPoisson(majority, xcov * (1.0 - self.getNum('ErrPerBase')), exact=False, callobj=self)) self.debug(self.list['Accuracy']) return self.list['Accuracy'][xcoverage] except: self.errorLog('%s.accuracy error' % self.prog())
def accuracy(self,xcoverage): ### Calculate accuracy (if required) at xcoverage and returns '''Calculate accuracy (if required) at xcoverage and returns.''' try:### ~ [1] Calculate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### while len(self.list['Accuracy']) <= xcoverage: if not int((1.0 - self.list['Accuracy'][-1]) * self.getNum('GenomeSize')): self.list['Accuracy'].append(1.0) continue xcov = len(self.list['Accuracy']) majority = int(xcov/2.0) + 1 # Number of correct reads needed for majority try: self.list['Accuracy'].append(rje.logBinomial(majority,xcov,1.0 - self.getNum('ErrPerBase'),exact=False,callobj=self)) except: self.list['Accuracy'].append(rje.logPoisson(majority,xcov*(1.0 - self.getNum('ErrPerBase')),exact=False,callobj=self)) self.debug(self.list['Accuracy']) return self.list['Accuracy'][xcoverage] except: self.errorLog('%s.accuracy error' % self.prog())
def coverage( self ): ### Calculates estimated % coverage and accuracy of genome sequencing. '''Calculates estimated % coverage and accuracy of genome sequencing.''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### # XCoverage, SMRT, %Cov, Accuracy if self.getBool('BySMRT'): ckey = 'SMRT' else: ckey = 'XCoverage' cfields = ['XCoverage', 'SMRT', '%Coverage', 'Accuracy'] for xn in self.list['XnList']: cfields.append('%%X%d' % xn) cdb = self.db().addEmptyTable('coverage', cfields, [ckey]) ### ~ [2] Calculate stats for one round ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.progLog('\r#XCOV', 'Calculating coverage stats...') cov_per_base_per_read = self.getNum('AvRead') / self.getNum( 'GenomeSize') if self.getBool('BySMRT'): reads = self.getInt('SMRTReads') # If going per SMRT cell else: reads = int(0.5 + self.getNum('GenomeSize') / self.getNum('AvRead')) # if going per X coverage # Calculate X coverage counts using binomial bases = int(self.getNum('GenomeSize')) xcov = [ ] # List where index is X coverage and number is proportion of reads while bases > 1: try: xcov.append( rje.logBinomial(len(xcov), reads, cov_per_base_per_read, exact=True, callobj=self)) except: xcov.append( rje.logPoisson(len(xcov), reads * cov_per_base_per_read, exact=True, callobj=self)) bases -= self.getNum('GenomeSize') * xcov[-1] if len(xcov) > reads: raise ValueError('XCoverage cannot exceed read count!') cyccov = xcov[0:] self.debug(xcov) ### ~ [3] Cycle through rounds, multiplying by X coverage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### cx = 0.0 ctot = self.getInt('MaxCov') xcoverage = 0.0 while xcoverage < self.getInt('MaxCov'): self.progLog( '\r#XCOV', 'Calculating coverage stats: %.1f%% (%d|%d)' % ((cx / ctot, cdb.entryNum() + 1, len(cyccov)))) cx += 100.0 # Update xcov: calculate %bases at different X coverage if cdb.entryNum( ): # Equivalent of starting with [1.0] (!00% 0 @ 0X) prevcov = cyccov[0:] cyccov = [0.0] * (len(prevcov) * 2 - 1) for xi in range(len(prevcov)): for xj in range(len(xcov)): x = xi + xj cyccov[x] += (prevcov[xi] * xcov[xj]) while (cyccov[-1]) < 1.0 / self.getNum('GenomeSize'): cyccov.pop(-1) # Calculate accuracy: For each X coverage, calculate % bases with >50% correct accuracy = 0.0 for x in range(len(cyccov[1:])): accuracy += cyccov[x] * self.accuracy(x) accuracy = 100.0 * accuracy / sum(cyccov[1:]) # SMRT cells versus coverage xcoverage += self.getNum('AvRead') * reads / self.getNum( 'GenomeSize') smrt = (self.getNum('GenomeSize') * xcoverage) / ( self.getNum('AvRead') * self.getNum('SMRTReads')) # Update cdb #centry = {'XCoverage':'%.3f' % xcoverage,'SMRT':'%.2f' % smrt,'%Coverage':100.0 * (1.0-cyccov[0]),'Accuracy':accuracy} centry = { 'XCoverage': rje.sf(xcoverage, 3), 'SMRT': rje.sf(smrt, 3), '%Coverage': 100.0 * (1.0 - cyccov[0]), 'Accuracy': accuracy } for xn in self.list['XnList']: if xn <= len(cyccov): centry['%%X%d' % xn] = rje.sf(100.0 * sum(cyccov[xn:]), 4) else: centry['%%X%d' % xn] = 0.000 cdb.addEntry(centry) self.progLog( '\r#XCOV', 'Calculated coverage stats upto %dX coverage.' % self.getInt('MaxCov')) ### ~ [4] Save results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### for xkey in cdb.dataKeys(): cdb.dict['Data'][float(xkey)] = cdb.dict['Data'].pop(xkey) cdb.saveToFile() return except: self.errorLog('%s.coverage error' % self.prog())
def coverage(self): ### Calculates estimated % coverage and accuracy of genome sequencing. '''Calculates estimated % coverage and accuracy of genome sequencing.''' try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### # XCoverage, SMRT, %Cov, Accuracy if self.getBool('BySMRT'): ckey = 'SMRT' else: ckey = 'XCoverage' cfields = ['XCoverage','SMRT','%Coverage','Accuracy'] for xn in self.list['XnList']: cfields.append('%%X%d' % xn) cdb = self.db().addEmptyTable('coverage',cfields,[ckey]) ### ~ [2] Calculate stats for one round ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.progLog('\r#XCOV','Calculating coverage stats...') cov_per_base_per_read = self.getNum('AvRead') / self.getNum('GenomeSize') if self.getBool('BySMRT'): reads = self.getInt('SMRTReads') # If going per SMRT cell else: reads = int(0.5 + self.getNum('GenomeSize') / self.getNum('AvRead')) # if going per X coverage # Calculate X coverage counts using binomial bases = int(self.getNum('GenomeSize')) xcov = [] # List where index is X coverage and number is proportion of reads while bases > 1: try: xcov.append(rje.logBinomial(len(xcov),reads,cov_per_base_per_read,exact=True,callobj=self)) except: xcov.append(rje.logPoisson(len(xcov),reads*cov_per_base_per_read,exact=True,callobj=self)) bases -= self.getNum('GenomeSize') * xcov[-1] if len(xcov) > reads: raise ValueError('XCoverage cannot exceed read count!') cyccov = xcov[0:] self.debug(xcov) ### ~ [3] Cycle through rounds, multiplying by X coverage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### cx = 0.0; ctot = self.getInt('MaxCov'); xcoverage = 0.0 while xcoverage < self.getInt('MaxCov'): self.progLog('\r#XCOV','Calculating coverage stats: %.1f%% (%d|%d)' % ((cx/ctot,cdb.entryNum()+1,len(cyccov)))); cx += 100.0 # Update xcov: calculate %bases at different X coverage if cdb.entryNum(): # Equivalent of starting with [1.0] (!00% 0 @ 0X) prevcov = cyccov[0:] cyccov = [0.0] * (len(prevcov)*2 - 1) for xi in range(len(prevcov)): for xj in range(len(xcov)): x = xi + xj cyccov[x] += (prevcov[xi] * xcov[xj]) while(cyccov[-1]) < 1.0 / self.getNum('GenomeSize'): cyccov.pop(-1) # Calculate accuracy: For each X coverage, calculate % bases with >50% correct accuracy = 0.0 for x in range(len(cyccov[1:])): accuracy += cyccov[x] * self.accuracy(x) accuracy = 100.0 * accuracy / sum(cyccov[1:]) # SMRT cells versus coverage xcoverage += self.getNum('AvRead') * reads / self.getNum('GenomeSize') smrt = (self.getNum('GenomeSize') * xcoverage) / (self.getNum('AvRead') * self.getNum('SMRTReads')) # Update cdb #centry = {'XCoverage':'%.3f' % xcoverage,'SMRT':'%.2f' % smrt,'%Coverage':100.0 * (1.0-cyccov[0]),'Accuracy':accuracy} centry = {'XCoverage':rje.sf(xcoverage,3),'SMRT':rje.sf(smrt,3),'%Coverage':100.0 * (1.0-cyccov[0]),'Accuracy':accuracy} for xn in self.list['XnList']: if xn <= len(cyccov): centry['%%X%d' % xn] = rje.sf(100.0*sum(cyccov[xn:]),4) else: centry['%%X%d' % xn] = 0.000 cdb.addEntry(centry) self.progLog('\r#XCOV','Calculated coverage stats upto %dX coverage.' % self.getInt('MaxCov')) ### ~ [4] Save results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### for xkey in cdb.dataKeys(): cdb.dict['Data'][float(xkey)] = cdb.dict['Data'].pop(xkey) cdb.saveToFile() return except: self.errorLog('%s.coverage error' % self.prog())