def callBinPeak(self,pvalue=1e-05): self.pvalue=pvalue self.binpeak=[] threshold=1 while 1: if prob.poisson_cdf(threshold,self.lam,False) < pvalue: break threshold+=1 for i in range(len(self.bins)): print >>sys.stderr,"Call Bins Pvalue in",self.chrs[i],"\r", self.binpeak.append([False for row in range(len(self.bins[i]))]) for j in range(len(self.bins[i])): if self.bins[i][j]>=threshold: # if prob.poisson_cdf(self.bins[i][j],self.lam,False)<pvalue: # if prob.poisson_cdf(self.bins[i][j],self.lam,False)<pvalue: self.binpeak[i][j]=True print >>sys.stderr,"Call Bins Pvalue Done "
def iterPeaks(self): self.peaks = [] print >> sys.stderr, "Reading ", self.bamfilename, " HMM Segments" k = 0 for i in self.parse_segment(): k += 1 if k % 10000 == 0: print >> sys.stderr, "parsed ", k, " segments\r", (chr, start, stop) = i a = self.samfile.fetch(chr, start, stop) refine_start = None refine_stop = None reads_num = 0 for j in a: reads_num += 1 if refine_start == None: refine_start = j.pos if refine_stop == None: refine_stop = j.pos + j.qlen if j.pos < refine_start: refine_start = j.pos if j.pos + j.qlen > refine_stop: refine_stop = j.pos + j.qlen peak_intensity = 0 peak_pos = 0 coverage = 0.0 for pileupcolumn in self.samfile.pileup(chr, refine_start, refine_stop): if pileupcolumn.n > peak_intensity: peak_intensity = pileupcolumn.n peak_pos = pileupcolumn.pos coverage += pileupcolumn.n coverage /= (refine_stop - refine_start) lam = float(self.mapped) / (self.total_length / (refine_stop - refine_start)) pvalue = prob.poisson_cdf(reads_num, lam, False) yield Peak([ chr, refine_start, refine_stop, reads_num, pvalue, coverage, peak_pos, peak_intensity ]) print >> sys.stderr, "Reading HMM Segments Done! "
def iterPeaks(self): self.peaks=[] print >>sys.stderr,"Reading ",self.bamfilename," HMM Segments" k=0 for i in self.parse_segment(): k+=1 if k%10000==0 : print >>sys.stderr,"parsed ",k," segments\r", (chr,start,stop)=i a=self.samfile.fetch(chr,start,stop) refine_start=None refine_stop=None reads_num=0 for j in a: reads_num+=1 if refine_start==None: refine_start=j.pos if refine_stop==None: refine_stop=j.pos+j.qlen if j.pos < refine_start: refine_start=j.pos if j.pos+j.qlen > refine_stop: refine_stop=j.pos+j.qlen peak_intensity=0 peak_pos=0 coverage=0.0 for pileupcolumn in self.samfile.pileup(chr,refine_start,refine_stop): if pileupcolumn.n > peak_intensity: peak_intensity=pileupcolumn.n peak_pos=pileupcolumn.pos coverage+=pileupcolumn.n coverage/=(refine_stop-refine_start) lam=float(self.mapped)/(self.total_length/(refine_stop-refine_start)) pvalue=prob.poisson_cdf(reads_num,lam,False) yield (chr,refine_start,refine_stop,reads_num,pvalue,coverage,peak_pos,peak_intensity) print >>sys.stderr,"Reading HMM Segments Done! "