Esempio n. 1
0
 def callBinPeak(self,pvalue=1e-05):
     self.pvalue=pvalue
     self.binpeak=[]
     threshold=1
     while 1:
         if prob.poisson_cdf(threshold,self.lam,False) < pvalue: break
         threshold+=1
     for i in range(len(self.bins)):
         print >>sys.stderr,"Call Bins Pvalue in",self.chrs[i],"\r",
         self.binpeak.append([False for row in range(len(self.bins[i]))])
         for j in range(len(self.bins[i])):
             if self.bins[i][j]>=threshold:
            # if prob.poisson_cdf(self.bins[i][j],self.lam,False)<pvalue:
            # if prob.poisson_cdf(self.bins[i][j],self.lam,False)<pvalue:
                 self.binpeak[i][j]=True
     print >>sys.stderr,"Call Bins Pvalue Done                         "
Esempio n. 2
0
 def callBinPeak(self,pvalue=1e-05):
     self.pvalue=pvalue
     self.binpeak=[]
     threshold=1
     while 1:
         if prob.poisson_cdf(threshold,self.lam,False) < pvalue: break
         threshold+=1
     for i in range(len(self.bins)):
         print >>sys.stderr,"Call Bins Pvalue in",self.chrs[i],"\r",
         self.binpeak.append([False for row in range(len(self.bins[i]))])
         for j in range(len(self.bins[i])):
             if self.bins[i][j]>=threshold:
            # if prob.poisson_cdf(self.bins[i][j],self.lam,False)<pvalue:
            # if prob.poisson_cdf(self.bins[i][j],self.lam,False)<pvalue:
                 self.binpeak[i][j]=True
     print >>sys.stderr,"Call Bins Pvalue Done                         "
Esempio n. 3
0
    def iterPeaks(self):
        self.peaks = []
        print >> sys.stderr, "Reading ", self.bamfilename, " HMM Segments"
        k = 0
        for i in self.parse_segment():
            k += 1
            if k % 10000 == 0:
                print >> sys.stderr, "parsed ", k, " segments\r",
            (chr, start, stop) = i

            a = self.samfile.fetch(chr, start, stop)
            refine_start = None
            refine_stop = None
            reads_num = 0
            for j in a:
                reads_num += 1
                if refine_start == None:
                    refine_start = j.pos
                if refine_stop == None:
                    refine_stop = j.pos + j.qlen
                if j.pos < refine_start: refine_start = j.pos
                if j.pos + j.qlen > refine_stop: refine_stop = j.pos + j.qlen
            peak_intensity = 0
            peak_pos = 0
            coverage = 0.0
            for pileupcolumn in self.samfile.pileup(chr, refine_start,
                                                    refine_stop):

                if pileupcolumn.n > peak_intensity:
                    peak_intensity = pileupcolumn.n
                    peak_pos = pileupcolumn.pos
                coverage += pileupcolumn.n
            coverage /= (refine_stop - refine_start)
            lam = float(self.mapped) / (self.total_length /
                                        (refine_stop - refine_start))

            pvalue = prob.poisson_cdf(reads_num, lam, False)
            yield Peak([
                chr, refine_start, refine_stop, reads_num, pvalue, coverage,
                peak_pos, peak_intensity
            ])

        print >> sys.stderr, "Reading HMM Segments Done!                   "
Esempio n. 4
0
    def iterPeaks(self):
        self.peaks=[]
        print >>sys.stderr,"Reading ",self.bamfilename," HMM Segments"
        k=0
        for i in self.parse_segment():
            k+=1
            if k%10000==0 :
                print >>sys.stderr,"parsed ",k," segments\r",
            (chr,start,stop)=i
        
            a=self.samfile.fetch(chr,start,stop)
            refine_start=None
            refine_stop=None
            reads_num=0
            for j in a:
                reads_num+=1
                if refine_start==None:
                    refine_start=j.pos
                if refine_stop==None:
                    refine_stop=j.pos+j.qlen
                if j.pos < refine_start: refine_start=j.pos
                if j.pos+j.qlen > refine_stop: refine_stop=j.pos+j.qlen
            peak_intensity=0
            peak_pos=0
            coverage=0.0
            for pileupcolumn in self.samfile.pileup(chr,refine_start,refine_stop):
                
                if pileupcolumn.n > peak_intensity:
                    peak_intensity=pileupcolumn.n
                    peak_pos=pileupcolumn.pos
                coverage+=pileupcolumn.n
            coverage/=(refine_stop-refine_start)
            lam=float(self.mapped)/(self.total_length/(refine_stop-refine_start))

            pvalue=prob.poisson_cdf(reads_num,lam,False)
            yield (chr,refine_start,refine_stop,reads_num,pvalue,coverage,peak_pos,peak_intensity)



        print >>sys.stderr,"Reading HMM Segments Done!                   "