def HMMToPeaks(hmmStates,treatbins,Binsize): ''' Parse HMM result to Peaks. ''' # parse HMM and generate peak files. print >>sys.stderr, printTime(), "Parse Peaks from HMM result." peaks=[] pval=[] cnt=1 for chrom,start,stop in BamBins.parseHMM(hmmStates,Binsize): tpeak=Peak([chrom,start,stop,histmark+"_"+str(cnt)]) bins=treatBins[chrom][start/Binsize:stop/Binsize] tpeak.score=max(bins) tpeak.signalvalue=numpy.mean(bins) tpeak.pvalue = -10*numpy.log10(lprob.poisson_cdf(tpeak.score*100,lams[chrom],False)) tpeak.peak=bins.argmax()*Binsize+Binsize/2 # append to peaks peaks.append(tpeak) #pval.append(tpeak.pvalue) cnt+=1 #qval=storey_qvalues(pval) #for i in xrange(len(peaks)): #peaks[i].pvalue=-10*numpy.log10(pval[i]) #peaks[i].qvalue=-10*numpy.log10(qval[i]) print >>sys.stderr, printTime(), "Parse Peaks finished." print >>sys.stderr return peaks
def callPeakFromBins(Bins,pvalue=1e-5): ''' Call peak with poisson distribution. Return the hmmStates, lamda and threshold for each chromosome.''' print >>sys.stderr, printTime(), "Call peak for each chromosome." hmmStates={} lams={} for chrom in Bins: # All bin values are multiplied by 100, because the threshold is usually integers. lam = sum(Bins[chrom])/len(Bins[chrom])*100 # find the optimal threshold by binary search lthre=0.0 hthre=max(Bins[chrom])*100.0 # while hthre-lthre>0.01: pval = lprob.poisson_cdf((hthre+lthre)/2,lam,False) if pval > pvalue: lthre=(hthre+lthre)/2 else: hthre=(hthre+lthre)/2 # Print threshold print >>sys.stderr, printTime(), chrom, "lambda=",lam,"threshold=",hthre # convert to binary value hmmStates[chrom]=(numpy.greater_equal(Bins[chrom],hthre/100)-0).tolist() lams[chrom]=lam print >>sys.stderr, printTime(), "Call peak finished." print >>sys.stderr return (hmmStates,lams)