def addWigToPeak(fw, rw, peaks, isControl=False): ''' Parameters: fw: the wig file of forward strand. rw: the wig file of reverse strand. peaks: the peaks to add the data to. output: None. The peaks will be populated with data. ''' import wig fwig = wig.loadWig(fw, False, '+') rwig = wig.loadWig(rw, False, '-') total_f = 0 total_r = 0 chroms = [] for chrom in peaks: if not (chrom in fwig and chrom in rwig): continue chroms.append(chrom) chromPeaks = peaks[chrom] chromFwig = fwig[chrom] chromRwig = rwig[chrom] #total_f += chromFwig[:,1].sum() #total_r += chromRwig[:,1].sum() fi = 0 #index to track chromFwig ri = 0 #index to track chromRwig pi = 0 #index to track chromPeaks while pi < len(chromPeaks): p = chromPeaks[pi] p.prepareData(isControl) #add the forward data def _addWig(cp, cwig, i1, strand): while i1 < cwig.shape[0] and cwig[i1,0]-1 < cp.start: i1 += 1 while i1 < cwig.shape[0] and cp.start <= cwig[i1,0] - 1 < cp.end: if isControl: cp.addCtrl(cwig[i1,0]-1, cwig[i1,0], cwig[i1,1], strand) else: cp.addData(cwig[i1,0]-1, cwig[i1,0], cwig[i1,1], strand) i1 += 1 if strand == 1 and not isControl: i2 = i1 cend = cp.end - cp.start + cp.end - 1 if pi < len(chromPeaks)-1 and chromPeaks[pi+1].start < cend: cend = chromPeaks[pi+1].start while i2 < cwig.shape[0] and cwig[i2,0] - 1 < cend: cp.addData(cwig[i2,0]-1, cwig[i2,0], cwig[i2,1], strand) i2 += 1 return i1 fi = _addWig(p, chromFwig, fi, 0) ri = _addWig(p, chromRwig, ri, 1) pi += 1 return fwig, rwig, chroms
def main(): parser = argparse.ArgumentParser(description="Pair the peaks on different strand.") parser.add_argument( "peak", help="The bed file contains all peaks." ) parser.add_argument( 'forwardWig', help="The forward wig file." ) parser.add_argument( 'reverseWig', help="The reverse wig file." ) parser.add_argument( 'output', help="The output prefix. Will output two files, one containing all the singletons and the other with the pairs. The pair file will be in GFF format and singletons in BED format. Output files will be prefix_singletons.bed and prefix_pairs.gff" ) parser.add_argument( '-d', '--downstream', type = int, default = 100, help = "Within how far downstream to look for a mate." ) parser.add_argument( '-u', '--upstream', type = int, default = 0, help = "Within how far upstream to look for a mate." ) args = parser.parse_args() fpeaks, rpeaks = loadPeaks( args.peak ) fwig = WIG.loadWig( args.forwardWig, smooth=False ) rwig = WIG.loadWig( args.reverseWig, smooth=False, strand='-' ) writePeaks( fpeaks, rpeaks, 'test_peaks_smoothed_lowlim100.bed', fwig, rwig, args.upstream, args.downstream, args.output)
def main(args): for filename in args.inputs: strand = '+' if 'Reverse' in filename: strand = '-' lines = [] wigdata = wig.loadWig( filename, False, strand ) print wigdata chroms = Queue() count = 0 outQ = Queue() for chrom in wigdata: print "add ", chrom count += 1 chroms.put( (chrom, wigdata[chrom][:,0:2]) ) NUM_PROCESSES = args.process processID = 1 for i in range( NUM_PROCESSES): Process( target=add_chrom_data, args=( chroms, outQ, processID, args, strand) ).start() processID += 1 if args.method != 'nb': kargs = (args.bw, args.nbw) else: kargs = (args.r, args.mean) tokens = filename.split('.') tokens[-1] = args.method +'_'+str(kargs[0]) + '_' + str(kargs[1]) + "_smoothed.wig" out = open( '.'.join(tokens), 'w' ) out.write('track type=wiggle_0 name=%s_%d_%f\n'%( args.method, kargs[0], kargs[1], )) for i in range( count ): out.write( ''.join(outQ.get()) ) out.close() for i in range( NUM_PROCESSES ): chroms.put('STOP')
for chrom, oriPeaks, subPeaks, chromWig in iter(tasks.get, "STOP"): print 'Process ', processID, ' is processing ', chrom outQ.put( (chrom, filter_a_chrom( chrom, oriPeaks, subPeaks, chromWig, args ) ) ) if __name__=='__main__': parser = argparse.ArgumentParser(description='Filter splitted peaks reported by PeakSplitter.') parser.add_argument('wigfile', help='Wig file for the peaks.') parser.add_argument('subpeakfile', help='Peak file in bed format.') parser.add_argument('oripeakfile', help='The original bed file contains the peaks splitted.' ) parser.add_argument('strand', choices=['+', '-', '.'], help='The strand of the peaks we are currently processing. Choices are +, -, . .') parser.add_argument( '--threshfrac', type=float, default = 0.5, help='minimum pileup of a subpeak as a fraction of the highest point of the original peak. Default is 0.5.' ) parser.add_argument('--cutoff', type=float, default = 5, help='minimum pileup of a subpeak regardless the relative fraction. Default is 5.') args = parser.parse_args() wigData = wig.loadWig( args.wigfile, smooth = False ) subPeaks = getPeaks( args.subpeakfile ) oriPeaks = getPeaks( args.oripeakfile ) chroms = set( subPeaks.keys() ).intersection(set( oriPeaks.keys() )) tasks = Queue() count = 0 for c in chroms: count += 1 tasks.put( (c, oriPeaks[c], subPeaks[c], wigData[c]) ) freeze_support() NUM_PROCESSES = 4 processID = 1 processes = [] outQ = Queue() for i in range( NUM_PROCESSES ):
chromWig = w[peak.chrom] ws = bisect.bisect_left(chromWig[:,0],peak.start+1) #The start of the region we = bisect.bisect_right(chromWig[:,0], peak.end) #The right side of the regions axis_x = [] x = [] #The data for processing for i in range(ws, we): axis_x.append(chromWig[i,0]) x += [chromWig[i,0]] * int(chromWig[i,1]) axis_x = np.array(axis_x) x = np.array(x) return axis_x, x, chromWig[ws:we,1] if __name__ == '__main__': color_iter = itertools.cycle(['k','r','g','b','c','m','y']) FILTER_VAL = [99999,100000,1000000] fwig = wig.loadWig('/home/caofan/Downloads/MJF11_hg19/1_Bam/test_apex/MAX_sc-197_SNU16_XO111_Forward.wig', smooth=False) rwig = wig.loadWig('/home/caofan/Downloads/MJF11_hg19/1_Bam/test_apex/MAX_sc-197_SNU16_XO111_Reverse.wig', smooth=False) peaks = Peak.objects.filter(run=9).order_by('-size') for i in range(10): print peaks[i] for filter_val in FILTER_VAL: if peaks[i].strand == '+': axis_x, x, orig_y = getPeakWig(fwig, peaks[i]) else: axis_x, x, orig_y = getPeakWig(rwig, peaks[i]) model = DPGMM(1) skmodel = mixture.DPGMM(n_components=8,alpha=32,n_iter=10000) min_x = axis_x[0] axis_x = axis_x - min_x
import wig import argparse def getArgs(): parser = argparse.ArgumentParser("Convert wig file to bedgraph file.") parser.add_argument('filename', help="Input wig file") parser.add_argument('strand', choices=['+','-','.'], help="The strand of the input. Can be [+, - , . ]") parser.add_argument('output', help="The output file.") args = parser.parse_args() return args if __name__=='__main__': args = getArgs() wigData = wig.loadWig(args.filename, False, args.strand) wig.writeAsBedGraph(wigData, args.output)