def detect(name, ipfile, bgfile, mapfile, alpha, l_thresh, pickle_file, wav_file): """ This function drives the peak detection workflow. """ # set logfile logfile = name + '.log' pique.msg(logfile, 'starting run for project : ' + name) # log inputs pique.msg(logfile, ' -> IP file : ' + ipfile) pique.msg(logfile, ' -> BG file : ' + bgfile) pique.msg(logfile, ' -> map file : ' + mapfile) pique.msg(logfile, ' -> alpha : ' + str(alpha)) pique.msg(logfile, ' -> l_thresh : ' + str(l_thresh)) # load the data pique.msg(logfile, 'loading data...') D = pique.data.PiqueData(ipfile, bgfile, mapfile, name=name) pique.msg(logfile, ' found contigs :') for contig in D.data.keys(): pique.msg(logfile, ' ' + contig) pique.msg(logfile, ' length : ' + str(D.data[contig]['length'])) for r in D.data[contig]['regions']: start = str(r['start']) stop = str(r['stop']) pique.msg(logfile, ' analysis region : ' + start + ':' + stop) for m in D.data[contig]['masks']: start = str(m['start']) stop = str(m['stop']) pique.msg(logfile, ' masking region : ' + start + ':' + stop) # start analysis workbench pique.msg(logfile, 'creating analysis workbench...') PA = pique.analysis.PiqueAnalysis(D) # run filters pique.msg(logfile, 'running filters...') for ar_name in PA.data.keys(): pique.msg(logfile, ' :: applying filters to analysis region ' + ar_name) PA.apply_filter(ar_name, alpha, l_thresh) # find peaks pique.msg(logfile, 'finding peaks...') for ar_name in PA.data.keys(): PA.find_peaks(ar_name) pique.msg( logfile, ' peaks ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks']))) pique.msg( logfile, ' noise threshold : ' + str(PA.data[ar_name]['N_thresh'])) pique.msg( logfile, ' filter threshold : ' + str(PA.data[ar_name]['n_thresh'])) pique.msg( logfile, ' normalizations : ' + ', '.join(map(str, PA.data[ar_name]['norms']))) # if a pickle file was requested, write it if pickle_file: pique.msg(logfile, 'pickling analysis workbench...') cPickle.dump(PA, open(name + '.pickle', 'w')) # if a WAV file was requested, write it if wav_file: for contig in D.data.keys(): file = name + '_' + contig + '.wav' pique.msg(logfile, 'writing WAV output : ' + file) pique.fileIO.writeWAV(file, D.data, contig, track='IP', minusBG=True, amplify=True) # write output files pique.msg(logfile, 'writing output files...') pique.fileIO.writepeaksGFF(name + '.gff', PA.data) pique.fileIO.writebookmarks(name + '.bookmark', PA.data, name=name) pique.fileIO.writeQP(name + '.qp', PA.data) pique.fileIO.writepeakTSV(name + '.peak.tsv', PA.data) pique.fileIO.writetrack(name + '.IP.track', D.data) pique.fileIO.writetrack(name + '.BG.track', D.data, track='BG') # done! pique.msg(logfile, 'run completed.')
'masking_loci', \ 'peak_bookmarks', \ 'weed_bookmarks', \ 'overlap_track', \ 'binding_track' ] opt_dict = yaml.load( open( sys.argv[1] ).read() ) for opt in num_opts + str_opts : if not opt_dict.has_key( opt ) : print 'config file missing option : ' + opt quit() setattr( sys.modules[__name__], opt, opt_dict[opt] ) # read the track data pique.msg( 'reading track data...' ) data_ff = pique.readtrack( forward_ChIP_track ) data_rr = pique.readtrack( reverse_ChIP_track ) b_ff = pique.readtrack( forward_bgnd_track ) b_rr = pique.readtrack( reverse_bgnd_track ) # apply mask pique.msg( 'applying mask...' ) is_elements = [] for line in open( masking_loci ) : if line.__contains__('#') : continue start, stop = map( int, line.split()[:2] ) is_elements.append( { 'start':start, 'stop':stop } ) data_ff = pique.mask( data_ff, is_elements )
def makemap(name, bamfile, window, stride, highest, lowest, bins): """ This function drives the genome map making workflow. """ import pylab logfile = name + '.mapmaker.log' mapfile = name + '.map.gff' pique.msg(logfile, 'starting mapmaker for project : ' + name) pique.msg(logfile, ' -> BAM file : ' + bamfile) pique.msg(logfile, ' -> map file : ' + mapfile) pique.msg(logfile, ' -> window : ' + str(window)) pique.msg(logfile, ' -> stride : ' + str(stride)) pique.msg(logfile, ' -> bins : ' + str(bins)) pique.msg(logfile, ' -> highest bin : ' + str(highest)) pique.msg(logfile, ' -> lowest bin : ' + str(lowest)) pique.msg(logfile, 'loading data...') data = pique.fileIO.loadBAM(bamfile) pique.msg(logfile, ' found contigs :') for contig in data.keys(): pique.msg(logfile, ' ' + contig) pique.msg(logfile, ' ' + str(len(data[contig]['forward']))) pique.msg(logfile, 'making spectral histograms...') sh = {} for contig in data.keys(): pique.msg(logfile, ' :: making sectral histogram for contig ' + contig) d = numpy.array(data[contig]['forward'] + data[contig]['reverse'], dtype=int) sh[contig] = pique.mapmaker.hist(d, lowest, highest, bins, window, stride) # save images of spectral histograms pique.msg(logfile, 'saving images of spectral histograms...') for contig in sh.keys(): pylab.cla() # clean up crumbs from last plot pylab.clf() # clean up crumbs from last plot pique.msg(logfile, ' :: saving image for contig ' + contig) pylab.contourf(sh[contig], bins) pylab.title(name + ' : ' + contig) imgname = name + '_' + contig + '.png' pylab.savefig(imgname, format='png')
def bam2wav(name, ipfile, bgfile): """ This function drives the creation of a WAV file from a BAM file. """ # set logfile logfile = name + '.log' pique.msg(logfile, 'converting BAM files to WAV files for project : ' + name) # log inputs pique.msg(logfile, ' -> IP file : ' + ipfile) pique.msg(logfile, ' -> BG file : ' + bgfile) # load the data pique.msg(logfile, 'loading data...') D = pique.data.PiqueData(ipfile, bgfile, '', name=name) pique.msg(logfile, ' found contigs :') for contig in D.data.keys(): pique.msg(logfile, ' ' + contig) pique.msg(logfile, ' length : ' + str(D.data[contig]['length'])) for r in D.data[contig]['regions']: start = str(r['start']) stop = str(r['stop']) pique.msg(logfile, ' analysis region : ' + start + ':' + stop) for m in D.data[contig]['masks']: start = str(m['start']) stop = str(m['stop']) pique.msg(logfile, ' masking region : ' + start + ':' + stop) # write the WAV files for contig in D.data.keys(): file = name + '_' + contig + '.wav' pique.msg(logfile, 'writing WAV output : ' + file) pique.fileIO.writeWAV(file, D.data, contig, track='IP', minusBG=True, amplify=True) # done! pique.msg(logfile, 'conversion completed.')
def makemap( name, bamfile, window, stride, highest, lowest, bins ) : """ This function drives the genome map making workflow. """ import pylab logfile = name + '.mapmaker.log' mapfile = name + '.map.gff' pique.msg( logfile, 'starting mapmaker for project : ' + name ) pique.msg( logfile, ' -> BAM file : ' + bamfile ) pique.msg( logfile, ' -> map file : ' + mapfile ) pique.msg( logfile, ' -> window : ' + str(window) ) pique.msg( logfile, ' -> stride : ' + str(stride) ) pique.msg( logfile, ' -> bins : ' + str(bins) ) pique.msg( logfile, ' -> highest bin : ' + str(highest) ) pique.msg( logfile, ' -> lowest bin : ' + str(lowest) ) pique.msg( logfile, 'loading data...' ) data = pique.fileIO.loadBAM( bamfile ) pique.msg( logfile, ' found contigs :' ) for contig in data.keys() : pique.msg( logfile, ' ' + contig ) pique.msg( logfile, ' ' + str(len(data[contig]['forward'])) ) pique.msg( logfile, 'making spectral histograms...' ) sh = {} for contig in data.keys() : pique.msg( logfile, ' :: making sectral histogram for contig ' + contig ) d = numpy.array( data[contig]['forward'] + data[contig]['reverse'], dtype = int ) sh[contig] = pique.mapmaker.hist( d, lowest, highest, bins, window, stride ) # save images of spectral histograms pique.msg( logfile, 'saving images of spectral histograms...' ) for contig in sh.keys() : pylab.cla() # clean up crumbs from last plot pylab.clf() # clean up crumbs from last plot pique.msg( logfile, ' :: saving image for contig ' + contig ) pylab.contourf( sh[contig], bins ) pylab.title( name + ' : ' + contig ) imgname = name + '_' + contig + '.png' pylab.savefig( imgname, format='png' )
def filter_all( self, alpha, l_thresh ) : for ar_name in self.data.keys() : pique.msg( ' :: applying filters to analysis region ' + ar_name ) self.apply_filter( ar_name, alpha, l_thresh )
'forward_bgnd_track', \ 'reverse_ChIP_track', \ 'reverse_bgnd_track', \ 'slice_bookmarks', \ 'new_track_prefix', ] opt_dict = yaml.load( open( sys.argv[1] ).read() ) for opt in str_opts : if not opt_dict.has_key( opt ) : print 'config file missing option : ' + opt quit() setattr( sys.modules[__name__], opt, opt_dict[opt] ) # read the track data pique.msg( 'reading track data...' ) data_ff = pique.readtrack( forward_ChIP_track ) data_rr = pique.readtrack( reverse_ChIP_track ) b_ff = pique.readtrack( forward_bgnd_track ) b_rr = pique.readtrack( reverse_bgnd_track ) # read bookmarks file pique.msg( 'reading annotations...' ) slices = pique.readbookmarks( slice_bookmarks ) # write new slice tracks for s in slices : sdata_ff = data_ff[ s['start'] : s['stop'] ] sdata_rr = data_rr[ s['start'] : s['stop'] ] sb_ff = b_ff[ s['start'] : s['stop'] ] sb_rr = b_rr[ s['start'] : s['stop'] ]
def bam2wav( name, ipfile, bgfile ) : """ This function drives the creation of a WAV file from a BAM file. """ # set logfile logfile = name + '.log' pique.msg( logfile, 'converting BAM files to WAV files for project : ' + name ) # log inputs pique.msg( logfile, ' -> IP file : ' + ipfile ) pique.msg( logfile, ' -> BG file : ' + bgfile ) # load the data pique.msg( logfile, 'loading data...' ) D = pique.data.PiqueData( ipfile, bgfile, '', name=name ) pique.msg( logfile, ' found contigs :' ) for contig in D.data.keys() : pique.msg( logfile, ' ' + contig ) pique.msg( logfile, ' length : ' + str(D.data[contig]['length']) ) for r in D.data[contig]['regions'] : start = str( r['start'] ) stop = str( r['stop'] ) pique.msg( logfile, ' analysis region : ' + start + ':' + stop ) for m in D.data[contig]['masks'] : start = str( m['start'] ) stop = str( m['stop'] ) pique.msg( logfile, ' masking region : ' + start + ':' + stop ) # write the WAV files for contig in D.data.keys() : file = name + '_' + contig + '.wav' pique.msg( logfile, 'writing WAV output : ' + file ) pique.fileIO.writeWAV( file, D.data, contig, track='IP', minusBG=True, amplify=True ) # done! pique.msg( logfile, 'conversion completed.' )
'forward_bgnd_track', \ 'reverse_ChIP_track', \ 'reverse_bgnd_track', \ 'slice_bookmarks', \ 'new_track_prefix', ] opt_dict = yaml.load(open(sys.argv[1]).read()) for opt in str_opts: if not opt_dict.has_key(opt): print 'config file missing option : ' + opt quit() setattr(sys.modules[__name__], opt, opt_dict[opt]) # read the track data pique.msg('reading track data...') data_ff = pique.readtrack(forward_ChIP_track) data_rr = pique.readtrack(reverse_ChIP_track) b_ff = pique.readtrack(forward_bgnd_track) b_rr = pique.readtrack(reverse_bgnd_track) # read bookmarks file pique.msg('reading annotations...') slices = pique.readbookmarks(slice_bookmarks) # write new slice tracks for s in slices: sdata_ff = data_ff[s['start']:s['stop']] sdata_rr = data_rr[s['start']:s['stop']] sb_ff = b_ff[s['start']:s['stop']] sb_rr = b_rr[s['start']:s['stop']]
'forward_bgnd_track', \ 'reverse_ChIP_track', \ 'reverse_bgnd_track', \ 'masking_loci', \ 'annotated_bookmarks' ] opt_dict = yaml.load( open( sys.argv[1] ).read() ) for opt in str_opts : if not opt_dict.has_key( opt ) : print 'config file missing option : ' + opt quit() setattr( sys.modules[__name__], opt, opt_dict[opt] ) # read track data pique.msg( 'reading track data...' ) data_ff = pique.readtrack( forward_ChIP_track ) data_rr = pique.readtrack( reverse_ChIP_track ) b_ff = pique.readtrack( forward_bgnd_track ) b_rr = pique.readtrack( reverse_bgnd_track ) # read bookmarks file peaks = pique.readbookmarks( peak_bookmarks ) # calculate enrichment ratios for n,peak in enumerate(peaks) : a = sum( data_ff[ peak['start'] : peak['stop'] ] ) a = a + sum( data_rr[ peak['start'] : peak['stop'] ] ) b = sum( b_ff[ peak['start'] : peak['stop'] ] ) b = b + sum( b_rr[ peak['start'] : peak['stop'] ] ) peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)
'forward_bgnd_track', \ 'reverse_ChIP_track', \ 'reverse_bgnd_track', \ 'new_forward_bgnd_track', \ 'new_reverse_bgnd_track', \ 'non_peak_bookmarks', ] opt_dict = yaml.load( open( sys.argv[1] ).read() ) for opt in str_opts : if not opt_dict.has_key( opt ) : print 'config file missing option : ' + opt quit() setattr( sys.modules[__name__], opt, opt_dict[opt] ) pique.msg( 'reading track data...' ) data_ff = pique.readtrack( forward_ChIP_track ) data_rr = pique.readtrack( reverse_ChIP_track ) b_ff = pique.readtrack( forward_bgnd_track ) b_rr = pique.readtrack( reverse_bgnd_track ) non_peaks = pique.readbookmarks( non_peak_bookmarks ) # calculate enrichment ratios pique.msg( 'calculating enrichment ratios using ' + \ str(len(non_peaks)) + 'regions...' ) d_f, d_r, b_f, b_r = [],[],[],[] for n,region in enumerate( non_peaks ) : d_f.append( sum( data_ff[ region['start'] : region['stop'] ] ) ) d_r.append( sum( data_rr[ region['start'] : region['stop'] ] ) )
def run( self ) : # check inputs... name = self.nametext.get().strip() # set logfile logfile = name + '.log' pique.msg( logfile, 'starting run for project : ' + name ) alpha = int( self.alphatext.get().strip() ) l_thresh = int( self.lthreshtext.get().strip() ) # log inputs pique.msg( logfile, ' -> IP file : ' + self.IPfile ) pique.msg( logfile, ' -> BG file : ' + self.BGfile ) pique.msg( logfile, ' -> map file : ' + self.mapfile ) pique.msg( logfile, ' -> alpha : ' + str(alpha) ) pique.msg( logfile, ' -> l_thresh : ' + str(l_thresh) ) # load the data pique.msg( logfile, 'loading data...' ) self.master.title( 'Pique : loading data...' ) if not self.mapfile : D = pique.data.PiqueData( self.IPfile, self.BGfile, name=name ) else : D = pique.data.PiqueData( self.IPfile, self.BGfile, self.mapfile, name=name ) pique.msg( logfile, ' found contigs :' ) for contig in D.data.keys() : pique.msg( logfile, ' ' + contig ) pique.msg( logfile, ' length : ' + str(D.data[contig]['length']) ) for r in D.data[contig]['regions'] : start = str( r['start'] ) stop = str( r['stop'] ) pique.msg( logfile, ' analysis region : ' + start + ':' + stop ) for m in D.data[contig]['masks'] : start = str( m['start'] ) stop = str( m['stop'] ) pique.msg( logfile, ' masking region : ' + start + ':' + stop ) # start analysis workbench pique.msg( logfile, 'creating analysis workbench...' ) self.master.title( 'Pique : creating workbench...' ) PA = pique.analysis.PiqueAnalysis( D ) # run filters pique.msg( logfile, 'running filters...' ) self.master.title( 'Pique : running filters...' ) for ar_name in PA.data.keys() : pique.msg( logfile, ' :: applying filters to analysis region ' + ar_name ) PA.apply_filter( ar_name, alpha, l_thresh ) # find peaks pique.msg( logfile, 'finding peaks...' ) self.master.title( 'Pique : finding peaks...' ) for ar_name in PA.data.keys() : PA.find_peaks(ar_name) pique.msg( logfile, ' peaks ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks'])) ) pique.msg( logfile, ' noise threshold : ' + str(PA.data[ar_name]['N_thresh']) ) pique.msg( logfile, ' filter threshold : ' + str(PA.data[ar_name]['n_thresh']) ) pique.msg( logfile, ' normalizations : ' + ', '.join( map(str, PA.data[ar_name]['norms']) ) ) # write output files pique.msg( logfile, 'writing output files...' ) self.master.title( 'Pique : writing output...' ) pique.fileIO.writepeaksGFF( name + '.gff', PA.data ) pique.fileIO.writebookmarks( name + '.bookmark', PA.data, name=name ) pique.fileIO.writeQP( name + '.qp', PA.data ) pique.fileIO.writepeakTSV( name + '.peak.tsv', PA.data ) pique.fileIO.writetrack( name + '.IP.track', D.data ) pique.fileIO.writetrack( name + '.BG.track', D.data, track='BG' ) # done! pique.msg( logfile, 'run completed.' ) self.master.title( 'Pique : run completed.' )
def run( self ) : # check inputs... name = self.nametext.get().strip() pique.msg( 'starting run for project : ' + name ) alpha = int( self.alphatext.get().strip() ) l_thresh = int( self.lthreshtext.get().strip() ) # load the data pique.msg( 'loading data...' ) self.master.title( 'Pique : loading data...' ) if not self.mapfile : D = pique.data.PiqueData( self.IPfile, self.BGfile ) else : D = pique.data.PiqueData( self.IPfile, self.BGfile, self.mapfile ) pique.msg( ' -> found contigs :' ) for contig in D.data.keys() : pique.msg( ' ' + contig ) pique.msg( ' length : ' + str(D.data[contig]['length']) ) for r in D.data[contig]['regions'] : start = str( r['start'] ) stop = str( r['stop'] ) pique.msg( ' analysis region : ' + start + ':' + stop ) for m in D.data[contig]['masks'] : start = str( m['start'] ) stop = str( m['stop'] ) pique.msg( ' masking region : ' + start + ':' + stop ) # start analysis workbench pique.msg( 'creating analysis workbench...' ) self.master.title( 'Pique : creating workbench...' ) PA = pique.analysis.PiqueAnalysis( D ) # run filters pique.msg( 'running filters...' ) self.master.title( 'Pique : running filters...' ) pique.msg( ' -> alpha : ' + str(alpha) ) pique.msg( ' -> l_thresh : ' + str(l_thresh) ) PA.filter_all( alpha, l_thresh ) # find peaks pique.msg( 'finding peaks...' ) self.master.title( 'Pique : finding peaks...' ) for ar_name in PA.data.keys() : PA.find_peaks(ar_name) pique.msg( ' :: ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks'])) ) # write output files pique.msg( 'writing output files...' ) self.master.title( 'Pique : writing output...' ) pique.fileIO.writepeaksGFF( name + '.gff', PA.data ) pique.fileIO.writebookmarks( name + '.bookmark', PA.data ) # done! pique.msg( 'run completed.' ) self.master.title( 'Pique : run completed.' )
'forward_bgnd_track', \ 'reverse_ChIP_track', \ 'reverse_bgnd_track', \ 'new_forward_bgnd_track', \ 'new_reverse_bgnd_track', \ 'non_peak_bookmarks', ] opt_dict = yaml.load(open(sys.argv[1]).read()) for opt in str_opts: if not opt_dict.has_key(opt): print 'config file missing option : ' + opt quit() setattr(sys.modules[__name__], opt, opt_dict[opt]) pique.msg('reading track data...') data_ff = pique.readtrack(forward_ChIP_track) data_rr = pique.readtrack(reverse_ChIP_track) b_ff = pique.readtrack(forward_bgnd_track) b_rr = pique.readtrack(reverse_bgnd_track) non_peaks = pique.readbookmarks(non_peak_bookmarks) # calculate enrichment ratios pique.msg( 'calculating enrichment ratios using ' + \ str(len(non_peaks)) + 'regions...' ) d_f, d_r, b_f, b_r = [], [], [], [] for n, region in enumerate(non_peaks): d_f.append(sum(data_ff[region['start']:region['stop']])) d_r.append(sum(data_rr[region['start']:region['stop']]))
'masking_loci', \ 'peak_bookmarks', \ 'weed_bookmarks', \ 'overlap_track', \ 'binding_track' ] opt_dict = yaml.load(open(sys.argv[1]).read()) for opt in num_opts + str_opts: if not opt_dict.has_key(opt): print 'config file missing option : ' + opt quit() setattr(sys.modules[__name__], opt, opt_dict[opt]) # read the track data pique.msg('reading track data...') data_ff = pique.readtrack(forward_ChIP_track) data_rr = pique.readtrack(reverse_ChIP_track) b_ff = pique.readtrack(forward_bgnd_track) b_rr = pique.readtrack(reverse_bgnd_track) # apply mask pique.msg('applying mask...') is_elements = [] for line in open(masking_loci): if line.__contains__('#'): continue start, stop = map(int, line.split()[:2]) is_elements.append({'start': start, 'stop': stop}) data_ff = pique.mask(data_ff, is_elements)
'forward_bgnd_track', \ 'reverse_ChIP_track', \ 'reverse_bgnd_track', \ 'masking_loci', \ 'annotated_bookmarks' ] opt_dict = yaml.load(open(sys.argv[1]).read()) for opt in str_opts: if not opt_dict.has_key(opt): print 'config file missing option : ' + opt quit() setattr(sys.modules[__name__], opt, opt_dict[opt]) # read track data pique.msg('reading track data...') data_ff = pique.readtrack(forward_ChIP_track) data_rr = pique.readtrack(reverse_ChIP_track) b_ff = pique.readtrack(forward_bgnd_track) b_rr = pique.readtrack(reverse_bgnd_track) # read bookmarks file peaks = pique.readbookmarks(peak_bookmarks) # calculate enrichment ratios for n, peak in enumerate(peaks): a = sum(data_ff[peak['start']:peak['stop']]) a = a + sum(data_rr[peak['start']:peak['stop']]) b = sum(b_ff[peak['start']:peak['stop']]) b = b + sum(b_rr[peak['start']:peak['stop']]) peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)
opt_dict = yaml.load( open( sys.argv[1] ).read() ) for opt in num_opts + str_opts : if not opt_dict.has_key( opt ) : print 'config file missing option : ' + opt quit() setattr( sys.modules[__name__], opt, opt_dict[opt] ) for opt in opt_opts : if opt_dict.has_key( opt ) : setattr( sys.modules[__name__], opt, opt_dict[opt] ) else : setattr( sys.modules[__name__], opt, None ) pique.msg( 'reading track data...' ) data_ff = pique.readtrack( forward_ChIP_track ) data_rr = pique.readtrack( reverse_ChIP_track ) b_ff = pique.readtrack( forward_bgnd_track ) b_rr = pique.readtrack( reverse_bgnd_track ) pique.msg( 'applying mask...' ) is_elements = [] for line in open( masking_loci ) : if line.__contains__('#') : continue start, stop = map( int, line.split()[:2] ) is_elements.append( { 'start':start, 'stop':stop } ) data_ff = pique.mask( data_ff, is_elements ) data_rr = pique.mask( data_rr, is_elements )
def detect( name, ipfile, bgfile, mapfile, alpha, l_thresh, pickle_file, wav_file ) : """ This function drives the peak detection workflow. """ # set logfile logfile = name + '.log' pique.msg( logfile, 'starting run for project : ' + name ) # log inputs pique.msg( logfile, ' -> IP file : ' + ipfile ) pique.msg( logfile, ' -> BG file : ' + bgfile ) pique.msg( logfile, ' -> map file : ' + mapfile ) pique.msg( logfile, ' -> alpha : ' + str(alpha) ) pique.msg( logfile, ' -> l_thresh : ' + str(l_thresh) ) # load the data pique.msg( logfile, 'loading data...' ) D = pique.data.PiqueData( ipfile, bgfile, mapfile, name=name ) pique.msg( logfile, ' found contigs :' ) for contig in D.data.keys() : pique.msg( logfile, ' ' + contig ) pique.msg( logfile, ' length : ' + str(D.data[contig]['length']) ) for r in D.data[contig]['regions'] : start = str( r['start'] ) stop = str( r['stop'] ) pique.msg( logfile, ' analysis region : ' + start + ':' + stop ) for m in D.data[contig]['masks'] : start = str( m['start'] ) stop = str( m['stop'] ) pique.msg( logfile, ' masking region : ' + start + ':' + stop ) # start analysis workbench pique.msg( logfile, 'creating analysis workbench...' ) PA = pique.analysis.PiqueAnalysis( D ) # run filters pique.msg( logfile, 'running filters...' ) for ar_name in PA.data.keys() : pique.msg( logfile, ' :: applying filters to analysis region ' + ar_name ) PA.apply_filter( ar_name, alpha, l_thresh ) # find peaks pique.msg( logfile, 'finding peaks...' ) for ar_name in PA.data.keys() : PA.find_peaks(ar_name) pique.msg( logfile, ' peaks ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks'])) ) pique.msg( logfile, ' noise threshold : ' + str(PA.data[ar_name]['N_thresh']) ) pique.msg( logfile, ' filter threshold : ' + str(PA.data[ar_name]['n_thresh']) ) pique.msg( logfile, ' normalizations : ' + ', '.join( map(str, PA.data[ar_name]['norms']) ) ) # if a pickle file was requested, write it if pickle_file : pique.msg( logfile, 'pickling analysis workbench...' ) cPickle.dump( PA, open( name + '.pickle', 'w' ) ) # if a WAV file was requested, write it if wav_file : for contig in D.data.keys() : file = name + '_' + contig + '.wav' pique.msg( logfile, 'writing WAV output : ' + file ) pique.fileIO.writeWAV( file, D.data, contig, track='IP', minusBG=True, amplify=True ) # write output files pique.msg( logfile, 'writing output files...' ) pique.fileIO.writepeaksGFF( name + '.gff', PA.data ) pique.fileIO.writebookmarks( name + '.bookmark', PA.data, name=name ) pique.fileIO.writeQP( name + '.qp', PA.data ) pique.fileIO.writepeakTSV( name + '.peak.tsv', PA.data ) pique.fileIO.writetrack( name + '.IP.track', D.data ) pique.fileIO.writetrack( name + '.BG.track', D.data, track='BG' ) # done! pique.msg( logfile, 'run completed.' )
#!/usr/bin/env python """ A very, very basic genome browser. usage : ./basic_browser.py forward.track \ reverse.track \ peak.bookmark \ genes.bed """ import numpy import pique import pylab import sys # read track data pique.msg('reading track data...') data_ff = pique.readtrack(sys.argv[1]) data_rr = pique.readtrack(sys.argv[2]) # read peak data pique.msg('reading peak bookmarks...') peaks = pique.readbookmarks(sys.argv[3]) # draw peaks pique.msg('drawing peak bookmarks...') for peak in peaks: axvspan(peak['start'], peak['stop'], color='green', alpha=0.3) # read BED formatted gene annotations pique.msg('reading gene annotations...') genes = {}
#!/usr/bin/env python """ A very, very basic genome browser. usage : ./basic_browser.py forward.track \ reverse.track \ peak.bookmark \ genes.bed """ import numpy import pique import pylab import sys # read track data pique.msg( 'reading track data...' ) data_ff = pique.readtrack( sys.argv[1] ) data_rr = pique.readtrack( sys.argv[2] ) # read peak data pique.msg( 'reading peak bookmarks...' ) peaks = pique.readbookmarks( sys.argv[3] ) # draw peaks pique.msg( 'drawing peak bookmarks...' ) for peak in peaks : axvspan( peak['start'], peak['stop'], color='green', alpha=0.3 ) # read BED formatted gene annotations pique.msg( 'reading gene annotations...' ) genes = {}
str_opts = [ 'track_name', \ 'annotated_bookmarks', \ 'gene_annotations', \ 'new_bookmarks', ] opt_dict = yaml.load( open( sys.argv[1] ).read() ) for opt in str_opts : if not opt_dict.has_key( opt ) : print 'config file missing option : ' + opt quit() setattr( sys.modules[__name__], opt, opt_dict[opt] ) # read bookmarks file pique.msg( 'reading annotations...' ) peaks = pique.readbookmarks( annotated_bookmarks ) # read gene annotations genes = {} for line in open( gene_annotations ) : if line.__contains__( '\"' ) : continue if not line.split()[0].lower() == track_name : continue start,stop = map( int, line.split()[1:3] ) strand = line.strip().split()[5] name = line.split()[3] genes[name] = { 'start':start,'stop':stop,'strand':strand } print name print len(genes.keys())