opt_dict = yaml.load(open(sys.argv[1]).read()) for opt in str_opts: if not opt_dict.has_key(opt): print 'config file missing option : ' + opt quit() setattr(sys.modules[__name__], opt, opt_dict[opt]) # read track data pique.msg('reading track data...') data_ff = pique.readtrack(forward_ChIP_track) data_rr = pique.readtrack(reverse_ChIP_track) b_ff = pique.readtrack(forward_bgnd_track) b_rr = pique.readtrack(reverse_bgnd_track) # read bookmarks file peaks = pique.readbookmarks(peak_bookmarks) # calculate enrichment ratios for n, peak in enumerate(peaks): a = sum(data_ff[peak['start']:peak['stop']]) a = a + sum(data_rr[peak['start']:peak['stop']]) b = sum(b_ff[peak['start']:peak['stop']]) b = b + sum(b_rr[peak['start']:peak['stop']]) peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b) # write new bookmark file pique.msg('writing re-annotated bookmark file...') pique.writebookmarks(annotated_bookmarks, track_name, peaks)
for n,peak in enumerate( envelope ) : df = data_ff[ peak['forward']['start'] : peak['reverse']['stop'] ] dr = data_rr[ peak['forward']['start'] : peak['reverse']['stop'] ] df = scipy.signal.convolve( df, numpy.ones(10) / 10.0 ) dr = scipy.signal.convolve( dr, numpy.ones(10) / 10.0 ) c = peak['forward']['start'] + ( dr.argmax() + df.argmax() ) / 2.0 envelope[n]['binds'] = int(c) for n in range(len(weeds)) : weeds[n]['binds'] = '' # write output files pique.msg( 'writing output files...' ) peaks_f = numpy.zeros( len(data_f) ) peaks_r = numpy.zeros( len(data_r) ) for peak in forward : peaks_f[ peak['start'] : peak['stop'] ] = 1 for peak in reverse : peaks_r[ peak['start'] : peak['stop'] ] = 1 pique.write_strandless_track( peaks_f+peaks_r, overlap_track, track_name ) pique.writebookmarks( peak_bookmarks, track_name, envelope ) pique.writebookmarks( weed_bookmarks, track_name, weeds ) f = open( binding_track, 'w' ) f.write( 'sequence\tstrand\tposition\tvalue\n' ) for peak in envelope : f.write( track_name + '\t.\t' + str(peak['binds']) + '\t1\n' ) f.close()
opt_dict = yaml.load( open( sys.argv[1] ).read() ) for opt in str_opts : if not opt_dict.has_key( opt ) : print 'config file missing option : ' + opt quit() setattr( sys.modules[__name__], opt, opt_dict[opt] ) # read track data pique.msg( 'reading track data...' ) data_ff = pique.readtrack( forward_ChIP_track ) data_rr = pique.readtrack( reverse_ChIP_track ) b_ff = pique.readtrack( forward_bgnd_track ) b_rr = pique.readtrack( reverse_bgnd_track ) # read bookmarks file peaks = pique.readbookmarks( peak_bookmarks ) # calculate enrichment ratios for n,peak in enumerate(peaks) : a = sum( data_ff[ peak['start'] : peak['stop'] ] ) a = a + sum( data_rr[ peak['start'] : peak['stop'] ] ) b = sum( b_ff[ peak['start'] : peak['stop'] ] ) b = b + sum( b_rr[ peak['start'] : peak['stop'] ] ) peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b) # write new bookmark file pique.msg( 'writing re-annotated bookmark file...' ) pique.writebookmarks( annotated_bookmarks, track_name, peaks )
for n, peak in enumerate(envelope): df = data_ff[peak['forward']['start']:peak['reverse']['stop']] dr = data_rr[peak['forward']['start']:peak['reverse']['stop']] df = scipy.signal.convolve(df, numpy.ones(10) / 10.0) dr = scipy.signal.convolve(dr, numpy.ones(10) / 10.0) c = peak['forward']['start'] + (dr.argmax() + df.argmax()) / 2.0 envelope[n]['binds'] = int(c) for n in range(len(weeds)): weeds[n]['binds'] = '' # write output files pique.msg('writing output files...') peaks_f = numpy.zeros(len(data_f)) peaks_r = numpy.zeros(len(data_r)) for peak in forward: peaks_f[peak['start']:peak['stop']] = 1 for peak in reverse: peaks_r[peak['start']:peak['stop']] = 1 pique.write_strandless_track(peaks_f + peaks_r, overlap_track, track_name) pique.writebookmarks(peak_bookmarks, track_name, envelope) pique.writebookmarks(weed_bookmarks, track_name, weeds) f = open(binding_track, 'w') f.write('sequence\tstrand\tposition\tvalue\n') for peak in envelope: f.write(track_name + '\t.\t' + str(peak['binds']) + '\t1\n') f.close()
# read bookmarks file pique.msg( 'reading annotations...' ) peaks = pique.readbookmarks( annotated_bookmarks ) # read gene annotations genes = {} for line in open( gene_annotations ) : if line.__contains__( '\"' ) : continue if not line.split()[0].lower() == track_name : continue start,stop = map( int, line.split()[1:3] ) strand = line.strip().split()[5] name = line.split()[3] genes[name] = { 'start':start,'stop':stop,'strand':strand } print name print len(genes.keys()) # add gene annotation to peak list pique.msg( 'finding transcript start sites in enriched regions...' ) for peak in peaks : gg = [] for name,gene in genes.items() : if peak['start'] < gene['start'] < peak['stop'] : gg.append(name) peak['annotations']['genes'] = ','.join(gg) # write new bookmark file pique.msg( 'writing re-annotated bookmark file...' ) pique.writebookmarks( new_bookmarks, track_name, peaks )