Example #1
0
opt_dict = yaml.load(open(sys.argv[1]).read())

for opt in str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

# read track data
pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

# read bookmarks file
peaks = pique.readbookmarks(peak_bookmarks)

# calculate enrichment ratios
for n, peak in enumerate(peaks):
    a = sum(data_ff[peak['start']:peak['stop']])
    a = a + sum(data_rr[peak['start']:peak['stop']])
    b = sum(b_ff[peak['start']:peak['stop']])
    b = b + sum(b_rr[peak['start']:peak['stop']])
    peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)

# write new bookmark file
pique.msg('writing re-annotated bookmark file...')
pique.writebookmarks(annotated_bookmarks, track_name, peaks)
Example #2
0
for n,peak in enumerate( envelope ) :
    df = data_ff[ peak['forward']['start'] : peak['reverse']['stop'] ]
    dr = data_rr[ peak['forward']['start'] : peak['reverse']['stop'] ]
    df = scipy.signal.convolve( df, numpy.ones(10) / 10.0 )
    dr = scipy.signal.convolve( dr, numpy.ones(10) / 10.0 )
    c = peak['forward']['start'] + ( dr.argmax() + df.argmax() ) / 2.0
    envelope[n]['binds'] = int(c)
for n in range(len(weeds)) :
    weeds[n]['binds'] = ''

# write output files
pique.msg( 'writing output files...' )

peaks_f = numpy.zeros( len(data_f) )
peaks_r = numpy.zeros( len(data_r) )
for peak in forward :
    peaks_f[ peak['start'] : peak['stop'] ] = 1
for peak in reverse :
    peaks_r[ peak['start'] : peak['stop'] ] = 1

pique.write_strandless_track( peaks_f+peaks_r, overlap_track, track_name )

pique.writebookmarks( peak_bookmarks, track_name, envelope )
pique.writebookmarks( weed_bookmarks, track_name, weeds    )

f = open( binding_track, 'w' )
f.write( 'sequence\tstrand\tposition\tvalue\n' )
for peak in envelope :
    f.write( track_name + '\t.\t' + str(peak['binds']) + '\t1\n' )
f.close()
Example #3
0
opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

# read bookmarks file
peaks = pique.readbookmarks( peak_bookmarks )

# calculate enrichment ratios
for n,peak in enumerate(peaks) :
    a =     sum( data_ff[ peak['start'] : peak['stop'] ] )
    a = a + sum( data_rr[ peak['start'] : peak['stop'] ] )
    b =     sum(    b_ff[ peak['start'] : peak['stop'] ] )
    b = b + sum(    b_rr[ peak['start'] : peak['stop'] ] )
    peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)

# write new bookmark file
pique.msg( 'writing re-annotated bookmark file...' )
pique.writebookmarks( annotated_bookmarks, track_name, peaks )
Example #4
0
for n, peak in enumerate(envelope):
    df = data_ff[peak['forward']['start']:peak['reverse']['stop']]
    dr = data_rr[peak['forward']['start']:peak['reverse']['stop']]
    df = scipy.signal.convolve(df, numpy.ones(10) / 10.0)
    dr = scipy.signal.convolve(dr, numpy.ones(10) / 10.0)
    c = peak['forward']['start'] + (dr.argmax() + df.argmax()) / 2.0
    envelope[n]['binds'] = int(c)
for n in range(len(weeds)):
    weeds[n]['binds'] = ''

# write output files
pique.msg('writing output files...')

peaks_f = numpy.zeros(len(data_f))
peaks_r = numpy.zeros(len(data_r))
for peak in forward:
    peaks_f[peak['start']:peak['stop']] = 1
for peak in reverse:
    peaks_r[peak['start']:peak['stop']] = 1

pique.write_strandless_track(peaks_f + peaks_r, overlap_track, track_name)

pique.writebookmarks(peak_bookmarks, track_name, envelope)
pique.writebookmarks(weed_bookmarks, track_name, weeds)

f = open(binding_track, 'w')
f.write('sequence\tstrand\tposition\tvalue\n')
for peak in envelope:
    f.write(track_name + '\t.\t' + str(peak['binds']) + '\t1\n')
f.close()
Example #5
0
# read bookmarks file
pique.msg( 'reading annotations...' )
peaks = pique.readbookmarks( annotated_bookmarks )

# read gene annotations
genes = {}
for line in open( gene_annotations ) :
    if line.__contains__( '\"' ) :
        continue
    if not line.split()[0].lower() == track_name :
        continue
    start,stop = map( int, line.split()[1:3] )
    strand = line.strip().split()[5]
    name = line.split()[3]
    genes[name] = { 'start':start,'stop':stop,'strand':strand }
    print name
print len(genes.keys())

# add gene annotation to peak list
pique.msg( 'finding transcript start sites in enriched regions...' )
for peak in peaks :
    gg = []
    for name,gene in genes.items() :
        if peak['start'] < gene['start'] < peak['stop'] :
            gg.append(name)
    peak['annotations']['genes'] = ','.join(gg)

# write new bookmark file
pique.msg( 'writing re-annotated bookmark file...' )
pique.writebookmarks( new_bookmarks, track_name, peaks )