Example #1
0
opt_dict = yaml.load(open(sys.argv[1]).read())

for opt in str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

# read track data
pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

# read bookmarks file
peaks = pique.readbookmarks(peak_bookmarks)

# calculate enrichment ratios
for n, peak in enumerate(peaks):
    a = sum(data_ff[peak['start']:peak['stop']])
    a = a + sum(data_rr[peak['start']:peak['stop']])
    b = sum(b_ff[peak['start']:peak['stop']])
    b = b + sum(b_rr[peak['start']:peak['stop']])
    peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)

# write new bookmark file
pique.msg('writing re-annotated bookmark file...')
pique.writebookmarks(annotated_bookmarks, track_name, peaks)
Example #2
0
                           peak.bookmark    \
                           genes.bed
"""
import numpy
import pique
import pylab
import sys

# read track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( sys.argv[1] )
data_rr = pique.readtrack( sys.argv[2] )

# read peak data
pique.msg( 'reading peak bookmarks...' )
peaks = pique.readbookmarks( sys.argv[3] )

# draw peaks 
pique.msg( 'drawing peak bookmarks...' )
for peak in peaks :
    axvspan( peak['start'], peak['stop'], color='green', alpha=0.3 )

# read BED formatted gene annotations
pique.msg( 'reading gene annotations...' )
genes = {}
for line in open( sys.argv[4] ) :
    if line.__contains__('Chromosome') :
        start, stop = map( int, line.split()[1:3] )
        name = line.split()[3]
        strand = line.split()[5]
        genes[name] = {'start':start,'stop':stop,'strand':strand}
Example #3
0
                           peak.bookmark    \
                           genes.bed
"""
import numpy
import pique
import pylab
import sys

# read track data
pique.msg('reading track data...')
data_ff = pique.readtrack(sys.argv[1])
data_rr = pique.readtrack(sys.argv[2])

# read peak data
pique.msg('reading peak bookmarks...')
peaks = pique.readbookmarks(sys.argv[3])

# draw peaks
pique.msg('drawing peak bookmarks...')
for peak in peaks:
    axvspan(peak['start'], peak['stop'], color='green', alpha=0.3)

# read BED formatted gene annotations
pique.msg('reading gene annotations...')
genes = {}
for line in open(sys.argv[4]):
    if line.__contains__('Chromosome'):
        start, stop = map(int, line.split()[1:3])
        name = line.split()[3]
        strand = line.split()[5]
        genes[name] = {'start': start, 'stop': stop, 'strand': strand}
Example #4
0
opt_dict = yaml.load(open(sys.argv[1]).read())

for opt in str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

non_peaks = pique.readbookmarks(non_peak_bookmarks)

# calculate enrichment ratios
pique.msg( 'calculating enrichment ratios using ' + \
            str(len(non_peaks)) + 'regions...' )

d_f, d_r, b_f, b_r = [], [], [], []
for n, region in enumerate(non_peaks):
    d_f.append(sum(data_ff[region['start']:region['stop']]))
    d_r.append(sum(data_rr[region['start']:region['stop']]))
    b_f.append(sum(b_ff[region['start']:region['stop']]))
    b_r.append(sum(b_rr[region['start']:region['stop']]))

f_norm = float(sum(d_f)) / sum(b_f)
r_norm = float(sum(d_r)) / sum(b_r)
Example #5
0
for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read the track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

# read bookmarks file
pique.msg( 'reading annotations...' )
slices = pique.readbookmarks( slice_bookmarks )

# write new slice tracks
for s in slices :
    sdata_ff    = data_ff[ s['start'] : s['stop'] ]
    sdata_rr    = data_rr[ s['start'] : s['stop'] ]
    sb_ff       = b_ff[    s['start'] : s['stop'] ]
    sb_rr       = b_rr[    s['start'] : s['stop'] ]
    
    # write enrichment track
    file = new_track_prefix + '_IP_' + s['annotations']['slice'] + '.track'
    pique.msg( 'writing ' + file + '...' )
    pique.write_track( sdata_ff, sdata_rr, file, track_name )

    # write background track
    file = new_track_prefix + '_BG_' + s['annotations']['slice'] + '.track'
Example #6
0
opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

# read bookmarks file
peaks = pique.readbookmarks( peak_bookmarks )

# calculate enrichment ratios
for n,peak in enumerate(peaks) :
    a =     sum( data_ff[ peak['start'] : peak['stop'] ] )
    a = a + sum( data_rr[ peak['start'] : peak['stop'] ] )
    b =     sum(    b_ff[ peak['start'] : peak['stop'] ] )
    b = b + sum(    b_rr[ peak['start'] : peak['stop'] ] )
    peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)

# write new bookmark file
pique.msg( 'writing re-annotated bookmark file...' )
pique.writebookmarks( annotated_bookmarks, track_name, peaks )
Example #7
0
for opt in str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

# read the track data
pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

# read bookmarks file
pique.msg('reading annotations...')
slices = pique.readbookmarks(slice_bookmarks)

# write new slice tracks
for s in slices:
    sdata_ff = data_ff[s['start']:s['stop']]
    sdata_rr = data_rr[s['start']:s['stop']]
    sb_ff = b_ff[s['start']:s['stop']]
    sb_rr = b_rr[s['start']:s['stop']]

    # write enrichment track
    file = new_track_prefix + '_IP_' + s['annotations']['slice'] + '.track'
    pique.msg('writing ' + file + '...')
    pique.write_track(sdata_ff, sdata_rr, file, track_name)

    # write background track
    file = new_track_prefix + '_BG_' + s['annotations']['slice'] + '.track'
Example #8
0
opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

non_peaks = pique.readbookmarks( non_peak_bookmarks )

# calculate enrichment ratios
pique.msg( 'calculating enrichment ratios using ' + \
            str(len(non_peaks)) + 'regions...' )

d_f, d_r, b_f, b_r = [],[],[],[]
for n,region in enumerate( non_peaks ) :
    d_f.append( sum( data_ff[ region['start'] : region['stop'] ] ) )
    d_r.append( sum( data_rr[ region['start'] : region['stop'] ] ) )
    b_f.append( sum(    b_ff[ region['start'] : region['stop'] ] ) )
    b_r.append( sum(    b_rr[ region['start'] : region['stop'] ] ) )
    
f_norm = float(sum(d_f)) / sum(b_f)
r_norm = float(sum(d_r)) / sum(b_r)
Example #9
0
str_opts = [    'track_name',           \
                'annotated_bookmarks',  \
                'gene_annotations',     \
                'new_bookmarks',        ]

opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read bookmarks file
pique.msg( 'reading annotations...' )
peaks = pique.readbookmarks( annotated_bookmarks )

# read gene annotations
genes = {}
for line in open( gene_annotations ) :
    if line.__contains__( '\"' ) :
        continue
    if not line.split()[0].lower() == track_name :
        continue
    start,stop = map( int, line.split()[1:3] )
    strand = line.strip().split()[5]
    name = line.split()[3]
    genes[name] = { 'start':start,'stop':stop,'strand':strand }
    print name
print len(genes.keys())