Esempio n. 1
0
def detect(name, ipfile, bgfile, mapfile, alpha, l_thresh, pickle_file,
           wav_file):
    """
    This function drives the peak detection workflow.
    """
    # set logfile
    logfile = name + '.log'

    pique.msg(logfile, 'starting run for project : ' + name)

    # log inputs
    pique.msg(logfile, '  -> IP file  : ' + ipfile)
    pique.msg(logfile, '  -> BG file  : ' + bgfile)
    pique.msg(logfile, '  -> map file : ' + mapfile)
    pique.msg(logfile, '  -> alpha    : ' + str(alpha))
    pique.msg(logfile, '  -> l_thresh : ' + str(l_thresh))

    # load the data
    pique.msg(logfile, 'loading data...')
    D = pique.data.PiqueData(ipfile, bgfile, mapfile, name=name)

    pique.msg(logfile, '  found contigs :')
    for contig in D.data.keys():
        pique.msg(logfile, '    ' + contig)
        pique.msg(logfile, '      length : ' + str(D.data[contig]['length']))
        for r in D.data[contig]['regions']:
            start = str(r['start'])
            stop = str(r['stop'])
            pique.msg(logfile, '      analysis region : ' + start + ':' + stop)
        for m in D.data[contig]['masks']:
            start = str(m['start'])
            stop = str(m['stop'])
            pique.msg(logfile, '      masking region  : ' + start + ':' + stop)

    # start analysis workbench
    pique.msg(logfile, 'creating analysis workbench...')
    PA = pique.analysis.PiqueAnalysis(D)

    # run filters
    pique.msg(logfile, 'running filters...')

    for ar_name in PA.data.keys():
        pique.msg(logfile,
                  '  :: applying filters to analysis region ' + ar_name)
        PA.apply_filter(ar_name, alpha, l_thresh)

    # find peaks
    pique.msg(logfile, 'finding peaks...')
    for ar_name in PA.data.keys():
        PA.find_peaks(ar_name)
        pique.msg(
            logfile,
            '  peaks ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks'])))
        pique.msg(
            logfile,
            '     noise threshold  : ' + str(PA.data[ar_name]['N_thresh']))
        pique.msg(
            logfile,
            '     filter threshold : ' + str(PA.data[ar_name]['n_thresh']))
        pique.msg(
            logfile, '     normalizations   : ' +
            ', '.join(map(str, PA.data[ar_name]['norms'])))

    # if a pickle file was requested, write it
    if pickle_file:
        pique.msg(logfile, 'pickling analysis workbench...')
        cPickle.dump(PA, open(name + '.pickle', 'w'))

    # if a WAV file was requested, write it
    if wav_file:
        for contig in D.data.keys():
            file = name + '_' + contig + '.wav'
            pique.msg(logfile, 'writing WAV output : ' + file)
            pique.fileIO.writeWAV(file,
                                  D.data,
                                  contig,
                                  track='IP',
                                  minusBG=True,
                                  amplify=True)

    # write output files
    pique.msg(logfile, 'writing output files...')
    pique.fileIO.writepeaksGFF(name + '.gff', PA.data)
    pique.fileIO.writebookmarks(name + '.bookmark', PA.data, name=name)
    pique.fileIO.writeQP(name + '.qp', PA.data)
    pique.fileIO.writepeakTSV(name + '.peak.tsv', PA.data)
    pique.fileIO.writetrack(name + '.IP.track', D.data)
    pique.fileIO.writetrack(name + '.BG.track', D.data, track='BG')

    # done!
    pique.msg(logfile, 'run completed.')
Esempio n. 2
0
                'masking_loci',         \
                'peak_bookmarks',       \
                'weed_bookmarks',       \
                'overlap_track',        \
                'binding_track' ]

opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in num_opts + str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read the track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

# apply mask
pique.msg( 'applying mask...' )
is_elements = []
for line in open( masking_loci ) :
    if line.__contains__('#') :
        continue
    start, stop = map( int, line.split()[:2] )
    is_elements.append( { 'start':start, 'stop':stop } )

data_ff = pique.mask( data_ff, is_elements )
Esempio n. 3
0
def makemap(name, bamfile, window, stride, highest, lowest, bins):
    """
    This function drives the genome map making workflow.
    """
    import pylab

    logfile = name + '.mapmaker.log'
    mapfile = name + '.map.gff'

    pique.msg(logfile, 'starting mapmaker for project : ' + name)

    pique.msg(logfile, '  -> BAM file    : ' + bamfile)
    pique.msg(logfile, '  -> map file    : ' + mapfile)
    pique.msg(logfile, '  -> window      : ' + str(window))
    pique.msg(logfile, '  -> stride      : ' + str(stride))
    pique.msg(logfile, '  -> bins        : ' + str(bins))
    pique.msg(logfile, '  -> highest bin : ' + str(highest))
    pique.msg(logfile, '  -> lowest bin  : ' + str(lowest))

    pique.msg(logfile, 'loading data...')

    data = pique.fileIO.loadBAM(bamfile)

    pique.msg(logfile, '  found contigs :')

    for contig in data.keys():
        pique.msg(logfile, '    ' + contig)
        pique.msg(logfile, '      ' + str(len(data[contig]['forward'])))

    pique.msg(logfile, 'making spectral histograms...')

    sh = {}
    for contig in data.keys():
        pique.msg(logfile,
                  '  :: making sectral histogram for contig ' + contig)
        d = numpy.array(data[contig]['forward'] + data[contig]['reverse'],
                        dtype=int)
        sh[contig] = pique.mapmaker.hist(d, lowest, highest, bins, window,
                                         stride)

    # save images of spectral histograms
    pique.msg(logfile, 'saving images of spectral histograms...')

    for contig in sh.keys():
        pylab.cla()  # clean up crumbs from last plot
        pylab.clf()  # clean up crumbs from last plot
        pique.msg(logfile, '  :: saving image for contig ' + contig)
        pylab.contourf(sh[contig], bins)
        pylab.title(name + ' : ' + contig)
        imgname = name + '_' + contig + '.png'
        pylab.savefig(imgname, format='png')
Esempio n. 4
0
def bam2wav(name, ipfile, bgfile):
    """
    This function drives the creation of a WAV file from a BAM file.
    """

    # set logfile
    logfile = name + '.log'

    pique.msg(logfile,
              'converting BAM files to WAV files for project : ' + name)

    # log inputs
    pique.msg(logfile, '  -> IP file  : ' + ipfile)
    pique.msg(logfile, '  -> BG file  : ' + bgfile)

    # load the data
    pique.msg(logfile, 'loading data...')
    D = pique.data.PiqueData(ipfile, bgfile, '', name=name)

    pique.msg(logfile, '  found contigs :')
    for contig in D.data.keys():
        pique.msg(logfile, '    ' + contig)
        pique.msg(logfile, '      length : ' + str(D.data[contig]['length']))
        for r in D.data[contig]['regions']:
            start = str(r['start'])
            stop = str(r['stop'])
            pique.msg(logfile, '      analysis region : ' + start + ':' + stop)
        for m in D.data[contig]['masks']:
            start = str(m['start'])
            stop = str(m['stop'])
            pique.msg(logfile, '      masking region  : ' + start + ':' + stop)
    # write the WAV files
    for contig in D.data.keys():
        file = name + '_' + contig + '.wav'
        pique.msg(logfile, 'writing WAV output : ' + file)
        pique.fileIO.writeWAV(file,
                              D.data,
                              contig,
                              track='IP',
                              minusBG=True,
                              amplify=True)

    # done!
    pique.msg(logfile, 'conversion completed.')
Esempio n. 5
0
def makemap( name, 
             bamfile,
             window,
             stride, 
             highest,
             lowest,
             bins ) :
    """
    This function drives the genome map making workflow.
    """
    import pylab 

    logfile = name + '.mapmaker.log'
    mapfile = name + '.map.gff'
    
    pique.msg( logfile, 'starting mapmaker for project : ' + name )
    
    pique.msg( logfile, '  -> BAM file    : ' + bamfile      )
    pique.msg( logfile, '  -> map file    : ' + mapfile      )
    pique.msg( logfile, '  -> window      : ' + str(window)  )
    pique.msg( logfile, '  -> stride      : ' + str(stride)  )
    pique.msg( logfile, '  -> bins        : ' + str(bins)    )
    pique.msg( logfile, '  -> highest bin : ' + str(highest) )
    pique.msg( logfile, '  -> lowest bin  : ' + str(lowest)  )
    
    pique.msg( logfile, 'loading data...' )
    
    data = pique.fileIO.loadBAM( bamfile )
    
    pique.msg( logfile, '  found contigs :' )
    
    for contig in data.keys() :
        pique.msg( logfile, '    ' + contig )
        pique.msg( logfile, '      ' + str(len(data[contig]['forward'])) )
    
    pique.msg( logfile, 'making spectral histograms...' )
    
    sh = {}
    for contig in data.keys() :
        pique.msg( logfile, '  :: making sectral histogram for contig ' + contig )
        d = numpy.array( data[contig]['forward'] + data[contig]['reverse'], dtype = int )
        sh[contig] = pique.mapmaker.hist(   d,
                                            lowest,
                                            highest,
                                            bins,
                                            window,
                                            stride     )
    
    # save images of spectral histograms
    pique.msg( logfile, 'saving images of spectral histograms...' )
    
    for contig in sh.keys() :
        pylab.cla() # clean up crumbs from last plot
        pylab.clf() # clean up crumbs from last plot
        pique.msg( logfile, '  :: saving image for contig ' + contig )
        pylab.contourf( sh[contig], bins )
        pylab.title( name + ' : ' + contig )
        imgname = name + '_' + contig + '.png'
        pylab.savefig( imgname, format='png' )
Esempio n. 6
0
 def filter_all( self, alpha, l_thresh ) :
     for ar_name in self.data.keys() :
         pique.msg( '  :: applying filters to analysis region ' + ar_name )
         self.apply_filter( ar_name, alpha, l_thresh )
Esempio n. 7
0
                'forward_bgnd_track',   \
                'reverse_ChIP_track',   \
                'reverse_bgnd_track',   \
                'slice_bookmarks',      \
                'new_track_prefix',     ]

opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read the track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

# read bookmarks file
pique.msg( 'reading annotations...' )
slices = pique.readbookmarks( slice_bookmarks )

# write new slice tracks
for s in slices :
    sdata_ff    = data_ff[ s['start'] : s['stop'] ]
    sdata_rr    = data_rr[ s['start'] : s['stop'] ]
    sb_ff       = b_ff[    s['start'] : s['stop'] ]
    sb_rr       = b_rr[    s['start'] : s['stop'] ]
Esempio n. 8
0
def bam2wav( name,
             ipfile, 
             bgfile ) :
    """
    This function drives the creation of a WAV file from a BAM file.
    """
    
    # set logfile
    logfile = name + '.log'
    
    pique.msg( logfile, 'converting BAM files to WAV files for project : ' + name )
    
    # log inputs
    pique.msg( logfile, '  -> IP file  : ' + ipfile   )
    pique.msg( logfile, '  -> BG file  : ' + bgfile   )
    
    # load the data
    pique.msg( logfile, 'loading data...' )
    D = pique.data.PiqueData( ipfile, bgfile, '', name=name )
    
    pique.msg( logfile, '  found contigs :' )
    for contig in D.data.keys() :
        pique.msg( logfile, '    ' + contig )
        pique.msg( logfile, '      length : ' + str(D.data[contig]['length']) )
        for r in D.data[contig]['regions'] :
            start = str( r['start'] )
            stop  = str( r['stop']  )
            pique.msg( logfile, '      analysis region : ' + start + ':' + stop )
        for m in D.data[contig]['masks'] :
            start = str( m['start'] )
            stop  = str( m['stop']  )
            pique.msg( logfile, '      masking region  : ' + start + ':' + stop )
    # write the WAV files
    for contig in D.data.keys() :
        file = name + '_' + contig + '.wav'
        pique.msg( logfile, 'writing WAV output : ' + file )
        pique.fileIO.writeWAV( file,
                               D.data,
                               contig,
                               track='IP',
                               minusBG=True,
                               amplify=True )

    # done!
    pique.msg( logfile, 'conversion completed.' )
Esempio n. 9
0
                'forward_bgnd_track',   \
                'reverse_ChIP_track',   \
                'reverse_bgnd_track',   \
                'slice_bookmarks',      \
                'new_track_prefix',     ]

opt_dict = yaml.load(open(sys.argv[1]).read())

for opt in str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

# read the track data
pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

# read bookmarks file
pique.msg('reading annotations...')
slices = pique.readbookmarks(slice_bookmarks)

# write new slice tracks
for s in slices:
    sdata_ff = data_ff[s['start']:s['stop']]
    sdata_rr = data_rr[s['start']:s['stop']]
    sb_ff = b_ff[s['start']:s['stop']]
    sb_rr = b_rr[s['start']:s['stop']]
Esempio n. 10
0
                'forward_bgnd_track',   \
                'reverse_ChIP_track',   \
                'reverse_bgnd_track',   \
                'masking_loci',         \
                'annotated_bookmarks'   ]

opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

# read bookmarks file
peaks = pique.readbookmarks( peak_bookmarks )

# calculate enrichment ratios
for n,peak in enumerate(peaks) :
    a =     sum( data_ff[ peak['start'] : peak['stop'] ] )
    a = a + sum( data_rr[ peak['start'] : peak['stop'] ] )
    b =     sum(    b_ff[ peak['start'] : peak['stop'] ] )
    b = b + sum(    b_rr[ peak['start'] : peak['stop'] ] )
    peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)
Esempio n. 11
0
                'forward_bgnd_track',       \
                'reverse_ChIP_track',       \
                'reverse_bgnd_track',       \
                'new_forward_bgnd_track',   \
                'new_reverse_bgnd_track',   \
                'non_peak_bookmarks',       ]

opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

non_peaks = pique.readbookmarks( non_peak_bookmarks )

# calculate enrichment ratios
pique.msg( 'calculating enrichment ratios using ' + \
            str(len(non_peaks)) + 'regions...' )

d_f, d_r, b_f, b_r = [],[],[],[]
for n,region in enumerate( non_peaks ) :
    d_f.append( sum( data_ff[ region['start'] : region['stop'] ] ) )
    d_r.append( sum( data_rr[ region['start'] : region['stop'] ] ) )
Esempio n. 12
0
    def run( self ) :
        
        # check inputs...
        name     = self.nametext.get().strip()
        
        # set logfile
        logfile = name + '.log'
        
        pique.msg( logfile, 'starting run for project : ' + name )
         
        alpha    = int( self.alphatext.get().strip() )
        l_thresh = int( self.lthreshtext.get().strip() )
               
        # log inputs
        pique.msg( logfile, '  -> IP file  : ' + self.IPfile   )
        pique.msg( logfile, '  -> BG file  : ' + self.BGfile   )
        pique.msg( logfile, '  -> map file : ' + self.mapfile  )
        pique.msg( logfile, '  -> alpha    : ' + str(alpha)    )
        pique.msg( logfile, '  -> l_thresh : ' + str(l_thresh) )
        
        # load the data
        pique.msg( logfile, 'loading data...' )
        self.master.title( 'Pique : loading data...' )
        if not self.mapfile :
            D = pique.data.PiqueData( self.IPfile, self.BGfile, name=name )
        else :
            D = pique.data.PiqueData( self.IPfile, self.BGfile, self.mapfile, name=name )
        
        pique.msg( logfile, '  found contigs :' )
        for contig in D.data.keys() :
            pique.msg( logfile, '    ' + contig )
            pique.msg( logfile, '      length : ' + str(D.data[contig]['length']) )
            for r in D.data[contig]['regions'] :
                start = str( r['start'] )
                stop  = str( r['stop']  )
                pique.msg( logfile, '      analysis region : ' + start + ':' + stop )
            for m in D.data[contig]['masks'] :
                start = str( m['start'] )
                stop  = str( m['stop']  )
                pique.msg( logfile, '      masking region  : ' + start + ':' + stop )
        
        # start analysis workbench
        pique.msg( logfile, 'creating analysis workbench...' )
        self.master.title( 'Pique : creating workbench...' )
        PA = pique.analysis.PiqueAnalysis( D )
        
        # run filters
        pique.msg( logfile, 'running filters...' )
        self.master.title( 'Pique : running filters...' )
        
        for ar_name in PA.data.keys() :
            pique.msg( logfile, '  :: applying filters to analysis region ' + ar_name )
            PA.apply_filter( ar_name, alpha, l_thresh )
            
        # find peaks
        pique.msg( logfile, 'finding peaks...' )
        self.master.title( 'Pique : finding peaks...' )
        for ar_name in PA.data.keys() :
            PA.find_peaks(ar_name)
            pique.msg( logfile, '  peaks ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks'])) )
            pique.msg( logfile, '     noise threshold  : ' + str(PA.data[ar_name]['N_thresh']) )
            pique.msg( logfile, '     filter threshold : ' + str(PA.data[ar_name]['n_thresh']) )
            pique.msg( logfile, '     normalizations   : ' + ', '.join( map(str, PA.data[ar_name]['norms']) ) )

        # write output files
        pique.msg( logfile, 'writing output files...' )
        self.master.title( 'Pique : writing output...' )
        pique.fileIO.writepeaksGFF(  name + '.gff',      PA.data )
        pique.fileIO.writebookmarks( name + '.bookmark', PA.data, name=name )
        pique.fileIO.writeQP(        name + '.qp',       PA.data )
        pique.fileIO.writepeakTSV(   name + '.peak.tsv', PA.data )
        pique.fileIO.writetrack(     name + '.IP.track', D.data  )
        pique.fileIO.writetrack(     name + '.BG.track', D.data, track='BG' )
        
        # done!
        pique.msg( logfile, 'run completed.' )
        self.master.title( 'Pique : run completed.' )
Esempio n. 13
0
    def run( self ) :
        
        # check inputs...
        name     = self.nametext.get().strip()
        pique.msg( 'starting run for project : ' + name )
        
        alpha    = int( self.alphatext.get().strip() )
        l_thresh = int( self.lthreshtext.get().strip() )
        
        # load the data
        pique.msg( 'loading data...' )
        self.master.title( 'Pique : loading data...' )
        if not self.mapfile :
            D = pique.data.PiqueData( self.IPfile, self.BGfile )
        else :
            D = pique.data.PiqueData( self.IPfile, self.BGfile, self.mapfile )
        
        pique.msg( '  -> found contigs :' )
        for contig in D.data.keys() :
            pique.msg( '    ' + contig )
            pique.msg( '      length : ' + str(D.data[contig]['length']) )
            for r in D.data[contig]['regions'] :
                start = str( r['start'] )
                stop  = str( r['stop']  )
                pique.msg( '      analysis region : ' + start + ':' + stop )
            for m in D.data[contig]['masks'] :
                start = str( m['start'] )
                stop  = str( m['stop']  )
                pique.msg( '      masking region  : ' + start + ':' + stop )

        # start analysis workbench
        pique.msg( 'creating analysis workbench...' )
        self.master.title( 'Pique : creating workbench...' )
        PA = pique.analysis.PiqueAnalysis( D )
        
        # run filters
        pique.msg( 'running filters...' )
        self.master.title( 'Pique : running filters...' )
        pique.msg( '  -> alpha    : ' + str(alpha) )        
        pique.msg( '  -> l_thresh : ' + str(l_thresh) )
        PA.filter_all( alpha, l_thresh )
        
        # find peaks
        pique.msg( 'finding peaks...' )
        self.master.title( 'Pique : finding peaks...' )
        for ar_name in PA.data.keys() :
            PA.find_peaks(ar_name)
            pique.msg( '  :: ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks'])) )
        
        # write output files
        pique.msg( 'writing output files...' )
        self.master.title( 'Pique : writing output...' )
        pique.fileIO.writepeaksGFF( name + '.gff', PA.data )
        pique.fileIO.writebookmarks( name + '.bookmark', PA.data )
        
        # done!
        pique.msg( 'run completed.' )
        self.master.title( 'Pique : run completed.' )
Esempio n. 14
0
                'forward_bgnd_track',       \
                'reverse_ChIP_track',       \
                'reverse_bgnd_track',       \
                'new_forward_bgnd_track',   \
                'new_reverse_bgnd_track',   \
                'non_peak_bookmarks',       ]

opt_dict = yaml.load(open(sys.argv[1]).read())

for opt in str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

non_peaks = pique.readbookmarks(non_peak_bookmarks)

# calculate enrichment ratios
pique.msg( 'calculating enrichment ratios using ' + \
            str(len(non_peaks)) + 'regions...' )

d_f, d_r, b_f, b_r = [], [], [], []
for n, region in enumerate(non_peaks):
    d_f.append(sum(data_ff[region['start']:region['stop']]))
    d_r.append(sum(data_rr[region['start']:region['stop']]))
Esempio n. 15
0
                'masking_loci',         \
                'peak_bookmarks',       \
                'weed_bookmarks',       \
                'overlap_track',        \
                'binding_track' ]

opt_dict = yaml.load(open(sys.argv[1]).read())

for opt in num_opts + str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

# read the track data
pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

# apply mask
pique.msg('applying mask...')
is_elements = []
for line in open(masking_loci):
    if line.__contains__('#'):
        continue
    start, stop = map(int, line.split()[:2])
    is_elements.append({'start': start, 'stop': stop})

data_ff = pique.mask(data_ff, is_elements)
Esempio n. 16
0
                'forward_bgnd_track',   \
                'reverse_ChIP_track',   \
                'reverse_bgnd_track',   \
                'masking_loci',         \
                'annotated_bookmarks'   ]

opt_dict = yaml.load(open(sys.argv[1]).read())

for opt in str_opts:
    if not opt_dict.has_key(opt):
        print 'config file missing option : ' + opt
        quit()
    setattr(sys.modules[__name__], opt, opt_dict[opt])

# read track data
pique.msg('reading track data...')
data_ff = pique.readtrack(forward_ChIP_track)
data_rr = pique.readtrack(reverse_ChIP_track)
b_ff = pique.readtrack(forward_bgnd_track)
b_rr = pique.readtrack(reverse_bgnd_track)

# read bookmarks file
peaks = pique.readbookmarks(peak_bookmarks)

# calculate enrichment ratios
for n, peak in enumerate(peaks):
    a = sum(data_ff[peak['start']:peak['stop']])
    a = a + sum(data_rr[peak['start']:peak['stop']])
    b = sum(b_ff[peak['start']:peak['stop']])
    b = b + sum(b_rr[peak['start']:peak['stop']])
    peaks[n]['annotations']['enrichment_ratio'] = float(a) / float(b)
Esempio n. 17
0
opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in num_opts + str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

for opt in opt_opts :
    if opt_dict.has_key( opt ) :
        setattr( sys.modules[__name__], opt, opt_dict[opt] )
    else :
        setattr( sys.modules[__name__], opt, None )

pique.msg( 'reading track data...' )
data_ff = pique.readtrack( forward_ChIP_track )
data_rr = pique.readtrack( reverse_ChIP_track )
b_ff    = pique.readtrack( forward_bgnd_track )
b_rr    = pique.readtrack( reverse_bgnd_track )

pique.msg( 'applying mask...' )
is_elements = []
for line in open( masking_loci ) :
    if line.__contains__('#') :
        continue
    start, stop = map( int, line.split()[:2] )
    is_elements.append( { 'start':start, 'stop':stop } )

data_ff = pique.mask( data_ff, is_elements )
data_rr = pique.mask( data_rr, is_elements )
Esempio n. 18
0
def detect( name,
            ipfile, 
            bgfile,
            mapfile,
            alpha,
            l_thresh,
            pickle_file,
            wav_file ) :
    """
    This function drives the peak detection workflow.
    """
    # set logfile
    logfile = name + '.log'
    
    pique.msg( logfile, 'starting run for project : ' + name )
    
    # log inputs
    pique.msg( logfile, '  -> IP file  : ' + ipfile   )
    pique.msg( logfile, '  -> BG file  : ' + bgfile   )
    pique.msg( logfile, '  -> map file : ' + mapfile  )
    pique.msg( logfile, '  -> alpha    : ' + str(alpha)    )
    pique.msg( logfile, '  -> l_thresh : ' + str(l_thresh) )
    
    # load the data
    pique.msg( logfile, 'loading data...' )
    D = pique.data.PiqueData( ipfile, bgfile, mapfile, name=name )
    
    pique.msg( logfile, '  found contigs :' )
    for contig in D.data.keys() :
        pique.msg( logfile, '    ' + contig )
        pique.msg( logfile, '      length : ' + str(D.data[contig]['length']) )
        for r in D.data[contig]['regions'] :
            start = str( r['start'] )
            stop  = str( r['stop']  )
            pique.msg( logfile, '      analysis region : ' + start + ':' + stop )
        for m in D.data[contig]['masks'] :
            start = str( m['start'] )
            stop  = str( m['stop']  )
            pique.msg( logfile, '      masking region  : ' + start + ':' + stop )
    
    # start analysis workbench
    pique.msg( logfile, 'creating analysis workbench...' )
    PA = pique.analysis.PiqueAnalysis( D )
    
    # run filters
    pique.msg( logfile, 'running filters...' )
    
    for ar_name in PA.data.keys() :
        pique.msg( logfile, '  :: applying filters to analysis region ' + ar_name )
        PA.apply_filter( ar_name, alpha, l_thresh )
    
    # find peaks
    pique.msg( logfile, 'finding peaks...' )
    for ar_name in PA.data.keys() :
        PA.find_peaks(ar_name)
        pique.msg( logfile, '  peaks ' + ar_name + ' : ' + str(len(PA.data[ar_name]['peaks'])) )
        pique.msg( logfile, '     noise threshold  : ' + str(PA.data[ar_name]['N_thresh']) )
        pique.msg( logfile, '     filter threshold : ' + str(PA.data[ar_name]['n_thresh']) )
        pique.msg( logfile, '     normalizations   : ' + ', '.join( map(str, PA.data[ar_name]['norms']) ) )
    
    # if a pickle file was requested, write it
    if pickle_file :
        pique.msg( logfile, 'pickling analysis workbench...' )
        cPickle.dump( PA, open( name + '.pickle', 'w' ) )
    
    # if a WAV file was requested, write it
    if wav_file :
        for contig in D.data.keys() :
            file = name + '_' + contig + '.wav'
            pique.msg( logfile, 'writing WAV output : ' + file )
            pique.fileIO.writeWAV(  file,
                                    D.data,
                                    contig,
                                    track='IP',
                                    minusBG=True,
                                    amplify=True )
    
    
    # write output files
    pique.msg( logfile, 'writing output files...' )
    pique.fileIO.writepeaksGFF(  name + '.gff',      PA.data )
    pique.fileIO.writebookmarks( name + '.bookmark', PA.data, name=name )
    pique.fileIO.writeQP(        name + '.qp',       PA.data )
    pique.fileIO.writepeakTSV(   name + '.peak.tsv', PA.data )
    pique.fileIO.writetrack(     name + '.IP.track', D.data  )
    pique.fileIO.writetrack(     name + '.BG.track', D.data, track='BG' )

    # done!
    pique.msg( logfile, 'run completed.' )
Esempio n. 19
0
#!/usr/bin/env python
"""
A very, very basic genome browser.

usage : ./basic_browser.py forward.track    \
                           reverse.track    \
                           peak.bookmark    \
                           genes.bed
"""
import numpy
import pique
import pylab
import sys

# read track data
pique.msg('reading track data...')
data_ff = pique.readtrack(sys.argv[1])
data_rr = pique.readtrack(sys.argv[2])

# read peak data
pique.msg('reading peak bookmarks...')
peaks = pique.readbookmarks(sys.argv[3])

# draw peaks
pique.msg('drawing peak bookmarks...')
for peak in peaks:
    axvspan(peak['start'], peak['stop'], color='green', alpha=0.3)

# read BED formatted gene annotations
pique.msg('reading gene annotations...')
genes = {}
Esempio n. 20
0
#!/usr/bin/env python
"""
A very, very basic genome browser.

usage : ./basic_browser.py forward.track    \
                           reverse.track    \
                           peak.bookmark    \
                           genes.bed
"""
import numpy
import pique
import pylab
import sys

# read track data
pique.msg( 'reading track data...' )
data_ff = pique.readtrack( sys.argv[1] )
data_rr = pique.readtrack( sys.argv[2] )

# read peak data
pique.msg( 'reading peak bookmarks...' )
peaks = pique.readbookmarks( sys.argv[3] )

# draw peaks 
pique.msg( 'drawing peak bookmarks...' )
for peak in peaks :
    axvspan( peak['start'], peak['stop'], color='green', alpha=0.3 )

# read BED formatted gene annotations
pique.msg( 'reading gene annotations...' )
genes = {}
Esempio n. 21
0
str_opts = [    'track_name',           \
                'annotated_bookmarks',  \
                'gene_annotations',     \
                'new_bookmarks',        ]

opt_dict = yaml.load( open( sys.argv[1] ).read() )

for opt in str_opts :
    if not opt_dict.has_key( opt ) :
        print 'config file missing option : ' + opt
        quit()
    setattr( sys.modules[__name__], opt, opt_dict[opt] )

# read bookmarks file
pique.msg( 'reading annotations...' )
peaks = pique.readbookmarks( annotated_bookmarks )

# read gene annotations
genes = {}
for line in open( gene_annotations ) :
    if line.__contains__( '\"' ) :
        continue
    if not line.split()[0].lower() == track_name :
        continue
    start,stop = map( int, line.split()[1:3] )
    strand = line.strip().split()[5]
    name = line.split()[3]
    genes[name] = { 'start':start,'stop':stop,'strand':strand }
    print name
print len(genes.keys())