Example #1
0
    bwdir=options.b
    bws = np.array(subprocess.check_output('find %s -name "%s" | sort'%(bwdir, '*.bw'), shell=True).split()) 
    if len(bws) == 0:
        bws = np.array(subprocess.check_output('find %s -name "%s" | sort'%(bwdir, '*.bigWig'), shell=True).split())
    numSamples = len(bws)
        
    # load output file    
    if not options.o:
        outfile = 'bwvals'
    else: outfile = options.o
    print "saving to: %s"%outfile
        
    # load genome size
    if options.g is None:
        options.g = '/raid/gSizes/mm9.genomsize'
    genomeSize = filefun.getGenomeSize(options.g)
        
    # find signal around each motif sites
    signals = np.zeros((numSamples, numSites))
    for i, bw in enumerate(bws):
        print "Extracting signal for %s"%(os.path.basename(bw))
        pool = Pool(processes=options.p)
        sigArray = pool.map(functools.partial(findInsertions, bw, bedFile), range(0, numSites))
        pool.close()
        signals[i] = np.array(sigArray)

    np.save(outfile+'.npy', signals)
    
    labels = np.array([os.path.splitext(os.path.basename(bw))[0].replace('wgEncodeFsuRepliChip', '').replace('WaveSignal', '_') for bw in bws])
    indx = np.array([label.find('Diff') == -1 for label in labels])
    
    

#### SCRIPT #####
print '%s\n%s\n%s'%(options.b, options.g, options.o)
bedFileName = options.b
countFile = options.c
genomeSizeFile = options.g

print 'loading files...'
#genomeSizeFile = '/raid/gSizes/mm9.genomsize'
#bedFileName = '../scoring/140815_peaks.coverageCorr.all.bed'
#countFile = '../140815_peaks.coverageCorr.normalize.norepicates.peakCount'

tssBedFileName ='/raid/Downloaded_data/mm9_data/TSS/refSeqmm9.TSS.fix.bed'

genomeSize = filefun.getGenomeSize(genomeSizeFile)
locBed = filefun.loadBed(bedFileName)
values = filefun.Peaks(np.loadtxt(countFile))
numPeaks = values.numPeaks

print 'initializing...'
chrms = np.array(['chr%d'%i for i in np.linspace(1, 19, 19)])
#chrms = np.array([chrms[0]])

# go through each chromosome and find signal density
windowSize = 1E6
stepSize = 1E5
signalDensityDict = {}
signalDensityRandomDict = {}
windowedLocsDict = {}
qvaluesDict = {}