bwdir=options.b bws = np.array(subprocess.check_output('find %s -name "%s" | sort'%(bwdir, '*.bw'), shell=True).split()) if len(bws) == 0: bws = np.array(subprocess.check_output('find %s -name "%s" | sort'%(bwdir, '*.bigWig'), shell=True).split()) numSamples = len(bws) # load output file if not options.o: outfile = 'bwvals' else: outfile = options.o print "saving to: %s"%outfile # load genome size if options.g is None: options.g = '/raid/gSizes/mm9.genomsize' genomeSize = filefun.getGenomeSize(options.g) # find signal around each motif sites signals = np.zeros((numSamples, numSites)) for i, bw in enumerate(bws): print "Extracting signal for %s"%(os.path.basename(bw)) pool = Pool(processes=options.p) sigArray = pool.map(functools.partial(findInsertions, bw, bedFile), range(0, numSites)) pool.close() signals[i] = np.array(sigArray) np.save(outfile+'.npy', signals) labels = np.array([os.path.splitext(os.path.basename(bw))[0].replace('wgEncodeFsuRepliChip', '').replace('WaveSignal', '_') for bw in bws]) indx = np.array([label.find('Diff') == -1 for label in labels])
#### SCRIPT ##### print '%s\n%s\n%s'%(options.b, options.g, options.o) bedFileName = options.b countFile = options.c genomeSizeFile = options.g print 'loading files...' #genomeSizeFile = '/raid/gSizes/mm9.genomsize' #bedFileName = '../scoring/140815_peaks.coverageCorr.all.bed' #countFile = '../140815_peaks.coverageCorr.normalize.norepicates.peakCount' tssBedFileName ='/raid/Downloaded_data/mm9_data/TSS/refSeqmm9.TSS.fix.bed' genomeSize = filefun.getGenomeSize(genomeSizeFile) locBed = filefun.loadBed(bedFileName) values = filefun.Peaks(np.loadtxt(countFile)) numPeaks = values.numPeaks print 'initializing...' chrms = np.array(['chr%d'%i for i in np.linspace(1, 19, 19)]) #chrms = np.array([chrms[0]]) # go through each chromosome and find signal density windowSize = 1E6 stepSize = 1E5 signalDensityDict = {} signalDensityRandomDict = {} windowedLocsDict = {} qvaluesDict = {}