# null peaks fig = plt.figure(figsize=(4,3)) plotBoxplot(readCountNorm[noExprChange][:, parameters.reorder_noreplicates], parameters.headers_noreplicates[parameters.reorder_noreplicates]) ax = plt.gca() ax.set_yscale('log') plt.tight_layout() plt.savefig('%s.boxplot.noExprChange.pdf'%outputfile) # all peaks fig = plt.figure() plotBoxplot(readCountNorm[peakbed.not_significant][:, parameters.reorder_noreplicates], parameters.headers_noreplicates[parameters.reorder_noreplicates]) # peaks that go up peakbedFile = 'scoring/140815_peaks.coverageCorr.all.bed' peakbed = filefun.loadBedwScores(peakbedFile) fig = plt.figure(figsize=(4,3)) plotBoxplot(readCountNorm[peakbed.significant_up][:, parameters.reorder_noreplicates], parameters.headers_noreplicates[parameters.reorder_noreplicates]) ax = plt.gca() ax.set_yscale('log') plt.tight_layout() plt.savefig('%s.boxplot.upwNfib.pdf'%outputfile) # plot scatterplots for all samples versus mean of low readCountNorm = readCountRed/np.mean(readCountRed[noExprChange], 0)*10 xvalues = np.mean(readCountNorm[:, parameters.indices_low_noreplicates], axis=1) for i, label in enumerate(parameters.headers_noreplicates): yvalues = readCountNorm[:, i] indx = yvalues > 0 #fig = plt.figure(figsize=(4, 4)) #plotfun.plot_hexbin(xvalues[indx], yvalues[indx])
import scipy.stats as st import scipy.spatial.distance import matplotlib.pyplot as plt import subprocess from heatmapfun import plotHeatMap import filefun import globalvars parameters = globalvars.Parameters() wd = '/home/sarah/GreenDragonRaid1/lab/sarah/SCLC_ATAC/140908_bams/mm9/peakProcessing' # load peak beds with scores bedFileName = os.path.join(wd, 'scoring/140815_peaks.coverageCorr.all.bed') peakBed = filefun.loadBedwScores(bedFileName) # load nfib peak summits chipBedFileName = '/home/sarah/GreenDragonRaid1/shr/Downloaded_data/mm9/nfib_chip/nfib_peak_summits.bed' chipBed = filefun.loadBed(chipBedFileName) # load tss tssBedFileName ='/raid/Downloaded_data/mm9_data/TSS/refSeqmm9.TSS.fix.bed' # find the number of chip peaks that fall into accessible peaks peakBed.hasChipPeak = np.array(subprocess.check_output("bedtools intersect -c -b %s -a %s | awk '{print $NF}'"%(chipBedFileName, bedFileName), shell=True).split(), dtype=int).astype(bool) # get peak Distance to Tss peakBed.distancetoTss = np.array(subprocess.check_output("bedtools closest -d -t first -a %s -b %s | awk '{print $NF}'"%(bedFileName, tssBedFileName), shell=True).split(), dtype=int) cutoff_distance = 5E3 peakBed.distal = peakBed.distancetoTss > cutoff_distance
np.save(outfile+'.npy', signals) """ for signal tracks, make plot """ span = 1E4 indx = np.arange(0, options.l+options.r, span, dtype=int) xvalues = np.arange(-options.l, options.r, span) fig = plt.figure(figsize=(5, 4)) ax = fig.add_subplot(111) for signal in signals[0]: ax.plot(xvalues, signal[indx], 'b', alpha=0.1) ax.plot(xvalues, np.nanmean(signals[0, :, indx], 1), 'k') plt.savefig('%s.%s.pdf'%(outfile, options.interval)) """ Now, for conservation, etc, plot the conservation in distal sites. Dista, """ locBed = filefun.loadBedwScores(options.a) locBed.distanceToTss = np.array(subprocess.check_output("bedtools closest -d -t first -a %s -b %s | awk '{print $NF}'"%(bedFileName, tssBedFileName), shell=True).split(), dtype=int) locBed.distal = locBed.distanceToTss > 5E3 signals[0, np.all((locBed.distal, locBed.significant_up), axis=0)] signals[0, locBed.distal] signals[0, np.logical_not(locBed.distal)] histogram.compare([signals[0, np.all((locBed.distal, locBed.significant_up), axis=0)], signals[0, np.all((locBed.distal, locBed.no_change), axis=0)], signals[0, np.logical_not(locBed.distal)]], labels=['up, distal', 'no change, distal', 'all promoter']) plotfun.plot_barplot()