# null peaks
fig = plt.figure(figsize=(4,3))
plotBoxplot(readCountNorm[noExprChange][:, parameters.reorder_noreplicates], parameters.headers_noreplicates[parameters.reorder_noreplicates])
ax = plt.gca()
ax.set_yscale('log')
plt.tight_layout()
plt.savefig('%s.boxplot.noExprChange.pdf'%outputfile)

# all peaks
fig = plt.figure()
plotBoxplot(readCountNorm[peakbed.not_significant][:, parameters.reorder_noreplicates], parameters.headers_noreplicates[parameters.reorder_noreplicates])

# peaks that go up
peakbedFile = 'scoring/140815_peaks.coverageCorr.all.bed'
peakbed = filefun.loadBedwScores(peakbedFile)
fig = plt.figure(figsize=(4,3))
plotBoxplot(readCountNorm[peakbed.significant_up][:, parameters.reorder_noreplicates], parameters.headers_noreplicates[parameters.reorder_noreplicates])
ax = plt.gca()
ax.set_yscale('log')
plt.tight_layout()
plt.savefig('%s.boxplot.upwNfib.pdf'%outputfile)

# plot scatterplots for all samples versus mean of low
readCountNorm = readCountRed/np.mean(readCountRed[noExprChange], 0)*10
xvalues = np.mean(readCountNorm[:, parameters.indices_low_noreplicates], axis=1)
for i, label in enumerate(parameters.headers_noreplicates):
    yvalues = readCountNorm[:, i]
    indx = yvalues > 0
    #fig = plt.figure(figsize=(4, 4))
    #plotfun.plot_hexbin(xvalues[indx], yvalues[indx])
import scipy.stats as st
import scipy.spatial.distance
import matplotlib.pyplot as plt
import subprocess
from heatmapfun import plotHeatMap
import filefun
import globalvars
parameters = globalvars.Parameters()



wd = '/home/sarah/GreenDragonRaid1/lab/sarah/SCLC_ATAC/140908_bams/mm9/peakProcessing'

# load peak beds with scores
bedFileName =  os.path.join(wd, 'scoring/140815_peaks.coverageCorr.all.bed')
peakBed = filefun.loadBedwScores(bedFileName)

# load nfib peak summits
chipBedFileName = '/home/sarah/GreenDragonRaid1/shr/Downloaded_data/mm9/nfib_chip/nfib_peak_summits.bed'
chipBed = filefun.loadBed(chipBedFileName)

# load tss
tssBedFileName ='/raid/Downloaded_data/mm9_data/TSS/refSeqmm9.TSS.fix.bed'

# find the number of chip peaks that fall into accessible peaks
peakBed.hasChipPeak = np.array(subprocess.check_output("bedtools intersect -c -b %s -a %s | awk '{print $NF}'"%(chipBedFileName, bedFileName), shell=True).split(), dtype=int).astype(bool)

# get peak Distance to Tss
peakBed.distancetoTss = np.array(subprocess.check_output("bedtools closest -d -t first -a %s -b %s | awk '{print $NF}'"%(bedFileName, tssBedFileName), shell=True).split(), dtype=int)
cutoff_distance = 5E3
peakBed.distal = peakBed.distancetoTss > cutoff_distance
    np.save(outfile+'.npy', signals)
    
    """
    for signal tracks, make plot
    """
    span = 1E4
    indx = np.arange(0, options.l+options.r, span, dtype=int)
    xvalues = np.arange(-options.l, options.r, span)
    fig = plt.figure(figsize=(5, 4))
    ax = fig.add_subplot(111)
    for signal in signals[0]:
        ax.plot(xvalues, signal[indx], 'b', alpha=0.1)
    ax.plot(xvalues, np.nanmean(signals[0, :, indx], 1), 'k')
    plt.savefig('%s.%s.pdf'%(outfile, options.interval))

    """
    Now, for conservation, etc, plot the conservation in distal sites. Dista,
    """

    locBed = filefun.loadBedwScores(options.a)
    locBed.distanceToTss = np.array(subprocess.check_output("bedtools closest -d -t first -a %s -b %s | awk '{print $NF}'"%(bedFileName, tssBedFileName), shell=True).split(), dtype=int)
    locBed.distal = locBed.distanceToTss > 5E3
    
    signals[0, np.all((locBed.distal, locBed.significant_up), axis=0)]
    signals[0, locBed.distal]
    signals[0, np.logical_not(locBed.distal)]
    histogram.compare([signals[0, np.all((locBed.distal, locBed.significant_up), axis=0)],
                       signals[0, np.all((locBed.distal, locBed.no_change), axis=0)],
                       signals[0, np.logical_not(locBed.distal)]], labels=['up, distal', 'no change, distal', 'all promoter'])
    plotfun.plot_barplot()