from heatmapfun import plotHeatMap
import filefun
import globalvars
parameters = globalvars.Parameters()

wd = '/home/sarah/GreenDragonRaid1/lab/sarah/SCLC_ATAC/140908_bams/mm9/peakProcessing'

# load peak beds with scores
bedFileName =  os.path.join(wd, 'scoring/140815_peaks.coverageCorr.all.bed')
peakBed = filefun.loadBedwScores(bedFileName)

# load nfib peak summits
chipBedFileName = '/home/sarah/GreenDragonRaid1/shr/Downloaded_data/mm9/nfib_chip/nfib_peak_summits.bed'
chipBed = filefun.loadBed(chipBedFileName)

# load tss
tssBedFileName ='/raid/Downloaded_data/mm9_data/TSS/refSeqmm9.TSS.fix.bed'

# find the number of chip peaks that fall into accessible peaks
peakBed.hasChipPeak = np.array(subprocess.check_output("bedtools intersect -c -b %s -a %s | awk '{print $NF}'"%(chipBedFileName, bedFileName), shell=True).split(), dtype=int).astype(bool)

# get peak Distance to Tss
peakBed.distancetoTss = np.array(subprocess.check_output("bedtools closest -d -t first -a %s -b %s | awk '{print $NF}'"%(bedFileName, tssBedFileName), shell=True).split(), dtype=int)
cutoff_distance = 5E3
peakBed.distal = peakBed.distancetoTss > cutoff_distance

# get Intergenic indicator from homer annotate peaks
peakIndxName = os.path.join(wd, 'scoring/140815_peaks.coverageCorr.all.ann.noheader.intergenic.peakIndx')
peakIndx = np.loadtxt(peakIndxName, dtype=bool)
    ax.set_xlabel('distance across chromosome %s (Mb)'%chrm)


#### SCRIPT #####
print '%s\n%s\n%s'%(options.b, options.g, options.o)
# lets look at this subset of chromosomes
chrms = np.array(['chr%d'%i for i in np.linspace(1, 19, 19)])

# import bedfile
stepSize =   5E5      # distance between points
windowSize = 1E6    # distance over which to average
genomeSize = filefun.getGenomeSize(options.g)

locBed = filefun.loadBed(options.b)
values = filefun.Peaks(np.loadtxt(options.c))
smoothedValues, windowedLocs = smooth_windowed_genome.main(locBed, values,genomeSize, windowSize, stepSize )

# normalize coverage by total number of reads on chromosome 1. Should you do that?
# Can't normalize with chromosome 4 because variable amplifications lead to craziness
smoothedValuesNorm = {}
for chrm in chrms:
    #smoothedValuesNorm[chrm] = smoothedValues[chrm]/np.mean(smoothedValues['chr1'], 0) * np.mean(smoothedValues['chr1'])
    # NOTE: not actually normalizing at all
    smoothedValuesNorm[chrm] = smoothedValues[chrm]
# what should max plotted value be?
allValues = smoothedValuesNorm[chrms[0]]
for chrm in chrms[1:]:
# given enrichment scores of how spatiall distirbuted the peaks
# are, what are the intervals of 'boundaries' i.e. places where enrichment crosses zero?

##### IMPORT MODULES #####
# import necessary for python
import os
import sys
import numpy as np
import subprocess
import matplotlib.pyplot as plt
import filefun
import histogram

enrichment = np.loadtxt('counts_in_peaks.distal.hypervsnot.enrichment_values.iterations_500.mat')
locBed = filefun.loadBed('counts_in_peaks.distal.windowed_locs.bed')

define boundary regions as those close to zero, where regions to either side change signs

range of 'zero' points is within 0.1 std deviations of overall distribution from zero
stdev = np.nanstd(enrichment)
mx = 0.1

# possible boundaries are those that are within 0.1 standard deviations from 0
possible_boundaries = np.all((enrichment < mx*stdev, enrichment > -mx*stdev), axis=0)

# cycle through and ask if the region before that boundary region
actual_boundaries = np.zeros(possible_boundaries.shape, dtype=bool)
for indx in np.ravel(np.where(possible_boundaries)):
    y[np.argsort(y)] = meannorm.astype(int)
    return x, y

wd = "/home/sarah/GreenDragonRaid1/lab/sarah/SCLC_ATAC/140908_bams/mm9/peakProcessing"
readcountNormFile = os.path.join(wd, "140815_peaks.coverageCorr.normalize.norepicates.peakCount")
bedFileName = os.path.join(wd, "scoring/140815_peaks.coverageCorr.all.bed")
tssBedFileName = "/raid/Downloaded_data/mm9_data/TSS/refSeqmm9.TSS.fix.bed"
noExprChangeFile = os.path.join(wd, "expression/diff_TvsM.filtered.noChange.tss.noExprChange")
enrichedBedFile = os.path.join(wd, "spatialCorrelation/significant_up_peaks.enriched.merged.bed")
depletedBedFile = os.path.join(wd, "spatialCorrelation/significant_up_peaks.depleted.merged.bed")

# load files
readCountNorm = np.loadtxt(readcountNormFile)
peakBed = filefun.loadBedwScores(bedFileName)
tssBed = filefun.loadBed(tssBedFileName)
enrichedBed = filefun.loadBed(enrichedBedFile)
depletedBed = filefun.loadBed(depletedBedFile)
noExprChange = np.loadtxt(noExprChangeFile, dtype=bool)

# get non promoter proximal peaks
peakBed.distancetoTss = np.array(
        "bedtools closest -d -t first -a %s -b %s | cut -f14" % (bedFileName, tssBedFileName), shell=True
cutoff_distance = 5e3
peakBed.distal = peakBed.distancetoTss > cutoff_distance

# get region of peaks
for chrm in chrms:
    foldChange = signalDensityDict[chrm]
    foldChangeRandom = np.mean(signalDensityRandomDict[chrm], 0)
    enrichment = np.log2(foldChange/foldChangeRandom)   

    if chrm=='chr8':
        heatmapfun.plotCoverageHeatMap(enrichment[:, reorder], cluster=False, rowlabels = parameters.headers_noreplicates[reorder])
# 11/7/14
import filefun
import subprocess
motifBed = '../NF1_CTF.bed'
nucBedFile = 'hyperaccessible.chr1.nucpos.bed'
nucBed = filefun.loadBed(nucBedFile)
nucBed.zscore = np.loadtxt(nucBedFile, usecols=(3,))
nucBed.is_nfi_full = np.array(subprocess.check_output("bedtools intersect -c -b %s -a %s | awk '{print $NF}'"%(motifBed, nucBedFile), shell=True).split()).astype(bool)
nucBed.is_ctcf = np.array(subprocess.check_output("bedtools intersect -c -b %s -a %s | awk '{print $NF}'"%('../../CTCF_Zf/CTCF_Zf.noheader.bed', nucBedFile), shell=True).split()).astype(bool)
xbins = np.arange(-0.5, 21, 1)
histogram.compare([nucBed.zscore, nucBed.zscore[nucBed.is_nfi_full], nucBed.zscore[nucBed.is_ctcf]], labels=['all', 'NFI full', 'CTCF'], xbins=xbins)
ax = plt.gca()

footprints = np.load('hyperaccessible.insertions.npy')[0]
fig = plt.figure()
ax = fig.add_subplot(111)
xvalues = np.arange(-100, 101)
ax.plot(xvalues, np.mean(footprints[nucBed.is_ctcf], axis=0), label='CTCF sites')
#### SCRIPT #####
print '%s\n%s\n%s'%(options.b, options.g, options.o)
bedFileName = options.b
countFile = options.c
genomeSizeFile = options.g

print 'loading files...'
#genomeSizeFile = '/raid/gSizes/mm9.genomsize'
#bedFileName = '../scoring/140815_peaks.coverageCorr.all.bed'
#countFile = '../140815_peaks.coverageCorr.normalize.norepicates.peakCount'

tssBedFileName ='/raid/Downloaded_data/mm9_data/TSS/refSeqmm9.TSS.fix.bed'

genomeSize = filefun.getGenomeSize(genomeSizeFile)
locBed = filefun.loadBed(bedFileName)
values = filefun.Peaks(np.loadtxt(countFile))
numPeaks = values.numPeaks

print 'initializing...'
chrms = np.array(['chr%d'%i for i in np.linspace(1, 19, 19)])
#chrms = np.array([chrms[0]])

# go through each chromosome and find signal density
windowSize = 1E6
stepSize = 1E5
signalDensityDict = {}
signalDensityRandomDict = {}
windowedLocsDict = {}
qvaluesDict = {}