def correctedScalingPlot(resolution, filename, experiment, genome, mouse=False, **kwargs): "Paper figure to compare scaling before/after correction" global pp if (options.verbose): print >> sys.stdout, "correctedScalingPlot: res: %d file1: %s exp1:%s gen:%s" % (resolution, filename, experiment, genome) plt.figure() Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename, experiment) Tanay.removePoorRegions() Tanay.removeDiagonal() Tanay.plotScaling(experiment, label="Raw data", color="#A7A241") Tanay.iterativeCorrectWithSS() Tanay.plotScaling(experiment, label="Corrected", color="#344370") ax = plt.gca() plotting.removeAxes() fs = 6 plt.xlabel("Genomic distance (MB)", fontsize=6) plt.ylabel("Contact probability", fontsize=6) for xlabel_i in ax.get_xticklabels(): xlabel_i.set_fontsize(fs) for xlabel_i in ax.get_yticklabels(): xlabel_i.set_fontsize(fs) legend = plt.legend(loc=0, prop={"size": 6}) legend.draw_frame(False) plt.xscale("log") plt.yscale("log") plt.show() pp.savefig()
def doArmPlot(resolution, filename, experiment, genome, mouse=False, **kwargs): "Plot an single interarm map - paper figure" global pp plt.figure() if (options.verbose): print >> sys.stdout, "doArmPlot: res: %d file: %s exp:%s gen:%s" % (resolution, filename, experiment, genome) Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename, experiment) if mouse == True: Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None), (4, 45000000, None, 12, 0, 30000000), (9, 0, 50000000, 12, 0, 35000000)]) Tanay.removeChromosome(19) else: Tanay.removeChromosome(22) Tanay.removeDiagonal(1) Tanay.removePoorRegions() Tanay.truncTrans() Tanay.fakeCis() #mat_img(Tanay.dataDict["GM-all"]) #plt.figure(figsize = (3.6,3.6)) Tanay.averageTransMap(experiment, **kwargs) #plotting.removeBorder() cb = plt.colorbar(orientation="vertical") #cb.set_ticks([-0.05,0.05,0.15]) for xlabel_i in cb.ax.get_xticklabels(): xlabel_i.set_fontsize(6)
def plotSixHeatmaps(): "Plots 6 heatmaps to a correlation supplementary figure" plt.figure(figsize=(3, 3)) for size in [10]: BD = binnedDataAnalysis(size * 1000000, "../../../data/hg18") BD.simpleLoad("../../../tcc/working/HindIII_%d.hm" % size, "HindIII") BD.simpleLoad("../../../tcc/working/NcoI_%d.hm" % size, "NcoI") BD.simpleLoad("../../../tcc/working/control_%d.hm" % size, "control") BD.removeDiagonal() BD.removePoorRegions(cutoff=5) BD.removeZeros() beg1 = BD.chromosomeStarts[0] beg2 = BD.chromosomeStarts[1] end1 = BD.chromosomeEnds[0] - 1 end2 = BD.chromosomeEnds[1] - 1 data1 = BD.dataDict["HindIII"][beg1:end1, beg2:end2] data2 = BD.dataDict["NcoI"][beg1:end1, beg2:end2] data3 = BD.dataDict["control"][beg1:end1, beg2:end2] def minmax(*args): mi = min([i.min() for i in args]) ma = max([i.max() for i in args]) return mi, ma vmin, vmax = minmax(data1, data2) plt.subplot(321) plt.imshow(data1, interpolation="nearest", vmin=vmin, vmax=vmax) plt.title("HindIII, raw") plt.colorbar() plt.subplot(322) plt.imshow(data2, interpolation="nearest", vmin=vmin, vmax=vmax) plt.colorbar() plt.title("NcoI, raw") BD.iterativeCorrectWithoutSS() data1 = BD.dataDict["HindIII"][beg1:end1, beg2:end2] data2 = BD.dataDict["NcoI"][beg1:end1, beg2:end2] data3 = BD.dataDict["control"][beg1:end1, beg2:end2] vmin, vmax = minmax(data1, data2, data3) plt.subplot(323) plt.imshow(data1, interpolation="nearest", vmin=vmin, vmax=vmax) plt.colorbar() plt.title("HindIII, IC",) plt.subplot(324) plt.imshow(data2, interpolation="nearest", vmin=vmin, vmax=vmax) plt.colorbar() plt.title("NcoI, IC") plt.subplot(325) plt.imshow(data1, interpolation="nearest", vmin=vmin, vmax=vmax) plt.colorbar() plt.title("HindIII, 50%") plt.subplot(326) plt.imshow(data3, interpolation="nearest", vmin=vmin, vmax=vmax) plt.colorbar() plt.title("HindIII, 50%") plt.show()
def doArmPlot(resolution, filename, experiment, genome, mouse=False, **kwargs): "Plot an single interarm map - paper figure" global pp plt.figure() if (options.verbose): print >> sys.stdout, "doArmPlot: res: %d file: %s exp:%s gen:%s" % ( resolution, filename, experiment, genome) Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename, experiment) if mouse == True: Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None), (4, 45000000, None, 12, 0, 30000000), (9, 0, 50000000, 12, 0, 35000000)]) Tanay.removeChromosome(19) else: Tanay.removeChromosome(22) Tanay.removeDiagonal(1) Tanay.removePoorRegions() Tanay.truncTrans() Tanay.fakeCis() #mat_img(Tanay.dataDict["GM-all"]) #plt.figure(figsize = (3.6,3.6)) Tanay.averageTransMap(experiment, **kwargs) #plotting.removeBorder() cb = plt.colorbar(orientation="vertical") #cb.set_ticks([-0.05,0.05,0.15]) for xlabel_i in cb.ax.get_xticklabels(): xlabel_i.set_fontsize(6)
def getInterValues(filenames, genome): allValues = [] for i in filenames: BD = binnedDataAnalysis(1000000, "/home/magus/HiC2011/data/{0}".format(genome), readChrms=["#", "X"]) BD.simpleLoad("{1}/{0}-1000k.hm".format(i, genome), "test") BD.truncTrans(high=0.0001) allValues.append(BD.interchromosomalValues("test")) return allValues
def compareCorrelationOfEigenvectors(): """Plot correlation figure with eigenvector correlation between datasets paper figure """ Tanay = binnedDataAnalysis(1000000, "../../../data/hg18") Tanay.simpleLoad("../../../ErezPaperData/hg18/GM-HindIII-hg18-1M.hm", "Erez") Tanay.simpleLoad("../../../ErezPaperData/hg18/GM-NcoI-hg18-1M.hm", "NcoI") Tanay.simpleLoad("../../../tcc/hg18/tcc-HindIII-hg18-1M.hm", "TCC") Tanay.removeDiagonal() Tanay.removePoorRegions() Tanay.removeZeros() Tanay.truncTrans() Tanay.fakeCis() M = 10 Tanay.doEig(numPCs=M) E1 = Tanay.EigDict["Erez"] E2 = Tanay.EigDict["NcoI"] E3 = Tanay.EigDict["TCC"] data = numpy.zeros((M, M)) data2 = numpy.zeros((M, M)) data3 = numpy.zeros((M, M)) for i in xrange(M): for j in xrange(M): data[i][j] = abs(numpy.corrcoef(E2[i], E1[j])[0, 1]) data2[i][j] = abs(numpy.corrcoef(E3[i], E1[j])[0, 1]) data3[i][j] = abs(numpy.corrcoef(E3[i], E2[j])[0, 1]) plt.figure(figsize=(7.5, 2.5)) plt.gcf().subplots_adjust(0.2, 0.2, 0.85, 0.85) plt.subplot(131) plt.xlabel("HiC 2009, HindIII") plt.ylabel("HiC 2009, NcoI") #plt.title("Abs. correlation between eigenvectors") plt.imshow(data, interpolation="nearest", vmin=0, vmax=1) plt.colorbar() plt.subplot(132) plt.xlabel("HiC 2009, HindIII") plt.ylabel("TCC 2011, HindIII") #plt.title("Abs. correlation between eigenvectors") plt.imshow(data2, interpolation="nearest", vmin=0, vmax=1) plt.colorbar() plt.subplot(133) plt.xlabel("HiC 2009, NcoI") plt.ylabel("TCC 2011, HindIII") #plt.title("Abs. correlation between eigenvectors") plt.imshow(data3, interpolation="nearest", vmin=0, vmax=1) plt.colorbar() plt.show() raise
def compareCorrelationOfEigenvectors( resolution, filename1, filename2, experiment1, experiment2, genome, mouse=False, **kwargs ): """Plot correlation figure with eigenvector correlation between datasets paper figure """ global pp if options.verbose: print >> sys.stdout, "compareCorrelationOfEigenvectors: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % ( resolution, filename1, filename2, experiment1, experiment2, genome, ) plt.figure() Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename1, experiment1) Tanay.simpleLoad(filename2, experiment2) Tanay.removeDiagonal() Tanay.removePoorRegions() Tanay.removeZeros() Tanay.truncTrans() Tanay.fakeCis() M = 10 Tanay.doEig(numPCs=M) E1 = Tanay.EigDict[experiment1] E2 = Tanay.EigDict[experiment2] data = numpy.zeros((M, M)) for i in xrange(M): for j in xrange(M): data[i][j] = abs(numpy.corrcoef(E2[i], E1[j])[0, 1]) plt.figure(figsize=(8, 8)) plt.gcf().subplots_adjust(0.2, 0.2, 0.85, 0.85) plt.subplot(111) plt.xlabel(experiment1) plt.ylabel(experiment2) # plt.title("Abs. correlation between eigenvectors") plt.imshow(data, interpolation="nearest", vmin=0, vmax=1) plt.colorbar() plt.show() pp.savefig()
def compareCorrelationOfEigenvectors(resolution, filename1, filename2, experiment1, experiment2, genome, mouse=False, **kwargs): """Plot correlation figure with eigenvector correlation between datasets paper figure """ global pp if (options.verbose): print >> sys.stdout, "compareCorrelationOfEigenvectors: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % ( resolution, filename1, filename2, experiment1, experiment2, genome) plt.figure() Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename1, experiment1) Tanay.simpleLoad(filename2, experiment2) Tanay.removeDiagonal() Tanay.removePoorRegions() Tanay.removeZeros() Tanay.truncTrans() Tanay.fakeCis() M = 10 Tanay.doEig(numPCs=M) E1 = Tanay.EigDict[experiment1] E2 = Tanay.EigDict[experiment2] data = numpy.zeros((M, M)) for i in xrange(M): for j in xrange(M): data[i][j] = abs(numpy.corrcoef(E2[i], E1[j])[0, 1]) plt.figure(figsize=(8, 8)) plt.gcf().subplots_adjust(0.2, 0.2, 0.85, 0.85) plt.subplot(111) plt.xlabel(experiment1) plt.ylabel(experiment2) #plt.title("Abs. correlation between eigenvectors") plt.imshow(data, interpolation="nearest", vmin=0, vmax=1) plt.colorbar() plt.show() pp.savefig()
def doReconstructedArmPlot(filename=GM1M, genome=myGenome, usePCs=[0, 1], mouse=False, **kwargs): "Plot an PC2-PC3 interarm map - supp paper figure" Tanay = binnedDataAnalysis(1000000, genome) Tanay.simpleLoad(filename, "GM-all") if mouse == True: Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None), (4, 45000000, None, 12, 0, 30000000), (9, 0, 50000000, 12, 0, 35000000)]) Tanay.removeChromosome(19) else: Tanay.removeChromosome(22) Tanay.removeDiagonal() Tanay.removePoorRegions() Tanay.removeZeros() Tanay.truncTrans() Tanay.fakeCis() Tanay.doEig(numPCs=max(usePCs) + 1) print Tanay.eigEigenvalueDict Tanay.restoreZeros(value=0) PCs = Tanay.EigDict["GM-all"][usePCs] eigenvalues = Tanay.eigEigenvalueDict["GM-all"][usePCs] proj = reduce(lambda x, y: x + y, [PCs[i][:, None] * PCs[i][None, :] * \ eigenvalues[i] for i in xrange(len(PCs))]) mask = PCs[0] != 0 mask = mask[:, None] * mask[None, :] data = Tanay.dataDict["GM-all"] datamean = numpy.mean(data[mask]) proj[mask] += datamean Tanay.dataDict["BLA"] = proj Tanay.averageTransMap("BLA", **kwargs) cb = plt.colorbar(orientation="vertical") for xlabel_i in cb.ax.get_xticklabels(): xlabel_i.set_fontsize(6)
def correctedScalingPlot(): "Paper figure to compare scaling before/after correction" plt.figure(figsize=(4, 4)) Tanay = binnedDataAnalysis(200000, genome=myGenome) Tanay.simpleLoad(GM200kBreaks, "GM-all") Tanay.removePoorRegions() Tanay.removeDiagonal() Tanay.plotScaling("GM-all", label="Raw data", color="#A7A241") Tanay.iterativeCorrectWithSS() Tanay.plotScaling("GM-all", label="Corrected", color="#344370") ax = plt.gca() mirnylib.plotting.removeAxes() fs = 6 plt.xlabel("Genomic distance (MB)", fontsize=6) plt.ylabel("Contact probability", fontsize=6) for xlabel_i in ax.get_xticklabels(): xlabel_i.set_fontsize(fs) for xlabel_i in ax.get_yticklabels(): xlabel_i.set_fontsize(fs) legend = plt.legend(loc=0, prop={"size": 6}) legend.draw_frame(False) plt.xscale("log") plt.yscale("log") plt.show()
def doArmPlot(filename=GM1M, genome=myGenome, mouse=False, **kwargs): "Plot an single interarm map - paper figure" Tanay = binnedDataAnalysis(1000000, genome) Tanay.simpleLoad(filename, "GM-all") if mouse == True: Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None), (4, 45000000, None, 12, 0, 30000000), (9, 0, 50000000, 12, 0, 35000000)]) Tanay.removeChromosome(19) else: Tanay.removeChromosome(22) Tanay.removeDiagonal(1) Tanay.removePoorRegions() Tanay.truncTrans() Tanay.fakeCis() #mat_img(Tanay.dataDict["GM-all"]) #plt.figure(figsize = (3.6,3.6)) Tanay.averageTransMap("GM-all", **kwargs) #plotting.removeBorder() cb = plt.colorbar(orientation="vertical") #cb.set_ticks([-0.05,0.05,0.15]) for xlabel_i in cb.ax.get_xticklabels(): xlabel_i.set_fontsize(6)
def correctedScalingPlot(resolution, filename, experiment, genome, mouse=False, **kwargs): "Paper figure to compare scaling before/after correction" global pp if (options.verbose): print >> sys.stdout, "correctedScalingPlot: res: %d file1: %s exp1:%s gen:%s" % ( resolution, filename, experiment, genome) plt.figure() Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename, experiment) Tanay.removePoorRegions() Tanay.removeDiagonal() Tanay.plotScaling(experiment, label="Raw data", color="#A7A241") Tanay.iterativeCorrectWithSS() Tanay.plotScaling(experiment, label="Corrected", color="#344370") ax = plt.gca() plotting.removeAxes() fs = 6 plt.xlabel("Genomic distance (MB)", fontsize=6) plt.ylabel("Contact probability", fontsize=6) for xlabel_i in ax.get_xticklabels(): xlabel_i.set_fontsize(fs) for xlabel_i in ax.get_yticklabels(): xlabel_i.set_fontsize(fs) legend = plt.legend(loc=0, prop={"size": 6}) legend.draw_frame(False) plt.xscale("log") plt.yscale("log") plt.show() pp.savefig()
from hiclib.binnedData import binnedDataAnalysis import os import sys workingGenome = "hg18" genomeFolder = "../../../data/hg19" if not os.path.exists(genomeFolder): try: genomeFolder = sys.argv[1] except: raise StandardError( "Please provide hg19 Genome folder in the code or as a first argument" ) a = binnedDataAnalysis(1000000, genomeFolder) a.simpleLoad("../fragmentHiC/test-1M.hm", "test") a.removeDiagonal() a.removePoorRegions() a.removeCis() a.fakeCis() a.removeZeros() a.iterativeCorrectWithoutSS() a.doEig() a.export("test", "testHeatmap") print "everything worked, but no verification of result was made, because we haven't written it yet..."
def compareInterarmMaps(resolution, filename1, filename2, experiment1, experiment2, genome, mouse=False, **kwargs): "plots witn 8 inetrarm maps - paper supplement figure" global pp if (options.verbose): print >> sys.stdout, "compareInterarmMaps: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % ( resolution, filename1, filename2, experiment1, experiment2, genome) Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename1, experiment1) Tanay.simpleLoad(filename2, experiment2) Tanay.removeDiagonal() Tanay.removePoorRegions(cutoff=2) #Tanay.removeStandalone(3) fs = 10 vmin = None vmax = None plt.figure(figsize=(12, 16)) plt.subplot(421) plt.title(experiment1 + ", raw", fontsize=fs) Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(422) plt.title(experiment2 + ", raw", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() Tanay.iterativeCorrectWithSS() vmin = None vmax = None plt.subplot(425) plt.title(experiment1 + ", with SS reads", fontsize=fs) Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(426) plt.title(experiment2 + ", with SS reads", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() Tanay.iterativeCorrectWithoutSS() vmin = None vmax = None plt.subplot(423) plt.title(experiment1 + ", no SS reads", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(424) plt.title(experiment2 + ", no ss reads", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() Tanay.fakeCis() vmin = None vmax = None plt.subplot(427) plt.title(experiment1 + ", trans only", fontsize=fs) Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(428) plt.title(experiment2 + ", trans only", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() plt.show() pp.savefig()
def compareInterarmMaps(): "plots witn 8 inetrarm maps - paper supplement figure" Tanay = binnedDataAnalysis(1000000, myGenome) Tanay.simpleLoad(GM1M, "GM-all") Tanay.simpleLoad(GM1MNcoI, "GM-NcoI") Tanay.removeDiagonal() Tanay.removePoorRegions(cutoff=2) #Tanay.removeStandalone(3) fs = 10 vmin = None vmax = None plt.subplot(421) plt.title("GM, HindIII, raw", fontsize=fs) Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(422) plt.title("GM, NcoI, raw", fontsize=fs) Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax) plt.colorbar() Tanay.iterativeCorrectWithSS() vmin = None vmax = None plt.subplot(425) plt.title("GM, HindIII, with SS reads", fontsize=fs) Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(426) plt.title("GM, NcoI, with SS reads", fontsize=fs) Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax) plt.colorbar() Tanay.iterativeCorrectWithoutSS() vmin = None vmax = None plt.subplot(423) plt.title("GM, HindIII, no SS reads", fontsize=fs) Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(424) plt.title("GM, NcoI, no ss reads", fontsize=fs) Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax) plt.colorbar() Tanay.fakeCis() vmin = None vmax = None plt.subplot(427) plt.title("GM, HindIII, trans only", fontsize=fs) Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(428) plt.title("GM, NcoI, trans only", fontsize=fs) Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax) plt.colorbar() plt.show()
def plotCorrelationAtDifferentBinning(): """Plots figure with correlation at different binning. Note the caching and creating of binned heatmaps flags below. Suppplementary paper figure """ sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] setExceptionHook() cache = False create = False if create == True: if cache == True: #-------------------standard version code----------------- FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR.load("../../../ErezPaperData/hg18/GM-HindIII-hg18_refined.frag") FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR3.load("../../../ErezPaperData/hg18/GM-HindIII-hg18"\ "_refined.frag") FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR2.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag") #----------------------cross-check code---------------- # FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18", # override=False, inMemory=True) # FR.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag") # # FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", # override=False, inMemory=True) # FR3.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag") # # FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", # override=False, inMemory=True) # FR2.load("../../../ErezPaperData/hg18/G"\ # "M-HindIII-hg18_refined.frag") #-------end corss-check code --------------------------------- #--------Filter only trans DS reads----------------- FR.maskFilter(FR.DS * (FR.chrms1 != FR.chrms2)) FR2.maskFilter(FR2.DS * (FR2.chrms1 != FR2.chrms2)) FR3.maskFilter(FR3.DS * (FR3.chrms1 != FR3.chrms2)) #Now create two halfs of one dataset and down-sample second dataset #----------------------standard version code-------- fraction = 0.5 * len(FR.DS) / float(len(FR2.DS)) rarray = numpy.random.random(len(FR.DS)) mask1 = rarray < 0.5 mask3 = rarray >= 0.5 mask2 = numpy.random.random(len(FR2.DS)) < fraction #-------------------- cross-check code--------- #fraction = 0.5 * len(FR2.DS) / float(len(FR.DS)) #rarray = numpy.random.random(len(FR.DS)) #mask1 = rarray < fraction #mask3 = (rarray > fraction) * (rarray < fraction * 2) #mask2 = numpy.random.random(len(FR2.DS)) > 0.5 #----------------------------------------- FR.maskFilter(mask1) FR2.maskFilter(mask2) FR3.maskFilter(mask3) FR.save("../../../tcc/working/cache1") FR2.save("../../../tcc/working/cache2") FR3.save("../../../tcc/working/cache3") else: FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR.load("../../../tcc/working/cache1") FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR3.load("../../../tcc/working/cache3") FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR2.load("../../../tcc/working/cache2") for size in sizes: FR.saveHeatmap("../../../tcc/working/HindIII_%d.hm" % size, size * 1000000) FR2.saveHeatmap("../../../tcc/working/NcoI_%d.hm" % size, size * 1000000) FR3.saveHeatmap("../../../tcc/working/control_%d.hm" % size, size * 1000000) p1 = [] p2 = [] p3 = [] p4 = [] evs = [] for size in sizes: BD = binnedDataAnalysis(size * 1000000, "../../../data/hg18") BD.simpleLoad("../../../tcc/working/HindIII_%d.hm" % size, "HindIII") BD.simpleLoad("../../../tcc/working/NcoI_%d.hm" % size, "NcoI") BD.simpleLoad("../../../tcc/working/control_%d.hm" % size, "control") BD.removeDiagonal() BD.removePoorRegions(cutoff=2) BD.removeCis() data1 = BD.dataDict["HindIII"] data2 = BD.dataDict["NcoI"] data3 = BD.dataDict["control"] mask = (numpy.sum( data1, axis=0) > 0) * (numpy.sum(data2, axis=0) > 0) validMask = mask[:, None] * mask[None, :] transmask = BD.chromosomeIndex[:, None] != BD.chromosomeIndex[None, :] cormask = transmask * validMask c1 = scipy.stats.spearmanr(data1[cormask], data2[cormask])[0] c4 = scipy.stats.spearmanr(data1[cormask], data3[cormask])[0] if size == 1: evs.append(BD.interchromosomalValues("HindIII")) evs.append(BD.interchromosomalValues("NcoI")) evs.append(BD.interchromosomalValues("control")) p4.append(c4) p1.append(c1) print "size\t%d\traw:" % size, c1, BD.removeZeros() BD.fakeCis() # does iterative correction as well BD.restoreZeros(value=0) data1 = BD.dataDict["HindIII"] data2 = BD.dataDict["NcoI"] data3 = BD.dataDict["control"] c2 = scipy.stats.spearmanr(data1[cormask], data2[cormask])[0] c3 = scipy.stats.spearmanr(data1[cormask], data3[cormask])[0] if size == 1: evs.append(BD.interchromosomalValues("HindIII")) evs.append(BD.interchromosomalValues("NcoI")) evs.append(BD.interchromosomalValues("control")) print evs p3.append(c3) p2.append(c2) print "\tcorrected:", c2, "\tcontrol", c3 plt.plot(sizes, p1, label="Raw data, between enzymes") plt.plot(sizes, p2, label="Iteratively corrected, between") plt.plot(sizes, p3, label="IC, within") plt.xlabel("Bin size, MB") plt.xticks(range(1, 11)) plt.ylabel("Spearman correlation coefficient") plt.legend() niceShow() setExceptionHook() 0 / 0
def compareInterarmMaps(resolution, filename1, filename2, experiment1, experiment2, genome, mouse=False, **kwargs): "plots witn 8 inetrarm maps - paper supplement figure" global pp if options.verbose: print >> sys.stdout, "compareInterarmMaps: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % ( resolution, filename1, filename2, experiment1, experiment2, genome, ) Tanay = binnedDataAnalysis(resolution, genome) Tanay.simpleLoad(filename1, experiment1) Tanay.simpleLoad(filename2, experiment2) Tanay.removeDiagonal() Tanay.removePoorRegions(cutoff=2) # Tanay.removeStandalone(3) fs = 10 vmin = None vmax = None plt.figure(figsize=(12, 16)) plt.subplot(421) plt.title(experiment1 + ", raw", fontsize=fs) Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(422) plt.title(experiment2 + ", raw", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() Tanay.iterativeCorrectWithSS() vmin = None vmax = None plt.subplot(425) plt.title(experiment1 + ", with SS reads", fontsize=fs) Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(426) plt.title(experiment2 + ", with SS reads", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() Tanay.iterativeCorrectWithoutSS() vmin = None vmax = None plt.subplot(423) plt.title(experiment1 + ", no SS reads", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(424) plt.title(experiment2 + ", no ss reads", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() Tanay.fakeCis() vmin = None vmax = None plt.subplot(427) plt.title(experiment1 + ", trans only", fontsize=fs) Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax) plt.colorbar() plt.subplot(428) plt.title(experiment2 + ", trans only", fontsize=fs) Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax) plt.colorbar() plt.show() pp.savefig()
from hiclib.binnedData import binnedDataAnalysis import os import sys workingGenome = "hg18" genomeFolder = "../../../data/hg19" if not os.path.exists(genomeFolder): try: genomeFolder = sys.argv[1] except: raise StandardError("Please provide hg19 Genome folder in the code or as a first argument") a = binnedDataAnalysis(1000000, genomeFolder) a.simpleLoad("../fragmentHiC/test-1M.hm", "test") a.removeDiagonal() a.removePoorRegions() a.removeCis() a.fakeCis() a.removeZeros() a.iterativeCorrectWithoutSS() a.doEig() a.export("test", "testHeatmap") print "everything worked, but no verification of result was made, because we haven't written it yet..."