def plotCisToTransHotFragments(dataset="../../../mouse/data/combined/"\ "mouse1_merged.frag", workingFile="../../../tcc/working/workingMouse.frag", cacheFile="../../../tcc/working/workingMouseFiltered.frag", genomeFolder="../../../data/mm9", label=None): mirnylib.systemutils.setExceptionHook() if not os.path.exists(cacheFile): print "caching parsed data" FH = HiCdataset(workingFile, genomeFolder) FH.load(dataset) FH.filterRsiteStart(offset=5) FH.filterDuplicates() #TR.save(filename[1]+".dat") FH.filterLarge() FH.maskFilter(FH.DS) FH.save(cacheFile) FH = HiCdataset(workingFile, genomeFolder) FH.load(cacheFile) fs = FH.fragmentSum() FH.saveFragments() FH.maskFilter(FH.chrms1 == FH.chrms2) FH.originalFragments() fsCis = FH.fragmentSum() args = numpy.argsort(fs) fsSort = 1. * fs[args] fsCisSort = 1. * fsCis[args] cisToTrans = fsCisSort / fsSort p1, p2, p3 = numpy.percentile(fsSort, [99, 99.5, 99.9]) bins = mirnylib.numutils.logbins(1, fsSort.max(), 1.08) counts = numpy.histogram(fsSort, bins) values = numpy.histogram(fsSort, bins, weights=cisToTrans) plt.plot(0.5 * (values[1][:-1] + values[1][1:]), values[0] / counts[0], '.', label=label) for linep in p1, p2, p3: plt.vlines(linep, 0, 1) plt.xlabel("Counts per fragment") plt.ylabel("Cis-to-trans ratio") plt.title("Vertical lines are at 99%,99.5% and 99.9% reads per fragment") niceShow()
def plotFigure2c(): TR = HiCdataset() TR.load("GM-all.refined") hm = TR.buildHeatmap(1, 1, 1000000, False, False) TR.calculateWeights() TR.weights = np.ones(len(TR.weights), float) # if you want to correct just by fragment density, not by length dependence hm2 = TR.buildHeatmap(1, 1, 1000000, False, weights=True) hm2[np.isnan(hm2)] = 0 mask = np.sum(hm, axis=0) > 0 """p1-6 are 6 lines to be plotted, below is plotting only""" p1 = np.sum(hm, axis=0)[mask] p3 = np.sum(correct(hm), axis=0)[mask] p5 = np.sum(ultracorrect(hm, 40), axis=0)[mask] p4 = np.sum(correct(hm2), axis=0)[mask] p2 = np.sum(hm2, axis=0)[mask] p6 = np.sum(ultracorrect(hm2, 40), axis=0)[mask] matplotlib.rcParams['font.sans-serif'] = 'Arial' dashstyle = (3, 3) plt.figure(figsize=(4, 4)) ax = plt.subplot(2, 1, 1) plt.xlim((0, 80)) plt.ylim((0, 2)) plt.ylabel("Total coverage", fontsize=8) line21 = plt.plot(p1 / p1.mean(), "-", linewidth=1, color="#e5a826")[0] line22 = plt.plot( p3 / p3.mean(), "--", linewidth=1, color="#e5a826")[0] line22.set_dashes(dashstyle) line23 = plt.plot(p5 / p5.mean(), linewidth=1, color="grey")[0] for xlabel_i in ax.get_xticklabels(): xlabel_i.set_fontsize(8) for xlabel_i in ax.get_yticklabels(): xlabel_i.set_fontsize(8) legend = plt.legend([line21, line22, line23], ["Raw data", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2) legend.draw_frame(False) removeAxes(shift=0, ax=ax) for i in ax.spines.values(): i.set_color('none') ax.axhline(linewidth=1, color='black') ax.axvline(linewidth=1, color='black') ax2 = plt.subplot(2, 1, 2, sharex=ax) plt.xlim((0, 80)) plt.ylim((0, 2)) plt.xlabel("Position on chom 1 (MB)", fontsize=8) plt.ylabel("Total coverage", fontsize=8) line1 = plt.plot(p4 / p4.mean(), "--", color="#9b3811", linewidth=1)[0] line1.set_dashes(dashstyle) line2 = plt.plot(p2 / p2.mean(), "-", color="#9b3811", linewidth=1)[0] line3 = plt.plot(p6 / p6.mean(), linewidth=1, color="grey")[0] for xlabel_i in ax2.get_xticklabels(): xlabel_i.set_fontsize(8) for xlabel_i in ax2.get_yticklabels(): xlabel_i.set_fontsize(8) legend = plt.legend([line2, line1, line3], ["HindIII corrected", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2) legend.draw_frame(False) removeAxes(shift=0, ax=ax2) plotting.niceShow()
def plotFigure2c(): TR = HiCdataset() TR.load("GM-all.refined") hm = TR.buildHeatmap(1, 1, 1000000, False, False) TR.calculateWeights() TR.weights = np.ones( len(TR.weights), float ) # if you want to correct just by fragment density, not by length dependence hm2 = TR.buildHeatmap(1, 1, 1000000, False, weights=True) hm2[np.isnan(hm2)] = 0 mask = np.sum(hm, axis=0) > 0 """p1-6 are 6 lines to be plotted, below is plotting only""" p1 = np.sum(hm, axis=0)[mask] p3 = np.sum(correct(hm), axis=0)[mask] p5 = np.sum(ultracorrect(hm, 40), axis=0)[mask] p4 = np.sum(correct(hm2), axis=0)[mask] p2 = np.sum(hm2, axis=0)[mask] p6 = np.sum(ultracorrect(hm2, 40), axis=0)[mask] matplotlib.rcParams['font.sans-serif'] = 'Arial' dashstyle = (3, 3) plt.figure(figsize=(4, 4)) ax = plt.subplot(2, 1, 1) plt.xlim((0, 80)) plt.ylim((0, 2)) plt.ylabel("Total coverage", fontsize=8) line21 = plt.plot(p1 / p1.mean(), "-", linewidth=1, color="#e5a826")[0] line22 = plt.plot(p3 / p3.mean(), "--", linewidth=1, color="#e5a826")[0] line22.set_dashes(dashstyle) line23 = plt.plot(p5 / p5.mean(), linewidth=1, color="grey")[0] for xlabel_i in ax.get_xticklabels(): xlabel_i.set_fontsize(8) for xlabel_i in ax.get_yticklabels(): xlabel_i.set_fontsize(8) legend = plt.legend( [line21, line22, line23], ["Raw data", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2) legend.draw_frame(False) removeAxes(shift=0, ax=ax) for i in ax.spines.values(): i.set_color('none') ax.axhline(linewidth=1, color='black') ax.axvline(linewidth=1, color='black') ax2 = plt.subplot(2, 1, 2, sharex=ax) plt.xlim((0, 80)) plt.ylim((0, 2)) plt.xlabel("Position on chom 1 (MB)", fontsize=8) plt.ylabel("Total coverage", fontsize=8) line1 = plt.plot(p4 / p4.mean(), "--", color="#9b3811", linewidth=1)[0] line1.set_dashes(dashstyle) line2 = plt.plot(p2 / p2.mean(), "-", color="#9b3811", linewidth=1)[0] line3 = plt.plot(p6 / p6.mean(), linewidth=1, color="grey")[0] for xlabel_i in ax2.get_xticklabels(): xlabel_i.set_fontsize(8) for xlabel_i in ax2.get_yticklabels(): xlabel_i.set_fontsize(8) legend = plt.legend( [line2, line1, line3], ["HindIII corrected", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2) legend.draw_frame(False) removeAxes(shift=0, ax=ax2) plotting.niceShow()
plt.yscale("log") for filename in filelist: scalings = give_slices( base="{0}/blockDATA2.dat".format(filename), tosave=None, nproc=4, slices=[250], sliceParams=(200), multipliers=numpy.arange(0.850001, 1.0001, 0.001), # multipliers=[1], mode="chain", loadFunction=polymerutils.load) setExceptionHook() values = [i[0] for i in scalings] values = numpy.array(values) values[:, 0, :] /= 2500 labels = [ "{0}; time = ".format(filename) + str(i[1]["slice"]) for i in scalings ] for scaling, label in map(None, values, labels): plt.plot(*scaling[0], label=label) a = logbins(10, 10000, 1.2) a = numpy.array(a) plt.plot(a / 2500., 1e-5 * a**(-0.5), label="Proposed -0.5 scaling") plt.xlabel("distance (MB)") niceShow() exit()
plt.plot(*value[0], color=color, linestyle=linestyle, label="L={0}; num={1}".format(len, num)) #values1[:, 0, :] /= (1000000. / 600) #cPickle.dump((scalings, values), open("consScaff", 'wb')) #exit() setExceptionHook() a = logbins(10, 10000, 1.2) a = numpy.array(a) plt.xlabel("distance (MB)") niceShow("log") exit() for scaling, label in zip(values, labels): plt.plot(*scaling[1], label=label) a = logbins(10, 10000, 1.2) a = numpy.array(a) plt.plot(a, 2 * a**(1 / 6.)) niceShow("log") for scaling, label in zip(values, labels): plt.plot(*scaling[2], label=label) niceShow("log") pc = scalings[0][0][0][1] rg = scalings[0][0][1][1]
def plotCorrelationAtDifferentBinning(): """Plots figure with correlation at different binning. Note the caching and creating of binned heatmaps flags below. Suppplementary paper figure """ sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] setExceptionHook() cache = False create = False if create == True: if cache == True: #-------------------standard version code----------------- FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR.load("../../../ErezPaperData/hg18/GM-HindIII-hg18_refined.frag") FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR3.load("../../../ErezPaperData/hg18/GM-HindIII-hg18"\ "_refined.frag") FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR2.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag") #----------------------cross-check code---------------- # FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18", # override=False, inMemory=True) # FR.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag") # # FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", # override=False, inMemory=True) # FR3.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag") # # FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", # override=False, inMemory=True) # FR2.load("../../../ErezPaperData/hg18/G"\ # "M-HindIII-hg18_refined.frag") #-------end corss-check code --------------------------------- #--------Filter only trans DS reads----------------- FR.maskFilter(FR.DS * (FR.chrms1 != FR.chrms2)) FR2.maskFilter(FR2.DS * (FR2.chrms1 != FR2.chrms2)) FR3.maskFilter(FR3.DS * (FR3.chrms1 != FR3.chrms2)) #Now create two halfs of one dataset and down-sample second dataset #----------------------standard version code-------- fraction = 0.5 * len(FR.DS) / float(len(FR2.DS)) rarray = numpy.random.random(len(FR.DS)) mask1 = rarray < 0.5 mask3 = rarray >= 0.5 mask2 = numpy.random.random(len(FR2.DS)) < fraction #-------------------- cross-check code--------- #fraction = 0.5 * len(FR2.DS) / float(len(FR.DS)) #rarray = numpy.random.random(len(FR.DS)) #mask1 = rarray < fraction #mask3 = (rarray > fraction) * (rarray < fraction * 2) #mask2 = numpy.random.random(len(FR2.DS)) > 0.5 #----------------------------------------- FR.maskFilter(mask1) FR2.maskFilter(mask2) FR3.maskFilter(mask3) FR.save("../../../tcc/working/cache1") FR2.save("../../../tcc/working/cache2") FR3.save("../../../tcc/working/cache3") else: FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR.load("../../../tcc/working/cache1") FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR3.load("../../../tcc/working/cache3") FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18", override=False, inMemory=True) FR2.load("../../../tcc/working/cache2") for size in sizes: FR.saveHeatmap("../../../tcc/working/HindIII_%d.hm" % size, size * 1000000) FR2.saveHeatmap("../../../tcc/working/NcoI_%d.hm" % size, size * 1000000) FR3.saveHeatmap("../../../tcc/working/control_%d.hm" % size, size * 1000000) p1 = [] p2 = [] p3 = [] p4 = [] evs = [] for size in sizes: BD = binnedDataAnalysis(size * 1000000, "../../../data/hg18") BD.simpleLoad("../../../tcc/working/HindIII_%d.hm" % size, "HindIII") BD.simpleLoad("../../../tcc/working/NcoI_%d.hm" % size, "NcoI") BD.simpleLoad("../../../tcc/working/control_%d.hm" % size, "control") BD.removeDiagonal() BD.removePoorRegions(cutoff=2) BD.removeCis() data1 = BD.dataDict["HindIII"] data2 = BD.dataDict["NcoI"] data3 = BD.dataDict["control"] mask = (numpy.sum( data1, axis=0) > 0) * (numpy.sum(data2, axis=0) > 0) validMask = mask[:, None] * mask[None, :] transmask = BD.chromosomeIndex[:, None] != BD.chromosomeIndex[None, :] cormask = transmask * validMask c1 = scipy.stats.spearmanr(data1[cormask], data2[cormask])[0] c4 = scipy.stats.spearmanr(data1[cormask], data3[cormask])[0] if size == 1: evs.append(BD.interchromosomalValues("HindIII")) evs.append(BD.interchromosomalValues("NcoI")) evs.append(BD.interchromosomalValues("control")) p4.append(c4) p1.append(c1) print "size\t%d\traw:" % size, c1, BD.removeZeros() BD.fakeCis() # does iterative correction as well BD.restoreZeros(value=0) data1 = BD.dataDict["HindIII"] data2 = BD.dataDict["NcoI"] data3 = BD.dataDict["control"] c2 = scipy.stats.spearmanr(data1[cormask], data2[cormask])[0] c3 = scipy.stats.spearmanr(data1[cormask], data3[cormask])[0] if size == 1: evs.append(BD.interchromosomalValues("HindIII")) evs.append(BD.interchromosomalValues("NcoI")) evs.append(BD.interchromosomalValues("control")) print evs p3.append(c3) p2.append(c2) print "\tcorrected:", c2, "\tcontrol", c3 plt.plot(sizes, p1, label="Raw data, between enzymes") plt.plot(sizes, p2, label="Iteratively corrected, between") plt.plot(sizes, p3, label="IC, within") plt.xlabel("Bin size, MB") plt.xticks(range(1, 11)) plt.ylabel("Spearman correlation coefficient") plt.legend() niceShow() setExceptionHook() 0 / 0
def plotTanayGenomicFeature(): """Shows how genomic feature is spawned by Eig1, not Tanay domains paper supplementary figure""" Tanay = experimentalBinnedData(1000000, myGenome) Tanay.simpleLoad(GM1M, "GM-all") Tanay.loadTanayDomains() Tanay.loadWigFile("../../../histoneMarks/hg18/wgEncodeUwDnaseSeqRawSignal"\ "Rep1Gm06990.bigWig", label="feature") #Tanay.loadWigFile("../../../histoneMarks/hg18/wgEncodeBroadChipSeqSignal"\ #"Gm12878H3k9ac.wig", label = "feature", #control = "../../../histoneMarks/hg18/wgEncodeBroadChipSeqSignal"\ #"Gm12878Control.wig") #Tanay.loadWigFile("../../../histoneMarks/hg18/wgEncodeBroadChipSeqSignal"\ #"Gm12878H3k4me3.wig", label = "feature", #control = "../../../histoneMarks/hg18/wgEncodeBroadChipSeqSignal"\ #"Gm12878Control.wig") Tanay.removeDiagonal() Tanay.removePoorRegions() Tanay.removeZeros() Tanay.fakeCis() Tanay.doEig() E1 = Tanay.EigDict["GM-all"][0] E2 = Tanay.EigDict["GM-all"][1] GC = Tanay.trackDict["GC"] if scipy.stats.spearmanr(E1, GC)[0] < 0: E1 = -E1 if scipy.stats.spearmanr(E2, GC)[0] < 0: E2 = -E2 TD = Tanay.trackDict["TanayDomains"] print scipy.stats.spearmanr(Tanay.trackDict["feature"], E1) plt.scatter(Tanay.trackDict["feature"], E1, c=TD, s=4, linewidth=0) cm = plt.cm.get_cmap("jet") print "Our 2r is", ( numpy.corrcoef(Tanay.trackDict["feature"], E1)[0, 1]) ** 2 tset = set(TD) tfeature = numpy.zeros_like(TD, dtype=float) feature = Tanay.trackDict["feature"] for i in tset: #print i #print numpy.mean(feature[TD==i]) tfeature[TD == i] = numpy.mean(feature[TD == i]) #print tfeature print "Tanay r2 is", (numpy.corrcoef(tfeature, E1)[0, 1]) ** 2 plt.legend([matplotlib.lines.Line2D([0], [0], color=cm(i), marker="o", markersize=8, linewidth=0) \ for i in [0.333, 0.666, 0.999]], ["Active", "Centromere proximal", "Centromere distal"], loc=2) plt.ylabel("Eig1GW") #plt.xlabel("UWashington DNAse") #plt.xlabel("H3K9ac ChIP-seq") plt.xlabel("H3K4me3 ChIP-seq") plt.title("Color represents domain from (Yaffe 2011)") niceShow(subplotAdjust=(0.13, 0.11, 0.97, 0.92))