Ejemplo n.º 1
0
def diamondScore(dataset, size=10):
    """
    Extract a so-called "diamond score" - inspired by  Suzana Hadjur talks
    see Sevil Sofueva, EMBO 2013 - Supp Figure 11
    (but this is a bit different from Supp Figure 11!!!)
    """
    heatmap = 1. * h5dict(hm(dataset))["heatmap"]
    for _ in range(1):
        zeros = np.sum(heatmap, axis=0) == 0
        zeros = np.nonzero(zeros)[0]
        heatmap[zeros] = heatmap[zeros - 1]
        heatmap[:, zeros] = heatmap[:, zeros - 1]
    mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
    heatmap = trunc(heatmap, low=0, high=0.0001)
    heatmap = ultracorrect(heatmap)
    diag2value = np.mean(np.diagonal(heatmap, 2))
    mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
    heatmap /= np.mean(np.sum(heatmap, axis=0))
    tiledHeatmap = np.hstack([heatmap, heatmap, heatmap])
    tiledHeatmap = np.vstack([tiledHeatmap, tiledHeatmap, tiledHeatmap])
    setExceptionHook()
    start = len(heatmap)
    end = 2 * len(heatmap)
    ratios = []
    for mon in xrange(start, end):
        diamond = tiledHeatmap[mon:mon + size, mon:mon - size:-1]
        inds = (np.arange(len(diamond))[:, None] + np.arange(len(diamond))[None, :]) < len(diamond)
        ratios.append(diamond[inds].sum())
    return np.array(ratios) - gaussian_filter(ratios, 30)

    return ratios
Ejemplo n.º 2
0
    def show_heatmap(self,key,vmax=None,vmin=None,showShifted=False,shiftBy=None,logColorScale=None,iterativeCorrect=False,\
                     cmap='fall',fillZeros=True,observedOverExpected=False):
        # logColorScale is set a floating point value only if we wish to display a log colorbar
        L = self.HiC_genomDict['chrmLen'] // 1e3
        hmap = self.HiC_genomDict[key].heatmap.copy()

        if fillZeros == True:
            hmap[hmap == 0] = np.max(hmap)

        if observedOverExpected == True:
            hmap = setMatrilocScaling(hmap, alpha=0)

        if iterativeCorrect == True:
            hmap = ultracorrect(hmap)

        if logColorScale != None:
            hmap = np.log2(hmap + logColorScale)

        if showShifted == True:
            if shiftBy is None:
                fshift = L // 2
                plt.imshow(np.fft.fftshift(np.flipud(hmap)),\
                           extent=[-fshift,fshift,-fshift,fshift],cmap=cmap,vmin=vmin,vmax=vmax,interpolation='nearest')
            else:
                hmap = np.flipud(
                    np.roll(np.roll(hmap, -shiftBy, axis=0), -shiftBy, axis=1))
                plt.imshow(hmap, extent=[-shiftBy,L-shiftBy,0-shiftBy,L-shiftBy],\
                                cmap=cmap,vmin=vmin,vmax=vmax,interpolation='nearest')
        else:
            plt.imshow(np.flipud(hmap),\
                       extent=[0,L,0,L],cmap=cmap,vmin=vmin,vmax=vmax,interpolation='nearest')
Ejemplo n.º 3
0
def directionalityRatio(dataset, size=20):
    heatmap = 1. * h5dict(hm(dataset))["heatmap"]  # extract heatmap

    #filling in the gaps in the heatmap. Not really needed as heatmaps are with overlaps,
    #so they have no gaps
    for _ in range(1):
        zeros = np.sum(heatmap, axis=0) == 0
        zeros = np.nonzero(zeros)[0]
        heatmap[zeros] = heatmap[zeros - 1]
        heatmap[:, zeros] = heatmap[:, zeros - 1]
    #Following regular IC protocol (see 033_....py)
    mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
    heatmap = trunc(heatmap, low=0, high=0.0001)
    heatmap = ultracorrect(heatmap)
    diag2value = np.mean(np.diagonal(heatmap, 2))
    mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
    heatmap /= np.mean(np.sum(heatmap, axis=0))

    #Put 9 copies of the heatmap in a huge square - Caulobacter is a ring.
    #this is a cheap-and-dirty way to account for that
    tiledHeatmap = np.hstack([heatmap, heatmap, heatmap])
    tiledHeatmap = np.vstack([tiledHeatmap, tiledHeatmap, tiledHeatmap])
    setExceptionHook()  # debug only
    start = len(heatmap)
    end = 2 * len(heatmap)
    ratios = []
    for mon in xrange(start, end):  #going through the central square
        upstream = tiledHeatmap[mon, mon:mon + size].sum()
        downstream = tiledHeatmap[mon - size:mon, mon].sum()
        #print upstream
        #print downstream
        ratios.append(
            upstream /
            (upstream + downstream))  #this is upstream/downstream ratio

    return ratios
Ejemplo n.º 4
0
def directionalityRatio(dataset, size=20):
    heatmap = 1. * h5dict(hm(dataset))["heatmap"]  # extract heatmap

    #filling in the gaps in the heatmap. Not really needed as heatmaps are with overlaps,
    #so they have no gaps
    for _ in range(1):
        zeros = np.sum(heatmap, axis=0) == 0
        zeros = np.nonzero(zeros)[0]
        heatmap[zeros] = heatmap[zeros - 1]
        heatmap[:, zeros] = heatmap[:, zeros - 1]
    #Following regular IC protocol (see 033_....py)
    mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
    heatmap = trunc(heatmap, low=0, high=0.0001)
    heatmap = ultracorrect(heatmap)
    diag2value = np.mean(np.diagonal(heatmap, 2))
    mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
    heatmap /= np.mean(np.sum(heatmap, axis=0))

    #Put 9 copies of the heatmap in a huge square - Caulobacter is a ring.
    #this is a cheap-and-dirty way to account for that
    tiledHeatmap = np.hstack([heatmap, heatmap, heatmap])
    tiledHeatmap = np.vstack([tiledHeatmap, tiledHeatmap, tiledHeatmap])
    setExceptionHook()  # debug only
    start = len(heatmap)
    end = 2 * len(heatmap)
    ratios = []
    for mon in xrange(start, end):  #going through the central square
        upstream = tiledHeatmap[mon, mon:mon + size].sum()
        downstream = tiledHeatmap[mon - size:mon, mon].sum()
        #print upstream
        #print downstream
        ratios.append(upstream / (upstream + downstream))  #this is upstream/downstream ratio

    return ratios
Ejemplo n.º 5
0
    def load_text_HiC_Map(self,
                          fname,
                          source='',
                          key='map',
                          iterativeCorrect=False):
        if os.path.isabs(fname):
            assert os.path.exists(fname)
        elif not os.path.exists(fname):
            assert os.path.exists(os.path.join(source, fname))
            fname = os.path.join(source, fname)
        self.__mapcount += 1
        if key == 'map':
            key = 'map' + str(self.__mapcount)

        try:
            M = np.loadtxt(fname, float)

            if iterativeCorrect == True:
                M = ultracorrect(M) / np.median(np.nansum(M, axis=0))
            self.HiC_genomDict[key] = HiC_object()
            self.HiC_genomDict[key].heatmap = M
        except:
            print("could not load file: {0}".format(key))
Ejemplo n.º 6
0
def diamondScore(dataset, size=10):
    """
    Extract a so-called "diamond score" - inspired by  Suzana Hadjur talks
    see Sevil Sofueva, EMBO 2013 - Supp Figure 11
    (but this is a bit different from Supp Figure 11!!!)
    """
    heatmap = 1. * h5dict(hm(dataset))["heatmap"]
    for _ in range(1):
        zeros = np.sum(heatmap, axis=0) == 0
        zeros = np.nonzero(zeros)[0]
        heatmap[zeros] = heatmap[zeros - 1]
        heatmap[:, zeros] = heatmap[:, zeros - 1]
    mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
    heatmap = trunc(heatmap, low=0, high=0.0001)
    heatmap = ultracorrect(heatmap)
    diag2value = np.mean(np.diagonal(heatmap, 2))
    mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
    mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
    heatmap /= np.mean(np.sum(heatmap, axis=0))
    tiledHeatmap = np.hstack([heatmap, heatmap, heatmap])
    tiledHeatmap = np.vstack([tiledHeatmap, tiledHeatmap, tiledHeatmap])
    setExceptionHook()
    start = len(heatmap)
    end = 2 * len(heatmap)
    ratios = []
    for mon in xrange(start, end):
        diamond = tiledHeatmap[mon:mon + size, mon:mon - size:-1]
        inds = (np.arange(len(diamond))[:, None] +
                np.arange(len(diamond))[None, :]) < len(diamond)
        ratios.append(diamond[inds].sum())
    return np.array(ratios) - gaussian_filter(ratios, 30)

    return ratios
Ejemplo n.º 7
0
def plotFigure2c():
    TR = HiCdataset()
    TR.load("GM-all.refined")
    hm = TR.buildHeatmap(1, 1, 1000000, False, False)
    TR.calculateWeights()
    TR.weights = np.ones(len(TR.weights), float)  # if you want to correct just by fragment density, not by length dependence
    hm2 = TR.buildHeatmap(1, 1, 1000000, False, weights=True)
    hm2[np.isnan(hm2)] = 0
    mask = np.sum(hm, axis=0) > 0
    """p1-6 are 6 lines to be plotted, below is plotting only"""
    p1 = np.sum(hm, axis=0)[mask]
    p3 = np.sum(correct(hm), axis=0)[mask]
    p5 = np.sum(ultracorrect(hm, 40), axis=0)[mask]
    p4 = np.sum(correct(hm2), axis=0)[mask]
    p2 = np.sum(hm2, axis=0)[mask]
    p6 = np.sum(ultracorrect(hm2, 40), axis=0)[mask]
    matplotlib.rcParams['font.sans-serif'] = 'Arial'
    dashstyle = (3, 3)
    plt.figure(figsize=(4, 4))

    ax = plt.subplot(2, 1, 1)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.ylabel("Total coverage", fontsize=8)

    line21 = plt.plot(p1 / p1.mean(), "-", linewidth=1, color="#e5a826")[0]
    line22 = plt.plot(
        p3 / p3.mean(), "--", linewidth=1, color="#e5a826")[0]
    line22.set_dashes(dashstyle)
    line23 = plt.plot(p5 / p5.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(8)
    legend = plt.legend([line21, line22, line23],
                        ["Raw data", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax)

    for i in ax.spines.values():
        i.set_color('none')
    ax.axhline(linewidth=1, color='black')
    ax.axvline(linewidth=1, color='black')

    ax2 = plt.subplot(2, 1, 2, sharex=ax)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.xlabel("Position on chom 1 (MB)", fontsize=8)
    plt.ylabel("Total coverage", fontsize=8)

    line1 = plt.plot(p4 / p4.mean(), "--", color="#9b3811", linewidth=1)[0]
    line1.set_dashes(dashstyle)
    line2 = plt.plot(p2 / p2.mean(), "-", color="#9b3811", linewidth=1)[0]
    line3 = plt.plot(p6 / p6.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax2.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax2.get_yticklabels():
        xlabel_i.set_fontsize(8)

    legend = plt.legend([line2, line1, line3],
                        ["HindIII corrected", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax2)
    plotting.niceShow()
Ejemplo n.º 8
0
def showAllDatasets():
    setExceptionHook()

    #plt.figure(figsize=(25, 15))
    fig = plt.figure()

    #size of the figure
    fw = fig.get_figwidth() * fig.get_dpi()
    fh = fig.get_figheight() * fig.get_dpi()

    #get subplot configuration
    sx, sy = subplots(len(datasets))

    for  j, dataset in enumerate(datasets):
        curPlot = plt.subplot(sx, sy, j + 1)
        heatmap = 1. * h5dict(hm(dataset), 'r')["heatmap"]

        #fill in gaps - obsolete, as heatmaps are with overlaps
        for _ in range(1):
            zeros = np.sum(heatmap, axis=0) == 0
            zeros = np.nonzero(zeros)[0]
            heatmap[zeros] = heatmap[zeros - 1]
            heatmap[:, zeros] = heatmap[:, zeros - 1]

        #regular IC protocol
        mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
        heatmap = trunc(heatmap, low=0, high=0.0001)
        heatmap = ultracorrect(heatmap)
        diag2value = np.mean(np.diagonal(heatmap, 2))
        mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
        newHeatmap = heatmap

        #Top highly expressed genes
        #genePos = [18, 56, 77, 117, 143, 215, 234, 256, 266, 286, 300, 326, 336, 367, 379]
        geneCoor = [1162773, 3509071, 1180887, 543099, 1953250, 2522439, 3328524, 1503879, 900483, 242693, 3677144, 3931680, 3677704, 3762707, 3480870, 3829656, 1424678, 901855, 1439056, 3678537]

        # here we commited to 10kb resolution - change below if you're not
        genePos = [i / 10000. for i in geneCoor]

        genePos = []

        #putting lines at highly expressed genes
        for lpos in genePos:
            plt.hlines(lpos , 0, 500, linewidth=0.7, color="black", alpha=0.2, zorder=1)
            plt.vlines(lpos , 0, 500, linewidth=0.7, color="black", alpha=0.2, zorder=1)
            pass

        #performing adaptive smoothing
        smoothedHeatmap = adaptiveSmoothing(newHeatmap, 20)
        smoothedHeatmap /= np.mean(np.sum(heatmap, axis=0))

        #print dataset, sum([np.diagonal(smoothedHeatmap, i).sum() for i in range(60, 140)])
        #maps = [[smoothedHeatmap, smoothedHeatmap[:30]],
        #         [smoothedHeatmap[:, :30], smoothedHeatmap[:30, :30]]]
        #smoothedHeatmap = np.hstack([np.vstack(i) for i in maps])

        allx = []
        ally = []

        plt.title(dataset, fontsize=10)
        plt.imshow((smoothedHeatmap), interpolation="none", vmax=0.035, cmap="acidblues", zorder=0)
        #plt.imshow((smoothedHeatmap), interpolation="nearest", vmin=0, vmax=np.exp(-4.5), cmap="fall", zorder=0)
        plt.xticks([])
        plt.yticks([])





        plt.subplots_adjust(left=0.05,  # the left side of the subplots of the figure
      right=0.95,  # the right side of the subplots of the figure
      bottom=0.05,  # the bottom of the subplots of the figure
      top=0.95 ,  # the top of the subplots of the figure
      wspace=0.1,  # the amount of width reserved for blank space between subplots
      hspace=0.2)
        #cPickle.dump(scaling, open(dataset.split("/")[-1] + "scaling", 'w'))
        #plt.ylim((400, 200))
        #plt.xlim((0, 200))

        #code below just puts the P(s) over the heatmap
        N = len(smoothedHeatmap)
        pts = np.array([[1, 0], [N, N], [N, 0]])
        p = Polygon(pts, closed=True, facecolor=(0.8, 0.8, 0.8), linewidth=0, alpha=0.7, zorder=2)
        ax = plt.gca()
        ax.add_patch(p)

        Bbox = matplotlib.transforms.Bbox.from_bounds(.55, .55, .35, .42)
        tBbox = matplotlib.transforms.TransformedBbox(Bbox, ax.transAxes).get_points()
        l, b, w, h = tBbox[0, 0] / fw, tBbox[0, 1] / fh, (tBbox[1, 0] - tBbox[0, 0]) / fw, (tBbox[1, 1] - tBbox[0, 1]) / fh
        axins = fig.add_axes([l, b, w, h], axisbg=(0, 0, 0, 0), xscale="log", yscale="log")
        removeAxes(ax=axins)
        for xlabel_i in axins.get_xticklabels(): xlabel_i.set_fontsize(6)
        for xlabel_i in axins.get_yticklabels(): xlabel_i.set_fontsize(6)

        N = len(smoothedHeatmap)
        st = int(0.05 * N)
        end = int(0.45 * N)
        st2 = int(0.55 * N)
        end2 = int(0.95 * N)
        axins.plot(*scaling(0.5 * (smoothedHeatmap[st:end, st:end] + smoothedHeatmap[st2:end2, st2:end2])), color="blue", label="intra-arm")
        if (dataset in ['Wildtype_0min_BglII_rep1', "ML2000_0hr"]):
            myscaling = scaling(0.5 * (smoothedHeatmap[st:end, st:end] + smoothedHeatmap[st2:end2, st2:end2]))
        #axins.plot(*scaling(smoothedHeatmap[st:end, end2:st2:-1]), color="green", label="inter-arm")
        axins.set_xlabel("kb", fontsize=6)
        axins.set_ylabel("Pc", fontsize=6)
        axins.grid()

        if "myscaling" in locals():
            axins.plot(*myscaling, color="grey")

        #axins.set_xticks([])
        #axins.set_yticks([])
        #axins.tick_params(color="red")

        #axins.set_xlabel("Mb")
        #axins.set_ylabel("Pc")
        for i, line in enumerate(axins.get_xticklines() + axins.get_yticklines()):
            if i % 2 == 1:  # odd indices
                line.set_visible(False)

        #if dataset != "Wildtype_0min_BglII_rep1":
        #    data = cPickle.load(open("scalings/{0}".format(dataset)))
        #    axins.plot(*data, color="blue")

        #axins.xscale("log")
        #axins.yscale("log")

        #end strange code





    plt.show()
Ejemplo n.º 9
0
def plotFigure2c():
    TR = HiCdataset()
    TR.load("GM-all.refined")
    hm = TR.buildHeatmap(1, 1, 1000000, False, False)
    TR.calculateWeights()
    TR.weights = np.ones(
        len(TR.weights), float
    )  # if you want to correct just by fragment density, not by length dependence
    hm2 = TR.buildHeatmap(1, 1, 1000000, False, weights=True)
    hm2[np.isnan(hm2)] = 0
    mask = np.sum(hm, axis=0) > 0
    """p1-6 are 6 lines to be plotted, below is plotting only"""
    p1 = np.sum(hm, axis=0)[mask]
    p3 = np.sum(correct(hm), axis=0)[mask]
    p5 = np.sum(ultracorrect(hm, 40), axis=0)[mask]
    p4 = np.sum(correct(hm2), axis=0)[mask]
    p2 = np.sum(hm2, axis=0)[mask]
    p6 = np.sum(ultracorrect(hm2, 40), axis=0)[mask]
    matplotlib.rcParams['font.sans-serif'] = 'Arial'
    dashstyle = (3, 3)
    plt.figure(figsize=(4, 4))

    ax = plt.subplot(2, 1, 1)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.ylabel("Total coverage", fontsize=8)

    line21 = plt.plot(p1 / p1.mean(), "-", linewidth=1, color="#e5a826")[0]
    line22 = plt.plot(p3 / p3.mean(), "--", linewidth=1, color="#e5a826")[0]
    line22.set_dashes(dashstyle)
    line23 = plt.plot(p5 / p5.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(8)
    legend = plt.legend(
        [line21, line22, line23],
        ["Raw data", "Single correction", "Iterative correction"],
        prop={"size": 6},
        loc=1,
        handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax)

    for i in ax.spines.values():
        i.set_color('none')
    ax.axhline(linewidth=1, color='black')
    ax.axvline(linewidth=1, color='black')

    ax2 = plt.subplot(2, 1, 2, sharex=ax)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.xlabel("Position on chom 1 (MB)", fontsize=8)
    plt.ylabel("Total coverage", fontsize=8)

    line1 = plt.plot(p4 / p4.mean(), "--", color="#9b3811", linewidth=1)[0]
    line1.set_dashes(dashstyle)
    line2 = plt.plot(p2 / p2.mean(), "-", color="#9b3811", linewidth=1)[0]
    line3 = plt.plot(p6 / p6.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax2.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax2.get_yticklabels():
        xlabel_i.set_fontsize(8)

    legend = plt.legend(
        [line2, line1, line3],
        ["HindIII corrected", "Single correction", "Iterative correction"],
        prop={"size": 6},
        loc=1,
        handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax2)
    plotting.niceShow()
Ejemplo n.º 10
0
def showAllDatasets():
    setExceptionHook()

    #plt.figure(figsize=(25, 15))
    fig = plt.figure()

    #size of the figure
    fw = fig.get_figwidth() * fig.get_dpi()
    fh = fig.get_figheight() * fig.get_dpi()

    #get subplot configuration
    sx, sy = subplots(len(datasets))

    for j, dataset in enumerate(datasets):
        curPlot = plt.subplot(sx, sy, j + 1)
        heatmap = 1. * h5dict(hm(dataset), 'r')["heatmap"]

        #fill in gaps - obsolete, as heatmaps are with overlaps
        for _ in range(1):
            zeros = np.sum(heatmap, axis=0) == 0
            zeros = np.nonzero(zeros)[0]
            heatmap[zeros] = heatmap[zeros - 1]
            heatmap[:, zeros] = heatmap[:, zeros - 1]

        #regular IC protocol
        mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
        heatmap = trunc(heatmap, low=0, high=0.0001)
        heatmap = ultracorrect(heatmap)
        diag2value = np.mean(np.diagonal(heatmap, 2))
        mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
        newHeatmap = heatmap

        #Top highly expressed genes
        #genePos = [18, 56, 77, 117, 143, 215, 234, 256, 266, 286, 300, 326, 336, 367, 379]
        geneCoor = [
            1162773, 3509071, 1180887, 543099, 1953250, 2522439, 3328524,
            1503879, 900483, 242693, 3677144, 3931680, 3677704, 3762707,
            3480870, 3829656, 1424678, 901855, 1439056, 3678537
        ]

        # here we commited to 10kb resolution - change below if you're not
        genePos = [i / 10000. for i in geneCoor]

        genePos = []

        #putting lines at highly expressed genes
        for lpos in genePos:
            plt.hlines(lpos,
                       0,
                       500,
                       linewidth=0.7,
                       color="black",
                       alpha=0.2,
                       zorder=1)
            plt.vlines(lpos,
                       0,
                       500,
                       linewidth=0.7,
                       color="black",
                       alpha=0.2,
                       zorder=1)
            pass

        #performing adaptive smoothing
        smoothedHeatmap = adaptiveSmoothing(newHeatmap, 20)
        smoothedHeatmap /= np.mean(np.sum(heatmap, axis=0))

        #print dataset, sum([np.diagonal(smoothedHeatmap, i).sum() for i in range(60, 140)])
        #maps = [[smoothedHeatmap, smoothedHeatmap[:30]],
        #         [smoothedHeatmap[:, :30], smoothedHeatmap[:30, :30]]]
        #smoothedHeatmap = np.hstack([np.vstack(i) for i in maps])

        allx = []
        ally = []

        plt.title(dataset, fontsize=10)
        plt.imshow((smoothedHeatmap),
                   interpolation="none",
                   vmax=0.035,
                   cmap="acidblues",
                   zorder=0)
        #plt.imshow((smoothedHeatmap), interpolation="nearest", vmin=0, vmax=np.exp(-4.5), cmap="fall", zorder=0)
        plt.xticks([])
        plt.yticks([])

        plt.subplots_adjust(
            left=0.05,  # the left side of the subplots of the figure
            right=0.95,  # the right side of the subplots of the figure
            bottom=0.05,  # the bottom of the subplots of the figure
            top=0.95,  # the top of the subplots of the figure
            wspace=
            0.1,  # the amount of width reserved for blank space between subplots
            hspace=0.2)
        #cPickle.dump(scaling, open(dataset.split("/")[-1] + "scaling", 'w'))
        #plt.ylim((400, 200))
        #plt.xlim((0, 200))

        #code below just puts the P(s) over the heatmap
        N = len(smoothedHeatmap)
        pts = np.array([[1, 0], [N, N], [N, 0]])
        p = Polygon(pts,
                    closed=True,
                    facecolor=(0.8, 0.8, 0.8),
                    linewidth=0,
                    alpha=0.7,
                    zorder=2)
        ax = plt.gca()
        ax.add_patch(p)

        Bbox = matplotlib.transforms.Bbox.from_bounds(.55, .55, .35, .42)
        tBbox = matplotlib.transforms.TransformedBbox(
            Bbox, ax.transAxes).get_points()
        l, b, w, h = tBbox[0, 0] / fw, tBbox[0, 1] / fh, (
            tBbox[1, 0] - tBbox[0, 0]) / fw, (tBbox[1, 1] - tBbox[0, 1]) / fh
        axins = fig.add_axes([l, b, w, h],
                             axisbg=(0, 0, 0, 0),
                             xscale="log",
                             yscale="log")
        removeAxes(ax=axins)
        for xlabel_i in axins.get_xticklabels():
            xlabel_i.set_fontsize(6)
        for xlabel_i in axins.get_yticklabels():
            xlabel_i.set_fontsize(6)

        N = len(smoothedHeatmap)
        st = int(0.05 * N)
        end = int(0.45 * N)
        st2 = int(0.55 * N)
        end2 = int(0.95 * N)
        axins.plot(*scaling(0.5 * (smoothedHeatmap[st:end, st:end] +
                                   smoothedHeatmap[st2:end2, st2:end2])),
                   color="blue",
                   label="intra-arm")
        if (dataset in ['Wildtype_0min_BglII_rep1', "ML2000_0hr"]):
            myscaling = scaling(0.5 * (smoothedHeatmap[st:end, st:end] +
                                       smoothedHeatmap[st2:end2, st2:end2]))
        #axins.plot(*scaling(smoothedHeatmap[st:end, end2:st2:-1]), color="green", label="inter-arm")
        axins.set_xlabel("kb", fontsize=6)
        axins.set_ylabel("Pc", fontsize=6)
        axins.grid()

        if "myscaling" in locals():
            axins.plot(*myscaling, color="grey")

        #axins.set_xticks([])
        #axins.set_yticks([])
        #axins.tick_params(color="red")

        #axins.set_xlabel("Mb")
        #axins.set_ylabel("Pc")
        for i, line in enumerate(axins.get_xticklines() +
                                 axins.get_yticklines()):
            if i % 2 == 1:  # odd indices
                line.set_visible(False)

        #if dataset != "Wildtype_0min_BglII_rep1":
        #    data = cPickle.load(open("scalings/{0}".format(dataset)))
        #    axins.plot(*data, color="blue")

        #axins.xscale("log")
        #axins.yscale("log")

        #end strange code

    plt.show()
Ejemplo n.º 11
0
     heatmap, 0, 0)  # Fill main and second diagonals with zeros
 mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
 mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
 heatmap = trunc(
     heatmap, low=0, high=0.0001
 )  # truncate heatmap at the  top 0.01% of bins (bin pairs).
 # This is one bin-pair per 10 bins, i.e. negligible. But will help to
 # get rid of PCR blowouts and other artifacts
 # Before doing this step, I got sure that sum of reads in each row/column of the heatmap is more than 100
 """
 If I was working with eucaryotes, or with highly reprtitive genomes, I would do this: 
 mask = np.sum(heatmap, axis=0) < 100 #take all bins with less than 100 reads
 heatmap[mask,:] = 0 
 heatmap[:,mask] = 0   #set these rows and columns to zero
 """
 heatmap = ultracorrect(heatmap)  # performing iterative correction
 correctedHeatmap = np.array(
     heatmap
 )  # just making a copy (np.asarray does not make a copy, np.array does by default)
 mask = np.sum(
     correctedHeatmap, axis=0
 ) == 0  # record mask of rows/columns which have a sum of zero (e.g. no fragments).
 diag2value = np.mean(np.diagonal(
     correctedHeatmap,
     2))  # get value from the third diagonal... the first one left
 mirnylib.numutils.fillDiagonal(
     correctedHeatmap, 1.5 * diag2value,
     0)  # Fill main diagonal with 1.5 times that
 mirnylib.numutils.fillDiagonal(
     correctedHeatmap, 1.2 * diag2value,
     1)  # Fill second diagonals with 1.2 times that