Esempio n. 1
0
def main():
    basedir = '/'.join(
        os.path.dirname(os.path.realpath(__file__)).split('/')[:-2])
    out_fname = sys.argv[1]
    fivec_fname = "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_prob.fcp" % basedir
    fivec = hifive.FiveC(fivec_fname)
    num_bins = 200
    width = 16.92
    plot_width = (width) / 2.0 - 2
    c = canvas.canvas()
    hist1, hist_min, hist_max, hist_ranges1 = get_histograms(
        fivec, num_bins, 'raw')
    temp = get_histograms(fivec, num_bins, 'fragment')
    hist2 = temp[0]
    hist_ranges2 = temp[3]
    hist_min = min(hist_min, temp[1])
    hist_max = max(hist_max, temp[2])
    c.insert(
        plot_distances(fivec, plot_width, hist1, hist_min, hist_max,
                       hist_ranges1))
    c.insert(
        plot_distances(fivec, plot_width, hist2, hist_min, hist_max,
                       hist_ranges2), [trafo.translate(plot_width + 0.8, 0)])
    c.text(-0.6, plot_width * 0.5, "Log count", [
        text.halign.center, text.valign.bottom,
        text.size(-3),
        trafo.rotate(90)
    ])
    c.text(plot_width * 0.5, -0.4, "Log distance (Kb)",
           [text.halign.center, text.valign.top,
            text.size(-3)])
    c.text(plot_width * 1.5 + 0.8, -0.4, "Log distance (Kb)",
           [text.halign.center, text.valign.top,
            text.size(-3)])
    c.text(plot_width * 0.5, plot_width + 0.1, "Raw",
           [text.halign.center, text.valign.bottom,
            text.size(-2)])
    c.text(plot_width * 1.5 + 0.8, plot_width + 0.1, "Corrected",
           [text.halign.center, text.valign.bottom,
            text.size(-2)])
    c.insert(
        hifive.plotting.plot_key(hist_min,
                                 hist_max,
                                 0.4,
                                 plot_width * 2 + 0.8,
                                 "%.0f",
                                 orientation='top',
                                 min_color="000000",
                                 max_color="0000ff",
                                 mid_color=None,
                                 labelattrs=[text.size(-3)]),
        [trafo.translate(0, plot_width + 0.4)])
    c.text(plot_width * 1.0 + 0.4, plot_width + 1.3, "Interactions per bin",
           [text.halign.center, text.valign.bottom,
            text.size(-2)])
    c.writePDFfile(out_fname)
Esempio n. 2
0
def main():
    out_fname = sys.argv[1]
    basedir = '/'.join(os.path.dirname(os.path.realpath(__file__)).split('/')[:-2])
    hic_phillips_fname1 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_NcoI_Phillips.hch" % basedir
    hic_phillips_fname2 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_HindIII_Phillips.hch" % basedir
    hic_nora_fname1 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_NcoI_Nora.hch" % basedir
    hic_nora_fname2 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_HindIII_Nora.hch" % basedir
    hic_phillips1 = h5py.File(hic_phillips_fname1, 'r')
    hic_phillips2 = h5py.File(hic_phillips_fname2, 'r')
    hic_nora1 = h5py.File(hic_nora_fname1, 'r')
    hic_nora2 = h5py.File(hic_nora_fname2, 'r')
    hm_phillips = {}
    hm_nora = {}
    for key in hic_phillips1.keys():
        if key.count('unbinned_counts') == 0:
            continue
        region = int(key.split('.')[0])
        hm_phillips[region] = dynamically_bin(hic_phillips1, hic_phillips2, region)
    for key in hic_nora1.keys():
        if key.count('unbinned_counts') == 0:
            continue
        region = int(key.split('.')[0])
        hm_nora[region] = dynamically_bin(hic_nora1, hic_nora2, region)

    fivec_fnames = {
        "Prob_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_probnodist.fcp" % basedir,
        "Prob_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_probnodist.fcp" % basedir,
        "Bin_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_binnodist.fcp" % basedir,
        "Bin_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_binnodist.fcp" % basedir,
        "Exp_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_expnodist.fcp" % basedir,
        "Exp_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_expnodist.fcp" % basedir,
        "Exp-KR_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_expKRnodist.fcp" % basedir,
        "Exp-KR_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_expKRnodist.fcp" % basedir,
    }
    data = {}
    imgs = {}
    ratio1 = 0
    ratio2 = 0
    for meth in ['Prob', 'Bin', 'Exp', 'Exp-KR']:
        fc = hifive.FiveC(fivec_fnames["%s_Phillips" % meth])
        fragments = fc.frags['fragments'][...]
        regions = fc.frags['regions'][...]
        counts = numpy.zeros(0, dtype=numpy.float64)
        expected = numpy.zeros(0, dtype=numpy.float64)
        hic_counts = numpy.zeros(0, dtype=numpy.float64)
        hic_expected = numpy.zeros(0, dtype=numpy.float64)
        skipped = []
        for i in range(fc.frags['regions'].shape[0]):
            temp = fc.cis_heatmap(i, datatype='fragment', arraytype='compact', binsize=0, skipfiltered=True)
            if temp is None:
                skipped.append(i)
                continue
            counts = numpy.hstack((counts, temp[:, :, 0].ravel()))
            expected = numpy.hstack((expected, temp[:, :, 1].ravel()))
            if i == 6:
                ratio1 = temp.shape[1] / float(temp.shape[0])
                imgs["%s_Phillips" % meth] = hifive.plotting.plot_full_array(temp, symmetricscaling=False)
            if meth == 'Prob':
                temp1 = numpy.zeros((temp.shape[0], temp.shape[1]), dtype=numpy.float32)
                temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1
                if i == 6:
                    imgs["Raw_Phillips"] = hifive.plotting.plot_full_array(
                            numpy.dstack((temp[:, :, 0], temp1)), symmetricscaling=False)
                binbounds = numpy.hstack((
                    fragments['start'][regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1, 1),
                    fragments['stop'][regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1, 1)))
                valid = numpy.where(fc.filter[regions['start_frag'][i]:regions['stop_frag'][i]])[0]
                binbounds = binbounds[valid, :]
                temp = hm_phillips[i]
                strands = fragments['strand'][regions['start_frag'][i]:regions['stop_frag'][i]][valid]
                temp = temp[numpy.where(strands == 0)[0], :, :][:, numpy.where(strands == 1)[0], :]
                hic_counts = numpy.hstack((hic_counts, temp[:, :, 0].ravel()))
                hic_expected = numpy.hstack((hic_expected, temp[:, :, 1].ravel()))
                if i == 6:
                    imgs["HiC_Phillips"] = hifive.plotting.plot_full_array(temp, symmetricscaling=False)
        if meth == 'Prob':
            data["Raw_Phillips"] = numpy.copy(counts)
            where = numpy.where(hic_expected > 0.0)[0]
            hic_counts[where] /= hic_expected[where]
            data["HiC_Phillips"] = numpy.copy(hic_counts)
        where = numpy.where(expected > 0.0)[0]
        counts[where] /= expected[where] 
        data["%s_Phillips" % meth] = numpy.copy(counts)
        fc = hifive.FiveC(fivec_fnames["%s_Nora" % meth])
        temp = fc.cis_heatmap(0, datatype='fragment', arraytype='compact', binsize=0, skipfiltered=True)
        ratio2 = temp.shape[1] / float(temp.shape[0])
        imgs["%s_Nora" % meth] = hifive.plotting.plot_full_array(temp, symmetricscaling=False)
        counts = temp[:, :, 0].ravel()
        expected = temp[:, :, 1].ravel()
        if meth == 'Prob':
            temp1 = numpy.zeros((temp.shape[0], temp.shape[1]), dtype=numpy.float32)
            temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1
            imgs["Raw_Nora"] = hifive.plotting.plot_full_array(
                            numpy.dstack((temp[:, :, 0], temp1)), symmetricscaling=False)
            data["Raw_Nora"] = numpy.copy(counts)
            fragments = fc.frags['fragments'][...]
            regions = fc.frags['regions'][...]
            binbounds = numpy.hstack((
                    fragments['start'][regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1),
                    fragments['stop'][regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1)))
            binbounds = binbounds[numpy.where(fc.filter[regions['start_frag'][0]:regions['stop_frag'][0]])[0], :]
            temp = hm_nora[0]
            strands = fragments['strand'][regions['start_frag'][0]:regions['stop_frag'][0]]
            temp = temp[numpy.where(strands==0)[0], :, :][:, numpy.where(strands == 1)[0], :]
            imgs["HiC_Nora"] = hifive.plotting.plot_full_array(temp, symmetricscaling=False)
            hic_counts = temp[:, :, 0].ravel()
            hic_expected = temp[:, :, 1].ravel()
            where = numpy.where(hic_expected > 0.0)[0]
            hic_counts[where] /= hic_expected[where]
            data["HiC_Nora"] = numpy.copy(hic_counts)
        where = numpy.where(expected > 0.0)[0]
        counts[where] /= expected[where] 
        data["%s_Nora" % meth] = numpy.copy(counts)
    correlations = {}
    output = open(out_fname.replace('pdf', 'txt'), 'w')
    print >> output, "Method\tPhillips\tNora"
    for meth in methods:
        temp = [meth]
        for name in ["Phillips", "Nora"]:
            valid = numpy.where((data["%s_%s" % (meth, name)] > 0.0) * (data["HiC_%s" % name] > 0.0))
            correlations["%s_%s" % (meth, name)] = numpy.corrcoef(numpy.log(data["%s_%s" % (meth, name)][valid]),
                                                              numpy.log(data["HiC_%s" % name][valid]))[0, 1]
            temp.append(str(correlations["%s_%s" % (meth, name)]))
        print >> output, '\t'.join(temp)
    output.close()
    width = 16.8
    spacer = 0.3
    c = canvas.canvas()
    plot_width = (width - spacer * 3.0 - 0.4) / 4.0
    for i, meth in enumerate(["Raw", "Prob", "HiC"]):
        meth_names = {"Raw":"Raw", "Prob":"HiFive", "HiC":"HiC"}
        c.text(plot_width * (i + 1.5) + spacer * (i + 1), (ratio1 + ratio2) * plot_width + spacer + 0.1,
               "%s" % meth_names[meth], [text.halign.center, text.valign.bottom, text.size(-2)])
        c.insert(bitmap.bitmap(0, 0, imgs["%s_Phillips" % meth], width=plot_width),
                 [trafo.translate((i + 1) * (plot_width + spacer), plot_width * ratio2 + spacer)])
        c.insert(bitmap.bitmap(0, 0, imgs["%s_Nora" % meth], width=plot_width),
                 [trafo.translate((i + 1) * (plot_width + spacer), 0)])
    g = graph.graphxy(width=plot_width - 0.8, height=plot_width * ratio1,
                      x=graph.axis.nestedbar(painter=graph.axis.painter.bar(nameattrs=None)),
                      y=graph.axis.lin(painter=painter),
                      x2=graph.axis.lin(parter=None, min=0, max=1),
                      y2=graph.axis.lin(parter=None, min=0, max=1))
    for i, meth in enumerate(methods):
        Y = numpy.zeros(2, dtype=numpy.float32)
        col = method_colors[meth]
        for j, name in enumerate(["Phillips", "Nora"]):
            Y[j] = correlations["%s_%s" % (meth, name)]
        g.plot(graph.data.points(zip(zip(range(Y.shape[0]), [i] * Y.shape[0]), Y), xname=1, y=2),
               [graph.style.changebar([col])])
    g.text(-0.8, plot_width * ratio1 * 0.5, "Correlation",
           [text.halign.center, text.valign.top, text.size(-3), trafo.rotate(90)])
    g.text((plot_width - 0.8) * 0.25, -0.1, "Phillips",
           [text.halign.center, text.valign.top, text.size(-3)])
    g.text((plot_width - 0.8) * 0.75, -0.1, "Nora",
           [text.halign.center, text.valign.top, text.size(-3)])
    c.insert(g, [trafo.translate(0.8, plot_width * ratio2 + spacer)])
    c.text(width, (ratio1 + ratio2 * 0.5) * plot_width + spacer, "Phillips",
           [text.halign.center, text.valign.top, trafo.rotate(-90), text.size(-2)])
    c.text(width, ratio1 * 0.5 * plot_width, "Nora",
           [text.halign.center, text.valign.top, trafo.rotate(-90), text.size(-2)])
    meth_names = {"Raw":"Raw", "Prob":"HiFive-Probability", "Exp":"HiFive-Express", "Bin":"HiFive-Binning",
                  "Exp-KR":"HiFive-ExpressKR", "Exp-KR-dist":"HiFive-ExpressKR-dist"}
    for i, meth in enumerate(methods):
        c.fill(path.rect(1.0, plot_width * ratio1 - 1.0 - i * 0.5, 0.2, 0.2), [method_colors[meth]])
        c.text(1.3, plot_width * ratio1 - 0.9 - i * 0.5, "%s" % meth_names[meth],
               [text.halign.left, text.valign.middle, text.size(-3)])
    c.writePDFfile(out_fname)
def main():
    out_fname = sys.argv[1]
    basedir = '/'.join(
        os.path.dirname(os.path.realpath(__file__)).split('/')[:-2])
    hic_fname1 = "%s/Data/HiC/HiFive/mm9_ESC_NcoI_prob.hcp" % basedir
    hic_fname2 = "%s/Data/HiC/HiFive/mm9_ESC_HindIII_prob.hcp" % basedir
    fivec_fnames = {
        "Prob_Phillips":
        "%s/Data/FiveC/HiFive/Phillips_ESC_prob.fcp" % basedir,
        "Prob_Nora":
        "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_prob.fcp" % basedir,
        "Bin_Phillips":
        "%s/Data/FiveC/HiFive/Phillips_ESC_bin.fcp" % basedir,
        "Bin_Nora":
        "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_bin.fcp" % basedir,
        "Exp_Phillips":
        "%s/Data/FiveC/HiFive/Phillips_ESC_exp.fcp" % basedir,
        "Exp_Nora":
        "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_exp.fcp" % basedir,
        "Exp-KR_Phillips":
        "%s/Data/FiveC/HiFive/Phillips_ESC_expKR.fcp" % basedir,
        "Exp-KR_Nora":
        "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_expKR.fcp" % basedir,
    }
    hic1 = hifive.HiC(hic_fname1)
    hic2 = hifive.HiC(hic_fname2)
    hic_hm = {'Phillips': {}}
    fc = hifive.FiveC(fivec_fnames["Prob_Phillips"])
    fragments = fc.frags['fragments'][...]
    regions = fc.frags['regions'][...]
    for i in range(fc.frags['regions'].shape[0]):
        binbounds = numpy.hstack(
            (fragments['start']
             [regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1, 1),
             fragments['stop']
             [regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1,
                                                                        1)))
        binbounds = binbounds[numpy.where(
            fc.filter[regions['start_frag'][i]:regions['stop_frag'][i]])[0], :]
        hic_hm['Phillips'][i] = dynamically_bin(hic1, hic2,
                                                regions['chromosome'][i],
                                                binbounds)
    fc = hifive.FiveC(fivec_fnames["Prob_Nora"])
    fragments = fc.frags['fragments'][...]
    regions = fc.frags['regions'][...]
    binbounds = numpy.hstack(
        (fragments['start']
         [regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1),
         fragments['stop']
         [regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1)))
    binbounds = binbounds[numpy.where(
        fc.filter[regions['start_frag'][0]:regions['stop_frag'][0]])[0], :]
    hic_hm['Nora'] = dynamically_bin(hic1, hic2, regions['chromosome'][0],
                                     binbounds)
    dist_corr = find_correlations(hic_hm, fivec_fnames, out_fname, True)
    nodist_corr = find_correlations(hic_hm, fivec_fnames, out_fname, False)
    c = canvas.canvas()
    width = 16.8
    spacer = 0.4
    plot_width = (width - spacer * 2) / 2.5
    plot_height = plot_width
    key_width = width - (plot_width + spacer) * 2
    phillips_img = plot_correlation_diffs(dist_corr, nodist_corr, 'Phillips',
                                          plot_width, plot_height)
    nora_img = plot_correlation_diffs(dist_corr, nodist_corr, 'Nora',
                                      plot_width, plot_height)
    key_img = plot_key(key_width, plot_height)
    c.insert(phillips_img)
    c.insert(nora_img, [trafo.translate(plot_width + spacer, 0)])
    c.insert(key_img, [trafo.translate((plot_width + spacer) * 2, 0)])
    c.text(0, plot_height, "a",
           [text.halign.left, text.valign.top,
            text.size(-1)])
    c.text(plot_width + spacer, plot_height, "b",
           [text.halign.left, text.valign.top,
            text.size(-1)])
    c.writePDFfile(out_fname)
def find_correlations(hic_hm, fivec_fnames, out_fname, dist):
    data = {}
    for meth in ['Prob', 'Bin', 'Exp', 'Exp-KR']:
        if dist:
            fname = fivec_fnames['%s_Phillips' % meth]
        else:
            fname = fivec_fnames['%s_Phillips' % meth].replace(
                '.fcp', 'nodist.fcp')
        fc = hifive.FiveC(fname)
        fragments = fc.frags['fragments'][...]
        regions = fc.frags['regions'][...]
        counts = numpy.zeros(0, dtype=numpy.float64)
        expected = numpy.zeros(0, dtype=numpy.float64)
        hic_counts = numpy.zeros(0, dtype=numpy.float64)
        hic_expected = numpy.zeros(0, dtype=numpy.float64)
        skipped = []
        for i in range(fc.frags['regions'].shape[0]):
            temp = fc.cis_heatmap(i,
                                  datatype='fragment',
                                  arraytype='compact',
                                  binsize=0,
                                  skipfiltered=True)
            if temp is None:
                skipped.append(i)
                continue
            counts = numpy.hstack((counts, temp[:, :, 0].ravel()))
            expected = numpy.hstack((expected, temp[:, :, 1].ravel()))
            if meth == 'Prob':
                temp1 = numpy.zeros((temp.shape[0], temp.shape[1]),
                                    dtype=numpy.float32)
                temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1
                binbounds = numpy.hstack((
                    fragments['start']
                    [regions['start_frag'][i]:regions['stop_frag'][i]].reshape(
                        -1, 1), fragments['stop']
                    [regions['start_frag'][i]:regions['stop_frag'][i]].reshape(
                        -1, 1)))
                valid = numpy.where(
                    fc.filter[regions['start_frag'][i]:regions['stop_frag'][i]]
                )[0]
                binbounds = binbounds[valid, :]
                strands = fragments['strand'][
                    regions['start_frag'][i]:regions['stop_frag'][i]][valid]
                temp = hic_hm['Phillips'][i][numpy.where(
                    strands == 0)[0], :, :][:,
                                            numpy.where(strands == 1)[0], :]
                hic_counts = numpy.hstack((hic_counts, temp[:, :, 0].ravel()))
                hic_expected = numpy.hstack((hic_expected, temp[:, :,
                                                                1].ravel()))
        if meth == 'Prob':
            where = numpy.where(hic_expected > 0.0)[0]
            hic_counts[where] /= hic_expected[where]
            data["HiC_Phillips"] = numpy.copy(hic_counts)
        where = numpy.where(expected > 0.0)[0]
        counts[where] /= expected[where]
        data["%s_Phillips" % meth] = numpy.copy(counts)
        if dist:
            fname = fivec_fnames['%s_Nora' % meth]
        else:
            fname = fivec_fnames['%s_Nora' % meth].replace(
                '.fcp', 'nodist.fcp')
        fc = hifive.FiveC(fname)
        temp = fc.cis_heatmap(0,
                              datatype='fragment',
                              arraytype='compact',
                              binsize=0,
                              skipfiltered=True)
        counts = temp[:, :, 0].ravel()
        expected = temp[:, :, 1].ravel()
        if meth == 'Prob':
            temp1 = numpy.zeros((temp.shape[0], temp.shape[1]),
                                dtype=numpy.float32)
            temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1
            fragments = fc.frags['fragments'][...]
            regions = fc.frags['regions'][...]
            binbounds = numpy.hstack(
                (fragments['start']
                 [regions['start_frag'][0]:regions['stop_frag'][0]].reshape(
                     -1, 1), fragments['stop']
                 [regions['start_frag'][0]:regions['stop_frag'][0]].reshape(
                     -1, 1)))
            binbounds = binbounds[numpy.where(
                fc.filter[regions['start_frag'][0]:regions['stop_frag'][0]]
            )[0], :]
            strands = fragments['strand'][
                regions['start_frag'][0]:regions['stop_frag'][0]]
            temp = hic_hm['Nora'][numpy.where(
                strands == 0)[0], :, :][:, numpy.where(strands == 1)[0], :]
            hic_counts = temp[:, :, 0].ravel()
            hic_expected = temp[:, :, 1].ravel()
            where = numpy.where(hic_expected > 0.0)[0]
            hic_counts[where] /= hic_expected[where]
            data["HiC_Nora"] = numpy.copy(hic_counts)
        where = numpy.where(expected > 0.0)[0]
        counts[where] /= expected[where]
        data["%s_Nora" % meth] = numpy.copy(counts)
    correlations = {}
    for meth in methods:
        for name in ["Phillips", "Nora"]:
            valid = numpy.where((data["%s_%s" % (meth, name)] > 0.0) *
                                (data["HiC_%s" % name] > 0.0))
            correlations["%s_%s" % (meth, name)] = numpy.corrcoef(
                numpy.log(data["%s_%s" % (meth, name)][valid]),
                numpy.log(data["HiC_%s" % name][valid]))[0, 1]
    if not dist:
        output = open(out_fname.replace('pdf', 'txt'), 'w')
        print >> output, "Method\tPhillips\tNora"
        for meth in methods:
            temp = [meth]
            for name in ["Phillips", "Nora"]:
                temp.append(str(correlations["%s_%s" % (meth, name)]))
            print >> output, '\t'.join(temp)
        output.close()
    return correlations