def main(): basedir = '/'.join( os.path.dirname(os.path.realpath(__file__)).split('/')[:-2]) out_fname = sys.argv[1] fivec_fname = "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_prob.fcp" % basedir fivec = hifive.FiveC(fivec_fname) num_bins = 200 width = 16.92 plot_width = (width) / 2.0 - 2 c = canvas.canvas() hist1, hist_min, hist_max, hist_ranges1 = get_histograms( fivec, num_bins, 'raw') temp = get_histograms(fivec, num_bins, 'fragment') hist2 = temp[0] hist_ranges2 = temp[3] hist_min = min(hist_min, temp[1]) hist_max = max(hist_max, temp[2]) c.insert( plot_distances(fivec, plot_width, hist1, hist_min, hist_max, hist_ranges1)) c.insert( plot_distances(fivec, plot_width, hist2, hist_min, hist_max, hist_ranges2), [trafo.translate(plot_width + 0.8, 0)]) c.text(-0.6, plot_width * 0.5, "Log count", [ text.halign.center, text.valign.bottom, text.size(-3), trafo.rotate(90) ]) c.text(plot_width * 0.5, -0.4, "Log distance (Kb)", [text.halign.center, text.valign.top, text.size(-3)]) c.text(plot_width * 1.5 + 0.8, -0.4, "Log distance (Kb)", [text.halign.center, text.valign.top, text.size(-3)]) c.text(plot_width * 0.5, plot_width + 0.1, "Raw", [text.halign.center, text.valign.bottom, text.size(-2)]) c.text(plot_width * 1.5 + 0.8, plot_width + 0.1, "Corrected", [text.halign.center, text.valign.bottom, text.size(-2)]) c.insert( hifive.plotting.plot_key(hist_min, hist_max, 0.4, plot_width * 2 + 0.8, "%.0f", orientation='top', min_color="000000", max_color="0000ff", mid_color=None, labelattrs=[text.size(-3)]), [trafo.translate(0, plot_width + 0.4)]) c.text(plot_width * 1.0 + 0.4, plot_width + 1.3, "Interactions per bin", [text.halign.center, text.valign.bottom, text.size(-2)]) c.writePDFfile(out_fname)
def main(): out_fname = sys.argv[1] basedir = '/'.join(os.path.dirname(os.path.realpath(__file__)).split('/')[:-2]) hic_phillips_fname1 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_NcoI_Phillips.hch" % basedir hic_phillips_fname2 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_HindIII_Phillips.hch" % basedir hic_nora_fname1 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_NcoI_Nora.hch" % basedir hic_nora_fname2 = "%s/Data/HiC/HiCPipe/HM/mm9_ESC_HindIII_Nora.hch" % basedir hic_phillips1 = h5py.File(hic_phillips_fname1, 'r') hic_phillips2 = h5py.File(hic_phillips_fname2, 'r') hic_nora1 = h5py.File(hic_nora_fname1, 'r') hic_nora2 = h5py.File(hic_nora_fname2, 'r') hm_phillips = {} hm_nora = {} for key in hic_phillips1.keys(): if key.count('unbinned_counts') == 0: continue region = int(key.split('.')[0]) hm_phillips[region] = dynamically_bin(hic_phillips1, hic_phillips2, region) for key in hic_nora1.keys(): if key.count('unbinned_counts') == 0: continue region = int(key.split('.')[0]) hm_nora[region] = dynamically_bin(hic_nora1, hic_nora2, region) fivec_fnames = { "Prob_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_probnodist.fcp" % basedir, "Prob_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_probnodist.fcp" % basedir, "Bin_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_binnodist.fcp" % basedir, "Bin_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_binnodist.fcp" % basedir, "Exp_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_expnodist.fcp" % basedir, "Exp_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_expnodist.fcp" % basedir, "Exp-KR_Phillips":"%s/Data/FiveC/HiFive/Phillips_ESC_expKRnodist.fcp" % basedir, "Exp-KR_Nora":"%s/Data/FiveC/HiFive/Nora_ESC_male_E14_expKRnodist.fcp" % basedir, } data = {} imgs = {} ratio1 = 0 ratio2 = 0 for meth in ['Prob', 'Bin', 'Exp', 'Exp-KR']: fc = hifive.FiveC(fivec_fnames["%s_Phillips" % meth]) fragments = fc.frags['fragments'][...] regions = fc.frags['regions'][...] counts = numpy.zeros(0, dtype=numpy.float64) expected = numpy.zeros(0, dtype=numpy.float64) hic_counts = numpy.zeros(0, dtype=numpy.float64) hic_expected = numpy.zeros(0, dtype=numpy.float64) skipped = [] for i in range(fc.frags['regions'].shape[0]): temp = fc.cis_heatmap(i, datatype='fragment', arraytype='compact', binsize=0, skipfiltered=True) if temp is None: skipped.append(i) continue counts = numpy.hstack((counts, temp[:, :, 0].ravel())) expected = numpy.hstack((expected, temp[:, :, 1].ravel())) if i == 6: ratio1 = temp.shape[1] / float(temp.shape[0]) imgs["%s_Phillips" % meth] = hifive.plotting.plot_full_array(temp, symmetricscaling=False) if meth == 'Prob': temp1 = numpy.zeros((temp.shape[0], temp.shape[1]), dtype=numpy.float32) temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1 if i == 6: imgs["Raw_Phillips"] = hifive.plotting.plot_full_array( numpy.dstack((temp[:, :, 0], temp1)), symmetricscaling=False) binbounds = numpy.hstack(( fragments['start'][regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1, 1), fragments['stop'][regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1, 1))) valid = numpy.where(fc.filter[regions['start_frag'][i]:regions['stop_frag'][i]])[0] binbounds = binbounds[valid, :] temp = hm_phillips[i] strands = fragments['strand'][regions['start_frag'][i]:regions['stop_frag'][i]][valid] temp = temp[numpy.where(strands == 0)[0], :, :][:, numpy.where(strands == 1)[0], :] hic_counts = numpy.hstack((hic_counts, temp[:, :, 0].ravel())) hic_expected = numpy.hstack((hic_expected, temp[:, :, 1].ravel())) if i == 6: imgs["HiC_Phillips"] = hifive.plotting.plot_full_array(temp, symmetricscaling=False) if meth == 'Prob': data["Raw_Phillips"] = numpy.copy(counts) where = numpy.where(hic_expected > 0.0)[0] hic_counts[where] /= hic_expected[where] data["HiC_Phillips"] = numpy.copy(hic_counts) where = numpy.where(expected > 0.0)[0] counts[where] /= expected[where] data["%s_Phillips" % meth] = numpy.copy(counts) fc = hifive.FiveC(fivec_fnames["%s_Nora" % meth]) temp = fc.cis_heatmap(0, datatype='fragment', arraytype='compact', binsize=0, skipfiltered=True) ratio2 = temp.shape[1] / float(temp.shape[0]) imgs["%s_Nora" % meth] = hifive.plotting.plot_full_array(temp, symmetricscaling=False) counts = temp[:, :, 0].ravel() expected = temp[:, :, 1].ravel() if meth == 'Prob': temp1 = numpy.zeros((temp.shape[0], temp.shape[1]), dtype=numpy.float32) temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1 imgs["Raw_Nora"] = hifive.plotting.plot_full_array( numpy.dstack((temp[:, :, 0], temp1)), symmetricscaling=False) data["Raw_Nora"] = numpy.copy(counts) fragments = fc.frags['fragments'][...] regions = fc.frags['regions'][...] binbounds = numpy.hstack(( fragments['start'][regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1), fragments['stop'][regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1))) binbounds = binbounds[numpy.where(fc.filter[regions['start_frag'][0]:regions['stop_frag'][0]])[0], :] temp = hm_nora[0] strands = fragments['strand'][regions['start_frag'][0]:regions['stop_frag'][0]] temp = temp[numpy.where(strands==0)[0], :, :][:, numpy.where(strands == 1)[0], :] imgs["HiC_Nora"] = hifive.plotting.plot_full_array(temp, symmetricscaling=False) hic_counts = temp[:, :, 0].ravel() hic_expected = temp[:, :, 1].ravel() where = numpy.where(hic_expected > 0.0)[0] hic_counts[where] /= hic_expected[where] data["HiC_Nora"] = numpy.copy(hic_counts) where = numpy.where(expected > 0.0)[0] counts[where] /= expected[where] data["%s_Nora" % meth] = numpy.copy(counts) correlations = {} output = open(out_fname.replace('pdf', 'txt'), 'w') print >> output, "Method\tPhillips\tNora" for meth in methods: temp = [meth] for name in ["Phillips", "Nora"]: valid = numpy.where((data["%s_%s" % (meth, name)] > 0.0) * (data["HiC_%s" % name] > 0.0)) correlations["%s_%s" % (meth, name)] = numpy.corrcoef(numpy.log(data["%s_%s" % (meth, name)][valid]), numpy.log(data["HiC_%s" % name][valid]))[0, 1] temp.append(str(correlations["%s_%s" % (meth, name)])) print >> output, '\t'.join(temp) output.close() width = 16.8 spacer = 0.3 c = canvas.canvas() plot_width = (width - spacer * 3.0 - 0.4) / 4.0 for i, meth in enumerate(["Raw", "Prob", "HiC"]): meth_names = {"Raw":"Raw", "Prob":"HiFive", "HiC":"HiC"} c.text(plot_width * (i + 1.5) + spacer * (i + 1), (ratio1 + ratio2) * plot_width + spacer + 0.1, "%s" % meth_names[meth], [text.halign.center, text.valign.bottom, text.size(-2)]) c.insert(bitmap.bitmap(0, 0, imgs["%s_Phillips" % meth], width=plot_width), [trafo.translate((i + 1) * (plot_width + spacer), plot_width * ratio2 + spacer)]) c.insert(bitmap.bitmap(0, 0, imgs["%s_Nora" % meth], width=plot_width), [trafo.translate((i + 1) * (plot_width + spacer), 0)]) g = graph.graphxy(width=plot_width - 0.8, height=plot_width * ratio1, x=graph.axis.nestedbar(painter=graph.axis.painter.bar(nameattrs=None)), y=graph.axis.lin(painter=painter), x2=graph.axis.lin(parter=None, min=0, max=1), y2=graph.axis.lin(parter=None, min=0, max=1)) for i, meth in enumerate(methods): Y = numpy.zeros(2, dtype=numpy.float32) col = method_colors[meth] for j, name in enumerate(["Phillips", "Nora"]): Y[j] = correlations["%s_%s" % (meth, name)] g.plot(graph.data.points(zip(zip(range(Y.shape[0]), [i] * Y.shape[0]), Y), xname=1, y=2), [graph.style.changebar([col])]) g.text(-0.8, plot_width * ratio1 * 0.5, "Correlation", [text.halign.center, text.valign.top, text.size(-3), trafo.rotate(90)]) g.text((plot_width - 0.8) * 0.25, -0.1, "Phillips", [text.halign.center, text.valign.top, text.size(-3)]) g.text((plot_width - 0.8) * 0.75, -0.1, "Nora", [text.halign.center, text.valign.top, text.size(-3)]) c.insert(g, [trafo.translate(0.8, plot_width * ratio2 + spacer)]) c.text(width, (ratio1 + ratio2 * 0.5) * plot_width + spacer, "Phillips", [text.halign.center, text.valign.top, trafo.rotate(-90), text.size(-2)]) c.text(width, ratio1 * 0.5 * plot_width, "Nora", [text.halign.center, text.valign.top, trafo.rotate(-90), text.size(-2)]) meth_names = {"Raw":"Raw", "Prob":"HiFive-Probability", "Exp":"HiFive-Express", "Bin":"HiFive-Binning", "Exp-KR":"HiFive-ExpressKR", "Exp-KR-dist":"HiFive-ExpressKR-dist"} for i, meth in enumerate(methods): c.fill(path.rect(1.0, plot_width * ratio1 - 1.0 - i * 0.5, 0.2, 0.2), [method_colors[meth]]) c.text(1.3, plot_width * ratio1 - 0.9 - i * 0.5, "%s" % meth_names[meth], [text.halign.left, text.valign.middle, text.size(-3)]) c.writePDFfile(out_fname)
def main(): out_fname = sys.argv[1] basedir = '/'.join( os.path.dirname(os.path.realpath(__file__)).split('/')[:-2]) hic_fname1 = "%s/Data/HiC/HiFive/mm9_ESC_NcoI_prob.hcp" % basedir hic_fname2 = "%s/Data/HiC/HiFive/mm9_ESC_HindIII_prob.hcp" % basedir fivec_fnames = { "Prob_Phillips": "%s/Data/FiveC/HiFive/Phillips_ESC_prob.fcp" % basedir, "Prob_Nora": "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_prob.fcp" % basedir, "Bin_Phillips": "%s/Data/FiveC/HiFive/Phillips_ESC_bin.fcp" % basedir, "Bin_Nora": "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_bin.fcp" % basedir, "Exp_Phillips": "%s/Data/FiveC/HiFive/Phillips_ESC_exp.fcp" % basedir, "Exp_Nora": "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_exp.fcp" % basedir, "Exp-KR_Phillips": "%s/Data/FiveC/HiFive/Phillips_ESC_expKR.fcp" % basedir, "Exp-KR_Nora": "%s/Data/FiveC/HiFive/Nora_ESC_male_E14_expKR.fcp" % basedir, } hic1 = hifive.HiC(hic_fname1) hic2 = hifive.HiC(hic_fname2) hic_hm = {'Phillips': {}} fc = hifive.FiveC(fivec_fnames["Prob_Phillips"]) fragments = fc.frags['fragments'][...] regions = fc.frags['regions'][...] for i in range(fc.frags['regions'].shape[0]): binbounds = numpy.hstack( (fragments['start'] [regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1, 1), fragments['stop'] [regions['start_frag'][i]:regions['stop_frag'][i]].reshape(-1, 1))) binbounds = binbounds[numpy.where( fc.filter[regions['start_frag'][i]:regions['stop_frag'][i]])[0], :] hic_hm['Phillips'][i] = dynamically_bin(hic1, hic2, regions['chromosome'][i], binbounds) fc = hifive.FiveC(fivec_fnames["Prob_Nora"]) fragments = fc.frags['fragments'][...] regions = fc.frags['regions'][...] binbounds = numpy.hstack( (fragments['start'] [regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1), fragments['stop'] [regions['start_frag'][0]:regions['stop_frag'][0]].reshape(-1, 1))) binbounds = binbounds[numpy.where( fc.filter[regions['start_frag'][0]:regions['stop_frag'][0]])[0], :] hic_hm['Nora'] = dynamically_bin(hic1, hic2, regions['chromosome'][0], binbounds) dist_corr = find_correlations(hic_hm, fivec_fnames, out_fname, True) nodist_corr = find_correlations(hic_hm, fivec_fnames, out_fname, False) c = canvas.canvas() width = 16.8 spacer = 0.4 plot_width = (width - spacer * 2) / 2.5 plot_height = plot_width key_width = width - (plot_width + spacer) * 2 phillips_img = plot_correlation_diffs(dist_corr, nodist_corr, 'Phillips', plot_width, plot_height) nora_img = plot_correlation_diffs(dist_corr, nodist_corr, 'Nora', plot_width, plot_height) key_img = plot_key(key_width, plot_height) c.insert(phillips_img) c.insert(nora_img, [trafo.translate(plot_width + spacer, 0)]) c.insert(key_img, [trafo.translate((plot_width + spacer) * 2, 0)]) c.text(0, plot_height, "a", [text.halign.left, text.valign.top, text.size(-1)]) c.text(plot_width + spacer, plot_height, "b", [text.halign.left, text.valign.top, text.size(-1)]) c.writePDFfile(out_fname)
def find_correlations(hic_hm, fivec_fnames, out_fname, dist): data = {} for meth in ['Prob', 'Bin', 'Exp', 'Exp-KR']: if dist: fname = fivec_fnames['%s_Phillips' % meth] else: fname = fivec_fnames['%s_Phillips' % meth].replace( '.fcp', 'nodist.fcp') fc = hifive.FiveC(fname) fragments = fc.frags['fragments'][...] regions = fc.frags['regions'][...] counts = numpy.zeros(0, dtype=numpy.float64) expected = numpy.zeros(0, dtype=numpy.float64) hic_counts = numpy.zeros(0, dtype=numpy.float64) hic_expected = numpy.zeros(0, dtype=numpy.float64) skipped = [] for i in range(fc.frags['regions'].shape[0]): temp = fc.cis_heatmap(i, datatype='fragment', arraytype='compact', binsize=0, skipfiltered=True) if temp is None: skipped.append(i) continue counts = numpy.hstack((counts, temp[:, :, 0].ravel())) expected = numpy.hstack((expected, temp[:, :, 1].ravel())) if meth == 'Prob': temp1 = numpy.zeros((temp.shape[0], temp.shape[1]), dtype=numpy.float32) temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1 binbounds = numpy.hstack(( fragments['start'] [regions['start_frag'][i]:regions['stop_frag'][i]].reshape( -1, 1), fragments['stop'] [regions['start_frag'][i]:regions['stop_frag'][i]].reshape( -1, 1))) valid = numpy.where( fc.filter[regions['start_frag'][i]:regions['stop_frag'][i]] )[0] binbounds = binbounds[valid, :] strands = fragments['strand'][ regions['start_frag'][i]:regions['stop_frag'][i]][valid] temp = hic_hm['Phillips'][i][numpy.where( strands == 0)[0], :, :][:, numpy.where(strands == 1)[0], :] hic_counts = numpy.hstack((hic_counts, temp[:, :, 0].ravel())) hic_expected = numpy.hstack((hic_expected, temp[:, :, 1].ravel())) if meth == 'Prob': where = numpy.where(hic_expected > 0.0)[0] hic_counts[where] /= hic_expected[where] data["HiC_Phillips"] = numpy.copy(hic_counts) where = numpy.where(expected > 0.0)[0] counts[where] /= expected[where] data["%s_Phillips" % meth] = numpy.copy(counts) if dist: fname = fivec_fnames['%s_Nora' % meth] else: fname = fivec_fnames['%s_Nora' % meth].replace( '.fcp', 'nodist.fcp') fc = hifive.FiveC(fname) temp = fc.cis_heatmap(0, datatype='fragment', arraytype='compact', binsize=0, skipfiltered=True) counts = temp[:, :, 0].ravel() expected = temp[:, :, 1].ravel() if meth == 'Prob': temp1 = numpy.zeros((temp.shape[0], temp.shape[1]), dtype=numpy.float32) temp1[numpy.where(temp[:, :, 0] > 0.0)] = 1 fragments = fc.frags['fragments'][...] regions = fc.frags['regions'][...] binbounds = numpy.hstack( (fragments['start'] [regions['start_frag'][0]:regions['stop_frag'][0]].reshape( -1, 1), fragments['stop'] [regions['start_frag'][0]:regions['stop_frag'][0]].reshape( -1, 1))) binbounds = binbounds[numpy.where( fc.filter[regions['start_frag'][0]:regions['stop_frag'][0]] )[0], :] strands = fragments['strand'][ regions['start_frag'][0]:regions['stop_frag'][0]] temp = hic_hm['Nora'][numpy.where( strands == 0)[0], :, :][:, numpy.where(strands == 1)[0], :] hic_counts = temp[:, :, 0].ravel() hic_expected = temp[:, :, 1].ravel() where = numpy.where(hic_expected > 0.0)[0] hic_counts[where] /= hic_expected[where] data["HiC_Nora"] = numpy.copy(hic_counts) where = numpy.where(expected > 0.0)[0] counts[where] /= expected[where] data["%s_Nora" % meth] = numpy.copy(counts) correlations = {} for meth in methods: for name in ["Phillips", "Nora"]: valid = numpy.where((data["%s_%s" % (meth, name)] > 0.0) * (data["HiC_%s" % name] > 0.0)) correlations["%s_%s" % (meth, name)] = numpy.corrcoef( numpy.log(data["%s_%s" % (meth, name)][valid]), numpy.log(data["HiC_%s" % name][valid]))[0, 1] if not dist: output = open(out_fname.replace('pdf', 'txt'), 'w') print >> output, "Method\tPhillips\tNora" for meth in methods: temp = [meth] for name in ["Phillips", "Nora"]: temp.append(str(correlations["%s_%s" % (meth, name)])) print >> output, '\t'.join(temp) output.close() return correlations