def plot_consensus_boundary_properties(consensus_bounds): figsize = get_default_figsize() fig, axes = plt.subplots(2, 1, sharex=True, tight_layout=True, figsize=(figsize[0] * 1.5, figsize[1] * 2)) min_n, max_n = consensus_bounds.n_cell_types.min( ), consensus_bounds.n_cell_types.max() sns.countplot(consensus_bounds.n_cell_types, ax=axes[0], order=range(min_n, max_n + 1), palette="Reds") axes[0].set_xlabel("") axes[0].set_ylabel("Boundaries") axes[0].grid(axis='y') axes[0].ticklabel_format(axis='y', scilimits=(0, 0), useMathText=True) sns.boxplot(data=consensus_bounds, x='n_cell_types', y="length", order=range(min_n, max_n + 1), ax=axes[1], palette="Reds") axes[1].set_xlabel("Conservation score (s)") axes[1].set_ylabel("Boundary size") axes[1].grid(axis='y') axes[1].ticklabel_format(axis='y', scilimits=(0, 0), useMathText=True) fig.savefig(figures_path / "consensus_boundaries_stats.pdf", bbox_inches='tight', transparent=True)
def plot_cons_bounds_vs_grbs(grb_vs_windows, length_vs_windows, grbs): X = grb_vs_windows.reindex(index=grbs.sort_values( 'length', ascending=False).grb_uid.tolist()).fillna(0).values min_cons = 2 X[X < min_cons] = 0 X[X >= min_cons] = 1 L = length_vs_windows.reindex(index=grbs.sort_values( 'length', ascending=False).grb_uid.tolist()).fillna(0).values L[L > 0] = 1 L = L[L.sum(1) != 0] figsize = get_default_figsize() fig, axes = plt.subplots(1, 2, sharey=True, figsize=(figsize[0] * 2, figsize[1])) axes[0].imshow(X, aspect="auto", interpolation='bilinear', cmap='hot') axes[0].set_yticks([]) axes[0].set_xticks([0, int(X.shape[1] / 2), X.shape[1] - 1]) axes[0].set_xticklabels(['-2.5Mb', '0', "+2.5Mb"]) axes[0].set_xlabel("Distance from GRB center") axes[0].set_title("Conserved boundaries ($s \geq {}$)".format(min_cons), fontsize='large') axes[1].imshow(L, aspect="auto", interpolation='bilinear', cmap='Blues') axes[1].set_yticks([]) axes[1].set_xticks([0, int(L.shape[1] / 2), L.shape[1] - 1]) axes[1].set_xticklabels(['-2.5Mb', '0', "+2.5Mb"]) axes[1].set_xlabel("Distance from GRB center") axes[1].set_title("GRB length", fontsize='large') fig.savefig(figures_path / "cons_bounds_vs_GRBs.pdf", bbox_inches='tight', transparent=True)
def plot_n_ctcf_sites_in_boundaries(b): def __to_bin(x): if x in [0, 1, 2]: return str(x) elif x in [3, 4]: return "3-4" elif x in range(5, 9): return "5-8" elif x in range(9, 12): return "9-11" bounds_with_ctcf_count = b.copy() bounds_with_ctcf_count['cat'] = bounds_with_ctcf_count.n_ctcfs.map( __to_bin) bounds_with_ctcf_count = pd.concat( (bounds_with_ctcf_count, pd.get_dummies(bounds_with_ctcf_count.cat)), axis=1) groups = bounds_with_ctcf_count.groupby('n_cell_types')[[ '0', '1', '2', '3-4', '5-8', '9-11' ]].sum() cmap = plt.cm.get_cmap('Blues', 8) newcolors = cmap(np.linspace(0, 1, 8)) blue = np.array([1, 1, 1, 1]) newcolors[0, :] = blue newcmp = ListedColormap(newcolors) figsize = get_default_figsize() fig, axes = plt.subplots(1, 7, figsize=(figsize[0] * 7, figsize[1] * 2)) for i, g in enumerate(range(1, 8)): xg = groups.loc[[g], ['1', '2', '3-4', '5-8', '9-11']] xg.loc['0', '0'] = groups.loc[g, '0'] xg = xg.fillna(0) xg.loc[g, 'not_intersect'] = 0 xg = xg.loc[['0', g], ['0', '1', '2', '3-4', '5-8', '9-11']] xg.plot.bar(stacked=True, cmap=newcmp, ax=axes[i], edgecolor='black') axes[i].legend().set_visible(False) axes[i].set_xticklabels([]) axes[i].grid(axis="y") axes[i].tick_params(axis='y') axes[i].set_xlabel("") axes[i].set_title("$s = {}$".format(g), fontweight="bold", fontsize='xx-large') axes[0].set_ylabel("Boundaries", fontsize='xx-large') handles, labels = axes[i].get_legend_handles_labels() fig.legend(handles, labels, loc='lower center', ncol=8, bbox_to_anchor=(0.43, 1.05), title='CTCF binding sites') fig.text(x=0.53, y=-0.01, horizontalalignment='center', fontsize='xx-large', s="Boundary conservation score ($s$)") fig.savefig(figures_path / "n_ctcf_sites_in_boundaries_by_conservation.pdf", bbox_inches='tight', transparent=True)
def plot_conservation_CTCF_sites(n_overlaps): figsize = get_default_figsize() fig = plt.figure(figsize=(figsize[0]*2.5, figsize[1]*0.5)) plt.hist(n_overlaps, bins=np.arange(max(n_overlaps) + 2), rwidth=0.9) plt.grid(axis="y") plt.xlabel("# of samples in which the CTCF site is conserved") plt.ylabel("# CTCF sites") plt.xticks(np.arange(max(n_overlaps) + 1) + 0.5, labels=np.arange(max(n_overlaps) + 1)) fig.savefig(figures_path / "CTCF_sites_number_overlaps_with_chipseqs.pdf", bbox_inches='tight', transparent=True)
def plot_triangle_enrichment_horizontal(pos_triangles_vs_ctcfs, pos_triangles, neg_triangles_vs_ctcfs, neg_triangles, path=figures_path / "triangle_enrichment.pdf", cmap='Greens'): figsize = get_default_figsize() fig, axes = plt.subplots(1, 2, figsize=(figsize[0] * 2, figsize[1] * 1)) X_pos = pos_triangles_vs_ctcfs.reindex(index = pos_triangles\ .sort_values('length', ascending=False)\ .triangle_uid.tolist()).dropna().values X_pos[X_pos < 0] = -1 X_pos[X_pos > 0] = 1 axes[0].matshow(X_pos, aspect='auto', cmap=cmap, interpolation='bilinear') axes[0].set_yticklabels([]) axes[0].set_xticks([]) axes[0].set_xticks([ 0, X_pos.shape[1] // 4, X_pos.shape[1] // 2, X_pos.shape[1] * 3 // 4, X_pos.shape[1] ]) axes[0].set_xticklabels(['-1 Mb', '-500 Kb', '0', '+500 Kb', '+1 Mb']) axes[0].xaxis.set_ticks_position('bottom') axes[0].set_xlabel("Distance from\nnegative inversion point") axes[0].set_yticks([]) axes[0].set_ylabel( "GM12878 TADs ordered by\ntheir Positive DI region length") X_neg = neg_triangles_vs_ctcfs.reindex(index = neg_triangles\ .sort_values('length', ascending=False)\ .triangle_uid.tolist())\ .dropna().values X_neg[X_neg < 0] = -1 X_neg[X_neg > 0] = 1 axes[1].matshow(X_neg, aspect='auto', cmap=cmap, interpolation='bilinear') axes[1].set_yticklabels([]) axes[1].set_yticks([]) axes[1].set_xticks([ 0, X_pos.shape[1] // 4, X_pos.shape[1] // 2, X_pos.shape[1] * 3 // 4, X_pos.shape[1] ]) axes[1].set_xticklabels(['-1 Mb', '-500 Kb', '0', '+500 Kb', '+1 Mb']) axes[1].set_xlabel("Distance from\nnegative inversion point") axes[1].xaxis.set_ticks_position('bottom') axes[1].set_yticks([]) axes[1].set_ylabel( "GM12878 TADs ordered by\ntheir Negative DI region length", rotation=270, labelpad=27) axes[1].yaxis.set_label_position("right") fig.savefig(path, bbox_inches='tight', transparent=True)
def plot_aggregations_by_tad(aggregations_by_tad, tad_start_window, tad_end_window, path=figures_path / "aggregations_by_tad.pdf"): figsize = get_default_figsize() fig, axes = plt.subplots(3, 1, sharex=True, figsize=(figsize[0] * 1.5, figsize[1] * 3)) axes[0].plot(aggregations_by_tad['ctcf_id'].mean(0), label='both', color=ctcf_colors['all']) axes[0].set_ylabel("Avg. CTCF sites\nper % of TAD") axes[0].axvline(tad_start_window, color='black', linestyle=':') axes[0].axvline(tad_end_window, color='black', linestyle=':') axes[0].grid() axes[0].legend(loc='upper center') axes[1].plot(aggregations_by_tad['forward'].mean(0), label='Forward', color=ctcf_colors['forward']) axes[1].plot(aggregations_by_tad['reverse'].mean(0), label='Reverse', color=ctcf_colors['reverse']) axes[1].set_ylabel("Avg. CTCF sites\nper % of TAD") axes[1].axvline(tad_start_window, color='black', linestyle=':') axes[1].axvline(tad_end_window, color='black', linestyle=':') axes[1].grid() axes[1].legend(loc='upper center') axes[2].plot(aggregations_by_tad['S'].mean(0), label='Same', color=ctcf_colors['S']) axes[2].plot(aggregations_by_tad['C'].mean(0), label='Convergent', color=ctcf_colors['C']) axes[2].plot(aggregations_by_tad['D'].mean(0), label='Divergent', color=ctcf_colors['D']) axes[2].plot(aggregations_by_tad['CD'].mean(0), label='Convergent-Divergent', color=ctcf_colors['CD']) axes[2].set_ylabel("Avg. CTCF sites\nper % of TAD") axes[2].grid() axes[2].legend(loc='upper center') axes[2].axvline(tad_start_window, color='black', linestyle=':') axes[2].axvline(tad_end_window, color='black', linestyle=':') axes[2].set_xticks([ tad_start_window, int(tad_start_window + tad_end_window) / 2, tad_end_window ]) axes[2].set_xticklabels( ['TAD start\n(0%)', 'TAD center\n(50%)', 'TAD end\n(100%)']) axes[2].set_xlabel("Position on TAD") fig.savefig(path, bbox_inches='tight', transparent=True)
def plot_aggregations_by_boundary(aggregations_by_bound_tot, extended, window_size): figsize = get_default_figsize() fig, axes = plt.subplots(3, 1, sharex=True, figsize=(figsize[0] * 1.5, figsize[1] * 3)) axes[0].plot(aggregations_by_bound_tot['ctcf_id'].mean(0), label='both', color=ctcf_colors['all']) axes[0].set_ylabel("Avg. CTCF sites per 5kb") axes[0].axvline(extended / window_size, color='black') axes[0].grid() axes[0].legend(loc='upper left') axes[1].plot(aggregations_by_bound_tot['forward'].mean(0), label='Forward', color=ctcf_colors['forward']) axes[1].plot(aggregations_by_bound_tot['reverse'].mean(0), label='Reverse', color=ctcf_colors['reverse']) axes[1].set_ylabel("Avg. CTCF sites per 5kb") axes[1].axvline(extended / window_size, color='black') axes[1].grid() axes[1].legend(loc='upper left') axes[2].plot(aggregations_by_bound_tot['S'].mean(0), label='Same', color=ctcf_colors['S']) axes[2].plot(aggregations_by_bound_tot['C'].mean(0), label='Convergent', color=ctcf_colors['C']) axes[2].plot(aggregations_by_bound_tot['D'].mean(0), label='Divergent', color=ctcf_colors['D']) axes[2].plot(aggregations_by_bound_tot['CD'].mean(0), label='Convergent-\nDivergent', color=ctcf_colors['CD']) axes[2].set_ylabel("Avg. CTCF sites per 5kb") axes[2].axvline(extended / window_size, color='black') axes[2].grid() axes[2].legend(loc='upper left') axes[2].set_xticks([0, extended / window_size, extended * 2 / window_size]) axes[2].set_xticklabels(['-250kb', '0', '+250kb']) axes[2].set_xlabel("Distance from boundary center") fig.savefig(figures_path / "aggregations_by_boundary.pdf", bbox_inches='tight', transparent=True)
def plot_triangle_enrichment_halves(pos_triangles_vs_ctcfs, pos_triangles, neg_triangles_vs_ctcfs, neg_triangles, path=figures_path / "triangle_enrichment.pdf", cmap='Greens'): figsize = get_default_figsize() fig, axes = plt.subplots(1, 2, figsize=(figsize[0] * 2, figsize[1] * 1)) X_pos = pos_triangles_vs_ctcfs.reindex(index = pos_triangles\ .sort_values('length', ascending=False)\ .triangle_uid.tolist()).dropna().values X_pos[X_pos < 0] = -1 X_pos[X_pos > 0] = 1 half = X_pos.shape[1] // 2 X_pos = X_pos[:, :half - 1] axes[0].matshow(X_pos, aspect='auto', cmap=cmap, interpolation='bilinear') axes[0].set_yticklabels([]) axes[0].set_xticks([]) axes[0].set_xticks([0, X_pos.shape[1] // 2, X_pos.shape[1] - 1]) axes[0].set_xticklabels(['-1Mb', '-500Kb', '0']) axes[0].xaxis.set_ticks_position('bottom') axes[0].set_xlabel("Distance from\nnegative inversion point") axes[0].set_title("Positive DI regions") X_neg = neg_triangles_vs_ctcfs.reindex(index = neg_triangles\ .sort_values('length', ascending=False)\ .triangle_uid.tolist())\ .dropna().values X_neg[X_neg < 0] = -1 X_neg[X_neg > 0] = 1 half = X_neg.shape[1] // 2 X_neg = X_neg[:, half - 2:] axes[1].matshow(X_neg, aspect='auto', cmap=cmap, interpolation='bilinear') axes[1].set_yticklabels([]) axes[1].set_yticks([]) axes[1].set_xticks([0, X_neg.shape[1] // 2, X_neg.shape[1] - 1]) axes[1].set_xticklabels(['0', '+500Kb', '+1Mb']) axes[1].set_xlabel("Distance from\nnegative inversion point") axes[1].set_title("Negative DI regions") axes[1].xaxis.set_ticks_position('bottom') fig.savefig(path, bbox_inches='tight', transparent=True)
def plot_ctcf_patterns_vs_size(all_patterns): figsize = get_default_figsize() fig = plt.figure(figsize=(figsize[0]*2, figsize[1]*1.5)) ax = sns.boxplot(data=all_patterns, x='n_ctcf_sites', y='size', hue_order=['Same', 'Convergent', 'Divergent', "Convergent-Divergent"], hue='pattern_class', showfliers=False, order=[2, 3, 4], palette=ctcf_colors, medianprops={'color':'red'}) # add_stat_annotation(ax, data=all_patterns, x='n_ctcf_sites', y='size', # order=[2, 3, 4], # hue_order=['Same', # 'Convergent', # 'Divergent', # "Convergent-Divergent"], # hue='pattern_class', # box_pairs=[ # ( (2, "Same"), (2, "Convergent") ), # ( (2, "Same"), (2, "Divergent") ), # ( (2, "Divergent"), (2, "Convergent") ), # ( (3, "Same"), (3, "Convergent") ), # ( (3, "Same"), (3, "Divergent") ), # ( (3, "Same"), (3, "Convergent-Divergent") ), # ( (3, "Divergent"), (3, "Convergent") ), # # ( (3, "Convergent"), (3, "Convergent-Divergent") ), # # ( (3, "Divergent"), (3, "Convergent-Divergent") ), # # ( (4, "Same"), (4, "Convergent") ), # # ( (4, "Same"), (4, "Divergent") ), # # ( (4, "Same"), (4, "Convergent-Divergent") ), # # ( (4, "Divergent"), (4, "Convergent") ), # # ( (4, "Convergent"), (4, "Convergent-Divergent") ), # # ( (4, "Divergent"), (4, "Convergent-Divergent") ), # ], # test='t-test_ind', # text_format='full', loc='outside', # verbose=2) plt.legend(title="Pattern class") plt.xlabel("N. CTCF sites composing the pattern") plt.ylabel("Pattern size (bp)") plt.ticklabel_format(axis='y', style='sci', scilimits=(0,0), useMathText=True) fig.savefig(figures_path / "CTCF_patterns_vs_size.pdf", bbox_inches='tight', transparent=True)
def plot_perc_regions_bearing_CTCF_site_cat( pois_ext_ctcf, catname='point_of_interest', path=figures_path / "perc_regions_bearing_CTCF_site_cat.pdf"): figsize = get_default_figsize() fig = plt.figure(figsize=(figsize[0] * 1.5, figsize[1])) sns.barplot(data=pois_ext_ctcf.groupby(catname)[['all', 'S', 'CD', 'D', 'C']]\ .mean().reset_index()\ .melt(id_vars=catname), x=catname, y='value', hue='variable', palette=ctcf_colors) plt.grid(axis='y') plt.ylim(0, 1) yticks, _ = plt.yticks() plt.yticks(yticks, ["{:.0f}%".format(yi * 100) for yi in yticks], rotation=0) plt.xlabel("TAD position") plt.ylabel("Percentage of regions\nbearing a CTCF site") plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) fig.savefig(path, bbox_inches='tight', transparent=True)
def plot_cons_bounds_vs_gm12878_DI( aggregations_by_bound, extended, window_size, measure, ylabel="Average Directionality\nIndex on GM12878"): figsize = get_default_figsize() fig, axes = plt.subplots(1, len(aggregations_by_bound.keys()), sharex='col', sharey=True, tight_layout=True, figsize=(figsize[0] * 7, figsize[1])) for level in sorted(aggregations_by_bound.keys()): y = aggregations_by_bound[level].mean(0) axes[level - 1].plot(y, linewidth=1, color='black') axes[level - 1].fill_between(np.arange(y.shape[0]), y, where=y < 0, facecolor='red') axes[level - 1].fill_between(np.arange(y.shape[0]), y, where=y >= 0, facecolor='blue') if level == 1: axes[level - 1].set_ylabel(ylabel, fontsize="xx-large") axes[level - 1].tick_params(axis='y', which='major') axes[level - 1].grid() axes[level - 1].set_title("$s = {}$".format(level), fontweight="bold") axes[level - 1].set_xticks( [0, extended / window_size, extended * 2 / window_size]) yticklabels = axes[level - 1].get_yticklabels() length_n = "{}".format(extended // 1000) axes[level - 1].set_xticklabels( ['-{}kb'.format(length_n), '0', '+{}kb'.format(length_n)]) axes[level - 1].set_xlabel("Distance from\nboundary center", fontsize='xx-large') fig.savefig(figures_path / "cons_bounds_vs_gm12878_{}.pdf".format(measure), bbox_inches='tight', transparent=True)
def plot_triangle_silouette(pos_triangles_vs_ctcfs, pos_triangles, neg_triangles_vs_ctcfs, neg_triangles, path): figsize = get_default_figsize() fig, axes = plt.subplots(2, 1, figsize=(figsize[0] * 1.5, figsize[1] * 2)) X_pos = pos_triangles_vs_ctcfs.reindex(index = pos_triangles\ .sort_values('length', ascending=False)\ .triangle_uid.tolist()).dropna().values X_pos[X_pos < 0] = -1 X_pos[X_pos > 0] = 1 axes[0].matshow(X_pos, aspect='auto', cmap='seismic_r', interpolation='bilinear') axes[0].set_yticklabels([]) axes[0].set_xticks([]) axes[0].set_yticks([]) axes[0].set_xticklabels([]) X_neg = neg_triangles_vs_ctcfs.reindex(index = neg_triangles\ .sort_values('length', ascending=False)\ .triangle_uid.tolist())\ .dropna().values X_neg[X_neg < 0] = -1 X_neg[X_neg > 0] = 1 axes[1].matshow(X_neg, aspect='auto', cmap='seismic_r', interpolation='bilinear') axes[1].set_yticklabels([]) axes[1].set_yticks([]) axes[1].set_xticks([0, X_pos.shape[1] / 2, X_pos.shape[1]]) axes[1].set_xticklabels(['-1Mb', '0', '+1Mb']) plt.xlabel("Distance from (+) to (-) inversion point") axes[1].xaxis.set_ticks_position('bottom') fig.savefig(figures_path / "triangle_enrichment.pdf", bbox_inches='tight', transparent=True)
def plot_conservation_shift_test(regions_n_cross): figsize = get_default_figsize() fig = plt.figure(figsize=(figsize[0] * 2, figsize[1] * 1.2)) sns.lineplot(data=regions_n_cross.assign( conservation=lambda x: x.conservation.astype(str) + " times"), x='shift', y='count', hue='conservation', hue_order=["{} times".format(x) for x in range(1, 8)], palette='Reds', linewidth=2, legend='full') plt.xlabel("Shift") plt.ticklabel_format(axis='x', style='sci', scilimits=(0, 0), useMathText=True) plt.ylabel("Average n. overlapping\nPC-HiC interactions") plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1) fig.savefig(figures_path / "cons_tads_vs_PC-HIC.pdf", bbox_inches='tight', transparent=True)
def plot_consensus_boundaries_intersection_with_GM12878(x): consensus_boundaries_fixed_gm12878 = x.assign( has_gm12878_bound=lambda x: x.n_gm12878_bounds.map(lambda y: 'Intersects GM12878 boundary' if y > 0 \ else 'Does not intersect GM12878 boundary')) figsize = get_default_figsize() fig, axes = plt.subplots(2, 1, sharex=True, figsize=(figsize[0] * 1.5, figsize[1] * 2)) sns.countplot(data=consensus_boundaries_fixed_gm12878, x='n_cell_types', hue='has_gm12878_bound', ax=axes[0], hue_order=[ 'Intersects GM12878 boundary', 'Does not intersect GM12878 boundary' ]) axes[0].legend() axes[0].grid(axis='y') axes[0].set_ylabel("# boundaries") axes[0].legend() x = consensus_boundaries_fixed_gm12878.groupby( ['n_cell_types', 'has_gm12878_bound']).size().unstack() x = x.div(x.sum(1), axis=0) x['Intersects GM12878 boundary'].plot.bar(ax=axes[1]) plt.ylim(0, 0.8) yticks, _ = plt.yticks() plt.yticks(yticks, ["{:.0f}%".format(yi * 100) for yi in yticks], rotation=0) plt.xticks(rotation=0) plt.xlabel("Boundary conservation ($s$)") plt.ylabel("% boundaries overlapping\nwith a GM12878 boundary") plt.grid(axis='y') fig.savefig(figures_path / "cons_bounds_vs_gm12878_bounds.pdf", bbox_inches='tight', transparent=True)
def plot_simulation_stats(simulation_metrics, name_to_full_name, name_to_color, name_to_style): figsize = get_default_figsize() fig, axes = plt.subplots(1, 2, figsize=(figsize[0] * 2, figsize[1]), tight_layout=True) for name, stats in simulation_metrics.items(): axes[0].plot(stats[0], stats[1], label=name_to_full_name[name], color=name_to_color[name], style=name_to_style[name]) axes[1].plot(*order_for_ROC(stats[1], stats[2]), label=name_to_full_name[name], color=name_to_color[name], style=name_to_style[name]) yticks = axes[0].get_yticks() axes[0].set_yticklabels(["{:.0f}%".format(yi * 100) for yi in yticks]) axes[0].legend(title='Simulation') axes[0].set_xlabel( "Minimum threshold\n(Loop occurences in the simulation)") axes[0].set_ylabel("RECALL (% HiCCUP loops\nrecovered by simulation)") axes[0].grid() yticks = axes[1].get_yticks() axes[1].set_yticklabels(["{:.0f}%".format(yi * 100) for yi in yticks]) xticks = axes[1].get_xticks() axes[1].set_xticklabels(["{:.0f}%".format(yi * 100) for yi in xticks]) axes[1].grid() axes[1].legend(title='Simulation') axes[1].set_xlabel("RECALL (% HiCCUP loops\nrecovered by simulation)") axes[1].set_ylabel("PRECISION (% simulated\nloops confirmed by HiCCUP)") handles, labels = axes[1].get_legend_handles_labels() fig.savefig(figures_path / "simulation_stats.pdf", bbox_inches='tight', transparent=True)
def plot_patterns_by_window_and_class(pattern_enrichment_by_distance, path=figures_path / "CTCF_patterns_vs_size.pdf", vmin=-0.06, vmax=0.06): figsize=get_default_figsize() height_ratios=[ len(patternclass_to_pattern['Same']), len(patternclass_to_pattern['Convergent']), len(patternclass_to_pattern['Divergent']), len(patternclass_to_pattern['Convergent-Divergent']) ] fig, axes = plt.subplots(len(patternclass_to_pattern.keys()), 1, sharex=True, figsize=(figsize[0]*1.6, figsize[1]*len(patternclass_to_pattern.keys())*0.5), gridspec_kw={'height_ratios': height_ratios}, tight_layout=True) cbar_ax = fig.add_axes([1, .3, .03, .4]) pebd_names = pattern_enrichment_by_distance\ .assign(index=lambda x: x.index.map(window_string_converter))\ .set_index('index') for i, p in enumerate(patternclass_to_pattern.items()): pclass, pnames = p sns.heatmap(pebd_names.T.loc[pnames]\ .applymap(lambda x: np.log10(x)), cbar= i == 0, cmap='bwr', linewidth=0.01, vmin=vmin, vmax=vmax, ax=axes[i], linecolor='black', cbar_ax= None if i != 0 else cbar_ax, cbar_kws={'label': "$log_{10}(enrichment)$"}) for tick in axes[i].get_yticklabels(): tick.set_rotation(0) if i != len(patternclass_to_pattern.keys()) - 1: axes[i].set_xlabel("") axes[i].tick_params(axis='x', bottom=False) else: axes[i].set_xlabel("Clustering window") axes[i].text(x=-0.12, y=0.5, s=pclass, horizontalalignment='center', verticalalignment='center', rotation=90, transform=axes[i].transAxes) fig.savefig(path, bbox_inches='tight', transparent=True)
pos_triangles_vs_ctcfs, pos_triangles, neg_triangles_vs_ctcfs, neg_triangles, figures_path / "triangle_enrichment_CTCF_complete_square.pdf", cmap="seismic_r") plot_triangle_enrichment_halves(pos_triangles_vs_ctcfs, pos_triangles, neg_triangles_vs_ctcfs, neg_triangles, figures_path / "triangle_enrichment_CTCF_square.pdf", cmap="seismic_r") figsize = get_default_figsize() fig, ax = plt.subplots(1, 2, figsize=(figsize[0] * 2, figsize[1])) tbyleft = tads.sort_values("left_size", ascending=False) ax[0].barh(np.arange(tbyleft.shape[0]), -tbyleft.left_size, 0.5, label='Positive DI region') ax[0].barh(np.arange(tbyleft.shape[0]), tbyleft.right_size, 0.5, label='Negative DI region') ax[0].invert_yaxis() ax[0].set_xlim(-1e6, +1e6) ax[0].set_xticks([-1e6, -5e5, 0, 5e5, 1e6]) ax[0].set_xticklabels(['-1 Mb', '-500 Kb', '0', '+500 Kb', '1 Mb']) ax[0].legend(bbox_to_anchor=(0.5, 1.21), loc='upper center')
def plot_aggregations_by_conservation(aggregations_by_bound, windows_with_ctcf): figsize = get_default_figsize() n_figs = len(windows_with_ctcf.n_cell_types.unique()) fig, axes = plt.subplots(3, n_figs, sharex='col', sharey='row', tight_layout=False, figsize=(figsize[0] * n_figs, figsize[1] * 3)) for level in sorted(windows_with_ctcf.n_cell_types.unique()): axes[0, level - 1].plot(aggregations_by_bound[(level, 'ctcf_id')].mean(0), label='both', color=ctcf_colors['all']) if level == 1: axes[0, level - 1].legend(loc='upper left') axes[0, level - 1].set_ylabel("Avg. CTCF sites per 5kb") else: axes[0, level - 1].legend().set_visible(False) axes[0, level - 1].grid() axes[0, level - 1].set_title("$s = {}$".format(level), fontweight="bold", fontsize='xx-large') axes[0, level - 1].tick_params(axis='y') axes[1, level - 1].plot(aggregations_by_bound[(level, 'forward')].mean(0), label='Forward', color=ctcf_colors['forward']) axes[1, level - 1].plot(aggregations_by_bound[(level, 'reverse')].mean(0), label='Reverse', color=ctcf_colors['reverse']) axes[1, level - 1].tick_params(axis='y') if level == 1: axes[1, level - 1].legend(loc='upper left') axes[1, level - 1].set_ylabel("Avg. CTCF sites per 5kb") else: axes[1, level - 1].legend().set_visible(False) axes[1, level - 1].grid() axes[2, level - 1].plot(aggregations_by_bound[(level, 'S')].mean(0), label='Same', color=ctcf_colors['S']) axes[2, level - 1].plot(aggregations_by_bound[(level, 'C')].mean(0), label='Convergent', color=ctcf_colors['C']) axes[2, level - 1].plot(aggregations_by_bound[(level, 'D')].mean(0), label='Divergent', color=ctcf_colors['D']) axes[2, level - 1].plot(aggregations_by_bound[(level, 'CD')].mean(0), label='Convergent-Divergent', color=ctcf_colors['CD']) if level == 1: axes[2, level - 1].legend(loc='upper left') axes[2, level - 1].set_ylabel("Avg. CTCF sites per 5kb") else: axes[2, level - 1].legend().set_visible(False) axes[2, level - 1].grid() axes[2, level - 1].tick_params(axis='y') axes[2, level - 1].set_xticks( [0, extended / window_size, extended * 2 / window_size]) axes[2, level - 1].set_xticklabels(['-250kb', '0', '+250kb']) axes[2, level - 1].set_xlabel("Distance from\nboundary center (kb)") fig.savefig(figures_path / "aggregations_by_conservation.pdf", bbox_inches='tight', transparent=True)