コード例 #1
0
def plot_consensus_boundary_properties(consensus_bounds):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(2,
                             1,
                             sharex=True,
                             tight_layout=True,
                             figsize=(figsize[0] * 1.5, figsize[1] * 2))
    min_n, max_n = consensus_bounds.n_cell_types.min(
    ), consensus_bounds.n_cell_types.max()
    sns.countplot(consensus_bounds.n_cell_types,
                  ax=axes[0],
                  order=range(min_n, max_n + 1),
                  palette="Reds")
    axes[0].set_xlabel("")
    axes[0].set_ylabel("Boundaries")
    axes[0].grid(axis='y')
    axes[0].ticklabel_format(axis='y', scilimits=(0, 0), useMathText=True)
    sns.boxplot(data=consensus_bounds,
                x='n_cell_types',
                y="length",
                order=range(min_n, max_n + 1),
                ax=axes[1],
                palette="Reds")
    axes[1].set_xlabel("Conservation score (s)")
    axes[1].set_ylabel("Boundary size")
    axes[1].grid(axis='y')
    axes[1].ticklabel_format(axis='y', scilimits=(0, 0), useMathText=True)
    fig.savefig(figures_path / "consensus_boundaries_stats.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #2
0
def plot_cons_bounds_vs_grbs(grb_vs_windows, length_vs_windows, grbs):
    X = grb_vs_windows.reindex(index=grbs.sort_values(
        'length', ascending=False).grb_uid.tolist()).fillna(0).values
    min_cons = 2
    X[X < min_cons] = 0
    X[X >= min_cons] = 1

    L = length_vs_windows.reindex(index=grbs.sort_values(
        'length', ascending=False).grb_uid.tolist()).fillna(0).values
    L[L > 0] = 1
    L = L[L.sum(1) != 0]

    figsize = get_default_figsize()
    fig, axes = plt.subplots(1,
                             2,
                             sharey=True,
                             figsize=(figsize[0] * 2, figsize[1]))
    axes[0].imshow(X, aspect="auto", interpolation='bilinear', cmap='hot')
    axes[0].set_yticks([])
    axes[0].set_xticks([0, int(X.shape[1] / 2), X.shape[1] - 1])
    axes[0].set_xticklabels(['-2.5Mb', '0', "+2.5Mb"])
    axes[0].set_xlabel("Distance from GRB center")
    axes[0].set_title("Conserved boundaries ($s \geq {}$)".format(min_cons),
                      fontsize='large')

    axes[1].imshow(L, aspect="auto", interpolation='bilinear', cmap='Blues')
    axes[1].set_yticks([])
    axes[1].set_xticks([0, int(L.shape[1] / 2), L.shape[1] - 1])
    axes[1].set_xticklabels(['-2.5Mb', '0', "+2.5Mb"])
    axes[1].set_xlabel("Distance from GRB center")
    axes[1].set_title("GRB length", fontsize='large')
    fig.savefig(figures_path / "cons_bounds_vs_GRBs.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #3
0
def plot_n_ctcf_sites_in_boundaries(b):
    def __to_bin(x):
        if x in [0, 1, 2]:
            return str(x)
        elif x in [3, 4]:
            return "3-4"
        elif x in range(5, 9):
            return "5-8"
        elif x in range(9, 12):
            return "9-11"

    bounds_with_ctcf_count = b.copy()
    bounds_with_ctcf_count['cat'] = bounds_with_ctcf_count.n_ctcfs.map(
        __to_bin)
    bounds_with_ctcf_count = pd.concat(
        (bounds_with_ctcf_count, pd.get_dummies(bounds_with_ctcf_count.cat)),
        axis=1)
    groups = bounds_with_ctcf_count.groupby('n_cell_types')[[
        '0', '1', '2', '3-4', '5-8', '9-11'
    ]].sum()
    cmap = plt.cm.get_cmap('Blues', 8)
    newcolors = cmap(np.linspace(0, 1, 8))
    blue = np.array([1, 1, 1, 1])
    newcolors[0, :] = blue
    newcmp = ListedColormap(newcolors)

    figsize = get_default_figsize()
    fig, axes = plt.subplots(1, 7, figsize=(figsize[0] * 7, figsize[1] * 2))
    for i, g in enumerate(range(1, 8)):
        xg = groups.loc[[g], ['1', '2', '3-4', '5-8', '9-11']]
        xg.loc['0', '0'] = groups.loc[g, '0']
        xg = xg.fillna(0)
        xg.loc[g, 'not_intersect'] = 0
        xg = xg.loc[['0', g], ['0', '1', '2', '3-4', '5-8', '9-11']]
        xg.plot.bar(stacked=True, cmap=newcmp, ax=axes[i], edgecolor='black')
        axes[i].legend().set_visible(False)
        axes[i].set_xticklabels([])
        axes[i].grid(axis="y")
        axes[i].tick_params(axis='y')
        axes[i].set_xlabel("")
        axes[i].set_title("$s = {}$".format(g),
                          fontweight="bold",
                          fontsize='xx-large')
    axes[0].set_ylabel("Boundaries", fontsize='xx-large')
    handles, labels = axes[i].get_legend_handles_labels()
    fig.legend(handles,
               labels,
               loc='lower center',
               ncol=8,
               bbox_to_anchor=(0.43, 1.05),
               title='CTCF binding sites')
    fig.text(x=0.53,
             y=-0.01,
             horizontalalignment='center',
             fontsize='xx-large',
             s="Boundary conservation score ($s$)")
    fig.savefig(figures_path /
                "n_ctcf_sites_in_boundaries_by_conservation.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #4
0
def plot_conservation_CTCF_sites(n_overlaps):
    figsize = get_default_figsize()
    fig = plt.figure(figsize=(figsize[0]*2.5, figsize[1]*0.5))
    plt.hist(n_overlaps, bins=np.arange(max(n_overlaps) + 2), rwidth=0.9)
    plt.grid(axis="y")
    plt.xlabel("# of samples in which the CTCF site is conserved")
    plt.ylabel("# CTCF sites")
    plt.xticks(np.arange(max(n_overlaps) + 1) + 0.5, labels=np.arange(max(n_overlaps) + 1))
    fig.savefig(figures_path / "CTCF_sites_number_overlaps_with_chipseqs.pdf", bbox_inches='tight', transparent=True)
コード例 #5
0
def plot_triangle_enrichment_horizontal(pos_triangles_vs_ctcfs,
                                        pos_triangles,
                                        neg_triangles_vs_ctcfs,
                                        neg_triangles,
                                        path=figures_path /
                                        "triangle_enrichment.pdf",
                                        cmap='Greens'):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(1, 2, figsize=(figsize[0] * 2, figsize[1] * 1))
    X_pos = pos_triangles_vs_ctcfs.reindex(index = pos_triangles\
                                           .sort_values('length', ascending=False)\
                                  .triangle_uid.tolist()).dropna().values
    X_pos[X_pos < 0] = -1
    X_pos[X_pos > 0] = 1

    axes[0].matshow(X_pos, aspect='auto', cmap=cmap, interpolation='bilinear')
    axes[0].set_yticklabels([])
    axes[0].set_xticks([])
    axes[0].set_xticks([
        0, X_pos.shape[1] // 4, X_pos.shape[1] // 2, X_pos.shape[1] * 3 // 4,
        X_pos.shape[1]
    ])
    axes[0].set_xticklabels(['-1 Mb', '-500 Kb', '0', '+500 Kb', '+1 Mb'])
    axes[0].xaxis.set_ticks_position('bottom')
    axes[0].set_xlabel("Distance from\nnegative inversion point")
    axes[0].set_yticks([])
    axes[0].set_ylabel(
        "GM12878 TADs ordered by\ntheir Positive DI region length")

    X_neg = neg_triangles_vs_ctcfs.reindex(index = neg_triangles\
                                    .sort_values('length', ascending=False)\
                                           .triangle_uid.tolist())\
                                    .dropna().values
    X_neg[X_neg < 0] = -1
    X_neg[X_neg > 0] = 1

    axes[1].matshow(X_neg, aspect='auto', cmap=cmap, interpolation='bilinear')
    axes[1].set_yticklabels([])
    axes[1].set_yticks([])
    axes[1].set_xticks([
        0, X_pos.shape[1] // 4, X_pos.shape[1] // 2, X_pos.shape[1] * 3 // 4,
        X_pos.shape[1]
    ])
    axes[1].set_xticklabels(['-1 Mb', '-500 Kb', '0', '+500 Kb', '+1 Mb'])

    axes[1].set_xlabel("Distance from\nnegative inversion point")
    axes[1].xaxis.set_ticks_position('bottom')
    axes[1].set_yticks([])
    axes[1].set_ylabel(
        "GM12878 TADs ordered by\ntheir Negative DI region length",
        rotation=270,
        labelpad=27)
    axes[1].yaxis.set_label_position("right")

    fig.savefig(path, bbox_inches='tight', transparent=True)
コード例 #6
0
def plot_aggregations_by_tad(aggregations_by_tad,
                             tad_start_window,
                             tad_end_window,
                             path=figures_path / "aggregations_by_tad.pdf"):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(3,
                             1,
                             sharex=True,
                             figsize=(figsize[0] * 1.5, figsize[1] * 3))
    axes[0].plot(aggregations_by_tad['ctcf_id'].mean(0),
                 label='both',
                 color=ctcf_colors['all'])
    axes[0].set_ylabel("Avg. CTCF sites\nper % of TAD")
    axes[0].axvline(tad_start_window, color='black', linestyle=':')
    axes[0].axvline(tad_end_window, color='black', linestyle=':')
    axes[0].grid()
    axes[0].legend(loc='upper center')

    axes[1].plot(aggregations_by_tad['forward'].mean(0),
                 label='Forward',
                 color=ctcf_colors['forward'])
    axes[1].plot(aggregations_by_tad['reverse'].mean(0),
                 label='Reverse',
                 color=ctcf_colors['reverse'])
    axes[1].set_ylabel("Avg. CTCF sites\nper % of TAD")
    axes[1].axvline(tad_start_window, color='black', linestyle=':')
    axes[1].axvline(tad_end_window, color='black', linestyle=':')
    axes[1].grid()
    axes[1].legend(loc='upper center')

    axes[2].plot(aggregations_by_tad['S'].mean(0),
                 label='Same',
                 color=ctcf_colors['S'])
    axes[2].plot(aggregations_by_tad['C'].mean(0),
                 label='Convergent',
                 color=ctcf_colors['C'])
    axes[2].plot(aggregations_by_tad['D'].mean(0),
                 label='Divergent',
                 color=ctcf_colors['D'])
    axes[2].plot(aggregations_by_tad['CD'].mean(0),
                 label='Convergent-Divergent',
                 color=ctcf_colors['CD'])
    axes[2].set_ylabel("Avg. CTCF sites\nper % of TAD")
    axes[2].grid()
    axes[2].legend(loc='upper center')
    axes[2].axvline(tad_start_window, color='black', linestyle=':')
    axes[2].axvline(tad_end_window, color='black', linestyle=':')
    axes[2].set_xticks([
        tad_start_window,
        int(tad_start_window + tad_end_window) / 2, tad_end_window
    ])
    axes[2].set_xticklabels(
        ['TAD start\n(0%)', 'TAD center\n(50%)', 'TAD end\n(100%)'])
    axes[2].set_xlabel("Position on TAD")
    fig.savefig(path, bbox_inches='tight', transparent=True)
コード例 #7
0
def plot_aggregations_by_boundary(aggregations_by_bound_tot, extended,
                                  window_size):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(3,
                             1,
                             sharex=True,
                             figsize=(figsize[0] * 1.5, figsize[1] * 3))
    axes[0].plot(aggregations_by_bound_tot['ctcf_id'].mean(0),
                 label='both',
                 color=ctcf_colors['all'])
    axes[0].set_ylabel("Avg. CTCF sites per 5kb")
    axes[0].axvline(extended / window_size, color='black')
    axes[0].grid()
    axes[0].legend(loc='upper left')

    axes[1].plot(aggregations_by_bound_tot['forward'].mean(0),
                 label='Forward',
                 color=ctcf_colors['forward'])
    axes[1].plot(aggregations_by_bound_tot['reverse'].mean(0),
                 label='Reverse',
                 color=ctcf_colors['reverse'])
    axes[1].set_ylabel("Avg. CTCF sites per 5kb")
    axes[1].axvline(extended / window_size, color='black')
    axes[1].grid()
    axes[1].legend(loc='upper left')

    axes[2].plot(aggregations_by_bound_tot['S'].mean(0),
                 label='Same',
                 color=ctcf_colors['S'])
    axes[2].plot(aggregations_by_bound_tot['C'].mean(0),
                 label='Convergent',
                 color=ctcf_colors['C'])
    axes[2].plot(aggregations_by_bound_tot['D'].mean(0),
                 label='Divergent',
                 color=ctcf_colors['D'])
    axes[2].plot(aggregations_by_bound_tot['CD'].mean(0),
                 label='Convergent-\nDivergent',
                 color=ctcf_colors['CD'])
    axes[2].set_ylabel("Avg. CTCF sites per 5kb")
    axes[2].axvline(extended / window_size, color='black')
    axes[2].grid()
    axes[2].legend(loc='upper left')

    axes[2].set_xticks([0, extended / window_size, extended * 2 / window_size])
    axes[2].set_xticklabels(['-250kb', '0', '+250kb'])
    axes[2].set_xlabel("Distance from boundary center")

    fig.savefig(figures_path / "aggregations_by_boundary.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #8
0
def plot_triangle_enrichment_halves(pos_triangles_vs_ctcfs,
                                    pos_triangles,
                                    neg_triangles_vs_ctcfs,
                                    neg_triangles,
                                    path=figures_path /
                                    "triangle_enrichment.pdf",
                                    cmap='Greens'):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(1, 2, figsize=(figsize[0] * 2, figsize[1] * 1))
    X_pos = pos_triangles_vs_ctcfs.reindex(index = pos_triangles\
                                           .sort_values('length', ascending=False)\
                                  .triangle_uid.tolist()).dropna().values
    X_pos[X_pos < 0] = -1
    X_pos[X_pos > 0] = 1

    half = X_pos.shape[1] // 2
    X_pos = X_pos[:, :half - 1]

    axes[0].matshow(X_pos, aspect='auto', cmap=cmap, interpolation='bilinear')
    axes[0].set_yticklabels([])
    axes[0].set_xticks([])
    axes[0].set_xticks([0, X_pos.shape[1] // 2, X_pos.shape[1] - 1])
    axes[0].set_xticklabels(['-1Mb', '-500Kb', '0'])
    axes[0].xaxis.set_ticks_position('bottom')
    axes[0].set_xlabel("Distance from\nnegative inversion point")
    axes[0].set_title("Positive DI regions")

    X_neg = neg_triangles_vs_ctcfs.reindex(index = neg_triangles\
                                    .sort_values('length', ascending=False)\
                                           .triangle_uid.tolist())\
                                    .dropna().values
    X_neg[X_neg < 0] = -1
    X_neg[X_neg > 0] = 1

    half = X_neg.shape[1] // 2
    X_neg = X_neg[:, half - 2:]

    axes[1].matshow(X_neg, aspect='auto', cmap=cmap, interpolation='bilinear')
    axes[1].set_yticklabels([])
    axes[1].set_yticks([])
    axes[1].set_xticks([0, X_neg.shape[1] // 2, X_neg.shape[1] - 1])
    axes[1].set_xticklabels(['0', '+500Kb', '+1Mb'])

    axes[1].set_xlabel("Distance from\nnegative inversion point")
    axes[1].set_title("Negative DI regions")
    axes[1].xaxis.set_ticks_position('bottom')
    fig.savefig(path, bbox_inches='tight', transparent=True)
コード例 #9
0
def plot_ctcf_patterns_vs_size(all_patterns):
    figsize = get_default_figsize()
    fig = plt.figure(figsize=(figsize[0]*2, figsize[1]*1.5))
    ax = sns.boxplot(data=all_patterns, 
    	x='n_ctcf_sites', y='size',
                hue_order=['Same', 
                           'Convergent', 
                           'Divergent', 
                           "Convergent-Divergent"],
                hue='pattern_class',
                showfliers=False, 
                order=[2, 3, 4],
                palette=ctcf_colors, medianprops={'color':'red'})
    # add_stat_annotation(ax, data=all_patterns, x='n_ctcf_sites', y='size',
    #                     order=[2, 3, 4],
    #                     hue_order=['Same', 
    #                        'Convergent', 
    #                        'Divergent', 
    #                        "Convergent-Divergent"],
    #                     hue='pattern_class', 
    #                     box_pairs=[
    #                         ( (2, "Same"), (2, "Convergent") ),
    #                         ( (2, "Same"), (2, "Divergent") ),
    #                         ( (2, "Divergent"), (2, "Convergent") ),
    #                         ( (3, "Same"), (3, "Convergent") ),
    #                         ( (3, "Same"), (3, "Divergent") ),
    #                         ( (3, "Same"), (3, "Convergent-Divergent") ),
    #                         ( (3, "Divergent"), (3, "Convergent") ),
    #                         # ( (3, "Convergent"), (3, "Convergent-Divergent") ),
    #                         # ( (3, "Divergent"), (3, "Convergent-Divergent") ),
    #                         # ( (4, "Same"), (4, "Convergent") ),
    #                         # ( (4, "Same"), (4, "Divergent") ),
    #                         # ( (4, "Same"), (4, "Convergent-Divergent") ),
    #                         # ( (4, "Divergent"), (4, "Convergent") ),
    #                         # ( (4, "Convergent"), (4, "Convergent-Divergent") ),
    #                         # ( (4, "Divergent"), (4, "Convergent-Divergent") ),
    #                     ],
    #                     test='t-test_ind', 
    #                     text_format='full', loc='outside', 
    #                     verbose=2)
    plt.legend(title="Pattern class")
    plt.xlabel("N. CTCF sites composing the pattern")
    plt.ylabel("Pattern size (bp)")
    plt.ticklabel_format(axis='y', style='sci', scilimits=(0,0), useMathText=True)
    fig.savefig(figures_path / "CTCF_patterns_vs_size.pdf", bbox_inches='tight', transparent=True)
コード例 #10
0
def plot_perc_regions_bearing_CTCF_site_cat(
        pois_ext_ctcf,
        catname='point_of_interest',
        path=figures_path / "perc_regions_bearing_CTCF_site_cat.pdf"):
    figsize = get_default_figsize()
    fig = plt.figure(figsize=(figsize[0] * 1.5, figsize[1]))
    sns.barplot(data=pois_ext_ctcf.groupby(catname)[['all', 'S', 'CD', 'D', 'C']]\
                                  .mean().reset_index()\
                                  .melt(id_vars=catname),
                x=catname, y='value', hue='variable', palette=ctcf_colors)
    plt.grid(axis='y')
    plt.ylim(0, 1)
    yticks, _ = plt.yticks()
    plt.yticks(yticks, ["{:.0f}%".format(yi * 100) for yi in yticks],
               rotation=0)
    plt.xlabel("TAD position")
    plt.ylabel("Percentage of regions\nbearing a CTCF site")
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    fig.savefig(path, bbox_inches='tight', transparent=True)
コード例 #11
0
def plot_cons_bounds_vs_gm12878_DI(
        aggregations_by_bound,
        extended,
        window_size,
        measure,
        ylabel="Average Directionality\nIndex on GM12878"):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(1,
                             len(aggregations_by_bound.keys()),
                             sharex='col',
                             sharey=True,
                             tight_layout=True,
                             figsize=(figsize[0] * 7, figsize[1]))
    for level in sorted(aggregations_by_bound.keys()):
        y = aggregations_by_bound[level].mean(0)
        axes[level - 1].plot(y, linewidth=1, color='black')
        axes[level - 1].fill_between(np.arange(y.shape[0]),
                                     y,
                                     where=y < 0,
                                     facecolor='red')
        axes[level - 1].fill_between(np.arange(y.shape[0]),
                                     y,
                                     where=y >= 0,
                                     facecolor='blue')
        if level == 1:
            axes[level - 1].set_ylabel(ylabel, fontsize="xx-large")
            axes[level - 1].tick_params(axis='y', which='major')
        axes[level - 1].grid()
        axes[level - 1].set_title("$s = {}$".format(level), fontweight="bold")
        axes[level - 1].set_xticks(
            [0, extended / window_size, extended * 2 / window_size])
        yticklabels = axes[level - 1].get_yticklabels()

        length_n = "{}".format(extended // 1000)

        axes[level - 1].set_xticklabels(
            ['-{}kb'.format(length_n), '0', '+{}kb'.format(length_n)])
        axes[level - 1].set_xlabel("Distance from\nboundary center",
                                   fontsize='xx-large')
    fig.savefig(figures_path / "cons_bounds_vs_gm12878_{}.pdf".format(measure),
                bbox_inches='tight',
                transparent=True)
コード例 #12
0
def plot_triangle_silouette(pos_triangles_vs_ctcfs, pos_triangles,
                            neg_triangles_vs_ctcfs, neg_triangles, path):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(2, 1, figsize=(figsize[0] * 1.5, figsize[1] * 2))
    X_pos = pos_triangles_vs_ctcfs.reindex(index = pos_triangles\
                                           .sort_values('length', ascending=False)\
                                  .triangle_uid.tolist()).dropna().values
    X_pos[X_pos < 0] = -1
    X_pos[X_pos > 0] = 1

    axes[0].matshow(X_pos,
                    aspect='auto',
                    cmap='seismic_r',
                    interpolation='bilinear')
    axes[0].set_yticklabels([])
    axes[0].set_xticks([])
    axes[0].set_yticks([])
    axes[0].set_xticklabels([])

    X_neg = neg_triangles_vs_ctcfs.reindex(index = neg_triangles\
                                    .sort_values('length', ascending=False)\
                                           .triangle_uid.tolist())\
                                    .dropna().values
    X_neg[X_neg < 0] = -1
    X_neg[X_neg > 0] = 1

    axes[1].matshow(X_neg,
                    aspect='auto',
                    cmap='seismic_r',
                    interpolation='bilinear')
    axes[1].set_yticklabels([])
    axes[1].set_yticks([])
    axes[1].set_xticks([0, X_pos.shape[1] / 2, X_pos.shape[1]])
    axes[1].set_xticklabels(['-1Mb', '0', '+1Mb'])

    plt.xlabel("Distance from (+) to (-) inversion point")
    axes[1].xaxis.set_ticks_position('bottom')
    fig.savefig(figures_path / "triangle_enrichment.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #13
0
def plot_conservation_shift_test(regions_n_cross):
    figsize = get_default_figsize()
    fig = plt.figure(figsize=(figsize[0] * 2, figsize[1] * 1.2))
    sns.lineplot(data=regions_n_cross.assign(
        conservation=lambda x: x.conservation.astype(str) + " times"),
                 x='shift',
                 y='count',
                 hue='conservation',
                 hue_order=["{} times".format(x) for x in range(1, 8)],
                 palette='Reds',
                 linewidth=2,
                 legend='full')
    plt.xlabel("Shift")
    plt.ticklabel_format(axis='x',
                         style='sci',
                         scilimits=(0, 0),
                         useMathText=True)
    plt.ylabel("Average n. overlapping\nPC-HiC interactions")
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)
    fig.savefig(figures_path / "cons_tads_vs_PC-HIC.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #14
0
def plot_consensus_boundaries_intersection_with_GM12878(x):

    consensus_boundaries_fixed_gm12878 = x.assign(
    has_gm12878_bound=lambda x: x.n_gm12878_bounds.map(lambda y: 'Intersects GM12878 boundary' if y > 0 \
                                                       else 'Does not intersect GM12878 boundary'))

    figsize = get_default_figsize()
    fig, axes = plt.subplots(2,
                             1,
                             sharex=True,
                             figsize=(figsize[0] * 1.5, figsize[1] * 2))
    sns.countplot(data=consensus_boundaries_fixed_gm12878,
                  x='n_cell_types',
                  hue='has_gm12878_bound',
                  ax=axes[0],
                  hue_order=[
                      'Intersects GM12878 boundary',
                      'Does not intersect GM12878 boundary'
                  ])
    axes[0].legend()
    axes[0].grid(axis='y')
    axes[0].set_ylabel("# boundaries")
    axes[0].legend()

    x = consensus_boundaries_fixed_gm12878.groupby(
        ['n_cell_types', 'has_gm12878_bound']).size().unstack()
    x = x.div(x.sum(1), axis=0)
    x['Intersects GM12878 boundary'].plot.bar(ax=axes[1])
    plt.ylim(0, 0.8)
    yticks, _ = plt.yticks()
    plt.yticks(yticks, ["{:.0f}%".format(yi * 100) for yi in yticks],
               rotation=0)
    plt.xticks(rotation=0)
    plt.xlabel("Boundary conservation ($s$)")
    plt.ylabel("% boundaries overlapping\nwith a GM12878 boundary")
    plt.grid(axis='y')
    fig.savefig(figures_path / "cons_bounds_vs_gm12878_bounds.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #15
0
def plot_simulation_stats(simulation_metrics, name_to_full_name, name_to_color,
                          name_to_style):
    figsize = get_default_figsize()
    fig, axes = plt.subplots(1,
                             2,
                             figsize=(figsize[0] * 2, figsize[1]),
                             tight_layout=True)
    for name, stats in simulation_metrics.items():
        axes[0].plot(stats[0],
                     stats[1],
                     label=name_to_full_name[name],
                     color=name_to_color[name],
                     style=name_to_style[name])
        axes[1].plot(*order_for_ROC(stats[1], stats[2]),
                     label=name_to_full_name[name],
                     color=name_to_color[name],
                     style=name_to_style[name])

    yticks = axes[0].get_yticks()
    axes[0].set_yticklabels(["{:.0f}%".format(yi * 100) for yi in yticks])
    axes[0].legend(title='Simulation')
    axes[0].set_xlabel(
        "Minimum threshold\n(Loop occurences in the simulation)")
    axes[0].set_ylabel("RECALL (% HiCCUP loops\nrecovered by simulation)")
    axes[0].grid()

    yticks = axes[1].get_yticks()
    axes[1].set_yticklabels(["{:.0f}%".format(yi * 100) for yi in yticks])
    xticks = axes[1].get_xticks()
    axes[1].set_xticklabels(["{:.0f}%".format(yi * 100) for yi in xticks])
    axes[1].grid()
    axes[1].legend(title='Simulation')
    axes[1].set_xlabel("RECALL (% HiCCUP loops\nrecovered by simulation)")
    axes[1].set_ylabel("PRECISION (% simulated\nloops confirmed by HiCCUP)")

    handles, labels = axes[1].get_legend_handles_labels()
    fig.savefig(figures_path / "simulation_stats.pdf",
                bbox_inches='tight',
                transparent=True)
コード例 #16
0
def plot_patterns_by_window_and_class(pattern_enrichment_by_distance, 
                                      path=figures_path / "CTCF_patterns_vs_size.pdf", 
                                      vmin=-0.06, vmax=0.06):
    figsize=get_default_figsize()
    height_ratios=[
            len(patternclass_to_pattern['Same']),
            len(patternclass_to_pattern['Convergent']),
            len(patternclass_to_pattern['Divergent']),
            len(patternclass_to_pattern['Convergent-Divergent'])
        ]
    fig, axes = plt.subplots(len(patternclass_to_pattern.keys()), 
                           1, sharex=True, 
                           figsize=(figsize[0]*1.6, 
                                    figsize[1]*len(patternclass_to_pattern.keys())*0.5),
                           gridspec_kw={'height_ratios': height_ratios},
                           tight_layout=True)
    cbar_ax = fig.add_axes([1, .3, .03, .4])
    pebd_names = pattern_enrichment_by_distance\
                        .assign(index=lambda x: x.index.map(window_string_converter))\
                        .set_index('index')
    for i, p in enumerate(patternclass_to_pattern.items()):
        pclass, pnames = p
        sns.heatmap(pebd_names.T.loc[pnames]\
                    .applymap(lambda x: np.log10(x)), cbar= i == 0,
                    cmap='bwr', linewidth=0.01, vmin=vmin, vmax=vmax, ax=axes[i],
                    linecolor='black', cbar_ax= None if i != 0 else cbar_ax,
                    cbar_kws={'label': "$log_{10}(enrichment)$"})
        for tick in axes[i].get_yticklabels():
            tick.set_rotation(0)
        if i != len(patternclass_to_pattern.keys()) - 1:
            axes[i].set_xlabel("")
            axes[i].tick_params(axis='x', bottom=False)
        else:
            axes[i].set_xlabel("Clustering window")
        axes[i].text(x=-0.12, y=0.5, s=pclass, horizontalalignment='center',
                     verticalalignment='center',
                     rotation=90, transform=axes[i].transAxes)
    fig.savefig(path, bbox_inches='tight', transparent=True)
コード例 #17
0
    pos_triangles_vs_ctcfs,
    pos_triangles,
    neg_triangles_vs_ctcfs,
    neg_triangles,
    figures_path / "triangle_enrichment_CTCF_complete_square.pdf",
    cmap="seismic_r")

plot_triangle_enrichment_halves(pos_triangles_vs_ctcfs,
                                pos_triangles,
                                neg_triangles_vs_ctcfs,
                                neg_triangles,
                                figures_path /
                                "triangle_enrichment_CTCF_square.pdf",
                                cmap="seismic_r")

figsize = get_default_figsize()
fig, ax = plt.subplots(1, 2, figsize=(figsize[0] * 2, figsize[1]))
tbyleft = tads.sort_values("left_size", ascending=False)
ax[0].barh(np.arange(tbyleft.shape[0]),
           -tbyleft.left_size,
           0.5,
           label='Positive DI region')
ax[0].barh(np.arange(tbyleft.shape[0]),
           tbyleft.right_size,
           0.5,
           label='Negative DI region')
ax[0].invert_yaxis()
ax[0].set_xlim(-1e6, +1e6)
ax[0].set_xticks([-1e6, -5e5, 0, 5e5, 1e6])
ax[0].set_xticklabels(['-1 Mb', '-500 Kb', '0', '+500 Kb', '1 Mb'])
ax[0].legend(bbox_to_anchor=(0.5, 1.21), loc='upper center')
コード例 #18
0
def plot_aggregations_by_conservation(aggregations_by_bound,
                                      windows_with_ctcf):
    figsize = get_default_figsize()
    n_figs = len(windows_with_ctcf.n_cell_types.unique())
    fig, axes = plt.subplots(3,
                             n_figs,
                             sharex='col',
                             sharey='row',
                             tight_layout=False,
                             figsize=(figsize[0] * n_figs, figsize[1] * 3))
    for level in sorted(windows_with_ctcf.n_cell_types.unique()):
        axes[0,
             level - 1].plot(aggregations_by_bound[(level, 'ctcf_id')].mean(0),
                             label='both',
                             color=ctcf_colors['all'])
        if level == 1:
            axes[0, level - 1].legend(loc='upper left')
            axes[0, level - 1].set_ylabel("Avg. CTCF sites per 5kb")
        else:
            axes[0, level - 1].legend().set_visible(False)
        axes[0, level - 1].grid()
        axes[0, level - 1].set_title("$s = {}$".format(level),
                                     fontweight="bold",
                                     fontsize='xx-large')
        axes[0, level - 1].tick_params(axis='y')

        axes[1,
             level - 1].plot(aggregations_by_bound[(level, 'forward')].mean(0),
                             label='Forward',
                             color=ctcf_colors['forward'])
        axes[1,
             level - 1].plot(aggregations_by_bound[(level, 'reverse')].mean(0),
                             label='Reverse',
                             color=ctcf_colors['reverse'])
        axes[1, level - 1].tick_params(axis='y')
        if level == 1:
            axes[1, level - 1].legend(loc='upper left')
            axes[1, level - 1].set_ylabel("Avg. CTCF sites per 5kb")
        else:
            axes[1, level - 1].legend().set_visible(False)
        axes[1, level - 1].grid()

        axes[2, level - 1].plot(aggregations_by_bound[(level, 'S')].mean(0),
                                label='Same',
                                color=ctcf_colors['S'])
        axes[2, level - 1].plot(aggregations_by_bound[(level, 'C')].mean(0),
                                label='Convergent',
                                color=ctcf_colors['C'])
        axes[2, level - 1].plot(aggregations_by_bound[(level, 'D')].mean(0),
                                label='Divergent',
                                color=ctcf_colors['D'])
        axes[2, level - 1].plot(aggregations_by_bound[(level, 'CD')].mean(0),
                                label='Convergent-Divergent',
                                color=ctcf_colors['CD'])
        if level == 1:
            axes[2, level - 1].legend(loc='upper left')
            axes[2, level - 1].set_ylabel("Avg. CTCF sites per 5kb")
        else:
            axes[2, level - 1].legend().set_visible(False)
        axes[2, level - 1].grid()
        axes[2, level - 1].tick_params(axis='y')
        axes[2, level - 1].set_xticks(
            [0, extended / window_size, extended * 2 / window_size])
        axes[2, level - 1].set_xticklabels(['-250kb', '0', '+250kb'])
        axes[2, level - 1].set_xlabel("Distance from\nboundary center (kb)")
    fig.savefig(figures_path / "aggregations_by_conservation.pdf",
                bbox_inches='tight',
                transparent=True)