def plot_revigo( rev, outline=2, expand_points=(1.05, 1.2), figure_size=(8, 8), font_size=8, point_size=3, point_alpha=0.7, palette='RdPu', dispensability_cutoff=1., show_all_labels=False, text_column='name', term_size_limit=None, ): import plotnine as p9 import matplotlib.patheffects as path_effects pe = [ path_effects.Stroke(linewidth=2, foreground='white'), path_effects.Normal() ] if not show_all_labels: lbl_df = rev[(rev.eliminated == 0) & (rev.dispensability < dispensability_cutoff)] if term_size_limit is not None: lbl_df = lbl_df[lbl_df.term_size < term_size_limit] else: lbl_df = rev g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) + p9.geom_point(p9.aes(fill='neglog10', size='frequency'), color='black', alpha=point_alpha) + p9.geom_text(p9.aes(label=text_column), data=lbl_df, size=font_size, adjust_text={ 'expand_points': expand_points, 'arrowprops': { 'arrowstyle': '-' }, 'x': rev.plot_X.values, 'y': rev.plot_Y.values }, path_effects=pe) + p9.theme_bw() + p9.scale_fill_distiller(type='seq', palette=palette, direction=1) + p9.labs(x='Semantic similarity space', y='', fill='-log10(adj. p-value)', size='Term frequency') + p9.scale_size_continuous(range=(2, 7), trans='log10') + p9.theme(figure_size=figure_size, axis_text_x=p9.element_blank(), axis_text_y=p9.element_blank(), axis_ticks=p9.element_blank())) return g
def test_text_aesthetics(): p = ( ggplot(df, aes(y='y', label='label')) + geom_text(aes('x', label='label'), size=15, ha='left') + geom_text( aes('x+1', angle='angle'), size=15, va='top', show_legend=False) + geom_text( aes('x+2', label='label', alpha='z'), size=15, show_legend=False) + geom_text(aes('x+3', color='factor(z)'), size=15, show_legend=False) + geom_text(aes('x+5', size='z'), ha='right', show_legend=False) + scale_size_continuous(range=(12, 30)) + scale_y_continuous(limits=(-0.5, n - 0.5))) assert p == 'text_aesthetics'
def ggpca(x, y=None, center='col', scale='none', rlab=False, clab=False, cshow=None, rsize=4, csize=2, lsize=10, lnudge=0.03, ralpha=0.6, calpha=1.0, clightalpha=0, rname='sample', cname='variable', lname='', grid=True, printit=False, xsvd=None, invert1=False, invert2=False, colscale=None, **kwargs): if cshow is None: cshow = x.shape[1] if rlab is not None and isinstance(rlab, bool): rlab = x.index if rlab else '' if clab is not None and isinstance(clab, bool): clab = x.columns if clab else '' if y is not None: pass x = x.loc[:, x.isnull().sum(axis=0) == 0] if xsvd is None: xsvd = svdForPca(x, center, scale) rsf = np.max(xsvd[0].iloc[:, 0]) - np.min(xsvd[0].iloc[:, 0]) csf = np.max(xsvd[2].iloc[0, :]) - np.min(xsvd[2].iloc[0, :]) sizeRange = sorted([csize, rsize]) alphaRange = sorted([calpha, ralpha]) ggd = pd.DataFrame({ 'PC1': xsvd[0].iloc[:, 0] / rsf, 'PC2': xsvd[0].iloc[:, 1] / rsf, 'label': rlab, 'size': rsize, 'alpha': ralpha }) cclass = [] if cshow > 0: cdata = pd.DataFrame({ 'PC1': xsvd[2].iloc[0, :] / csf, 'PC2': xsvd[2].iloc[1, :] / csf, 'label': clab, 'size': csize, 'alpha': calpha }) if cshow < x.shape[1]: cscores = cdata['PC1']**2 + cdata['PC2']**2 keep = cscores.sort_values(ascending=False).head(cshow).index if clightalpha > 0: cdata.loc[~cdata.index.isin(keep), 'label'] = '' cdata.loc[~cdata.index.isin(keep), 'alpha'] = clightalpha alphaRange = [ np.min([alphaRange[0], clightalpha]), np.max([alphaRange[1], clightalpha]) ] else: cdata = cdata.loc[cdata.index.isin(keep)] ggd = pd.concat([cdata, ggd]) cclass = [cname] * cdata.shape[0] if invert1: ggd['PC1'] = -ggd['PC1'] if invert2: ggd['PC2'] = -ggd['PC2'] if y is not None: ggd['class'] = cclass + list(y.loc[x.index]) else: ggd['class'] = cclass + ([rname] * x.shape[0]) ggo = gg.ggplot( ggd, gg.aes(x='PC1', y='PC2', color='class', size='size', alpha='alpha', label='label')) ggo += gg.geom_hline(yintercept=0, color='lightgray') ggo += gg.geom_vline(xintercept=0, color='lightgray') ggo += gg.geom_point() ggo += gg.theme_bw() ggo += gg.geom_text(nudge_y=lnudge, size=lsize, show_legend=False) if colscale is None and len(ggd['class'].unique()) < 8: colscale = [ 'darkslategray', 'goldenrod', 'lightseagreen', 'orangered', 'dodgerblue', 'darkorchid' ] colscale = colscale[0:(len(ggd['class'].unique()) - 1)] + ['gray'] if len(colscale) == 2 and cshow > 0: colscale = ['black', 'darkgray'] if len(colscale) == 2 and cshow == 0: colscale = ['black', 'red'] if len(colscale) == 3: colscale = ['black', 'red', 'darkgray'] ggo += gg.scale_color_manual(values=colscale, name=lname) ggo += gg.scale_size_continuous(guide=False, range=sizeRange) ggo += gg.scale_alpha_continuous(guide=False, range=alphaRange) ggo += gg.xlab('PC1 (' + str(np.round(100 * xsvd[1][0]**2 / ((xsvd[1]**2).sum()), 1)) + '% explained var.)') ggo += gg.ylab('PC2 (' + str(np.round(100 * xsvd[1][1]**2 / ((xsvd[1]**2).sum()), 1)) + '% explained var.)') if not grid: ggo += gg.theme(panel_grid_minor=gg.element_blank(), panel_grid_major=gg.element_blank(), panel_background=gg.element_blank()) ggo += gg.theme(axis_ticks=gg.element_blank(), axis_text_x=gg.element_blank(), axis_text_y=gg.element_blank()) if printit: print(ggo) return ggo
dataset = "dmso_treated" umap_resistant_type_gg = ( gg.ggplot(embedding_df, gg.aes(x="x", y="y")) + gg.geom_point( gg.aes(fill="Metadata_clone_type", shape="Metadata_batch", size="Metadata_cell_count"), color='black', alpha=0.6) + gg.theme_bw() + gg.xlab("UMAP (X)") + gg.ylab("UMAP (Y)") + gg.ggtitle("DMSO treated samples") + gg.scale_shape_manual(name="Batch", values=[".", "+", "x"]) + gg.scale_fill_manual(name="Clone type", values=["#1F8AA5", "#E98831"]) + gg.scale_size_continuous(name="Cell count") + gg.theme( strip_text=gg.element_text(size=6, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) file = os.path.join("figures", "umap", f"{dataset}_umap_resistant_type") for extension in save_file_extensions:'{}{}'.format(file, extension), height=3, width=3.5, dpi=400) umap_resistant_type_gg # In[10]: