def plot_frequency(n = 200): """ Draws the histogram of the distribution of n tweets by date. Parameters ---------- n: int An integer specifying how many tweets should be analysed. Returns ------- It saves the histogram as a .png file in the static folder. """ from plotnine import ggplot, aes, geom_histogram, scale_x_datetime, labs, theme_minimal, ggsave from Mod_1_API import gather_tweets from mizani.breaks import date_breaks from mizani.formatters import date_format import pandas df = pandas.DataFrame(gather_tweets(n)) plot1 = (ggplot(df, aes(x = 'Date', fill = 'Author')) + geom_histogram() + scale_x_datetime(breaks=date_breaks('1 week')) + labs(x = "Time in weeks", y = "Number of tweets by source") + theme_minimal() ) ggsave(plot = plot1, filename = "test.png", path = "static/")
def plot(i): return (qplot(x, y, color=colors[i], xlab='x', ylab='y') + lims(color=(1, 7)) + labs(color='color') + theme_minimal() + _theme )
def multiplot(files, smooth=100, alpha=0.6, loss_padd=None): if not isinstance(files, dict): files = [files] def load_hist(entry): name, file = entry try: hist = np.loadtxt(file) except OSError: warn = "{} could not be loaded with np.loadtext({})." warnings.warn(warn.format(name, file), UserWarning) return name, None is_fine = np.isfinite(hist) if not any(is_fine): return name, None iters = np.where(is_fine)[0] hist = hist[is_fine] lb = min(hist) if loss_padd is not None and lb < 0: hist += loss_padd - lb lb = loss_padd ldf = pd.DataFrame({ "loss": hist, "iteration": iters, "model": [name] * len(hist) }) if smooth is not False: if lb > 0: ldf["sloss"] = np.exp( gaussian_filter1d(np.log(hist), sigma=smooth)) else: ldf["sloss"] = gaussian_filter1d(hist, sigma=smooth) return name, ldf tasks = list(files.items()) df = pd.DataFrame() with mp.Pool() as pool: for name, ldf in tqdm(pool.imap(load_hist, tasks), total=len(tasks), desc="models"): if ldf is not None: df = df.append(ldf) def breaks(limits): ll = np.log10(limits) if (ll[1] - ll[0]) > 3: ll = np.round(ll) ex = np.linspace(ll[0], ll[1], 10) ex = np.round(ex) else: ex = np.linspace(ll[0], ll[1], 10) return 10.0**ex pl = (pn.ggplot(pn.aes("iteration", "loss", color="model"), df) + pn.geom_line(alpha=alpha) + pn.scale_y_log10() + pn.theme_minimal()) if smooth is not False: pl += pn.geom_line(pn.aes(y="sloss"), size=1, alpha=alpha) return pl, df
def plot(i): if i == 2: p = qplot(x, y, xlab='x', ylab='y') else: p = (qplot(x, y, color=colors[i], xlab='x', ylab='y') + lims(color=(1, 7)) + labs(color='color')) return p + theme_minimal()
def plot(i): c = colors[i] if i == 2: p = (qplot(x, y, color=c, xlab='x', ylab='y') + lims(color=(1, 7)) + labs(color='color')) else: p = (qplot(x, y, stroke=c, xlab='x', ylab='y') + lims(stroke=(1, 7)) + labs(stroke='stroke')) return p + theme_minimal()
def plot(i): if i == 2: _lims = lims(color=(3, 7)) else: _lims = lims(color=(1, 7)) return (qplot(x, y, color=colors[i], xlab='x', ylab='y') + _lims + labs(color='color') + theme_minimal() + _theme )
def plot_hists(df, out=None, **kwargs): r"""Construct histograms Create a set of histograms. Often used to visualize the results of random sampling for multiple outputs. Usually called as a dispatch from plot_auto(). Args: out (list of strings): Variables to plot Returns: Seaborn histogram plot Examples: >>> import grama as gr >>> import matplotlib.pyplot as plt >>> from grama.models import make_cantilever_beam >>> md = make_cantilever_beam() >>> ## Dispatch from autoplotter >>> ( >>> md >>> >> gr.ev_sample(n=100, df_det="nom") >>> >> gr.pt_auto() >>> ) >>> ## Re-create without metadata >>> ( >>> md >>> >> gr.ev_sample(n=100, df_det="nom") >>> >> gr.pt_hists(out=md.out) >>> ) """ if out is None: raise ValueError("Must provide input columns list as keyword out") return ( df >> tf_pivot_longer( columns=out, names_to="var", values_to="value", ) >> ggplot(aes("value")) + geom_histogram(bins=30) + facet_wrap("var", scales="free") + theme_minimal() + labs( x="Output Value", y="Count", ) )
def gene_log_HR_plot(inFile, pcaFile=None, model=None): # get logHRs par = get_params(inFile) pca_components = par["means"]["logHR"].shape[0] >> 1 components = range(pca_components) tf_components = slice(pca_components, 2 * pca_components) t_logHR = par["means"]["logHR"][components, 0] tf_logHR = par["means"]["logHR"][tf_components, 0] t_logHR_sd = par["stds"]["logHR"][components, 0] tf_logHR_sd = par["stds"]["logHR"][tf_components, 0] # get pca if pcaFile is None: pcaFile = inFile.replace("_params.hdf5", "_pca.pkl") with open(pcaFile, "rb") as buff: pca = pickle.load(buff) # prep dataframe n_genes = pca.components_.shape[1] if model is None: logHR_df = pd.DataFrame(index=[f"{i+1}" for i in range(n_genes)]) else: logHR_df = pd.DataFrame(index=model.counts.index) logHR_df["tumor logHR"] = pca.inverse_transform(t_logHR) logHR_df["non-tumor logHR"] = pca.inverse_transform(tf_logHR) logHR_df["tumor logHR sd"] = np.sqrt( np.sum((pca.components_ * t_logHR_sd[:, None])**2, axis=0)) logHR_df["non-tumor logHR sd"] = np.sqrt( np.sum((pca.components_ * tf_logHR_sd[:, None])**2, axis=0)) logHR_df["tumor Z"] = logHR_df["tumor logHR"] / logHR_df["tumor logHR sd"] logHR_df["non-tumor Z"] = (logHR_df["non-tumor logHR"] / logHR_df["tumor logHR sd"]) logHR_df["tumor p-value"] = norm.sf(abs(logHR_df["tumor Z"])) * 2 logHR_df["non-tumor p-value"] = norm.sf(abs(logHR_df["non-tumor Z"])) * 2 # make plot lb = min(logHR_df["non-tumor logHR"].min(), logHR_df["tumor logHR"].min()) ub = max(logHR_df["non-tumor logHR"].max(), logHR_df["tumor logHR"].max()) pl = (pn.ggplot(pn.aes("non-tumor logHR", "tumor logHR"), logHR_df) + pn.xlim(lb, ub) + pn.ylim(lb, ub) + pn.theme_minimal() + pn.geom_point(alpha=0.3, color="red") + pn.geom_abline()) return pl, logHR_df
def kernel_stats(inFile, log_scale=True): par = get_params(inFile) n_kernel = 0 for var in sorted(par["means"]): n_kernel += "mus_f" in var tf = pm.distributions.transforms.StickBreaking() dfs = list() for tissue_type in ["t", "f"]: weights = tf.backward( par["means"][f"w_{tissue_type}_stickbreaking__"]).eval() n_dim = par["means"][f"x_{tissue_type}"].shape[1] volumes = list() for kernel in range(n_kernel): # get covariance elipse parameters packed_cov = par["means"][ f"packed_L_{tissue_type}_{kernel}_cholesky-cov-packed__"] lower = pm.expand_packed_triangular(n_dim, packed_cov, lower=True).eval() cov = np.dot(lower, lower.T) volume = np.linalg.det(cov) volumes.append(volume) type_df = pd.DataFrame( { "tissue": "tumor" if tissue_type == "t" else "non-tumor", "weight": weights, "volume": volumes, }, index=[f"kernel {i}" for i in range(n_kernel)], ) dfs.append(type_df) df = pd.concat(dfs) pl = (pn.ggplot(pn.aes("volume", "weight", color="tissue"), df) + pn.geom_point()) if log_scale: pl += pn.scale_y_log10() pl += pn.scale_x_log10() pl += pn.theme_minimal() return pl, df
def lollipop(data): data = data.sort_values(by=['probability']).reset_index(drop=True) custom_order = pd.Categorical(data['label'], categories=data.label) data = data.assign(label_custom=custom_order) p = ggplot(data, aes('label_custom', 'probability')) + \ geom_point(color = "#88aa88", size = 4) + \ geom_segment(aes(x = 'label_custom', y = 0, xend = 'label_custom', yend = 'probability'), color = "#88aa88") + \ coord_flip(expand=True) + \ theme_minimal() + \ labs(x="", y="probability", title = "Most Likely Object") + \ guides(title_position = "left") + \ theme(plot_title = element_text(size = 20, face = "bold", ha= "right")) fig = p.draw() figfile = BytesIO() plt.savefig(figfile, format='png', bbox_inches='tight') figfile.seek(0) # rewind to beginning of file figdata_png = base64.b64encode(figfile.getvalue()).decode() return p, figdata_png
def plot_windows(self, query=""): """Plot selection windows with 'plotnine'. Install plotnine separately. Args: query (str): a query used for subselection in "self.windows" Returns: ggplot: a plot with selection windows """ from plotnine import ggplot, aes, geom_rect, theme_minimal, xlab, ylab, labs D = self.windows.reset_index().query(query) if query else self.windows[1:].reset_index() plot = (ggplot(aes(), data=D) + geom_rect(aes(xmin='mz_left', xmax='mz_right', ymin='IM_min', ymax='IM_max', fill='pd.Categorical(window_gr)'), alpha=.5, color='black')+ theme_minimal() + xlab('mass/charge') + ylab('1/K0') + labs(fill='Window Group')) return plot
def wraplot(adata=None, filename='wraplot', embedding='tsne', feature='sample_type_tech', size=(12, 12), color=None, save=False, draw=False, psize=1): start = datetime.datetime.now() p.options.figure_size = size savename = filename + '.' + embedding + '.' + feature + '.' + str( color) + '.png' if color == None: color = feature savename = filename + '.' + embedding + '.' + feature + '.wraplot.png' print( start.strftime("%H:%M:%S"), 'Starting ... \t', savename, ) pt = (p.ggplot(p.aes(x=embedding + '0', y=embedding + '1', color=color), adata.obs) + p.geom_point(color='lightgrey', shape='.', data=adata.obs.drop(feature, axis=1)) + p.geom_point(shape='.', size=psize, alpha=1, stroke=0) + p.theme_minimal() + p.facet_wrap('~' + feature) + p.guides(color=p.guide_legend(override_aes={'size': 10}))) if save: pt.save(savename, format='png', dpi=200) end = datetime.datetime.now() delta = end - start print(start.strftime("%H:%M:%S"), str(int(delta.total_seconds())), 's to make: \t', savename)
def plot_sinew_inputs(df, var=None, sweep_ind="sweep_ind"): r"""Inspect a sinew design Create a scatterplot matrix with hues. Often used to visualize a sinew design before evaluating the model functions. Usually called as a dispatch from plot_auto(). Args: df (Pandas DataFrame): Input design data var (list of strings): Variables to plot sweep_ind (string): Sweep index column in df Returns: Seaborn scatterplot matrix Examples: >>> import grama as gr >>> import matplotlib.pyplot as plt >>> from grama.models import make_cantilever_beam >>> md = make_cantilever_beam() >>> ## Dispatch from autoplotter >>> ( >>> md >>> >> gr.ev_sinews(df_det="swp", skip=True) >>> >> gr.pt_auto() >>> ) >>> ## Re-create without metadata >>> ( >>> md >>> >> gr.ev_sinews(df_det="swp") >>> >> gr.pt_sinew_inputs(var=md.var) >>> ) """ if var is None: raise ValueError("Must provide input columns list as keyword var") ## Define helpers labels_blank = lambda v: [""] * len(v) breaks_min = lambda lims: (lims[0], 0.5 * (lims[0] + lims[1]), lims[1]) ## Make blank figure fig = ( df >> ggplot() + geom_blank() + theme_void() ).draw(show=False) gs = gridspec.GridSpec(len(var), len(var)) for i, v1 in enumerate(var): for j, v2 in enumerate(var): ax = fig.add_subplot(gs[i, j]) ## Switch labels if j == 0: labels_y = _sci_format else: labels_y = labels_blank if i == len(var) - 1: labels_x = _sci_format else: labels_x = labels_blank ## Label if i == j: p = ( df >> ggplot() + annotate( "label", x=0, y=0, label=v1, ) + theme_void() + guides(color=None) ) ## Scatterplot else: p = ( df >> ggplot(aes(v2, v1, color="factor("+sweep_ind+")")) + geom_point(size=0.1) + scale_x_continuous( breaks=breaks_min, labels=labels_x, ) + scale_y_continuous( breaks=breaks_min, labels=labels_y, ) + guides(color=None) + theme_minimal() + theme( axis_title=element_text(va="top", size=12), ) ) _ = p._draw_using_figure(fig, [ax]) ## Plot # NB Returning the figure causes a "double plot" in Jupyter.... fig.show()
def plot_sinew_outputs( df, var=None, out=None, sweep_ind="sweep_ind", sweep_var="sweep_var" ): r"""Construct sinew plot Create a relational lineplot with hues for each sweep. Often used to visualize the outputs of a sinew design. Usually called as a dispatch from plot_auto(). Args: df (Pandas DataFrame): Input design data with output results var (list of strings): Variables to plot out (list of strings): Outputs to plot sweep_ind (string): Sweep index column in df sweep_var (string): Swept variable column in df Returns: Seaborn relational lineplot Examples: >>> import grama as gr >>> import matplotlib.pyplot as plt >>> from grama.models import make_cantilever_beam >>> md = make_cantilever_beam() >>> ## Dispatch from autoplotter >>> ( >>> md >>> >> gr.ev_sinews(df_det="swp") >>> >> gr.pt_auto() >>> ) >>> ## Re-create without metadata >>> ( >>> md >>> >> gr.ev_sinews(df_det="swp") >>> >> gr.pt_sinew_inputs(var=md.var, out=md.out) >>> ) """ if var is None: raise ValueError("Must provide input columns list as keyword arg var") if out is None: raise ValueError("Must provide output columns list as keyword arg out") ## Prepare data # Gather inputs id_vars = [col for col in df.columns if col not in var] df_tmp = melt(df, id_vars, var, "_var", "_x") # Gather outputs id_vars = [col for col in df_tmp.columns if col not in out] df_plot = melt(df_tmp, id_vars, out, "_out", "_y") # Filter off-sweep values df_plot = df_plot[df_plot[sweep_var] == df_plot["_var"]] breaks_min = lambda lims: (lims[0], 0.5 * (lims[0] + lims[1]), lims[1]) return ( df_plot >> ggplot(aes( "_x", "_y", color="factor(" + sweep_ind + ")", group="factor(" + sweep_ind + ")", )) + geom_line() + facet_grid("_out~_var", scales="free") + scale_x_continuous( breaks=breaks_min, labels=_sci_format, ) + scale_y_continuous( breaks=breaks_min, labels=_sci_format, ) + guides(color=None) + theme_minimal() + theme( strip_text_y=element_text(angle=0), panel_border=element_rect(color="black", size=0.5), ) + labs( x="Input Value", y="Output Value", ) )
metric=arguments['--metric']) pd.DataFrame(Z).to_csv(output + DOCSTR + '.graphdr', sep='\t', index_label=False) if arguments['--plot']: try: from plotnine import ggplot, aes, geom_point, theme_minimal if arguments['--anno_file']: df = pd.DataFrame({ 'x': Z[:, 0], 'y': Z[:, 1], 'c': anno[arguments['--anno_column']].map(str) }) p = ggplot(df, aes('x', 'y', color='c')) + geom_point( size=0.1) + theme_minimal() else: df = pd.DataFrame({'x': Z[:, 0], 'y': Z[:, 1]}) p = ggplot(df, aes( 'x', 'y')) + geom_point(size=0.1) + theme_minimal() p.save(output + DOCSTR + '.pdf') except ImportError: warnings.warn( 'plotnine needs to be installed for the plotting function.')
def test_theme_minimal(self): p = self.g + labs(title='Theme Minimal') + theme_minimal() assert p + _theme == 'theme_minimal'
def expression_plot( inFile, cp1=1, cp2=2, model=None, draw_distribution=True, draw_points=True, max_kernel_alpha=0.5, color="expression", ): par = get_params(inFile) pl = (pn.ggplot(pn.aes(f"CP {cp1}", f"CP {cp2}", color=color)) + pn.theme_minimal()) df = None kdf = None if draw_points: if model is None: index = [ f"sample {i+1}" for i in range(par["means"]["x_t"].shape[0]) ] if color != "expression": raise Exception( "A model must be passed to color other that by expression." ) else: index = model.counts.columns columns = [f"CP {i+1}" for i in range(par["means"]["x_t"].shape[1])] df_t = pd.DataFrame(par["means"]["x_t"], index=index, columns=columns) df_t["expression"] = "tumor" df_tf = pd.DataFrame(par["means"]["x_f"], index=index, columns=columns) df_tf["expression"] = "non-tumor" df = pd.concat([df_t, df_tf]) if model is not None: df = df.merge(model.pheno, "left", left_index=True, right_index=True) pl += pn.geom_point(data=df, alpha=0.3) if draw_distribution: n_kernel = 0 for var in sorted(par["means"]): n_kernel += "mus_f" in var if "altStick" in par["note"] and not par["note"]["altStick"]: tf = StickBreaking_legacy() else: tf = StickBreaking2() elipses = list() elipse_t = np.linspace(0, 2 * np.pi, 100) for tissue_type in ["t", "f"]: weights = tf.backward( par["means"][f"w_{tissue_type}_stickbreaking__"]).eval() n_dim = par["means"][f"x_{tissue_type}"].shape[1] for kernel in range(n_kernel): # get covariance elipse parameters packed_cov = par["means"][ f"packed_L_{tissue_type}_{kernel}_cholesky-cov-packed__"] lower = pm.expand_packed_triangular(n_dim, packed_cov, lower=True).eval() cov = np.dot(lower, lower.T)[[cp1 - 1, cp2 - 1], :][:, [cp1 - 1, cp2 - 1]] var, U = np.linalg.eig(cov) theta = np.arccos(np.abs(U[0, 0])) # parametrize elipse width = 2 * np.sqrt(5.991 * var[0]) hight = 2 * np.sqrt(5.991 * var[1]) density = weights[kernel] / width * hight x = width * np.cos(elipse_t) y = hight * np.sin(elipse_t) # rotation c, s = np.cos(theta), np.sin(theta) R = np.array(((c, -s), (s, c))) path = np.dot(R, np.array([x, y])) # position pos = par["means"][f"mus_{tissue_type}_{kernel}"] path += pos[[cp1 - 1, cp2 - 1]][:, None] # make data frame path_df = pd.DataFrame({ f"CP {cp1}": path[0, :], f"CP {cp2}": path[1, :] }) path_df["kernel"] = kernel path_df["density"] = density path_df["expression"] = ("tumor" if tissue_type == "t" else "non-tumor") path_df["expression-kernel"] = (f"tumor {kernel}" if tissue_type == "t" else f"non-tumor {kernel}") elipses.append(path_df) kdf = pd.concat(elipses) density_scale = max_kernel_alpha / kdf["density"].max() kdf["density"] *= density_scale pl += pn.geom_polygon( pn.aes(fill="expression", group="expression-kernel", alpha="density"), data=kdf, ) pl += pn.scale_alpha_continuous(range=(0, max_kernel_alpha)) return pl, df, kdf
g = 'T' ens = adata.var[adata.var.SYMBOL == "T"].ENSEMBL[0] adata.obs['T_counts'] = adata[:, ens].X.toarray()[:, 0] adata.obs['T_logcounts'] = adata[:, ens].layers.get("logcounts").toarray()[:, 0] adata.obs['T_smoothed'] = adata[:, ens].layers.get("scvi_normalised").toarray()[:, 0] adata.obs['T_smoothed_lc'] = np.log2(adata.obs['T_smoothed'] + 1) count_hist = adata.obs[f'{g}_counts'].value_counts().reset_index().rename( columns={'index': 'counts'}) p.options.figure_size = 6, 2 plot_ = (p.ggplot(p.aes(x='counts', y=f'{g}_counts'), count_hist.query('0 < counts < 25')) + p.geom_bar(stat='identity') + p.scale_x_log10() + p.theme_minimal() + p.labs(x=f'{g} UMI counts', y='Number cells')) plot_.save('mgast_T_counts.pdf', verbose=False) count_hist = adata.obs[f'{g}_logcounts'].value_counts().reset_index().rename( columns={'index': 'logcounts'}) p.options.figure_size = 6, 2 plot_ = ( p.ggplot(p.aes(x='logcounts'), count_hist.query('0 < logcounts < 25')) + p.geom_histogram(bins=128, color="k", fill="w") + p.scale_x_log10() + p.theme_minimal() + p.labs(x=f'{g} UMI logcounts', y='Number cells')) plot_.save('mgast_T_logcounts.pdf', verbose=False) count_hist = adata.obs[f'{g}_smoothed_lc'].value_counts().reset_index().rename( columns={'index': 'smoothed_lc'}) p.options.figure_size = 6, 2
def log_HR_plot(inFile, label_unit=10, log_scale_color=True): par = get_params(inFile) pca_components = par["means"]["logHR"].shape[0] >> 1 components = range(pca_components) tf_components = slice(pca_components, 2 * pca_components) logHR_df = pd.DataFrame(index=[f"{i+1}" for i in components]) logHR_df["tumor logHR"] = par["means"]["logHR"][components, 0] logHR_df["non-tumor logHR"] = par["means"]["logHR"][tf_components, 0] logHR_df["component"] = components logHR_df["label"] = [ logHR_df.index[i] if i <= label_unit else "" for i in components ] logHR_df["tumor logHR sd"] = par["stds"]["logHR"][components, 0] logHR_df["non-tumor logHR sd"] = par["stds"]["logHR"][tf_components, 0] logHR_df["tumor Z"] = logHR_df["tumor logHR"] / logHR_df["tumor logHR sd"] logHR_df["non-tumor Z"] = (logHR_df["non-tumor logHR"] / logHR_df["tumor logHR sd"]) logHR_df["tumor p-value"] = norm.sf(abs(logHR_df["tumor Z"])) * 2 logHR_df["non-tumor p-value"] = norm.sf(abs(logHR_df["non-tumor Z"])) * 2 logHR_df["tumor -log10(p-value)"] = -np.log10(logHR_df["tumor p-value"]) logHR_df["non-tumor -log10(p-value)"] = -np.log10( logHR_df["non-tumor p-value"]) lb = min(logHR_df["non-tumor logHR"].min(), logHR_df["tumor logHR"].min()) ub = max(logHR_df["non-tumor logHR"].max(), logHR_df["tumor logHR"].max()) pl = (pn.ggplot( pn.aes( "non-tumor logHR", "tumor logHR", color="non-tumor p-value", fill="tumor p-value", label="label", ), logHR_df, ) + pn.xlim(lb, ub) + pn.ylim(lb, ub) + pn.geom_abline() + pn.geom_point() + pn.theme_minimal() + pn.geom_text(ha="left", va="bottom", color="black")) if log_scale_color: pl += pn.scale_color_cmap(trans="log") pl += pn.scale_fill_cmap(trans="log") lb = min( logHR_df["non-tumor -log10(p-value)"].min(), logHR_df["tumor -log10(p-value)"].min(), ) ub = max( logHR_df["non-tumor -log10(p-value)"].max(), logHR_df["tumor -log10(p-value)"].max(), ) pl_p = (pn.ggplot( pn.aes( "non-tumor -log10(p-value)", "tumor -log10(p-value)", color="component", label="label", ), logHR_df, ) + pn.geom_point() + pn.xlim(lb, ub) + pn.ylim(lb, ub) + pn.theme_minimal() + pn.geom_text(ha="left", va="bottom", color="black")) return pl, pl_p, logHR_df
np.sqrt(np.diag(event_study_formula.cov_params().loc[lags][lags])) ]), 'mean': np.concatenate([ event_study_formula.params[leads], np.array([0]), event_study_formula.params[lags] ]), 'label': np.arange(-9, 6) }) leadslags_plot['lb'] = leadslags_plot['mean'] - leadslags_plot['sd'] * 1.96 leadslags_plot['ub'] = leadslags_plot['mean'] + leadslags_plot['sd'] * 1.96 # This version has a point-range at each # estimated lead or lag # comes down to stylistic preference at the # end of the day! p.ggplot(leadslags_plot, p.aes(x = 'label', y = 'mean', ymin = 'lb', ymax = 'ub')) +\ p.geom_hline(yintercept = 0.035169444, color = "red") +\ p.geom_pointrange() +\ p.theme_minimal() +\ p.xlab("Years before and after castle doctrine expansion") +\ p.ylab("log(Homicide Rate)") +\ p.geom_hline(yintercept = 0, linetype = "dashed") +\ p.geom_vline(xintercept = 0, linetype = "dashed")
def plot_scattermat(df, var=None): r"""Create a scatterplot matrix Create a scatterplot matrix. Often used to visualize a design (set of inputs points) before evaluating the functions. Usually called as a dispatch from plot_auto(). Args: var (list of strings): Variables to plot Returns: ggplot: Scatterplot matrix Examples: >>> import grama as gr >>> import matplotlib.pyplot as plt >>> from grama.models import make_cantilever_beam >>> md = make_cantilever_beam() >>> ## Dispatch from autoplotter >>> ( >>> md >>> >> gr.ev_sample(n=100, df_det="nom", skip=True) >>> >> gr.pt_auto() >>> ) >>> ## Re-create plot without metadata >>> ( >>> md >>> >> gr.ev_sample(n=100, df_det="nom") >>> >> gr.pt_scattermat(var=md.var) >>> ) """ if var is None: raise ValueError("Must provide input columns list as keyword var") ## Define helpers labels_blank = lambda v: [""] * len(v) breaks_min = lambda lims: (lims[0], 0.5 * (lims[0] + lims[1]), lims[1]) ## Make blank figure fig = ( df >> ggplot() + geom_blank() + theme_void() ).draw(show=False) gs = gridspec.GridSpec(len(var), len(var)) for i, v1 in enumerate(var): for j, v2 in enumerate(var): ax = fig.add_subplot(gs[i, j]) ## Switch labels if j == 0: labels_y = _sci_format else: labels_y = labels_blank if i == len(var) - 1: labels_x = _sci_format else: labels_x = labels_blank ## Density if i == j: xmid = 0.5 * ( df[v1].min() + df[v1].max() ) p = ( df >> ggplot(aes(v1)) + geom_density() + scale_x_continuous( breaks=breaks_min, labels=labels_x, ) + scale_y_continuous( breaks=breaks_min, labels=labels_y, ) + annotate( "label", x=xmid, y=0, label=v1, va="bottom", ) + theme_minimal() + labs(title=v1) ) ## Scatterplot else: p = ( df >> ggplot(aes(v2, v1)) + geom_point() + scale_x_continuous( breaks=breaks_min, labels=labels_x, ) + scale_y_continuous( breaks=breaks_min, labels=labels_y, ) + theme_minimal() + theme( axis_title=element_text(va="top", size=12), ) ) _ = p._draw_using_figure(fig, [ax]) ## Plot # NB Returning the figure causes a "double plot" in Jupyter.... fig.show()