def plot_replicate_groups(self): from plotnine import ggplot, aes, ylab, xlab, geom_line, scale_y_continuous, geom_col, geom_point df1 = self.data1df df2 = self.data2df df1.insert(0, 'Experiment', '1') df2.insert(0, 'Experiment', '2') #len1 = len(df1.index) #len2 = len(df2.index) #print len1-len2 #exit() #if len1 > len2: # df1 = df1.drop(df1.tail(len1 - len2).index, inplace=True) #else: # df2 = df2.drop(df2.tail(len2 - len1).index, inplace=True) # df = pd.concat([df1, df2]) print(df1) print(df2) plot = ((ggplot() + ylab(u'Current (μA)') + xlab('Time (seconds)') + geom_line(df1, aes('Time', 'Current', color='Channel')) + geom_line(df2, aes('Time', 'Current', color='Channel')))) print(plot) return plot
def plot_predict(forecast): p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) + geom_point(colour='blue', alpha=0.3, na_rm=True) + geom_line(colour='blue', na_rm=True) + geom_line( data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') + geom_ribbon(data=forecast, mapping=aes(ymin='yhat_lower', ymax='yhat_upper'), fill='blue', alpha=0.1) + scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') + xlab('Time') + ylab('Pressure') + theme_bw() + theme(axis_text_x=element_text( angle=45, hjust=1, face='bold', color='black'), axis_text_y=element_text(face='bold', colour='black'))) ggplot.save(p, filename='predict_pressure_chart.png', path=os.path.join(os.path.abspath(os.path.dirname(__file__)), 'png'), width=8, height=6, units='in', dpi=326, verbose=False) return p
def plot_individual_returns( df_in: pd.DataFrame, max_episode: int, return_column: str = 'episode_return', colour_var: Optional[str] = None, yintercept: Optional[float] = None, sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot: """Plot individual learning curves: one curve per sweep setting.""" df = df_in.copy() df['unique_group'] = _make_unique_group_col(df, sweep_vars) p = (gg.ggplot(df) + gg.aes(x='episode', y=return_column, group='unique_group') + gg.coord_cartesian(xlim=(0, max_episode))) if colour_var: p += gg.geom_line(gg.aes(colour=colour_var), size=1.1, alpha=0.75) if len(df[colour_var].unique()) <= 5: df[colour_var] = df[colour_var].astype('category') p += gg.scale_colour_manual(values=FIVE_COLOURS) else: p += gg.geom_line(size=1.1, alpha=0.75, colour='#313695') if yintercept: p += gg.geom_hline(yintercept=yintercept, alpha=0.5, size=2, linetype='dashed') return facet_sweep_plot(p, sweep_vars, tall_plot=True)
def plot_train_test(ags): frontiers = data.train_test(ags) frontiers, model = data.train_test_model(frontiers) labs = frontiers.sort_values('train_flops').groupby( 'elo').first().reset_index() desc = f'log₁₀(test) = {model.params[1]:.1f} · log₁₀(train) + {model.params[2]:.1g} · elo + {model.params[0]:.0f}' return ( pn.ggplot( frontiers, pn.aes(x='train_flops', y='test_flops', color='elo', group='elo')) + pn.geom_line(size=.5, show_legend=False) + pn.geom_line(pn.aes(y='test_flops_hat'), size=.25, show_legend=False, linetype='dashed') # + pn.geom_point(size=.5, show_legend=False) + pn.geom_text(pn.aes(label='elo.astype(int)'), labs, show_legend=False, size=6, nudge_y=+.2) + pn.scale_color_cmap(limits=(-1500, 0)) + pn.scale_x_continuous(trans='log10') + pn.scale_y_continuous(trans='log10') + pn.annotate( 'text', 1.5e13, 5e9, label=desc, ha='left', size=6, family='serif') + pn.labs(x='Train-time compute (FLOPS-seconds)', y='Test-time compute (FLOPS-seconds)') + plot.IEEE())
def multiplot(files, smooth=100, alpha=0.6, loss_padd=None): if not isinstance(files, dict): files = [files] def load_hist(entry): name, file = entry try: hist = np.loadtxt(file) except OSError: warn = "{} could not be loaded with np.loadtext({})." warnings.warn(warn.format(name, file), UserWarning) return name, None is_fine = np.isfinite(hist) if not any(is_fine): return name, None iters = np.where(is_fine)[0] hist = hist[is_fine] lb = min(hist) if loss_padd is not None and lb < 0: hist += loss_padd - lb lb = loss_padd ldf = pd.DataFrame({ "loss": hist, "iteration": iters, "model": [name] * len(hist) }) if smooth is not False: if lb > 0: ldf["sloss"] = np.exp( gaussian_filter1d(np.log(hist), sigma=smooth)) else: ldf["sloss"] = gaussian_filter1d(hist, sigma=smooth) return name, ldf tasks = list(files.items()) df = pd.DataFrame() with mp.Pool() as pool: for name, ldf in tqdm(pool.imap(load_hist, tasks), total=len(tasks), desc="models"): if ldf is not None: df = df.append(ldf) def breaks(limits): ll = np.log10(limits) if (ll[1] - ll[0]) > 3: ll = np.round(ll) ex = np.linspace(ll[0], ll[1], 10) ex = np.round(ex) else: ex = np.linspace(ll[0], ll[1], 10) return 10.0**ex pl = (pn.ggplot(pn.aes("iteration", "loss", color="model"), df) + pn.geom_line(alpha=alpha) + pn.scale_y_log10() + pn.theme_minimal()) if smooth is not False: pl += pn.geom_line(pn.aes(y="sloss"), size=1, alpha=alpha) return pl, df
def __call__(self, graph, *args, **kwargs): yvec = h.Vector() xvec = h.Vector() self._data.to_vector(yvec, xvec) if isinstance(graph, hoc.HocObject): return yvec.line(graph, xvec, *args) str_type_graph = str(type(graph)) if str_type_graph == "<class 'plotly.graph_objs._figure.Figure'>": # plotly figure import plotly.graph_objects as go kwargs.setdefault("mode", "lines") return graph.add_trace(go.Scatter(x=xvec, y=yvec, *args, **kwargs)) if str_type_graph == "<class 'plotnine.ggplot.ggplot'>": # ggplot object import plotnine as p9 import pandas as pd return graph + p9.geom_line(*args, data=pd.DataFrame({ "x": xvec, "y": yvec }), mapping=p9.aes(x="x", y="y"), **kwargs) str_graph = str(graph) if str_graph.startswith("<module 'plotly' from "): # plotly module import plotly.graph_objects as go fig = go.Figure() kwargs.setdefault("mode", "lines") return fig.add_trace(go.Scatter(x=xvec, y=yvec, *args, **kwargs)) if str_graph.startswith("<module 'plotnine' from "): # plotnine module (contains ggplot) import plotnine as p9 import pandas as pd return p9.geom_line(*args, data=pd.DataFrame({ "x": xvec, "y": yvec }), mapping=p9.aes(x="x", y="y"), **kwargs) if hasattr(graph, "plot"): # works with e.g. pyplot or a matplotlib axis return graph.plot(xvec, yvec, *args, **kwargs) if hasattr(graph, "line"): # works with e.g. bokeh return graph.line(xvec, yvec, *args, **kwargs) if str_type_graph == "<class 'matplotlib.figure.Figure'>": raise Exception( "plot to a matplotlib axis not a matplotlib figure") raise Exception("Unable to plot to graphs of type {}".format( type(graph)))
def plot_optimal_model_size(ags): from statsmodels.formula import api as smf results = {} for b, g in ags.groupby('boardsize'): ordered = g.sort_values('elo').copy() ordered['params'] = g.width**2 * g.depth left = np.log10(g.train_flops.min()) right = np.log10(g.train_flops.max()) for f in np.linspace(left, right, 11)[1:]: subset = ordered[ordered.train_flops <= 10**f] results[b, 10**f] = subset.params.iloc[-1] df = pd.Series(results).reset_index() df.columns = ['boardsize', 'approx_flops', 'params'] model = smf.ols('np.log10(params) ~ np.log10(approx_flops) + 1', df).fit() left, right = np.log10(df.approx_flops.min()), np.log10( df.approx_flops.max()) preds = pd.DataFrame({'approx_flops': 10**np.linspace(left, right, 21)}) preds['params'] = 10**model.predict(preds) labs = df.sort_values('approx_flops').groupby( 'boardsize').last().reset_index() labs['params'] = labs.apply( lambda r: df[df.approx_flops <= r.approx_flops].params.max(), axis=1) points = df.sort_values('approx_flops').groupby( 'boardsize').last().reset_index() desc = f'log₁₀(params) = {model.params[1]:.2f} · log₁₀(compute) − {-model.params[0]:.1f}' return ( pn.ggplot(df, pn.aes(x='approx_flops', y='params')) + pn.geom_line(pn.aes(color='factor(boardsize)', group='boardsize'), show_legend=False) + pn.geom_line(data=preds, linetype='dashed', size=.25) + pn.geom_point(pn.aes(color='factor(boardsize)', group='boardsize'), data=points, size=.5, show_legend=False) + pn.geom_text(pn.aes( color='factor(boardsize)', group='boardsize', label='boardsize'), data=labs, nudge_y=+.5, show_legend=False, size=6) + pn.annotate( 'text', 1e9, 2e7, label=desc, ha='left', size=6, family='serif') + pn.scale_x_continuous(trans='log10') + pn.scale_y_continuous(trans='log10') + pn.scale_color_hue(l=.4) + pn.labs(x='Train-time compute (FLOPS-seconds)', y='Optimal model size (params)') + plot.IEEE())
def ranges_graphics(target, signatures_long, ranges, hydro_state, year, algorithm): if ranges is None: return alg = '' if algorithm == "boruta": alg = "Boruta" elif algorithm == "lasso": alg = "LASSO" elif algorithm == "kbestcorr": alg = "SelectKBest (correlation)" elif algorithm == "kbestmi": alg = "SelectKBest (mutual information)" elif algorithm == "ga": alg = "Genetic Algorithm" signatures_long["wavelength"] = pandas.to_numeric( signatures_long["wavelength"]) # signatures_long["value"] = pandas.to_numeric(signatures_long["value"]) y_max = signatures_long["value"].max() graph_signatures = ggplot(signatures_long) \ + theme(legend_position = "none") \ + aes(x = "wavelength", y = "value", color = "variable") \ + labs( x = "Wavelength (nm)", y = "Reflectance (%)", title = f"Ranges in signature: {target} {hydro_state} {alg}, {year}.", subtitle = f"{alg}, {hydro_state} set." ) if ranges is not None: for i in range(len(ranges)): i_range = [] for j in range(len(ranges[i])): # graph_signatures = graph_signatures + geom_vline(xintercept = ranges[i][j], color="black", alpha = 0.2) i_range.append(ranges[i][j]) graph_signatures = graph_signatures + geom_rect(aes( xmin=i_range[0], xmax=i_range[1], ymin=0.0, ymax=y_max), fill="steelblue", alpha=0.1, color=None) graph_signatures = graph_signatures + geom_line() else: graph_signatures = graph_signatures + geom_line() print(graph_signatures) graph_signatures.save(filename=os.path.join( PLOT_DIR, f"{hydro_state}-{year}-{algorithm}-ranges-{target}")) return
def plot_convergence(pile): stops = range(100, int(len(pile) / 10), utils.bills_per_pound) dist_stats = pd.DataFrame([get_sample_dist(pile, size) for size in stops]) return ( pn.ggplot(dist_stats) + pn.geom_line(pn.aes(x='size', y='mean')) + pn.geom_line( pn.aes(x='size', y='lower'), color='#FF5500', linetype='dotted') + pn.geom_line( pn.aes(x='size', y='upper'), color='#FF5500', linetype='dotted') + pn.scale_x_continuous(breaks=stops) + pn.theme(axis_text_x=pn.element_text(angle=270, hjust=1)))
def plot_results(results): df = pd.DataFrame(results[0]) #https://stackoverflow.com/questions/39092067/pandas-dataframe-convert-column-type-to-string-or-categorical df['agent_id'] = df.agent_id.astype('category') print( gg.ggplot(df) + gg.aes('t', 'cum_regret', color='agent_id', group='agent_id') + gg.geom_point() + gg.geom_line()) print( gg.ggplot(df) + gg.aes('t', 'time', color='agent_id', group='agent_id') + gg.geom_point() + gg.geom_line())
def estimate_cutoffs_plot(output_file, df_plt, df_cell_estimate_cutoff, df_fit=None, scale_x_log10=False, save_plot=True): """Plot UMI counts by sorted cell barcodes.""" if min(df_plt['umi_counts']) <= 0: fix_log_scale = min(df_plt['umi_counts']) + 1 df_plt['umi_counts'] = df_plt['umi_counts'] + fix_log_scale gplt = plt9.ggplot() gplt = gplt + plt9.theme_bw() if len(df_plt) <= 50000: gplt = gplt + plt9.geom_point(mapping=plt9.aes(x='barcode', y='umi_counts'), data=df_plt, alpha=0.05, size=0.1) else: gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='barcode', y='umi_counts'), data=df_plt, alpha=0.25, size=0.75, color='black') gplt = gplt + plt9.geom_vline(mapping=plt9.aes(xintercept='n_cells', color='method'), data=df_cell_estimate_cutoff, alpha=0.75, linetype='dashdot') gplt = gplt + plt9.scale_color_brewer(palette='Dark2', type='qual') if scale_x_log10: gplt = gplt + plt9.scale_x_continuous( trans='log10', labels=comma_labels, minor_breaks=0) else: gplt = gplt + plt9.scale_x_continuous(labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.scale_y_continuous( trans='log10', labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.labs(title='', y='UMI counts', x='Barcode index, sorted by UMI count', color='Cutoff') # Add the fit of the droplet utils model if df_fit: gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='x', y='y'), data=df_fit, alpha=1, color='yellow') if save_plot: gplt.save('{}.png'.format(output_file), dpi=300, width=5, height=4) return gplt
def plot_means(self): """ Plots means of the two experiments vs time """ from plotnine import ggplot, aes, ylab, xlab, geom_line df = self.t_test_df plot = ((ggplot(df, aes('Time', 'Mean 1')) + ylab(u'Average Current (μA)') + xlab('Time (seconds)') + geom_line() + geom_line(aes('Time', 'Mean 2')))) print(plot) return plot
def plot_anova(self): """ Plots F-value and P-value vs time """ from plotnine import ggplot, aes, ylab, xlab, geom_line df = self.anova_df plot = ((ggplot(df, aes('Time', 'F-Value')) + ylab('F-Value') + xlab('Time (seconds)') + geom_line() + geom_line(aes('Time', 'P-Value'), color='red'))) print(plot) return plot
def plot_standard_deviations(self): """ Plots standard deviation of two experiments vs time. """ from plotnine import ggplot, aes, ylab, xlab, geom_line df = self.t_test_df plot = ((ggplot(df, aes('Time', 'Standard Deviation 1')) + ylab('Standard Deviation') + xlab('Time (seconds)') + geom_line() + geom_line(aes('Time', 'Standard Deviation 2')))) print(plot) return plot
def plot_t_test(self): """ Plots p-value vs time """ from plotnine import ggplot, aes, ylab, xlab, geom_line, scale_y_continuous df = self.t_test_df plot = ((ggplot(df, aes('Time', 'P Value')) + ylab('P Value') + xlab('Time (seconds)') + geom_line() + scale_y_continuous(breaks=np.linspace(0, 0.0000005, 21)) + geom_line(aes('Time', 'Significance'), color='red'))) print(plot) return plot
def plot_time_curve_with_threshold(self): toplot = self.aggregated.melt( id_vars='hour', value_vars=['number_bacteria', 'number_actin'], value_name='counts', var_name='Object') colors = self.create_color_list() myfig = ( ggplot(toplot, aes("hour", "counts", color="Object")) + geom_point() + geom_line() + labels.xlab("Time [hours]") + labels.ylab("Average number of objects/nuclei") + pn.scale_colour_manual(values=colors, labels=list(self.sel_channel_time.value), name="") + pn.labs(colour="") + pn.scale_x_continuous( breaks=np.sort(self.result.hour.unique()), labels=list(np.sort(self.result.hour.unique()).astype(str)))) self.time_curve_fig = myfig self.out_plot2.clear_output() with self.out_plot2: display(myfig)
def plot_time_curve_by_channel(self, b=None): """Callback to polot time curve of number of bacteria/nuclei for each selected channel. Called by plot_time_curve_button.""" if self.aggregated is None: self.data_aggregation() if len(self.sel_channel_time.value) == 0: print("Select at least one channel") else: subset = self.aggregated[self.aggregated.channel.isin( self.sel_channel_time.value)].copy(deep=True) subset.loc[:, "channel"] = subset.channel.astype( pd.CategoricalDtype(self.sel_channel_time.value, ordered=True)) colors = self.create_color_list() myfig = ( ggplot(subset, aes("hour", "normalized", color="channel")) + geom_point() + geom_line() + labels.xlab("Time [hours]") + labels.ylab("Average number of bacteria/nuclei") + pn.scale_colour_manual( values=colors, labels=list(self.sel_channel_time.value), name="") + pn.labs(colour="") + pn.scale_x_continuous( breaks=np.sort(self.result.hour.unique()), labels=list( np.sort(self.result.hour.unique()).astype(str)))) self.time_curve_fig = myfig self.out_plot2.clear_output() with self.out_plot2: display(myfig)
def plot_seismograms(device_id): # Get earthquake date as datetime.datetime object eq_dt = AwsDataClient._get_dt_from_str(eq['date_utc']) print(eq_dt) ob = { "ti" : "2018-02-16 23:39:48" } time_format = '%Y-%m-%d %H:%M:%S' plots = [] for axis in ['x', 'y', 'z']: plots.append( pn.ggplot( records_df[records_df['device_id'] == device_id], pn.aes('sample_dt', axis) ) + \ pn.geom_line(color='blue') + \ pn.scales.scale_x_datetime( date_breaks='1 minute', date_labels='%H:%M:%S' ) + \ pn.geoms.geom_vline( xintercept= eq_dt,#datetime.strptime(ob["ti"], time_format), color='crimson' ) + \ pn.labels.ggtitle( 'device {}, axis {}'.format( device_id, axis) ) ) # Now output the plots for p in plots: print(p)
def plot_fitting(x, y, resonance_frequency, parameter): """ Plots the phase response and the corresponding fit of the harmonic damped oscillator. Args: x (`float array`): X coordinates (frequency in kHz) y (`float array`): Y coordinates (phase in radians) resonance_frequency (`float array`): Resonance frequency given by the fit of x and y parameter (`float array`): Others parameters of function fit (Q factor, offset, linear background) Returns: p (`ggplot object`): Returns a ggplot object """ y_fit = fit_function(x, resonance_frequency, parameter[0], parameter[1], parameter[2]) y_fit.name = 'Phase fit' x.name = 'Frequency (kHz)' y.name = 'Phase (rad)' data = concat([x, y, y_fit], axis=1) col_names = list(data) # Plot data p = ggplot(aes(x=col_names[0], y=col_names[1]), data=data) + \ geom_point() + \ geom_line(aes(x=col_names[0], y=col_names[2]), color='red', size=0.5) + \ theme_seaborn(style='ticks', context='talk', font_scale=0.75) + \ theme(figure_size=(15, 7), strip_background=element_rect(fill='white'), axis_line_x=element_line(color='black'), axis_line_y=element_line(color='black'), legend_key=element_rect(fill='white', color='white')) return p
def plot_mass(calculated_cell_mass, plot_every_nth_point): """ Plots the resulting mass Args: calculated_cell_mass (`pandas data frame`): Pandas data frame [Nx3] with time and calculated cell mass and rolling mean averaged cell mass plot_every_nth_point (`int`): If 1 all data points are plotted. Otherwise every nth data point is used for plotting. Returns: p (`ggplot object`): Returns a ggplot plot object """ col_names = list(calculated_cell_mass) col_names[0] = 'Time (h)' calculated_cell_mass.columns = col_names calculated_cell_mass = calculated_cell_mass.iloc[::plot_every_nth_point, :] # Plot data p = ggplot(aes(x=col_names[0], y=col_names[1]), data=calculated_cell_mass) + \ geom_point(alpha=0.1) + \ geom_line(aes(y=col_names[2]), color='red') + \ theme_bw() return p
def plot_it(t, concentration): data = pd.DataFrame({ "x": range(len(y)), "t": f"t={t}", "concentration": concentration }) return p9.geom_line(data=data, size=1)
def plot_key_stock_indicators(df, stock): assert isinstance(df, pd.DataFrame) assert all([ 'eps' in df.columns, 'pe' in df.columns, 'annual_dividend_yield' in df.columns ]) df['volume'] = df['last_price'] * df[ 'volume'] / 1000000 # again, express as $(M) df['fetch_date'] = df.index plot_df = pd.melt(df, id_vars='fetch_date', value_vars=[ 'pe', 'eps', 'annual_dividend_yield', 'volume', 'last_price' ], var_name='indicator', value_name='value') plot_df['value'] = pd.to_numeric(plot_df['value']) plot_df['fetch_date'] = pd.to_datetime(plot_df['fetch_date']) plot = ( p9.ggplot(plot_df, p9.aes('fetch_date', 'value', color='indicator')) + p9.geom_line(size=1.5, show_legend=False) + p9.facet_wrap('~ indicator', nrow=6, ncol=1, scales='free_y') + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(8, 7)) # + p9.aes(ymin=0) + p9.xlab("") + p9.ylab("")) return plot_as_inline_html_data(plot)
def plot_ROC(label_list, pred_list, names=None, **args): """ 複数の ROC 曲線をプロットする :param: label_list: 正解ラベルリストの配列. [(y1, y2, ...), (y1, y2, ...)] のようにして与える, pred_list に対応させる :param: pred_list: 予測確率リストの配列. label_list と同じ長さにすること :param: names=None: モデルの名称. None または同じ長さにすること. 指定しない場合, ラベルの組が 2~3 ならば ['train', 'valid', 'test'] を与える. 3より多い場合は通し番号にする. :param args: sklearn.metrics.roc_curve に与えるパラメータ :return: plotnine オブジェクト """ if names is None: if len(label_list) == 2: names = ('train', 'test') elif len(label_list) == 3: names = ('train', 'valid', 'test') else: names = list(range(len(label_list))) else: pass roc = [roc_curve(y, p, **args) for y, p in zip(label_list, pred_list)] fpr, tpr = tuple([list(chain.from_iterable(x)) for x in zip(*roc)][0:2]) models = chain.from_iterable([[name] * l for name, l in zip(names, [len(x) for x, y, _ in roc])]) d_roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'model': models}) return ggplot( d_roc, aes(x='fpr', y='tpr', group='model', color='model') ) + geom_segment(x=0, y=0, xend=1, yend=1, linetype=':', color='grey' ) + geom_line( ) + scale_color_discrete(breaks=names ) + labs(x='false positive rate', y='true positive rate' ) + coord_equal(ratio=1, xlim=[0, 1], ylim=[0, 1] ) + theme_classic() + theme(figure_size=(4, 4))
def customized_algorithm_plot(experiment_name='finite_simple_sanity', data_path=_DEFAULT_DATA_PATH): """Simple plot of average instantaneous regret by agent, per timestep. Args: experiment_name: string = name of experiment config. data_path: string = where to look for the files. Returns: p: ggplot plot """ df = load_data(experiment_name, data_path) plt_df = (df.groupby(['t', 'agent']).agg({ 'instant_regret': np.mean }).reset_index()) plt_df['agent_new_name'] = plt_df.agent.apply(rename_agent) custom_labels = ['Laplace TS', 'Langevin TS', 'TS', 'bootstrap TS'] custom_colors = ["#E41A1C", "#377EB8", "#4DAF4A", "#984EA3"] p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent_new_name') + gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('time period (t)') + gg.ylab('per-period regret') + gg.scale_color_manual( name='agent', labels=custom_labels, values=custom_colors)) return p
def plot_series( df, x=None, y=None, tick_text_size=6, line_size=1.5, y_axis_label="Point score", x_axis_label="", color="stock", use_smooth_line=False ): assert len(df) > 0 assert len(x) > 0 and len(y) > 0 assert line_size > 0.0 assert isinstance(tick_text_size, int) and tick_text_size > 0 assert y_axis_label is not None assert x_axis_label is not None args = {'x': x, 'y': y} if color: args['color'] = color plot = p9.ggplot(df, p9.aes(**args)) \ + p9.labs(x=x_axis_label, y=y_axis_label) \ + p9.theme( axis_text_x=p9.element_text(angle=30, size=tick_text_size), axis_text_y=p9.element_text(size=tick_text_size), legend_position="none", ) if use_smooth_line: plot += p9.geom_smooth(size=line_size) else: plot += p9.geom_line(size=line_size) return plot_as_inline_html_data(plot)
def go_to_time_plot3(large_go_to_time_probs_new: list, large_go_to_time_probs_old: list, average_minutes_per_game_values: list): """ Plot go-to-time probability, old vs. new rules, no blowouts, 300 matches/round """ large_time_prob_data = pd.DataFrame({ 'Average minutes per game': np.concatenate( [average_minutes_per_game_values, average_minutes_per_game_values]), 'P(Go to time)': np.concatenate( [large_go_to_time_probs_new, large_go_to_time_probs_old]), 'Rules': np.concatenate([ np.repeat('New', len(average_minutes_per_game_values)), np.repeat('Old', len(average_minutes_per_game_values)) ]) }) (plt.ggplot( large_time_prob_data, plt.aes(x='Average minutes per game', y='P(Go to time)', color='Rules')) + plt.geom_line() + plt.geom_point() + plt.ylim([0, 1]) + plt.theme_classic()).save( filename='figures/go_to_time_300_matches_prob_plot.png')
def plot_ci_eval(df): molten = pd.melt(df, id_vars=['sample_size'], value_vars=['bootstrap', 'ztest', 'ttest']) return (ggplot(molten, aes(x='sample_size', y='value', color='variable')) + geom_line() + scale_x_log10() + ylim(0, 1))
def _base_scaling(plt_df: pd.DataFrame, sweep_vars: Optional[Sequence[str]] = None, with_baseline: bool = True) -> gg.ggplot: """Base underlying piece of the scaling plots for deep sea.""" p = (gg.ggplot(plt_df) + gg.aes(x='size', y='episode')) if np.all(plt_df.finished): p += gg.geom_point(gg.aes(colour='solved'), size=3, alpha=0.75) else: p += gg.geom_point(gg.aes(shape='finished', colour='solved'), size=3, alpha=0.75) p += gg.scale_shape_manual(values=['x', 'o']) if np.all(plt_df.solved): p += gg.scale_colour_manual(values=['#313695']) # blue else: p += gg.scale_colour_manual(values=['#d73027', '#313695']) # [red, blue] if with_baseline: baseline_df = _make_baseline(plt_df, sweep_vars) p += gg.geom_line(data=baseline_df, colour='black', linetype='dashed', alpha=0.4, size=1.5) return p
def plot_fundamentals(df, stock) -> str: assert isinstance(df, pd.DataFrame) columns_to_report = ["pe", "eps", "annual_dividend_yield", "volume", \ "last_price", "change_in_percent_cumulative", \ "change_price", "market_cap", "number_of_shares"] colnames = df.columns for column in columns_to_report: assert column in colnames df["volume"] = df["last_price"] * df["volume"] / 1000000 # again, express as $(M) df["market_cap"] /= 1000 * 1000 df["number_of_shares"] /= 1000 * 1000 df["fetch_date"] = df.index plot_df = pd.melt( df, id_vars="fetch_date", value_vars=columns_to_report, var_name="indicator", value_name="value", ) plot_df["value"] = pd.to_numeric(plot_df["value"]) plot_df["fetch_date"] = pd.to_datetime(plot_df["fetch_date"]) plot = ( p9.ggplot(plot_df, p9.aes("fetch_date", "value", color="indicator")) + p9.geom_line(size=1.5, show_legend=False) + p9.facet_wrap("~ indicator", nrow=len(columns_to_report), ncol=1, scales="free_y") + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), axis_text_y=p9.element_text(size=7), figure_size=(8, len(columns_to_report))) # + p9.aes(ymin=0) + p9.xlab("") + p9.ylab("") ) return plot_as_inline_html_data(plot)
def go_to_time_plot2(go_to_time_probs_new: list, go_to_time_probs_old: list, go_to_time_blowout_probs_new: list, go_to_time_blowout_probs_old: list, average_minutes_per_game_values: list): """ Plot go-to-time probability, new vs. old rules, blowouts vs. no blowouts, 85 matches/round """ time_prob_blowout_data = pd.DataFrame({ 'Average minutes per game': np.concatenate([ average_minutes_per_game_values, average_minutes_per_game_values, average_minutes_per_game_values, average_minutes_per_game_values ]), 'P(Go to time)': np.concatenate([ go_to_time_probs_new, go_to_time_probs_old, go_to_time_blowout_probs_new, go_to_time_blowout_probs_old ]), 'Rules': np.concatenate([ np.repeat('New, no blowouts', len(average_minutes_per_game_values)), np.repeat('Old, no blowouts', len(average_minutes_per_game_values)), np.repeat('New, blowouts', len(average_minutes_per_game_values)), np.repeat('Old, blowouts', len(average_minutes_per_game_values)) ]) }) (plt.ggplot( time_prob_blowout_data, plt.aes(x='Average minutes per game', y='P(Go to time)', color='Rules')) + plt.geom_line() + plt.geom_point() + plt.ylim([0, 1]) + plt.theme_classic()).save( filename='figures/go_to_time_prob_with_blowouts_plot.png')
def test_line(): df2 = df.copy() # geom_path plots in given order. geom_line & # geom_step sort by x before plotting df2['x'] = df['x'].values[::-1] p = (ggplot(df2, aes('x')) + geom_path(aes(y='y'), size=4) + geom_line(aes(y='y+2'), color='blue', size=4) + geom_step(aes(y='y+4'), color='red', size=4)) assert p == 'path_line_step'
def yoy_growth(): """ This creates figures showing the number of questions versus year in dataset """ with open('data/external/datasets/qanta.mapped.2018.04.18.json') as f: year_pages = defaultdict(set) year_questions = Counter() for q in json.load(f)['questions']: if q['page'] is not None: year_pages[q['year']].add(q['page']) year_questions[q['year']] += 1 start_year = min(year_pages) # 2017 is the earlier year we have a full year's worth of data, including partial 2018 isn't accurate end_year = min(2017, max(year_pages)) upto_year_pages = defaultdict(set) upto_year_questions = Counter() for upto_y in range(start_year, end_year + 1): for curr_y in range(start_year, upto_y + 1): upto_year_questions[upto_y] += year_questions[curr_y] for page in year_pages[curr_y]: upto_year_pages[upto_y].add(page) year_page_counts = {} for y, pages in upto_year_pages.items(): year_page_counts[y] = len(pages) year_page_counts year_rows = [] for y, page_count in year_page_counts.items(): year_rows.append({'year': y, 'value': page_count, 'Quantity': 'Distinct Answers'}) year_rows.append({'year': y, 'Quantity': 'Total Questions', 'value': upto_year_questions[y]}) year_df = pd.DataFrame(year_rows) count_cat = CategoricalDtype(categories=['Total Questions', 'Distinct Answers'], ordered=True) year_df['Quantity'] = year_df['Quantity'].astype(count_cat) eprint(year_df[year_df.Quantity == 'Total Questions']) p = ( ggplot(year_df) + aes(x='year', y='value', color='Quantity') + geom_line() + geom_point() + xlab('Year') + ylab('Count up to Year (inclusive)') + theme_fs() + scale_x_continuous(breaks=list(range(start_year, end_year + 1, 2))) ) p.save(path.join(output_path, 'question_answer_counts.pdf'))
def accPlot(accsByNFeats): plotdata = [] for s in accsByNFeats: plotdata.append(pd.concat([DataFrame({"p" : p, "acc" : accsByNFeats[s][p], "set" : s}, index = [str(p)]) for p in accsByNFeats[s]], axis = 0)) ggd = pd.concat(plotdata) ggd['acc'] = ggd['acc'].astype(float) ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set')) ggo += gg.geom_line(alpha=0.5) ggo += gg.geom_point() ggo += gg.theme_bw() ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000]) ggo += gg.scale_color_manual(values=['darkgray', 'black', 'red', 'dodgerblue']) ggo += gg.ylab('Accuracy (5-fold CV)') print(ggo)
def analyze_nir_intensity(gray_img, mask, bins=256, histplot=False): """This function calculates the intensity of each pixel associated with the plant and writes the values out to a file. It can also print out a histogram plot of pixel intensity and a pseudocolor image of the plant. Inputs: gray_img = 8- or 16-bit grayscale image data mask = Binary mask made from selected contours bins = number of classes to divide spectrum into histplot = if True plots histogram of intensity values Returns: analysis_images = NIR histogram image :param gray_img: numpy array :param mask: numpy array :param bins: int :param histplot: bool :return analysis_images: list """ params.device += 1 # apply plant shaped mask to image mask1 = binary_threshold(mask, 0, 255, 'light') mask1 = (mask1 / 255) masked = np.multiply(gray_img, mask1) # calculate histogram if gray_img.dtype == 'uint16': maxval = 65536 else: maxval = 256 # Make a pseudo-RGB image rgbimg = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2BGR) hist_nir, hist_bins = np.histogram(masked, bins, (1, maxval)) hist_bins1 = hist_bins[:-1] hist_bins2 = [float(round(l, 2)) for l in hist_bins1] hist_nir1 = [float(l) for l in hist_nir] # make hist percentage for plotting pixels = cv2.countNonZero(mask1) hist_percent = (hist_nir / float(pixels)) * 100 # No longer returning a pseudocolored image # make mask to select the background # mask_inv = cv2.bitwise_not(mask) # img_back = cv2.bitwise_and(rgbimg, rgbimg, mask=mask_inv) # img_back1 = cv2.applyColorMap(img_back, colormap=1) # mask the background and color the plant with color scheme 'jet' # cplant = cv2.applyColorMap(rgbimg, colormap=2) # masked1 = apply_mask(cplant, mask, 'black') masked1 = cv2.bitwise_and(rgbimg, rgbimg, mask=mask) # cplant_back = cv2.add(masked1, img_back1) if params.debug is not None: if params.debug == "print": print_image(masked1, os.path.join(params.debug_outdir, str(params.device) + "_masked_nir_plant.jpg")) if params.debug == "plot": plot_image(masked1) analysis_images = [] if histplot is True: hist_x = hist_percent bin_labels = np.arange(0, bins) dataset = pd.DataFrame({'Grayscale pixel intensity': bin_labels, 'Proportion of pixels (%)': hist_x}) fig_hist = (ggplot(data=dataset, mapping=aes(x='Grayscale pixel intensity', y='Proportion of pixels (%)')) + geom_line(color='red') + scale_x_continuous(breaks=list(range(0, bins, 25)))) analysis_images.append(fig_hist) if params.debug == "print": fig_hist.save(os.path.join(params.debug_outdir, str(params.device) + '_nir_hist.png')) elif params.debug == "plot": print(fig_hist) outputs.add_observation(variable='nir_frequencies', trait='near-infrared frequencies', method='plantcv.plantcv.analyze_nir_intensity', scale='frequency', datatype=list, value=hist_nir1, label=hist_bins2) # Store images outputs.images.append(analysis_images) return analysis_images
def analyze_color(rgb_img, mask, hist_plot_type=None): """Analyze the color properties of an image object Inputs: rgb_img = RGB image data mask = Binary mask made from selected contours hist_plot_type = 'None', 'all', 'rgb','lab' or 'hsv' Returns: analysis_image = histogram output :param rgb_img: numpy.ndarray :param mask: numpy.ndarray :param hist_plot_type: str :return analysis_images: list """ params.device += 1 if len(np.shape(rgb_img)) < 3: fatal_error("rgb_img must be an RGB image") # Mask the input image masked = cv2.bitwise_and(rgb_img, rgb_img, mask=mask) # Extract the blue, green, and red channels b, g, r = cv2.split(masked) # Convert the BGR image to LAB lab = cv2.cvtColor(masked, cv2.COLOR_BGR2LAB) # Extract the lightness, green-magenta, and blue-yellow channels l, m, y = cv2.split(lab) # Convert the BGR image to HSV hsv = cv2.cvtColor(masked, cv2.COLOR_BGR2HSV) # Extract the hue, saturation, and value channels h, s, v = cv2.split(hsv) # Color channel dictionary channels = {"b": b, "g": g, "r": r, "l": l, "m": m, "y": y, "h": h, "s": s, "v": v} # Histogram plot types hist_types = {"ALL": ("b", "g", "r", "l", "m", "y", "h", "s", "v"), "RGB": ("b", "g", "r"), "LAB": ("l", "m", "y"), "HSV": ("h", "s", "v")} if hist_plot_type is not None and hist_plot_type.upper() not in hist_types: fatal_error("The histogram plot type was " + str(hist_plot_type) + ', but can only be one of the following: None, "all", "rgb", "lab", or "hsv"!') # Store histograms, plotting colors, and plotting labels histograms = { "b": {"label": "blue", "graph_color": "blue", "hist": [float(l[0]) for l in cv2.calcHist([channels["b"]], [0], mask, [256], [0, 255])]}, "g": {"label": "green", "graph_color": "forestgreen", "hist": [float(l[0]) for l in cv2.calcHist([channels["g"]], [0], mask, [256], [0, 255])]}, "r": {"label": "red", "graph_color": "red", "hist": [float(l[0]) for l in cv2.calcHist([channels["r"]], [0], mask, [256], [0, 255])]}, "l": {"label": "lightness", "graph_color": "dimgray", "hist": [float(l[0]) for l in cv2.calcHist([channels["l"]], [0], mask, [256], [0, 255])]}, "m": {"label": "green-magenta", "graph_color": "magenta", "hist": [float(l[0]) for l in cv2.calcHist([channels["m"]], [0], mask, [256], [0, 255])]}, "y": {"label": "blue-yellow", "graph_color": "yellow", "hist": [float(l[0]) for l in cv2.calcHist([channels["y"]], [0], mask, [256], [0, 255])]}, "h": {"label": "hue", "graph_color": "blueviolet", "hist": [float(l[0]) for l in cv2.calcHist([channels["h"]], [0], mask, [256], [0, 255])]}, "s": {"label": "saturation", "graph_color": "cyan", "hist": [float(l[0]) for l in cv2.calcHist([channels["s"]], [0], mask, [256], [0, 255])]}, "v": {"label": "value", "graph_color": "orange", "hist": [float(l[0]) for l in cv2.calcHist([channels["v"]], [0], mask, [256], [0, 255])]} } # Create list of bin labels for 8-bit data binval = np.arange(0, 256) bin_values = [l for l in binval] analysis_images = [] # Create a dataframe of bin labels and histogram data dataset = pd.DataFrame({'bins': binval, 'blue': histograms["b"]["hist"], 'green': histograms["g"]["hist"], 'red': histograms["r"]["hist"], 'lightness': histograms["l"]["hist"], 'green-magenta': histograms["m"]["hist"], 'blue-yellow': histograms["y"]["hist"], 'hue': histograms["h"]["hist"], 'saturation': histograms["s"]["hist"], 'value': histograms["v"]["hist"]}) # Make the histogram figure using plotnine if hist_plot_type is not None: if hist_plot_type.upper() == 'RGB': df_rgb = pd.melt(dataset, id_vars=['bins'], value_vars=['blue', 'green', 'red'], var_name='Color Channel', value_name='Pixels') hist_fig = (ggplot(df_rgb, aes(x='bins', y='Pixels', color='Color Channel')) + geom_line() + scale_x_continuous(breaks=list(range(0, 256, 25))) + scale_color_manual(['blue', 'green', 'red']) ) analysis_images.append(hist_fig) elif hist_plot_type.upper() == 'LAB': df_lab = pd.melt(dataset, id_vars=['bins'], value_vars=['lightness', 'green-magenta', 'blue-yellow'], var_name='Color Channel', value_name='Pixels') hist_fig = (ggplot(df_lab, aes(x='bins', y='Pixels', color='Color Channel')) + geom_line() + scale_x_continuous(breaks=list(range(0, 256, 25))) + scale_color_manual(['yellow', 'magenta', 'dimgray']) ) analysis_images.append(hist_fig) elif hist_plot_type.upper() == 'HSV': df_hsv = pd.melt(dataset, id_vars=['bins'], value_vars=['hue', 'saturation', 'value'], var_name='Color Channel', value_name='Pixels') hist_fig = (ggplot(df_hsv, aes(x='bins', y='Pixels', color='Color Channel')) + geom_line() + scale_x_continuous(breaks=list(range(0, 256, 25))) + scale_color_manual(['blueviolet', 'cyan', 'orange']) ) analysis_images.append(hist_fig) elif hist_plot_type.upper() == 'ALL': s = pd.Series(['blue', 'green', 'red', 'lightness', 'green-magenta', 'blue-yellow', 'hue', 'saturation', 'value'], dtype="category") color_channels = ['blue', 'yellow', 'green', 'magenta', 'blueviolet', 'dimgray', 'red', 'cyan', 'orange'] df_all = pd.melt(dataset, id_vars=['bins'], value_vars=s, var_name='Color Channel', value_name='Pixels') hist_fig = (ggplot(df_all, aes(x='bins', y='Pixels', color='Color Channel')) + geom_line() + scale_x_continuous(breaks=list(range(0, 256, 25))) + scale_color_manual(color_channels) ) analysis_images.append(hist_fig) # Hue values of zero are red but are also the value for pixels where hue is undefined # The hue value of a pixel will be undefined when the color values are saturated # Therefore, hue values of zero are excluded from the calculations below # Calculate the median hue value # The median is rescaled from the encoded 0-179 range to the 0-359 degree range hue_median = np.median(h[np.where(h > 0)]) * 2 # Calculate the circular mean and standard deviation of the encoded hue values # The mean and standard-deviation are rescaled from the encoded 0-179 range to the 0-359 degree range hue_circular_mean = stats.circmean(h[np.where(h > 0)], high=179, low=0) * 2 hue_circular_std = stats.circstd(h[np.where(h > 0)], high=179, low=0) * 2 # Store into lists instead for pipeline and print_results # stats_dict = {'mean': circular_mean, 'std' : circular_std, 'median': median} # Plot or print the histogram if hist_plot_type is not None: if params.debug == 'print': hist_fig.save(os.path.join(params.debug_outdir, str(params.device) + '_analyze_color_hist.png')) elif params.debug == 'plot': print(hist_fig) # Store into global measurements # RGB signal values are in an unsigned 8-bit scale of 0-255 rgb_values = [i for i in range(0, 256)] # Hue values are in a 0-359 degree scale, every 2 degrees at the midpoint of the interval hue_values = [i * 2 + 1 for i in range(0, 180)] # Percentage values on a 0-100 scale (lightness, saturation, and value) percent_values = [round((i / 255) * 100, 2) for i in range(0, 256)] # Diverging values on a -128 to 127 scale (green-magenta and blue-yellow) diverging_values = [i for i in range(-128, 128)] # outputs.measurements['color_data'] = { # 'histograms': { # 'blue': {'signal_values': rgb_values, 'frequency': histograms["b"]["hist"]}, # 'green': {'signal_values': rgb_values, 'frequency': histograms["g"]["hist"]}, # 'red': {'signal_values': rgb_values, 'frequency': histograms["r"]["hist"]}, # 'lightness': {'signal_values': percent_values, 'frequency': histograms["l"]["hist"]}, # 'green-magenta': {'signal_values': diverging_values, 'frequency': histograms["m"]["hist"]}, # 'blue-yellow': {'signal_values': diverging_values, 'frequency': histograms["y"]["hist"]}, # 'hue': {'signal_values': hue_values, 'frequency': histograms["h"]["hist"]}, # 'saturation': {'signal_values': percent_values, 'frequency': histograms["s"]["hist"]}, # 'value': {'signal_values': percent_values, 'frequency': histograms["v"]["hist"]} # }, # 'color_features': { # 'hue_circular_mean': hue_circular_mean, # 'hue_circular_std': hue_circular_std, # 'hue_median': hue_median # } # } outputs.add_observation(variable='blue_frequencies', trait='blue frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["b"]["hist"], label=rgb_values) outputs.add_observation(variable='green_frequencies', trait='green frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["g"]["hist"], label=rgb_values) outputs.add_observation(variable='red_frequencies', trait='red frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["r"]["hist"], label=rgb_values) outputs.add_observation(variable='lightness_frequencies', trait='lightness frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["l"]["hist"], label=percent_values) outputs.add_observation(variable='green-magenta_frequencies', trait='green-magenta frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["m"]["hist"], label=diverging_values) outputs.add_observation(variable='blue-yellow_frequencies', trait='blue-yellow frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["y"]["hist"], label=diverging_values) outputs.add_observation(variable='hue_frequencies', trait='hue frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["h"]["hist"], label=hue_values) outputs.add_observation(variable='saturation_frequencies', trait='saturation frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["s"]["hist"], label=percent_values) outputs.add_observation(variable='value_frequencies', trait='value frequencies', method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list, value=histograms["v"]["hist"], label=percent_values) outputs.add_observation(variable='hue_circular_mean', trait='hue circular mean', method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float, value=hue_circular_mean, label='degrees') outputs.add_observation(variable='hue_circular_std', trait='hue circular standard deviation', method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float, value=hue_median, label='degrees') outputs.add_observation(variable='hue_median', trait='hue median', method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float, value=hue_median, label='degrees') # Store images outputs.images.append(analysis_images) return analysis_images
gradient = ( (0.99, 0.88, 0.87), (0.98, 0.62, 0.71), (0.86, 0.20, 0.59), bcolor, bcolor, bcolor_darker, bcolor_darker) df1 = df[:n//3:9] df2 = df[n//3:2*n//3] df3 = df[2*n//3::12] p = (ggplot(aes('x', 'y', color='y', fill='y')) + annotate(geom='label', x=0.295, y=0.495, label='pl tnine', label_size=1.5, label_padding=.1, size=24, fill=bcolor_lighter, color=bcolor) + geom_point(df1, size=8, stroke=0, show_legend=False) + geom_line(df2, size=2, color=bcolor_darker, show_legend=False) + geom_bar(df3, aes('x+.06'), stat='identity', size=0, show_legend=False) + scale_color_gradientn(colors=gradient) + scale_fill_gradientn(colors=gradient) + theme_void() + theme(figure_size=(3.6, 3.6))) p.save('logo.pdf', pad_inches=-0.04) # Remove the project name p.layers = p.layers.__class__(p.layers[1:]) p.save('logo-small.pdf', pad_inches=-0.04)
def fluor_fvfm(fdark, fmin, fmax, mask, bins=256): """Analyze PSII camera images. Inputs: fdark = grayscale fdark image fmin = grayscale fmin image fmax = grayscale fmax image mask = mask of plant (binary, single channel) bins = number of bins (1 to 256 for 8-bit; 1 to 65,536 for 16-bit; default is 256) Returns: analysis_images = list of images (fv image and fvfm histogram image) :param fdark: numpy.ndarray :param fmin: numpy.ndarray :param fmax: numpy.ndarray :param mask: numpy.ndarray :param bins: int :return analysis_images: numpy.ndarray """ # Auto-increment the device counter params.device += 1 # Check that fdark, fmin, and fmax are grayscale (single channel) if not all(len(np.shape(i)) == 2 for i in [fdark, fmin, fmax]): fatal_error("The fdark, fmin, and fmax images must be grayscale images.") # # Check that fdark, fmin, and fmax are the same bit # if not (all(i.dtype == "uint16" for i in [fdark, fmin, fmax]) or # (all(i.dtype == "uint8" for i in [fdark, fmin, fmax]))): # fatal_error("The fdark, fmin, and fmax images must all be the same bit depth.") # Check that fdark, fmin, and fmax are 16-bit images # if not all(i.dtype == "uint16" for i in [fdark, fmin, fmax]): # fatal_error("The fdark, fmin, and fmax images must be 16-bit images.") # QC Fdark Image fdark_mask = cv2.bitwise_and(fdark, fdark, mask=mask) if np.amax(fdark_mask) > 2000: qc_fdark = False else: qc_fdark = True # Mask Fmin and Fmax Image fmin_mask = cv2.bitwise_and(fmin, fmin, mask=mask) fmax_mask = cv2.bitwise_and(fmax, fmax, mask=mask) # Calculate Fvariable, where Fv = Fmax - Fmin (masked) fv = np.subtract(fmax_mask, fmin_mask) # When Fmin is greater than Fmax, a negative value is returned. # Because the data type is unsigned integers, negative values roll over, resulting in nonsensical values # Wherever Fmin is greater than Fmax, set Fv to zero fv[np.where(fmax_mask < fmin_mask)] = 0 analysis_images = [] analysis_images.append(fv) # Calculate Fv/Fm (Fvariable / Fmax) where Fmax is greater than zero # By definition above, wherever Fmax is zero, Fvariable will also be zero # To calculate the divisions properly we need to change from unit16 to float64 data types fvfm = fv.astype(np.float64) fmax_flt = fmax_mask.astype(np.float64) fvfm[np.where(fmax_mask > 0)] /= fmax_flt[np.where(fmax_mask > 0)] # Calculate the median Fv/Fm value for non-zero pixels fvfm_median = np.median(fvfm[np.where(fvfm > 0)]) # Calculate the histogram of Fv/Fm non-zero values fvfm_hist, fvfm_bins = np.histogram(fvfm[np.where(fvfm > 0)], bins, range=(0, 1)) # fvfm_bins is a bins + 1 length list of bin endpoints, so we need to calculate bin midpoints so that # the we have a one-to-one list of x (FvFm) and y (frequency) values. # To do this we add half the bin width to each lower bin edge x-value midpoints = fvfm_bins[:-1] + 0.5 * np.diff(fvfm_bins) # Calculate which non-zero bin has the maximum Fv/Fm value max_bin = midpoints[np.argmax(fvfm_hist)] # Print F-variable image # print_image(fv, (os.path.splitext(filename)[0] + '_fv_img.png')) # analysis_images.append(['IMAGE', 'fv', os.path.splitext(filename)[0] + '_fv_img.png']) # Create Histogram Plot, if you change the bin number you might need to change binx so that it prints # an appropriate number of labels # Create a dataframe dataset = pd.DataFrame({'Plant Pixels': fvfm_hist, 'Fv/Fm': midpoints}) # Make the histogram figure using plotnine fvfm_hist_fig = (ggplot(data=dataset, mapping=aes(x='Fv/Fm', y='Plant Pixels')) + geom_line(color='green', show_legend=True) + geom_label(label='Peak Bin Value: ' + str(max_bin), x=.15, y=205, size=8, color='green')) analysis_images.append(fvfm_hist_fig) # Changed histogram method over from matplotlib pyplot to plotnine # binx = int(bins / 50) # plt.plot(midpoints, fvfm_hist, color='green', label='Fv/Fm') # plt.xticks(list(midpoints[0::binx]), rotation='vertical', size='xx-small') # plt.legend() # ax = plt.subplot(111) # ax.set_ylabel('Plant Pixels') # ax.text(0.05, 0.95, ('Peak Bin Value: ' + str(max_bin)), transform=ax.transAxes, verticalalignment='top') # plt.grid() # plt.title('Fv/Fm of ' + os.path.splitext(filename)[0]) # fig_name = (os.path.splitext(filename)[0] + '_fvfm_hist.svg') # plt.savefig(fig_name) # plt.clf() # analysis_images.append(['IMAGE', 'fvfm_hist', fig_name]) # No longer pseudocolor the image, instead can be pseudocolored by pcv.pseudocolor # # Pseudocolored Fv/Fm image # plt.imshow(fvfm, vmin=0, vmax=1, cmap="viridis") # plt.colorbar() # # fvfm_8bit = fvfm * 255 # # fvfm_8bit = fvfm_8bit.astype(np.uint8) # # plt.imshow(fvfm_8bit, vmin=0, vmax=1, cmap=cm.jet_r) # # plt.subplot(111) # # mask_inv = cv2.bitwise_not(mask) # # background = np.dstack((mask, mask, mask, mask_inv)) # # my_cmap = plt.get_cmap('binary_r') # # plt.imshow(background, cmap=my_cmap) # plt.axis('off') # fig_name = (os.path.splitext(filename)[0] + '_pseudo_fvfm.png') # plt.savefig(fig_name, dpi=600, bbox_inches='tight') # plt.clf() # analysis_images.append(['IMAGE', 'fvfm_pseudo', fig_name]) # path = os.path.dirname(filename) # fig_name = 'FvFm_pseudocolor_colorbar.svg' # if not os.path.isfile(os.path.join(path, fig_name)): # plot_colorbar(path, fig_name, 2) if params.debug == 'print': print_image(fmin_mask, os.path.join(params.debug_outdir, str(params.device) + '_fmin_mask.png')) print_image(fmax_mask, os.path.join(params.debug_outdir, str(params.device) + '_fmax_mask.png')) print_image(fv, os.path.join(params.debug_outdir, str(params.device) + '_fv_convert.png')) fvfm_hist_fig.save(os.path.join(params.debug_outdir, str(params.device) + '_fv_hist.png')) elif params.debug == 'plot': plot_image(fmin_mask, cmap='gray') plot_image(fmax_mask, cmap='gray') plot_image(fv, cmap='gray') print(fvfm_hist_fig) outputs.add_observation(variable='fvfm_hist', trait='Fv/Fm frequencies', method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=list, value=fvfm_hist.tolist(), label=np.around(midpoints, decimals=len(str(bins))).tolist()) outputs.add_observation(variable='fvfm_hist_peak', trait='peak Fv/Fm value', method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=float, value=float(max_bin), label='none') outputs.add_observation(variable='fvfm_median', trait='Fv/Fm median', method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=float, value=float(np.around(fvfm_median, decimals=4)), label='none') outputs.add_observation(variable='fdark_passed_qc', trait='Fdark passed QC', method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=bool, value=qc_fdark, label='none') # Store images outputs.images.append(analysis_images) return analysis_images
def test_no_missing_values(): p = (ggplot(df_missing, aes(x='x')) + geom_line(aes(y='y2'), size=2)) assert p == 'no_missing_values'
def test_missing_values(): p = (ggplot(df_missing, aes(x='x')) + geom_line(aes(y='y1'), size=2)) with pytest.warns(UserWarning): assert p == 'missing_values'
def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False): if self.y_max is not None: limits = [0, float(self.y_max)] eprint(f'Setting limits to: {limits}') else: limits = [0, 1] if expo: if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: with open('data/external/all_human_gameplay.json') as f: all_gameplay = json.load(f) frames = [] for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]: if self.merge_humans: name = 'Human' gameplay = all_gameplay[event] if event != 'live': control_correct_positions = gameplay['control_correct_positions'] control_wrong_positions = gameplay['control_wrong_positions'] control_positions = control_correct_positions + control_wrong_positions control_positions = np.array(control_positions) control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0]) argsort_control = np.argsort(control_positions) control_x = control_positions[argsort_control] control_sorted_result = control_result[argsort_control] control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0] control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x}) control_df['Dataset'] = 'Regular Test' control_df['Guessing_Model'] = f' {name}' frames.append(control_df) adv_correct_positions = gameplay['adv_correct_positions'] adv_wrong_positions = gameplay['adv_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'IR Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) if len(gameplay['advneural_correct_positions']) > 0: adv_correct_positions = gameplay['advneural_correct_positions'] adv_wrong_positions = gameplay['advneural_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'RNN Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) human_df = pd.concat(frames) human_vals = sort_humans(list(human_df['Guessing_Model'].unique())) human_dtype = CategoricalDtype(human_vals, ordered=True) human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype) dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True) human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype) if no_models: p = ggplot(human_df) + geom_point(shape='.') else: df = self.char_plot_df if 1 not in self.rounds: df = df[df['Dataset'] != 'Round 1 - IR Adversarial'] if 2 not in self.rounds: df = df[df['Dataset'] != 'Round 2 - IR Adversarial'] df = df[df['Dataset'] != 'Round 2 - RNN Adversarial'] p = ggplot(df) if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: eprint('Loading human data') p = p + geom_line(data=human_df) if columns: facet_conf = facet_wrap('Guessing_Model', ncol=1) else: facet_conf = facet_wrap('Guessing_Model', nrow=1) if not no_models: if self.mvg_avg_char: chart = stat_smooth(method='mavg', se=False, method_args={'window': 400}) else: chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5) else: chart = None p = ( p + facet_conf + aes(x='char_percent', y='correct', color='Dataset') ) if chart is not None: p += chart p = ( p + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, .5, 1]) + coord_cartesian(ylim=limits) + xlab('Percent of Question Revealed') + ylab('Accuracy') + theme( #legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5}) ) + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions') ) if self.title != '': p += ggtitle(self.title) return p else: if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) return ( ggplot(self.char_plot_df) + aes(x='char_percent', y='correct', color='Guessing_Model') + stat_smooth(method='mavg', se=False, method_args={'window': 500}) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + coord_cartesian(ylim=limits) )
accsByNFeats = OrderedDict([(s, OrderedDict([(n, fitKnnWithNFeat(n, s)) for n in nFeatures])) for s in xnorms]) plotData = pd.concat([DataFrame({"set" : s, "p" : p, "acc" : accsByNFeats[s][p]}, index = [s + "_" + str(p)]) for s in accsByNFeats for p in accsByNFeats[s]], axis = 0) plotData['acc'] = plotData['acc'].astype(float) plt.close() ggo = gg.ggplot(plotData, gg.aes(x='p', y='acc', color='set')) ggo += gg.geom_line() ggo += gg.scale_x_log10() ggo += gg.theme_bw() print(ggo) # plotData.to_csv("KnnRealAccuracyByNFeat.tsv", # sep = "\t", # index = False, # header = True) ## ----------------------------------------------------------------- ## use PCA feature extraction ## ----------------------------------------------------------------- feKnnFitter = pipeline.Pipeline([ ('featextr', pcaextractor.PcaExtractor(k=3)),
'feature_set': [model], 'auc': metrics['auroc'].round(3) })) roc_df = metrics['roc_df'] roc_output = roc_output.append(pd.DataFrame({ 'false_positive_rate': roc_df.fpr, 'true_positive_rate': roc_df.tpr, 'partition': partition, 'feature_set': model })) (gg.ggplot(roc_output, gg.aes(x='false_positive_rate', y='true_positive_rate', color='feature_set', linetype='partition')) + gg.geom_line(size=1.1, alpha=0.7) + gg.labs(x='false positive rate', y='true positive rate') + theme_cognoma() ) # ### AUROC # In[20]: pd.pivot_table(auc_output, values='auc', index='feature_set', columns='partition')
def syntactic_diversity_plots(): with open('data/external/syntactic_diversity_table.json') as f: rows = json.load(f) parse_df = pd.DataFrame(rows) parse_df['parse_ratio'] = parse_df['unique_parses'] / parse_df['parses'] melt_df = pd.melt( parse_df, id_vars=['dataset', 'depth', 'overlap', 'parses'], value_vars=['parse_ratio', 'unique_parses'], var_name='metric', value_name='y' ) def label_facet(name): if name == 'parse_ratio': return 'Average Unique Parses per Instance' elif name == 'unique_parses': return 'Count of Unique Parses' def label_y(ys): formatted_ys = [] for y in ys: y = str(y) if y.endswith('000.0'): formatted_ys.append(y[:-5] + 'K') else: formatted_ys.append(y) return formatted_ys p = ( ggplot(melt_df) + aes(x='depth', y='y', color='dataset') + facet_wrap('metric', scales='free_y', nrow=2, labeller=label_facet) + geom_line() + geom_point() + xlab('Parse Truncation Depth') + ylab('') + scale_color_discrete(name='Dataset') + scale_y_continuous(labels=label_y) + scale_x_continuous( breaks=list(range(1, 11)), minor_breaks=list(range(1, 11)), limits=[1, 10]) + theme_fs() ) p.save(path.join(output_path, 'syn_div_plot.pdf')) p = ( ggplot(parse_df) + aes(x='depth', y='unique_parses', color='dataset') + geom_line() + geom_point() + xlab('Parse Truncation Depth') + ylab('Count of Unique Parses') + scale_color_discrete(name='Dataset') + scale_x_continuous( breaks=list(range(1, 11)), minor_breaks=list(range(1, 11)), limits=[1, 10]) + theme_fs() ) p.save(path.join(output_path, 'n_unique_parses.pdf')) p = ( ggplot(parse_df) + aes(x='depth', y='parse_ratio', color='dataset') + geom_line() + geom_point() + xlab('Parse Truncation Depth') + ylab('Average Unique Parses per Instance') + scale_color_discrete(name='Dataset') + scale_x_continuous(breaks=list(range(1, 11)), minor_breaks=list(range(1, 11)), limits=[1, 10]) + scale_y_continuous(limits=[0, 1]) + theme_fs() ) p.save(path.join(output_path, 'parse_ratio.pdf'))
def main(): mpl.rc('mathtext', fontset='cm') warnings.filterwarnings('ignore', r'(geom|position)_\w+ ?: Removed \d+ rows') warnings.filterwarnings('ignore', r'Saving .+ x .+ in image') warnings.filterwarnings('ignore', r'Filename: .+\.png') df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_f') + titles('P_f(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pf_Ob_Ol') df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_f') + titles('P_f(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pf_Ob_σ') df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_q') + titles('P_q(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pq_Ob_Ol') df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_q') + titles('P_q(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pq_Ob_σ') df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'O_l', 'Opr') + titles("O'(O_b, O_l)") + limits((1, 10), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'Opr_Ob_Ol') df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'σ', 'Opr') + titles("O'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'Opr_Ob_σ') df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)}) .assign(Pf=lambda x: Opr_Pf(x.Opr))) save_both(my_plot(df, 'Opr', 'Pf') + titles("P_f(O')") + labs("O'", 'P_f') + limits((1, 20), (0, 1), xbreaks=np.linspace(2, 20, 10), ybreaks=np.linspace(0, 1, 11)) + gg.geom_line() + gg.geom_hline(yintercept=C, linetype='dashed', color='grey') , 'Pf_Opr') df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11)) save_both(my_plot(df, 'O_b', 'σ', 'σpr') + titles("σ'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'σpr_Ob_σ') df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)}) .assign(Pq=lambda x: σpr_Pq(x.σpr))) save_both(my_plot(df, 'σpr', 'Pq') + titles("P_q(σ')") + labs("σ'", 'P_q') + limits((0, 20), (-1, 0), xbreaks=np.linspace(0, 20, 11), ybreaks=np.linspace(-1, 0, 11)) + gg.geom_line() , 'Pq_σpr') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_free') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_free') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_qual') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_qual') df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f')) df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q')) df = pd.concat((df_Pf, df_Pq), ignore_index=True) df.drop_duplicates('O_b', inplace=True) Opr = df_Pf.query('σ==0').O_b[0] σpr = df_Pq.query('O_b==1').σ[0] labels = pd.DataFrame({ 'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3], 'label': ["$O'$", "$σ'$", mathrm('More profit')] }) lab_aes = gg.aes('x', 'y', label='label') save_both( gg.ggplot(df, gg.aes(x='O_b', y='σ')) + gg.geom_area(gg.aes(fill='profit'), alpha=0.3) + gg.geom_vline(xintercept=Opr, linetype='dashed') + gg.geom_hline(yintercept=σpr, linetype='dashed') # text alignment can't be specified in an aes + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top') + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom') + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom') + gg.scale_fill_discrete(name=mathrm('Bet type'), labels=[mathrm('Free'), mathrm('Qualifying')]) + limits((1, 10), (0, 5)) + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'), mathrm('more profitable'), mathrm('space'))) + labs('O_b', 'σ') , 'Px_shapes')