def run_item_clicked(self, item): logging.info('Run item %s clicked' % item.text(0)) output = io.StringIO() if item.parent() is not None: if item.parent().text(0) == 'Variables': cpt = self._concrete_model.find_component(item.text(0)) # create ggplot df = mo.get_entity(cpt) if item.text(0) == 'S': ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\ pn.geom_step(pn.aes(color='States'), direction='hv') + pn.facet_wrap('States') elif item.text(0) == 'Q': ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\ pn.geom_step(pn.aes(color='J'), direction='hv') + pn.facet_grid('J~') else: ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\ pn.geom_step(pn.aes(color='J'), direction='hv') + pn.facet_grid('I~') size = self.canvas.size() ff += pn.theme(figure_size=(size.width() / 100, size.height() / 100)) # update to the new figure fig = ff.draw() self.canvas.figure = fig self.canvas.draw() output.close()
def test_facet_grid_drop_false(): df = mpg.copy() df['drv'] = pd.Categorical(df['drv'], ['4', 'f', 'r', 'Q']) p = (ggplot(df, aes(x='displ', y='hwy')) + geom_point() + facet_grid('drv ~ .', drop=False)) assert p == 'facet_grid_drop_false'
def test_facet_grid_space_ratios(): p = (ggplot(mtcars, aes('wt', 'mpg')) + geom_point() + facet_grid('am ~ vs', space={ 'y': [1, 2], 'x': [1, 2] })) assert p == 'facet_grid_space_ratios'
def test_arrow_facets(): df = pd.DataFrame({ 'x': [1, 3, 2, 4], 'y': [10, 9, 10, 9], 'z': ['a', 'a', 'b', 'b'] }) p = (ggplot(df, aes('x', 'y')) + geom_path(size=2, arrow=arrow(length=.25)) + facet_grid('~ z')) assert p == 'arrow_facets'
def plot_trace(data_in, figure_size=(15, 5)): """ Returns trace and density plot of mcmc samples from data_in. Note: the values 'chain', 'sample_i', 'parameter; and 'value' must be in the inputted pd.DataFrame Parameters ---------- data_in : pd.DataFrame DataFrame containing samples from the sampler with columns: sample_i, chain, sample_i, and parameter figure_size : tuple, default = (15,5) Optional input for figure size Returns ------- None: Prints out the trace and density plot for mcmc chains(s) """ # Column validation name_check = set(data_in.columns) if name_check != set(['chain', 'sample_i', 'parameter', 'value']): raise MyValidationError( "Incorrect column names in data_in please check") # Set figure size pn.options.figure_size = figure_size # Trace plot plot_out_trace = pn.ggplot(pn.aes(x = 'sample_i', y = 'value', color = 'chain'), data = data_in)\ + pn.geom_line()\ + pn.facet_grid('parameter ~ .')\ + pn.labs(x = 'Sample', y = 'Parameter Value') # Distribution plot plot_out_distribution = pn.ggplot(pn.aes(x = 'value', color = 'chain'), data = data_in)\ + pn.geom_density()\ + pn.facet_grid('parameter ~ .')\ + pn.labs(x = 'Parameter Value', y = 'Density') print(plot_out_trace) print(plot_out_distribution) return (None)
def scatterplot(cls, df): Utils.check_and_make_dir("Figures/Scatterplots") df = df[(df['index'] != 'Overall') & (df['index'] != 'No ROI')] # Remove No ROI and Overall rows df = df.groupby([config.table_cols, config.table_rows]).apply( lambda x: x.sort_values(['Mean'])) # Group by parameters and sort df = df.reset_index(drop=True) # Reset index to remove grouping scatterplots = ['roi_ordered', 'stat_ordered'] if config.table_row_order == 'roi': scatterplots.remove('stat') elif config.table_row_order == 'statorder': scatterplots.remove('roi_ordered') for scatterplot in scatterplots: if config.verbose: print(f"Saving {scatterplot} scatterplot!") if scatterplot == 'roi_ordered': roi_ord = pd.Categorical(df['index'], categories=df['index'].unique() ) # Order rows based on first facet else: roi_ord = pd.Categorical( df.groupby(['MB', 'SENSE' ]).cumcount()) # Order each facet individually figure_table = ( pltn.ggplot(df, pltn.aes(x="Mean", y=roi_ord)) + pltn.geom_point(na_rm=True, size=1) + pltn.geom_errorbarh( pltn.aes(xmin="Mean-Conf_Int_95", xmax="Mean+Conf_Int_95"), na_rm=True, height=None) + pltn.xlim(0, None) + pltn.scale_y_discrete(labels=[]) + pltn.ylab(config.table_y_label) + pltn.xlab(config.table_x_label) + pltn.facet_grid('{rows}~{cols}'.format(rows=config.table_rows, cols=config.table_cols), drop=True, labeller="label_both") + pltn.theme_538() # Set theme + pltn.theme( panel_grid_major_y=pltn.themes.element_line(alpha=0), panel_grid_major_x=pltn.themes.element_line(alpha=1), panel_background=pltn.element_rect(fill="gray", alpha=0.1), dpi=config.plot_dpi)) figure_table.save( f"Figures/Scatterplots/{scatterplot}_scatterplot.png", height=config.plot_scale, width=config.plot_scale * 3, verbose=False, limitsize=False)
def plot_metrics_comparison_lineplot_grid(dataframe, models_labels, metrics_labels, figure_size=(14, 4)): """ We define a function to plot the grid. """ return ( # Define the plot. p9.ggplot( dataframe, p9.aes(x='threshold', y='value', group='variable', color='variable', shape='variable')) # Add the points and lines. + p9.geom_point() + p9.geom_line() # Rename the x axis and give some space to left and right. + p9.scale_x_discrete(name='Threshold', expand=(0, 0.2)) # Rename the y axis, give some space on top and bottom, and print the tick labels with 2 decimal digits. + p9.scale_y_continuous(name='Value', expand=(0, 0.05), labels=lambda l: ['{:.2f}'.format(x) for x in l]) # Replace the names in the legend. + p9.scale_shape_discrete( name='Metric', labels=lambda l: [metrics_labels[x] for x in l]) # Define the colors for the metrics for color-blind people. + p9.scale_color_brewer(name='Metric', labels=lambda l: [metrics_labels[x] for x in l], type='qual', palette='Set2') # Place the plots in a grid, renaming the labels for rows and columns. + p9.facet_grid('iterations ~ model', labeller=p9.labeller( rows=lambda x: f'iters = {x}', cols=lambda x: f'{models_labels[x]}')) # Define the theme for the plot. + p9.theme( # Remove the y axis name. axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def all_stack(fold=BUZZER_DEV_FOLD): df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold) df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold) df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) model_type = CategoricalDtype(categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = (ggplot(df) + geom_area(aes(x='Position', y='Frequency', fill='Buzzing')) + facet_grid('~ Model') + theme_fs() + theme(aspect_ratio=1) + scale_fill_brewer(type='div', palette=7)) p.save('output/buzzer/{}_stack.pdf'.format(fold))
def create(self, file_path: str) -> None: (ggplot(self._data, aes("loc")) + geom_histogram(bins=100, fill="#1e4f79") + facet_grid(facets="category ~ .", scales='free_y') + scale_x_continuous(trans=asinh_trans(), labels=asinh_labels) + scale_y_continuous(labels=comma_format()) #+ scale_y_continuous(labels=lambda l: ["%.2f%%" % (v * 100 / len(self._data)) for v in l]) + ggtitle("Class Sizes") + xlab("Lines of Code") + ylab("Number of Classes") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), subplots_adjust={"hspace": 0.1 })).save(file_path, width=8, height=18)
def plot_preprocessing_boxplot_bymodel(dataframe, models_labels, metrics_labels, groups_labels, figure_size=(14, 4)): """ We define a function to plot the grid. """ return ( # Define the plot. p9.ggplot(dataframe, p9.aes(x='variable', y='value', fill='group')) # Add the boxplots. + p9.geom_boxplot(position='dodge') # Rename the x axis. + p9.scale_x_discrete(name='Metric', labels=lambda l: [metrics_labels[x] for x in l]) # Rename the y axis. + p9.scale_y_continuous( name='Value', expand=(0, 0.05), # breaks=[-0.25, 0, 0.25, 0.5, 0.75, 1], limits=[-0.25, 1], labels=lambda l: ['{:.2f}'.format(x) for x in l]) # Define the colors for the metrics for color-blind people. + p9.scale_fill_brewer(name='Group', labels=lambda l: [groups_labels[x] for x in l], type='qual', palette='Set2') # Place the plots in a grid, renaming the labels. + p9.facet_grid( 'model ~ .', scales='free_y', labeller=p9.labeller(rows=lambda x: f'{models_labels[x]}')) # Define the theme for the plot. + p9.theme( # Remove the x and y axis names. axis_title_x=p9.element_blank(), axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def all_stack(fold=BUZZER_DEV_FOLD): df_rnn = stack("output/buzzer/RNNBuzzer", "RNN", fold) df_mlp = stack("output/buzzer/MLPBuzzer", "MLP", fold) df_thr = stack("output/buzzer/ThresholdBuzzer", "Threshold", fold) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) model_type = CategoricalDtype(categories=["Threshold", "MLP", "RNN"]) df["Model"] = df["Model"].astype(model_type) p = ( ggplot(df) + geom_area(aes(x="Position", y="Frequency", fill="Buzzing")) + facet_grid("~ Model") + theme_fs() + theme(aspect_ratio=1) + scale_fill_brewer(type="div", palette=7) ) p.save("output/buzzer/{}_stack.pdf".format(fold))
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)): """ We create a function to plot the bar plot. """ return ( # Define the plot. p9.ggplot(dataframe, p9.aes(x='threshold', fill='value')) # Add the bars. + p9.geom_bar(position='dodge') + p9.geom_text(p9.aes(label='stat(count)'), stat='count', position=p9.position_dodge(0.9), size=7, va='bottom') # Rename the x axis. + p9.scale_x_discrete(name='Threshold') # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top). + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500)) # Replace the names in the legend and set the colors of the bars. + p9.scale_fill_manual(values={ 0: '#009e73', 1: '#d55e00' }, labels=lambda l: [{ 0: 'Stable', 1: 'Unstable' }[x] for x in l]) # Place the plots in a grid, renaming the labels. + p9.facet_grid('. ~ iterations', labeller=p9.labeller(cols=lambda x: f'iters = {x}')) # Define the theme for the plot. + p9.theme( # Remove the y axis name. axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def _make_plots(df_plt, out_file_base, y='AUC', facet_grid='', h_line=''): len_x = len(np.unique(df_plt['resolution'])) if 'sparsity_l1' in df_plt.columns: df_plt['Sparsity'] = df_plt['sparsity_l1'] len_x2 = len(np.unique(df_plt['Sparsity'])) else: len_x2 = 0 if len_x2 > 1: gplt = plt9.ggplot(df_plt, plt9.aes( fill='Sparsity', x='resolution', y=y, )) gplt = gplt + plt9.geom_boxplot(alpha=0.8, outlier_alpha=0) gplt = gplt + plt9.geom_jitter( plt9.aes(color='Sparsity'), alpha=0.25, width=0.2) else: gplt = plt9.ggplot(df_plt, plt9.aes(x='resolution', y=y)) gplt = gplt + plt9.geom_boxplot(alpha=0.8, outlier_alpha=0) gplt = gplt + plt9.geom_jitter(alpha=0.25, width=0.2) gplt = gplt + plt9.theme_bw(base_size=12) if facet_grid != '': gplt = gplt + plt9.facet_grid('{} ~ .'.format(facet_grid)) if y == 'f1-score': gplt = gplt + plt9.labs(x='Resolution', y='F1 score', title='') elif y in ['AUC', 'MCC']: gplt = gplt + plt9.labs(x='Resolution', y=y, title='') else: gplt = gplt + plt9.labs( x='Resolution', y=y.capitalize().replace('_', ' '), title='') gplt = gplt + plt9.theme( # legend_position='none', axis_text_x=plt9.element_text(angle=-45, hjust=0)) if len_x2 != 0 and len_x2 < 9: gplt = gplt + plt9.scale_fill_brewer(palette='Dark2', type='qual') if h_line != '': gplt = gplt + plt9.geom_hline(plt9.aes(yintercept=h_line), linetype='dashdot') gplt.save('{}-resolution__{}.png'.format(out_file_base, y.replace('-', '_')), dpi=300, width=4 * ((len_x + len_x2) / 4), height=5, limitsize=False)
class pass_energy_use_mode(Plot): """Passenger energy use by mode. This reproduces a figure from the (private) item2-scripts respository. """ variable = "energy" selectors = dict(region="Global", mode=PAX, tech=ALL, fuel=ALL, year=[2015, 2030, 2050]) terms = [ aes("year", "value / 1000", fill="mode"), geom_bar(stat="identity"), facet_grid(["scenario", "model"]), labs(x="Year", y="EJ/year"), ]
def protobowl(fold=BUZZER_DEV_FOLD): df_rnn = pickle.load( open("output/buzzer/RNNBuzzer/{}_protobowl.pkl".format(fold), "rb") ) df_rnn = df_rnn.groupby(["Possibility", "Outcome"]) df_rnn = df_rnn.size().reset_index().rename(columns={0: "Count"}) df_rnn["Model"] = pd.Series(["RNN" for _ in range(len(df_rnn))], index=df_rnn.index) df_mlp = pickle.load( open("output/buzzer/MLPBuzzer/{}_protobowl.pkl".format(fold), "rb") ) df_mlp = df_mlp.groupby(["Possibility", "Outcome"]) df_mlp = df_mlp.size().reset_index().rename(columns={0: "Count"}) df_mlp["Model"] = pd.Series(["MLP" for _ in range(len(df_mlp))], index=df_mlp.index) df_thr = pickle.load( open("output/buzzer/ThresholdBuzzer/{}_protobowl.pkl".format(fold), "rb") ) df_thr = df_thr.groupby(["Possibility", "Outcome"]) df_thr = df_thr.size().reset_index().rename(columns={0: "Count"}) df_thr["Model"] = pd.Series( ["Threshold" for _ in range(len(df_thr))], index=df_thr.index ) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15]) df["Outcome"] = df["Outcome"].astype(outcome_type) model_type = CategoricalDtype(categories=["Threshold", "MLP", "RNN"]) df["Model"] = df["Model"].astype(model_type) p = ( ggplot(df) + geom_col(aes(x="Possibility", y="Count", fill="Outcome"), width=0.7) + facet_grid("Model ~") + coord_flip() + theme_fs() + theme(aspect_ratio=0.17) + scale_fill_brewer(type="div", palette=7) ) figure_dir = os.path.join("output/buzzer/{}_protobowl.pdf".format(fold)) p.save(figure_dir)
def all_stack(fold=BUZZER_DEV_FOLD): df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold) df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold) df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) model_type = CategoricalDtype( categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = ( ggplot(df) + geom_area(aes(x='Position', y='Frequency', fill='Buzzing')) + facet_grid('~ Model') + theme_fs() + theme( aspect_ratio=1, ) + scale_fill_brewer(type='div', palette=7) ) p.save('output/buzzer/{}_stack.pdf'.format(fold))
def protobowl(fold=BUZZER_DEV_FOLD): df_rnn = pickle.load( open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_rnn = df_rnn.groupby(['Possibility', 'Outcome']) df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'}) df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))], index=df_rnn.index) df_mlp = pickle.load( open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_mlp = df_mlp.groupby(['Possibility', 'Outcome']) df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'}) df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))], index=df_mlp.index) df_thr = pickle.load( open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_thr = df_thr.groupby(['Possibility', 'Outcome']) df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'}) df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))], index=df_thr.index) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15]) df['Outcome'] = df['Outcome'].astype(outcome_type) model_type = CategoricalDtype( categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = ( ggplot(df) + geom_col(aes(x='Possibility', y='Count', fill='Outcome'), width=0.7) + facet_grid('Model ~') + coord_flip() + theme_fs() + theme(aspect_ratio=0.17) + scale_fill_brewer(type='div', palette=7) ) figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold)) p.save(figure_dir)
def plot_portfolio_contributors( ld: LazyDictionary, figure_size=(11, 5)) -> p9.ggplot: df = ld["df"] melted_df = make_portfolio_dataframe(df, melt=True) all_dates = sorted(melted_df["date"].unique()) df = melted_df[melted_df["date"] == all_dates[-1]] # print(df) df = df[df["field"] == "stock_profit"] # only latest profit is plotted df["contribution"] = [ "positive" if profit >= 0.0 else "negative" for profit in df["value"] ] # 2. plot contributors ie. winners and losers plot = ( p9.ggplot(df, p9.aes("stock", "value", group="stock", fill="stock")) + p9.geom_bar(stat="identity") + p9.facet_grid("contribution ~ field", scales="free_y")) return user_theme(plot, y_axis_label="$ AUD", figure_size=figure_size, asxtrade_want_fill_d=True)
def protobowl(fold=BUZZER_DEV_FOLD): df_rnn = pickle.load( open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_rnn = df_rnn.groupby(['Possibility', 'Outcome']) df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'}) df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))], index=df_rnn.index) df_mlp = pickle.load( open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_mlp = df_mlp.groupby(['Possibility', 'Outcome']) df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'}) df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))], index=df_mlp.index) df_thr = pickle.load( open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_thr = df_thr.groupby(['Possibility', 'Outcome']) df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'}) df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))], index=df_thr.index) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15]) df['Outcome'] = df['Outcome'].astype(outcome_type) model_type = CategoricalDtype(categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = (ggplot(df) + geom_col(aes(x='Possibility', y='Count', fill='Outcome'), width=0.7) + facet_grid('Model ~') + coord_flip() + theme_fs() + theme(aspect_ratio=0.17) + scale_fill_brewer(type='div', palette=7)) figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold)) p.save(figure_dir)
def test_labeller_cols_both_grid(): p = g + facet_grid('gear ~ am', labeller=labeller_cols_both) assert p == 'labeller_cols_both_grid'
def test_facet_grid_scales_free_x(): p = (g + facet_grid(['var1>2', '.'], scales='free_x') + theme(panel_spacing_y=0.3)) assert p == 'facet_grid_scales_free_x'
def error_comparison(): char_frames = {} first_frames = {} full_frames = {} train_times = {} use_wiki = {} best_accuracies = {} for p in glob.glob(f'output/guesser/best/qanta.guesser*/guesser_report_guesstest.pickle', recursive=True): with open(p, 'rb') as f: report = pickle.load(f) name = report['guesser_name'] params = report['guesser_params'] train_times[name] = params['training_time'] use_wiki[name] = params['use_wiki'] if 'use_wiki' in params else False char_frames[name] = report['char_df'] first_frames[name] = report['first_df'] full_frames[name] = report['full_df'] best_accuracies[name] = (report['first_accuracy'], report['full_accuracy']) first_df = pd.concat([f for f in first_frames.values()]).sort_values('score', ascending=False).groupby(['guesser', 'qanta_id']).first().reset_index() first_df['position'] = ' Start' full_df = pd.concat([f for f in full_frames.values()]).sort_values('score', ascending=False).groupby(['guesser', 'qanta_id']).first().reset_index() full_df['position'] = 'End' compare_df = pd.concat([first_df, full_df]) compare_df = compare_df[compare_df.guesser != 'qanta.guesser.vw.VWGuesser'] compare_results = {} comparisons = ['qanta.guesser.dan.DanGuesser', 'qanta.guesser.rnn.RnnGuesser', 'qanta.guesser.elasticsearch.ElasticSearchGuesser'] cr_rows = [] for (qnum, position), group in compare_df.groupby(['qanta_id', 'position']): group = group.set_index('guesser') correct_guessers = [] wrong_guessers = [] for name in comparisons: if group.loc[name].correct == 1: correct_guessers.append(name) else: wrong_guessers.append(name) if len(correct_guessers) > 3: raise ValueError('this should be unreachable') elif len(correct_guessers) == 3: cr_rows.append({'qnum': qnum, 'Position': position, 'model': 'All', 'Result': 'Correct'}) elif len(correct_guessers) == 0: cr_rows.append({'qnum': qnum, 'Position': position, 'model': 'All', 'Result': 'Wrong'}) elif len(correct_guessers) == 1: cr_rows.append({ 'qnum': qnum, 'Position': position, 'model': to_shortname(correct_guessers[0]), 'Result': 'Correct' }) else: cr_rows.append({ 'qnum': qnum, 'Position': position, 'model': to_shortname(wrong_guessers[0]), 'Result': 'Wrong' }) cr_df = pd.DataFrame(cr_rows) # samples = cr_df[(cr_df.Position == ' Start') & (cr_df.Result == 'Correct') & (cr_df.model == 'RNN')].qnum.values # for qid in samples: # q = lookup[qid] # print(q['first_sentence']) # print(q['page']) # print() p = ( ggplot(cr_df) + aes(x='model', fill='Result') + facet_grid(['Result', 'Position']) #+ facet_wrap('Position', labeller='label_both') + geom_bar(aes(y='(..count..) / sum(..count..)'), position='dodge') + labs(x='Models', y='Fraction with Corresponding Result') + coord_flip() + theme_fs() + theme(aspect_ratio=.6) ) p.save('output/plots/guesser_error_comparison.pdf')
def test_facet_grid_scales_free_x(): p = g + facet_grid('var1>2 ~ x%2', scales='free_x') assert p == 'facet_grid_scales_free_x'
class TestThemes(object): g = (ggplot(mtcars, aes(x='wt', y='mpg', color='factor(gear)')) + geom_point() + facet_grid('vs ~ am')) def test_theme_538(self): p = self.g + labs(title='Theme 538') + theme_538() assert p + _theme == 'theme_538' def test_theme_bw(self): p = self.g + labs(title='Theme BW') + theme_bw() assert p + _theme == 'theme_bw' def test_theme_classic(self): p = self.g + labs(title='Theme Classic') + theme_classic() assert p + _theme == 'theme_classic' def test_theme_dark(self): p = self.g + labs(title='Theme Dark') + theme_dark() assert p + _theme == 'theme_dark' def test_theme_gray(self): p = self.g + labs(title='Theme Gray') + theme_gray() assert p + _theme == 'theme_gray' def test_theme_light(self): p = self.g + labs(title='Theme Light') + theme_light() assert p + _theme == 'theme_light' def test_theme_linedraw(self): p = self.g + labs(title='Theme Linedraw') + theme_linedraw() if six.PY2: # Small displacement in title assert p + _theme == ('theme_linedraw', {'tol': 8}) else: assert p + _theme == 'theme_linedraw' def test_theme_matplotlib(self): p = self.g + labs(title='Theme Matplotlib') + theme_matplotlib() assert p + _theme == 'theme_matplotlib' def test_theme_minimal(self): p = self.g + labs(title='Theme Minimal') + theme_minimal() assert p + _theme == 'theme_minimal' def test_theme_seaborn(self): p = self.g + labs(title='Theme Seaborn') + theme_seaborn() assert p + _theme == 'theme_seaborn' def test_theme_void(self): p = self.g + labs(title='Theme Void') + theme_void() assert p + _theme == 'theme_void' def test_theme_xkcd(self): p = self.g + labs(title='Theme Xkcd') + theme_xkcd() if os.environ.get('TRAVIS'): # Travis does not have the fonts, we still check # to catch any other errors assert p + _theme != 'theme_gray' else: assert p + _theme == 'theme_xkcd'
# print(aci.loc[aci["site"] == "Igloolik"]) # print(aci) res # res.to_feather("data_glm.feather") def label_x(dates): res = [(datetime.datetime(2018, 1, 1) + datetime.timedelta(x)).strftime("%d-%m") for x in dates] print(res) return res (ggplot(data=res, mapping=aes(x='julian', y='value', colour='type')) + xlab("Day") + ylab("Mean number of detected songs") + facet_grid("type~", scales="free") # + geom_line() # + facet_wrap("type", nrow=2, ncol=1) + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1}) + scale_colour_manual(values=cbbPalette, guide=False) + scale_x_continuous(labels=label_x)).save("figs/song_events_aci_BARROW_mean_smoothed.png", height=10, width=16, dpi=150) (ggplot(data=res, mapping=aes(x='julian', y='n_events_sum', colour='site')) + xlab("Day") + ylab("Total number of detected songs") # + facet_grid("site~", scales="free") # + facet_wrap("site", nrow=2, ncol=3) + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
def test_facet_grid_margins(): p = g + facet_grid('var1~var2', margins=True) assert p == 'facet_grid_margins'
def test_facet_grid_scales_free_x(): p = (g + facet_grid(['var1>2', '.'], scales='free_x') + theme(panel_spacing_y=0.3)) assert p == 'facet_grid_scales_free_x'
def test_facet_grid_one_by_one_var(): p = g + facet_grid('var1~var2') p2 = g + facet_grid('class~var2') # python keyword in formula assert p == 'facet_grid_one_by_one_var' assert p2 == 'facet_grid_one_by_one_var'
def test_facet_grid_one_by_one_var(): p = g + facet_grid('var1~var2') p2 = g + facet_grid('class~var2') # python keyword in formula assert p == 'facet_grid_one_by_one_var' assert p2 == 'facet_grid_one_by_one_var'
def test_facet_grid_expression(): p = g + facet_grid( ['var2', 'pd.cut(var1, (0, 2, 4), include_lowest=True)']) assert p == 'facet_grid_expression'
def test_facet_grid_scales_free_y(): p = (g + facet_grid(['.', 'var1>2'], scales='free_y') + theme(panel_spacing_x=0.3)) assert p == 'facet_grid_scales_free_y'
def test_facet_grid_margins(): p = g + facet_grid('var1~var2', margins=True) assert p == 'facet_grid_margins'
def test_facet_grid_expression(): p = g + facet_grid( ['var2', 'pd.cut(var1, (0, 2, 4), include_lowest=True)']) assert p == 'facet_grid_expression'
def test_labeller_towords(): p = g + facet_grid('gear ~ am', labeller=labeller_towords) assert p == 'labeller_towords'
def test_facet_grid_scales_free_y(): p = (g + facet_grid(['.', 'var1>2'], scales='free_y') + theme(panel_spacing_x=0.3)) assert p == 'facet_grid_scales_free_y'
def quick_color_check(target_matrix, source_matrix, num_chips): """ Quickly plot target matrix values against source matrix values to determine over saturated color chips or other issues. Inputs: source_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the source image target_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the target image num_chips = number of color card chips included in the matrices (integer) :param source_matrix: numpy.ndarray :param target_matrix: numpy.ndarray :param num_chips: int """ # Imports from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \ scale_y_continuous, scale_color_manual, aes import pandas as pd # Extract and organize matrix info tr = target_matrix[:num_chips, 1:2] tg = target_matrix[:num_chips, 2:3] tb = target_matrix[:num_chips, 3:4] sr = source_matrix[:num_chips, 1:2] sg = source_matrix[:num_chips, 2:3] sb = source_matrix[:num_chips, 3:4] # Create columns of color labels red = [] blue = [] green = [] for i in range(num_chips): red.append('red') blue.append('blue') green.append('green') # Make a column of chip numbers chip = np.arange(0, num_chips).reshape((num_chips, 1)) chips = np.row_stack((chip, chip, chip)) # Combine info color_data_r = np.column_stack((sr, tr, red)) color_data_g = np.column_stack((sg, tg, green)) color_data_b = np.column_stack((sb, tb, blue)) all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r)) # Create a dataframe with headers dataset = pd.DataFrame({'source': all_color_data[:, 0], 'target': all_color_data[:, 1], 'color': all_color_data[:, 2]}) # Add chip numbers to the dataframe dataset['chip'] = chips dataset = dataset.astype({'color': str, 'chip': str, 'target': float, 'source': float}) # Make the plot p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \ geom_point(show_legend=False, size=2) + \ geom_smooth(method='lm', size=.5, show_legend=False) + \ theme_seaborn() + facet_grid('.~color') + \ geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \ scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \ scale_color_manual(values=['blue', 'green', 'red']) # Reset debug if params.debug is not None: if params.debug == 'print': p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png')) elif params.debug == 'plot': print(p1)
def plot_alt_benefit(plot_df, title='Benefit by Alternative', which='both', sensitivity=False, legend=True): '''Builds a stacked bar chart of the alternative benefits @ param plot_df: The df containing benefits for each alt by the criteria and total benefit @ param title: The title for the graph @ param which: which parts to plot. Acceptable values are 'total' for just total value. 'criteria' for just criteria level stacked bars' 'both' for total and criteria. The graphs will be faceted in this case Returns the ggplot graph to be displayed elsewhere''' _facet = which == 'both' if which == 'both': plot_df = plot_df elif which == 'total': plot_df = plot_df.loc[plot_df['type'] == 'Total Value'] elif which == 'criteria': plot_df = plot_df.loc[plot_df['type'] == 'Weighted Criterion Value'] else: print( which, 'is not an approved value for which.\n Enter "total", "criteria", or "both"' ) return (None) if legend: g = ( p9.ggplot(plot_df, p9.aes(x='Alternative', y='Benefit', fill='Criterion')) + p9.geom_col(stat='identity', position=p9.position_stack( vjust=.5)) # makes stacked bar plot + p9.scale_fill_brewer(type='qual', palette='Paired') ) # changes the color palette to one for qualitative scales) else: g = ( p9.ggplot(plot_df, p9.aes(x='Alternative', y='Benefit', fill='Criterion')) + p9.geom_col( p9.aes(show_legend=False), stat='identity', position=p9.position_stack(vjust=.5)) # makes stacked bar plot + p9.scale_fill_brewer( type='qual', palette='Paired', guide=False ) # changes the color palette to one for qualitative scales + p9.theme(legend_position=None)) # Builds the base plot g = ( g # + p9.geom_col(stat='identity',position=p9.position_stack(vjust=.5)) # makes stacked bar plot # + p9.scale_fill_brewer(type='qual',palette='Paired') # changes the color palette to one for qualitative scales + p9.geom_text(p9.aes(label='print_value'), position=p9.position_stack(vjust=.5), size=6, hjust='center') # adds weighted value to bars + p9.ggtitle(title) # makes the title + p9.theme(axis_text_x=p9.element_text( rotation=45, hjust=1)) # rotates x axis labels ) # Adds the facet if required if sensitivity: if _facet: return ((g + p9.facet_grid('type~Criterion Weight'))) else: return ((g + p9.facet_grid('Criterion Weight~'))) elif _facet: return ((g + p9.facet_grid('~type'))) else: return (g)
def plot_xbs(df, group, var, n_side=9, n_delta=6): r"""Construct Xbar and S chart Construct an Xbar and S chart to assess the state of statistical control of a dataset. Args: df (DataFrame): Data to analyze group (str): Variable for grouping var (str): Variable to study Keyword args: n_side (int): Number of consecutive runs above/below centerline to flag n_delta (int): Number of consecutive runs increasing/decreasing to flag Returns: plotnine object: Xbar and S chart Examples:: import grama as gr DF = gr.Intention() from grama.data import df_shewhart ( df_shewhart >> gr.tf_mutate(idx=DF.index // 10) >> gr.pt_xbs("idx", "tensile_strength") ) """ ## Prepare the data DF = Intention() df_batched = (df >> tf_group_by(group) >> tf_summarize( X=mean(DF[var]), S=sd(DF[var]), n=nfcn(DF.index), ) >> tf_ungroup()) df_stats = (df_batched >> tf_summarize( X_center=mean(DF.X), S_biased=mean(DF.S), n=mean(DF.n), )) n = df_stats.n[0] df_stats["S_center"] = df_stats.S_biased / c_sd(n) df_stats["X_LCL"] = df_stats.X_center - 3 * df_stats.S_center / sqrt(n) df_stats["X_UCL"] = df_stats.X_center + 3 * df_stats.S_center / sqrt(n) df_stats["S_LCL"] = B3(n) * df_stats.S_center df_stats["S_UCL"] = B4(n) * df_stats.S_center ## Reshape for plotting df_stats_long = (df_stats >> tf_pivot_longer( columns=["X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL"], names_to=["_var", "_stat"], names_sep="_", values_to="_value", )) # Fake group value to avoid issue with discrete group variable df_stats_long[group] = [df_batched[group].values[0] ] * df_stats_long.shape[0] df_batched_long = ( df_batched >> tf_pivot_longer( columns=["X", "S"], names_to="_var", values_to="_value", ) ## Flag patterns >> tf_left_join( df_stats >> tf_pivot_longer( columns=[ "X_LCL", "X_center", "X_UCL", "S_LCL", "S_center", "S_UCL" ], names_to=["_var", ".value"], names_sep="_", ), by="_var", ) >> tf_group_by("_var") >> tf_mutate( outlier_below=(DF._value < DF.LCL), # Outside control limits outlier_above=(DF.UCL < DF._value), below=consec(DF._value < DF.center, i=n_side), # Below mean above=consec(DF.center < DF._value, i=n_side), # Above mean ) >> tf_mutate( decreasing=consec((lead(DF._value) - DF._value) < 0, i=n_delta - 1) | # Decreasing consec((DF._value - lag(DF._value)) < 0, i=n_delta - 1), increasing=consec(0 < (lead(DF._value) - DF._value), i=n_delta - 1) | # Increasing consec(0 < (DF._value - lag(DF._value)), i=n_delta - 1), ) >> tf_mutate( sign=case_when([DF.outlier_below, "-2"], [DF.outlier_above, "+2"], [DF.below | DF.decreasing, "-1"], [DF.above | DF.increasing, "+1"], [True, "0"]), glyph=case_when( [DF.outlier_below, "Below Limit"], [DF.outlier_above, "Above Limit"], [DF.below, "Low Run"], [DF.above, "High Run"], [DF.increasing, "Increasing Run"], [DF.decreasing, "Decreasing Run"], [True, "None"], )) >> tf_ungroup()) ## Visualize return (df_batched_long >> ggplot(aes(x=group)) + geom_hline( data=df_stats_long, mapping=aes(yintercept="_value", linetype="_stat"), ) + geom_line(aes(y="_value", group="_var"), size=0.2) + geom_point( aes(y="_value", color="sign", shape="glyph"), size=3, ) + scale_color_manual(values={ "-2": "blue", "-1": "darkturquoise", "0": "black", "+1": "salmon", "+2": "red" }, ) + scale_shape_manual( name="Patterns", values={ "Below Limit": "s", "Above Limit": "s", "Low Run": "X", "High Run": "X", "Increasing Run": "^", "Decreasing Run": "v", "None": "." }, ) + scale_linetype_manual( name="Guideline", values=dict(LCL="dashed", UCL="dashed", center="solid"), ) + guides(color=None) + facet_grid( "_var~.", scales="free_y", labeller=labeller(dict(X="Mean", S="Variability")), ) + labs( x="Group variable ({})".format(group), y="Value ({})".format(var), ))
print("Throughput Summary") print(subset.groupby(["Op", "ApiName"])["MiBs"].describe().unstack(1)) # %% print("") print("Latency Summary") print(subset.groupby(["Op", "ApiName"])["ElapsedSeconds"].describe().unstack(1)) # %% # Runs with small uploads/downloads look better with log scale. use_y_log10 = max(data["MiB"]) <= 8.0 # %% # A common facet for all plots facet = p9.facet_grid( "Op ~ Crc32cEnabled + MD5Enabled", labeller="label_both", scales="free_y" ) # %% plot = ( p9.ggplot(data=data, mapping=p9.aes(x="MiB", y="ElapsedSeconds", color="ApiName")) + p9.geom_point() + facet ) (plot + p9.scale_y_log10() if use_y_log10 else plot).save( args.output_prefix + ".elapsed-vs-size.png" ) # %% plot = ( p9.ggplot(data=data, mapping=p9.aes(x="MiB", y="CpuNanosPerByte", color="ApiName"))
def test_facet_grid_formula_without_dot(): p = g + facet_grid('~var1>2') assert p == 'facet_grid_formula_with_dot'
def quick_color_check(target_matrix, source_matrix, num_chips): """ Quickly plot target matrix values against source matrix values to determine over saturated color chips or other issues. Inputs: source_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the source image target_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the target image num_chips = number of color card chips included in the matrices (integer) :param source_matrix: numpy.ndarray :param target_matrix: numpy.ndarray :param num_chips: int """ # Imports from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \ scale_y_continuous, scale_color_manual, aes import pandas as pd # Extract and organize matrix info tr = target_matrix[:num_chips, 1:2] tg = target_matrix[:num_chips, 2:3] tb = target_matrix[:num_chips, 3:4] sr = source_matrix[:num_chips, 1:2] sg = source_matrix[:num_chips, 2:3] sb = source_matrix[:num_chips, 3:4] # Create columns of color labels red = [] blue = [] green = [] for i in range(num_chips): red.append('red') blue.append('blue') green.append('green') # Make a column of chip numbers chip = np.arange(0, num_chips).reshape((num_chips, 1)) chips = np.row_stack((chip, chip, chip)) # Combine info color_data_r = np.column_stack((sr, tr, red)) color_data_g = np.column_stack((sg, tg, green)) color_data_b = np.column_stack((sb, tb, blue)) all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r)) # Create a dataframe with headers dataset = pd.DataFrame({ 'source': all_color_data[:, 0], 'target': all_color_data[:, 1], 'color': all_color_data[:, 2] }) # Add chip numbers to the dataframe dataset['chip'] = chips dataset = dataset.astype({ 'color': str, 'chip': str, 'target': float, 'source': float }) # Make the plot p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \ geom_point(show_legend=False, size=2) + \ geom_smooth(method='lm', size=.5, show_legend=False) + \ theme_seaborn() + facet_grid('.~color') + \ geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \ scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \ scale_color_manual(values=['blue', 'green', 'red']) # Autoincrement the device counter params.device += 1 # Reset debug if params.debug is not None: if params.debug == 'print': p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png')) elif params.debug == 'plot': print(p1)
def test_facet_grid_scales_free_y_formula_dot_notation(): p = (g+facet_grid('. ~ var1>2', scales='free_y') + theme(panel_spacing_x=0.3)) assert p == 'facet_grid_scales_free_y'