def run_item_clicked(self, item): logging.info('Run item %s clicked' % item.text(0)) output = io.StringIO() if item.parent() is not None: if item.parent().text(0) == 'Variables': cpt = self._concrete_model.find_component(item.text(0)) # create ggplot df = mo.get_entity(cpt) if item.text(0) == 'S': ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\ pn.geom_step(pn.aes(color='States'), direction='hv') + pn.facet_wrap('States') elif item.text(0) == 'Q': ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\ pn.geom_step(pn.aes(color='J'), direction='hv') + pn.facet_grid('J~') else: ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\ pn.geom_step(pn.aes(color='J'), direction='hv') + pn.facet_grid('I~') size = self.canvas.size() ff += pn.theme(figure_size=(size.width() / 100, size.height() / 100)) # update to the new figure fig = ff.draw() self.canvas.figure = fig self.canvas.draw() output.close()
def test_labels(): """ Test invalid arguments to chart components """ gg = ggplot(df, aes(x='x', y='y')) gg = gg + geom_point() gg = gg + xlab('xlab') gg = gg + ylab('ylab') gg = gg + ggtitle('title') assert gg.labels['x'] == 'xlab' assert gg.labels['y'] == 'ylab' assert gg.labels['title'] == 'title' gg = gg + labs(x='xlab2', y='ylab2', title='title2') assert gg.labels['x'] == 'xlab2' assert gg.labels['y'] == 'ylab2' assert gg.labels['title'] == 'title2' with pytest.raises(PlotnineError): gg = gg + xlab(None) with pytest.raises(PlotnineError): gg = gg + ylab(None) with pytest.raises(PlotnineError): gg = gg + ggtitle(None) with pytest.raises(PlotnineError): gg = gg + labs('x', 'y')
def plot_hypothesis(hypothesis, file_name): bin_types = list(hypothesis) scores = list(hypothesis[bin_types[0]]) plots = [] for bin_type, score in product(bin_types, scores): mean_name = "Mean: " + score df = pd.DataFrame(columns=["Bin", "Dataset", mean_name]) df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value']) for bin_ in hypothesis[bin_type][score]: h = list(bin_.values())[0] bin_name = list(bin_)[0] parameter1 = h.p1 parameter2 = h.p2 mean1 = h.mean1 mean2 = h.mean2 row1 = { "Bin": bin_name, 'Dataset': parameter1, mean_name: str(round(float(mean1), 3)) } row2 = { "Bin": bin_name, 'Dataset': parameter2, mean_name: str(round(float(mean2), 3)) } df = df.append(row1, ignore_index=True) df = df.append(row2, ignore_index=True) t_statistic = h.t p_value = h.p row = { "Bin": bin_name, 't-statistic': str(round(t_statistic, 3)), 'p-value': str(p_value), '95% Confidence': "Significant" if p_value <= 0.05 else "Not Significant" } df2 = df2.append(row, ignore_index=True) plots.append( (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) + geom_col(stat='identity', position='dodge') + ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format( bin_type, score)))) plots.append( (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) + geom_col(stat='identity', width=0.2) + ggtitle( "{0} bin distribution| {1}\nBin's 95% Confidence Level Test". format(bin_type, score)) + scale_fill_manual(values={ 'Significant': "#214517", 'Not Significant': '#c62f2d' }))) save_as_pdf_pages(plots, file_name) return
def round_2_plot(): if not os.path.exists(round_2_df_path): eprint(f'Downloading {round_2_df_url} to {round_2_df_path}') urlretrieve(round_2_df_url, round_2_df_path) verify_checksum(round_2_df_checksum, round_2_df_path) df = pd.read_json(round_2_df_path) p = ( ggplot(df) + aes(x='char_percent', y='correct', color='Dataset') + facet_wrap('Guessing_Model', nrow=1) + stat_summary_bin( fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, .5, 1]) + coord_cartesian(ylim=[0, 0.7]) + ggtitle('Round 2 Attacks and Models') + xlab('Percent of Question Revealed') + ylab('Accuracy') + theme( #legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={ 't': 6, 'b': 6, 'l': 1, 'r': 5 })) + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')) p.save('2019_tacl_trick/auto_fig/round_2_json.pdf', width=7.0, height=1.7)
def round_1_plot(): df = pd.read_csv('2019_tacl_trick/data/round_1.csv') model_dtype = CategoricalDtype(['DAN', 'RNN', 'IR'], ordered=True) df['Model'] = df['Model'].astype(model_dtype) # This following is a hack so that the legend widths are the same across plots def rename(x): if x == 'Round 1 - IR Adversarial': return 'Round 1 - IR Adversarial ' else: return x df['Dataset'] = df['Dataset'].map(rename) p = (ggplot(df) + aes(x='x', y='y', color='Dataset') + facet_wrap('Model', nrow=1) + geom_point(size=1.0, shape='o') + scale_y_continuous(breaks=np.linspace(0, 1, 6), limits=[0, 0.6]) + scale_x_continuous(breaks=[0, .5, 1]) + xlab('Percent of Question Revealed') + ylab('Accuracy') + ggtitle('Round 1 Attacks and Models') + theme(strip_text_x=element_text(margin={ 't': 6, 'b': 6, 'l': 1, 'r': 5 })) + scale_color_manual( values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')) p.save('2019_tacl_trick/auto_fig/round_1_csv.pdf', width=7.0, height=1.7)
def plot_replicate_density( df, batch, plate, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=300, height=1.5, width=2, ): density_gg = ( gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info")) + gg.geom_density(alpha=0.3) + gg.scale_fill_manual( name="Replicate", labels={ "True": "True", "False": "False" }, values=["#B99638", "#2DB898"], ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( title=gg.element_text(size=9), axis_text=gg.element_text(size=5), axis_title=gg.element_text(size=8), legend_text=gg.element_text(size=6), legend_title=gg.element_text(size=7), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), )) if output_file_base: save_figure(density_gg, output_file_base, output_file_extensions, dpi, height, width) return density_gg
def event_counts_date(request_disc=None): ''' Plot the average timeline of a certain institution request should be given as a dictionary ''' request = np.ones(df.shape[0], dtype=bool) for key in request_disc.keys(): if key == "institution": request = request & (df[key].str.contains(request_disc[key])) else: request = request & (df[key] == request_disc[key]) df_selected = df[request] df_selected["date_md"] = df_selected["admission_date"].apply( lambda dt: dt.replace(year=1980)) df_selected["year"] = df_selected["admission_date"].apply( lambda dt: dt.year) samp = df[request].iloc[0] title = "" for key in request_disc.keys(): title += samp[key] title += " " gg = p9.ggplot(df_selected) gg += p9.aes(x="date_md", y="admission_status") gg += p9.scale_x_datetime(date_breaks='10 days', date_labels="%m-%d", limits=np.array([ np.min(df_selected["date_md"]), pd.to_datetime("1980-4-20") ])) gg += p9.geom_count() gg += p9.ggtitle(title) return gg
def density(X, y, sreg, treg): """ Plot the 2d-density of the size vs correlation data. Parameters: - - - - - X: float, array independent variable y: float, array dependent variable Returns: - - - - g: figure density plot """ df = pd.DataFrame({'Size': X, 'Correlation': y}) g = (ggplot(df, aes('Size', 'Correlation')) + geom_point(alpha=0.5, size=0.25) + geom_density_2d(size=1, color='r') + plotnine.ggtitle( 'Dispersion Correlations\n{:} --> {:}'.format(sreg, treg))) return g
def plot_overlap_duration(self, data, options): matches = data["matches"] matches = matches.loc[matches.tag_overlap > 0] # matches.loc[:, "log_dur"] = log() plt = ggplot(data=matches, mapping=aes(x="tag_duration", y="tag_overlap",),) plt = ( plt + geom_point() + xlab("Tag duration") + ylab("Proportion tag overlapping with matching event") + theme_classic() + theme( axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}), plot_title=element_text( weight="bold", size=14, margin={"t": 10, "b": 10} ), figure_size=(10, 10), text=element_text(size=12, weight="bold"), ) + ggtitle( ( "Proportion of tag overlapping with matching event depending on duration " + "size for model {}, database {}, class {}\n" + "with detector options {}" ).format( options["scenario_info"]["model"], options["scenario_info"]["database"], options["scenario_info"]["class"], options, ) ) ) return plt
def predictionContour(fit, data, y, title, density=51): data = data.copy() y = y.copy().astype(str) def predictor(g, h): dfgh = pd.DataFrame({data.columns[0]: [g]}) dfgh[data.columns[1]] = [h] return fit.predict_proba(dfgh)[0, 1] data["class"] = y xrng = (0.5 * np.floor(2.0 * min(data.iloc[:, 0])), 0.5 * np.ceil(2.0 * max(data.iloc[:, 0]))) yrng = (0.5 * np.floor(2.0 * min(data.iloc[:, 1])), 0.5 * np.ceil(2.0 * max(data.iloc[:, 1]))) out = ggfuntile(predictor, data, xrng=xrng, yrng=yrng, density=density, xlab=data.columns[0], ylab=data.columns[1], zlab="P(Y=1)", breaks=[-np.inf, 0.5, np.inf]) out += ggtitle(title) return out
def p(N=3): """Return *N* distinct plot objects.""" template = ( ggplot(aes(x='wt', y='mpg', label='name'), data=mtcars) + geom_text() ) for i in range(1, N+1): yield template + ggtitle('%d of %d' % (i, N))
def plot_scores(df, title=None, xlab=None, ylab=None): g = (gg.ggplot(df, gg.aes(x=cfg.SCORE_COLNAME_X, y=cfg.SCORE_COLNAME_Y)) + gg.geom_line()) if title is not None: g += gg.ggtitle(title) if xlab is not None: g += gg.xlab(xlab) if ylab is not None: g += gg.ylab(ylab) return g
def comparison_plot(self, df: pd.DataFrame, xmin=None, xmax=None, bw="normal_reference", **kwargs): return (ggplot(df, aes(df.columns[1], fill=df.columns[0])) + scale_fill_brewer(type="qual", palette="Pastel1") + geom_density(bw=bw, alpha=0.8) + ggtitle(self.plot_title) + self._scale_x(xmin, xmax) + ergo_theme)
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) + geom_bar(stat="identity", fill="#1e4f79") + geom_text(va='bottom', size=24, format_string='{:.1%}') + scale_x_discrete(limits=self._data["pattern"]) + scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) + ggtitle("Design Pattern Counts") + xlab("Design Pattern") + ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_text_x=element_text(rotation=45, ha="right"))).save( file_path, width=24, height=8)
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="count", label="..count..")) + geom_bar(fill="#1e4f79") + geom_text(stat="count", va='bottom', size=24) + scale_x_discrete(limits=[ "1", "2", "3", "5", "26", "52", "97", "100", "300", "537" ]) + scale_y_continuous(breaks=[0, 5, 10], limits=[0, 10]) + ggtitle("Case Study Sizes") + xlab("Number of Projects") + ylab("Number of Case Studies") + theme_classic(base_size=28, base_family="Helvetica") + theme(text=element_text(size=28))).save(file_path, width=14, height=7)
def density_plot( self, df: pd.DataFrame, xmin=None, xmax=None, fill: str = "#fbb4ae", bw="normal_reference", **kwargs, ): return (ggplot(df, aes(df.columns[0])) + geom_density(fill=fill, alpha=0.8) + ggtitle(self.plot_title) + self._scale_x(xmin, xmax) + ergo_theme)
def plot_replicate_correlation( df, batch, plate, facet_string=None, split_samples=False, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=500, height=4, width=5, return_plot=False, ): correlation_gg = ( gg.ggplot( df, gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"), ) + gg.geom_boxplot( alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5 ) + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0) + gg.scale_fill_manual( name="Replicate", labels={"True": "True", "False": "False"}, values=["#B99638", "#2DB898"], ) + gg.xlab("Replicates") + gg.ylab("Pearson Correlation") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( subplots_adjust={"wspace": 0.2}, title=gg.element_text(size=5), axis_text=gg.element_text(size=4), axis_title=gg.element_text(size=5), legend_text=gg.element_text(size=4), legend_title=gg.element_text(size=5), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) if split_samples: assert facet_string, "To split samples, specify a facet_string" correlation_gg += gg.facet_wrap(facet_string) if output_file_base: save_figure( correlation_gg, output_file_base, output_file_extensions, dpi, height, width ) if return_plot: return correlation_gg
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="category", y="count", label="percent")) + geom_bar(stat="identity", fill="#1e4f79") + geom_text(va='bottom', size=24) + scale_x_discrete(limits=self._data["category"]) + scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) + ggtitle("Classes per Category") + xlab("Category") + ylab("Number of Classes") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_text_x=element_text(rotation=45, ha="right"))).save( file_path, width=7, height=7)
def plot(self, plotDat, tag=None, log=True, by='cell_type', data_set=None, title=None, alpha=.4): pDat = plotDat.copy() gcorr = pearsonr(pDat.measured, pDat.prediction)[0] corrs = pDat.groupby( pDat[by]).apply(lambda x: pearsonr(x.measured, x.prediction)[0]) pDat['corr'] = corrs[pDat[by]].values by_str = '{}_pearson'.format(by) pDat[by_str] = pDat.apply( lambda x: '{} {:.2f}'.format(x[by], corrs[x[by]]), axis=1) if data_set: pDat = pDat.loc[pDat['dataset_name'] == data_set] pl = (pn.ggplot(pn.aes('measured', 'prediction', color=by_str), pDat) + pn.geom_point(alpha=alpha) + pn.stat_smooth(mapping=pn.aes( 'measured', 'prediction', color=by_str), method='lm', geom='line', alpha=0.5, se=False, inherit_aes=False)) if len(pDat['sample'].unique()) < 10: pl = pl + pn.aes(shape='sample') else: pl = pl + pn.aes(shape='dataset_name') if log is True: pl = pl + pn.scale_x_log10() + pn.scale_y_log10() if title is not None: pl = pl + pn.ggtitle(title) elif tag is not None: pl = pl + pn.ggtitle('{} pearson={}'.format(tag, gcorr)) else: pl = pl + pn.ggtitle('pearson={}'.format(gcorr)) return pl
def create(self, file_path: str) -> None: (ggplot(self._data, aes("value")) + geom_histogram(bins=100, fill="#1e4f79") + facet_wrap(facets="variable", scales="free", ncol=3) + scale_x_continuous(trans=asinh_trans(), labels=asinh_labels) + scale_y_continuous(labels=comma_format()) + ggtitle("Distributions of QMOOD Quality Attributes") + xlab("Quality Attribute Value") + ylab("Number of Projects") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), subplots_adjust={ "wspace": 0.35, "hspace": 0.35 })).save(file_path, width=24, height=12)
def export_graph(graphname, df, columns, plot): print('Not provided:') for c in columns: outstr = "{} events, {}".format( *len_and_pct(df[filter_notprovided(df[c], keep=True)], df)) with open( "../report/src/numbers/" + "np-" + graphname + "-" + c + ".tex", "w") as f: f.write(outstr) print(f'- {c}: {outstr}') (plot + p9.ggtitle("")).save("../report/src/images/" + graphname + ".png", dpi=300) print(plot)
def create(self, file_path: str) -> None: (ggplot(self._data, aes("loc")) + geom_histogram(bins=100, fill="#1e4f79") + facet_grid(facets="category ~ .", scales='free_y') + scale_x_continuous(trans=asinh_trans(), labels=asinh_labels) + scale_y_continuous(labels=comma_format()) #+ scale_y_continuous(labels=lambda l: ["%.2f%%" % (v * 100 / len(self._data)) for v in l]) + ggtitle("Class Sizes") + xlab("Lines of Code") + ylab("Number of Classes") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), subplots_adjust={"hspace": 0.1 })).save(file_path, width=8, height=18)
def plot_result_stats(results, title): stats = results.describe().unstack().reset_index().rename(columns={ "level_0": "metric", "level_1": "group", 0: "value" }) stats = stats[~stats["group"].isin(["count", "min", "max"])] stats["value_presentation"] = round(stats["value"], 2) plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") + p9.geom_col(position="dodge") + p9.theme_bw() + p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) + p9.geom_text(p9.aes(label="value_presentation"), position=p9.position_dodge(width=0.9), va="bottom")) return plot
def create(self, file_path: str) -> None: (ggplot(self._data, aes("value")) + geom_histogram(bins=100, fill="#1e4f79") + facet_wrap(facets="variable", scales="free", ncol=3) + xlim(0, 1) + scale_y_continuous(labels=comma_format()) + ggtitle("Intensity of Design Pattern Use") + xlab("Percentage of Classes Participating in Design Pattern") + ylab("Number of Projects") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_title_y=element_text(margin={"r": 40}), subplots_adjust={ "wspace": 0.3, "hspace": 0.5 })).save(file_path, width=24, height=24)
def plot_overlap_duration_bar(self, data, options): matches = data["matches"] matches = matches.loc[matches.tag_overlap > 0] matches.loc[:, "tag_overlap_bin"] = pd.cut( matches.tag_overlap, [0, 0.25, 0.5, 0.75, 1] ) matches.loc[:, "tag_duration_bin"] = pd.cut( matches.tag_duration, [0, 0.25, 0.5, 0.75, 1, 1.5, 2, float("inf")] ) matches.loc[matches.tag_overlap < 0.3].to_csv("small_overlap.csv") # matches.loc[:, "log_dur"] = log() plt = ggplot( data=matches, mapping=aes(x="tag_duration_bin", fill="tag_overlap_bin",), ) plt = ( plt + geom_bar() + xlab("Tag duration") + ylab("Proportion tag overlapping with matching event") + theme_classic() + theme( axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}), plot_title=element_text( weight="bold", size=14, margin={"t": 10, "b": 10} ), figure_size=(10, 10), text=element_text(size=12, weight="bold"), ) + ggtitle( ( "Proportion of tag overlapping with matching event depending on duration " + "size for model {}, database {}, class {}\n" + "with detector options {}" ).format( options["scenario_info"]["model"], options["scenario_info"]["database"], options["scenario_info"]["class"], options, ) ) ) return plt
def plot_dist_with_ci(dist): return (pn.ggplot(dist, pn.aes(x='estimates')) + pn.geom_histogram(bins=25) + pn.geom_vline( xintercept=dist.quantile(0.025), color="#FF5500", size=2, linetype='dotted', ) + pn.geom_vline( xintercept=dist.quantile(0.975), color="#FF5500", size=2, linetype='dotted', ) + pn.ggtitle("${0:,.0f} ({1:,.0f}, {2:,.0f})".format( np.mean(dist.estimates), dist.estimates.quantile(0.025), dist.estimates.quantile(0.975), )))
def create(self, file_path: str) -> None: metrics = self._data["metric"].unique() for metric in metrics: data = self._data[self._data["metric"] == metric] q75, q25 = np.percentile(data["value"], [98, 2]) (ggplot(data, aes(x="category", y="value")) + geom_boxplot(outlier_shape="") + coord_cartesian(ylim=(q75 * 0.8, q25 * 1.2)) #+ facet_wrap(facets="metric", scales="free", ncol=3) + ggtitle(metric) #+ ggtitle("QMOOD Quality Attributes") + xlab("Category") + ylab("Value") + theme_classic(base_size=28, base_family="Helvetica") #+ theme(subplots_adjust={"wspace": 0.25, "hspace": 0.2}) ).save(f"{file_path}.{metric}.pdf", width=24, height=24)
def plot_replicate_density( df, batch, plate, cutoff, percent_strong, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=300, height=1.5, width=2, return_plot=False, ): density_gg = ( gg.ggplot(df, gg.aes(x="similarity_metric", fill="group_replicate")) + gg.geom_density(alpha=0.3) + gg.scale_fill_manual( name="Replicate", labels={"True": "True", "False": "False"}, values=["#B99638", "#2DB898"], ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") + gg.geom_vline(xintercept=cutoff, color="red", linetype="dashed") + gg.ggtitle( f"{batch}; Plate: {plate}\n\nPercent Replicating: {np.round(percent_strong * 100, 2)}%" ) + gg.theme_bw() + gg.theme( title=gg.element_text(size=3.5), axis_text=gg.element_text(size=4), axis_title=gg.element_text(size=4), legend_text=gg.element_text(size=4), legend_title=gg.element_text(size=4), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) if output_file_base: save_figure( density_gg, output_file_base, output_file_extensions, dpi, height, width ) if return_plot: return density_gg
def image_histogram(): # create windows cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.namedWindow('image_bw', cv2.WINDOW_NORMAL) cv2.namedWindow('image_bw_eq', cv2.WINDOW_NORMAL) # read and work with image image = cv2.imread(r"image.jpg") image_bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image_bw_eq = cv2.equalizeHist(image_bw) # display images cv2.imshow('image', image) cv2.imshow('image_bw', image_bw) cv2.imshow('image_bw_eq', image_bw_eq) # calculate histogram # np_hist_y, bins = np.histogram(image_bw.ravel(), 256, [0, 256]) # hist = np.bincount(image_bw.ravel(), minlength=256) # faster version of np.histogram # plt.hist(image_bw.ravel(), bins=256) hist_bw = cv2.calcHist([image_bw], [0], None, [256], [0, 255]) hist_bw_eq = cv2.calcHist([image_bw_eq], [0], None, [256], [0, 255]) np_hist_x = np.arange(len(hist_bw)) d = { 'np_hist_x': np_hist_x, 'hist_bw': hist_bw.flatten(), 'hist_bw_eq': hist_bw_eq.flatten() } df = pd.DataFrame(data=d) # plot histogram pn_handle = pn.ggplot(df) + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw'), color=None, fill='red', alpha=0.5) + pn.ylab('occurences') \ + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw_eq'), color=None, fill='green', alpha=0.5) \ + pn.ggtitle('Histograms of bw images') pn_handle.draw() plt.show() while True: pressed_key = cv2.waitKey(16) if pressed_key == ord('q'): break # cleanup opencv cv2.destroyAllWindows()
def main(argv: List[str]) -> None: parser = argparse.ArgumentParser() parser.add_argument("roll_rule", type=RollRule, choices=list(RollRule)) parser.add_argument("--num_iterations", type=int, default=10000) parser.add_argument("--seed", type=int, default=None) parser.add_argument("--plot_file", default="ability_roll_distribution.png") args = parser.parse_args(argv) if args.seed is not None: random.seed(args.seed) # Run the simulation and process the data roll_counts = simulate(args.roll_rule, args.num_iterations) data = process_data(roll_counts) # Calculate statistics mean = sum(data["value"] * data["percent"] / 100.0) mode = data.iloc[data["count"].idxmax()]["value"] stddev = math.sqrt( sum(data["percent"] / 100.0 * (data["value"] - mean)**2.0)) skewness = pearson_first_skewness(mean, mode, stddev) # Print out result information print(data) print() print("Mean:", mean) print("Mode:", mode) print("Standard deviation:", stddev) print("Skewness:", skewness) # Plot the data plot = (plt9.ggplot(data, plt9.aes("value", "percent")) + plt9.geom_bar(stat="identity") + plt9.geom_vline(xintercept=mean, color="black") + plt9.xlim(0, 21) + plt9.ylab("Chance (%)") + plt9.xlab("Ability Score") + plt9.ggtitle("Ability Score Distribution ({} iterations)".format( args.num_iterations))) plot.save(args.plot_file, dpi=300) print("Wrote plot image to:", args.plot_file)
def main(): mpl.rc('mathtext', fontset='cm') warnings.filterwarnings('ignore', r'(geom|position)_\w+ ?: Removed \d+ rows') warnings.filterwarnings('ignore', r'Saving .+ x .+ in image') warnings.filterwarnings('ignore', r'Filename: .+\.png') df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_f') + titles('P_f(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pf_Ob_Ol') df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_f') + titles('P_f(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pf_Ob_σ') df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_q') + titles('P_q(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pq_Ob_Ol') df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_q') + titles('P_q(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pq_Ob_σ') df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'O_l', 'Opr') + titles("O'(O_b, O_l)") + limits((1, 10), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'Opr_Ob_Ol') df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'σ', 'Opr') + titles("O'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'Opr_Ob_σ') df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)}) .assign(Pf=lambda x: Opr_Pf(x.Opr))) save_both(my_plot(df, 'Opr', 'Pf') + titles("P_f(O')") + labs("O'", 'P_f') + limits((1, 20), (0, 1), xbreaks=np.linspace(2, 20, 10), ybreaks=np.linspace(0, 1, 11)) + gg.geom_line() + gg.geom_hline(yintercept=C, linetype='dashed', color='grey') , 'Pf_Opr') df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11)) save_both(my_plot(df, 'O_b', 'σ', 'σpr') + titles("σ'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'σpr_Ob_σ') df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)}) .assign(Pq=lambda x: σpr_Pq(x.σpr))) save_both(my_plot(df, 'σpr', 'Pq') + titles("P_q(σ')") + labs("σ'", 'P_q') + limits((0, 20), (-1, 0), xbreaks=np.linspace(0, 20, 11), ybreaks=np.linspace(-1, 0, 11)) + gg.geom_line() , 'Pq_σpr') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_free') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_free') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_qual') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_qual') df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f')) df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q')) df = pd.concat((df_Pf, df_Pq), ignore_index=True) df.drop_duplicates('O_b', inplace=True) Opr = df_Pf.query('σ==0').O_b[0] σpr = df_Pq.query('O_b==1').σ[0] labels = pd.DataFrame({ 'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3], 'label': ["$O'$", "$σ'$", mathrm('More profit')] }) lab_aes = gg.aes('x', 'y', label='label') save_both( gg.ggplot(df, gg.aes(x='O_b', y='σ')) + gg.geom_area(gg.aes(fill='profit'), alpha=0.3) + gg.geom_vline(xintercept=Opr, linetype='dashed') + gg.geom_hline(yintercept=σpr, linetype='dashed') # text alignment can't be specified in an aes + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top') + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom') + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom') + gg.scale_fill_discrete(name=mathrm('Bet type'), labels=[mathrm('Free'), mathrm('Qualifying')]) + limits((1, 10), (0, 5)) + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'), mathrm('more profitable'), mathrm('space'))) + labs('O_b', 'σ') , 'Px_shapes')
def titles(t, *s): # title, hacky subtitles if not s: s = [commission_string] s = ['${}_{%s}$' % (x,) for x in s] return gg.ggtitle('\n'.join(['$%s$' % (t,)] + s))
def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False): if self.y_max is not None: limits = [0, float(self.y_max)] eprint(f'Setting limits to: {limits}') else: limits = [0, 1] if expo: if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: with open('data/external/all_human_gameplay.json') as f: all_gameplay = json.load(f) frames = [] for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]: if self.merge_humans: name = 'Human' gameplay = all_gameplay[event] if event != 'live': control_correct_positions = gameplay['control_correct_positions'] control_wrong_positions = gameplay['control_wrong_positions'] control_positions = control_correct_positions + control_wrong_positions control_positions = np.array(control_positions) control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0]) argsort_control = np.argsort(control_positions) control_x = control_positions[argsort_control] control_sorted_result = control_result[argsort_control] control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0] control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x}) control_df['Dataset'] = 'Regular Test' control_df['Guessing_Model'] = f' {name}' frames.append(control_df) adv_correct_positions = gameplay['adv_correct_positions'] adv_wrong_positions = gameplay['adv_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'IR Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) if len(gameplay['advneural_correct_positions']) > 0: adv_correct_positions = gameplay['advneural_correct_positions'] adv_wrong_positions = gameplay['advneural_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'RNN Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) human_df = pd.concat(frames) human_vals = sort_humans(list(human_df['Guessing_Model'].unique())) human_dtype = CategoricalDtype(human_vals, ordered=True) human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype) dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True) human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype) if no_models: p = ggplot(human_df) + geom_point(shape='.') else: df = self.char_plot_df if 1 not in self.rounds: df = df[df['Dataset'] != 'Round 1 - IR Adversarial'] if 2 not in self.rounds: df = df[df['Dataset'] != 'Round 2 - IR Adversarial'] df = df[df['Dataset'] != 'Round 2 - RNN Adversarial'] p = ggplot(df) if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: eprint('Loading human data') p = p + geom_line(data=human_df) if columns: facet_conf = facet_wrap('Guessing_Model', ncol=1) else: facet_conf = facet_wrap('Guessing_Model', nrow=1) if not no_models: if self.mvg_avg_char: chart = stat_smooth(method='mavg', se=False, method_args={'window': 400}) else: chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5) else: chart = None p = ( p + facet_conf + aes(x='char_percent', y='correct', color='Dataset') ) if chart is not None: p += chart p = ( p + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, .5, 1]) + coord_cartesian(ylim=limits) + xlab('Percent of Question Revealed') + ylab('Accuracy') + theme( #legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5}) ) + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions') ) if self.title != '': p += ggtitle(self.title) return p else: if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) return ( ggplot(self.char_plot_df) + aes(x='char_percent', y='correct', color='Guessing_Model') + stat_smooth(method='mavg', se=False, method_args={'window': 500}) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + coord_cartesian(ylim=limits) )