def plot_individual_returns( df_in: pd.DataFrame, max_episode: int, return_column: str = 'episode_return', colour_var: Optional[str] = None, yintercept: Optional[float] = None, sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot: """Plot individual learning curves: one curve per sweep setting.""" df = df_in.copy() df['unique_group'] = _make_unique_group_col(df, sweep_vars) p = (gg.ggplot(df) + gg.aes(x='episode', y=return_column, group='unique_group') + gg.coord_cartesian(xlim=(0, max_episode))) if colour_var: p += gg.geom_line(gg.aes(colour=colour_var), size=1.1, alpha=0.75) if len(df[colour_var].unique()) <= 5: df[colour_var] = df[colour_var].astype('category') p += gg.scale_colour_manual(values=FIVE_COLOURS) else: p += gg.geom_line(size=1.1, alpha=0.75, colour='#313695') if yintercept: p += gg.geom_hline(yintercept=yintercept, alpha=0.5, size=2, linetype='dashed') return facet_sweep_plot(p, sweep_vars, tall_plot=True)
def plot_downstream(clwe, table, output, ylim): df = pd.read_csv(data_file(table)) df = df[df.clwe == clwe] df = df.assign( refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']), language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG']) ) g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine')) g += p9.geom_bar(position='dodge', stat='identity', width=.8) g += p9.coord_cartesian(ylim=ylim) g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E']) g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial') g += p9.theme( plot_background=p9.element_rect(fill='white'), panel_grid_major_y=p9.element_line(), axis_text_x=p9.element_text(margin={'t': 10}), axis_text_y=p9.element_text(margin={'r': 8}), legend_position=(.7, .9), legend_direction='horizontal', legend_title=p9.element_blank(), legend_text=p9.element_text(size=FONT_SIZE), legend_box_margin=0, figure_size=(12, 3) ) g.save(filename=output_file(output))
def round_2_plot(): if not os.path.exists(round_2_df_path): eprint(f'Downloading {round_2_df_url} to {round_2_df_path}') urlretrieve(round_2_df_url, round_2_df_path) verify_checksum(round_2_df_checksum, round_2_df_path) df = pd.read_json(round_2_df_path) p = ( ggplot(df) + aes(x='char_percent', y='correct', color='Dataset') + facet_wrap('Guessing_Model', nrow=1) + stat_summary_bin( fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, .5, 1]) + coord_cartesian(ylim=[0, 0.7]) + ggtitle('Round 2 Attacks and Models') + xlab('Percent of Question Revealed') + ylab('Accuracy') + theme( #legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={ 't': 6, 'b': 6, 'l': 1, 'r': 5 })) + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')) p.save('2019_tacl_trick/auto_fig/round_2_json.pdf', width=7.0, height=1.7)
def misspecified_plot(experiment_name='finite_misspecified', data_path=_DEFAULT_DATA_PATH): """Specialized plotting script for TS tutorial paper misspecified TS.""" df = load_data(experiment_name, data_path) def _parse_np_array(np_string): return np.array(np_string.replace('[', '') .replace(']', '') .strip() .split()) df['posterior_mean'] = df.posterior_mean.apply(_parse_np_array) # Action means new_col_list = ['mean_0', 'mean_1', 'mean_2'] for n, col in enumerate(new_col_list): df[col] = df['posterior_mean'].apply(lambda x: float(x[n])) plt_df = (df.groupby(['agent', 't']) .agg({'instant_regret': np.mean, 'mean_0': np.mean, 'mean_1': np.mean, 'mean_2': np.mean}) .reset_index()) regret_plot = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') + gg.ylab('Average instantaneous regret') + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1') + gg.coord_cartesian(ylim=(0, 0.02))) melt_df = pd.melt(plt_df, id_vars=['agent', 't'], value_vars=new_col_list) melt_df['group_id'] = melt_df.agent + melt_df.variable action_plot = (gg.ggplot(melt_df) + gg.aes('t', 'value', colour='agent', group='group_id') + gg.geom_line(size=1.25, alpha=0.75) + gg.coord_cartesian(ylim=(0, 0.05)) + gg.xlab('Timestep (t)') + gg.ylab('Expected mean reward') + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1')) plot_dict = {} plot_dict['misspecified_regret'] = regret_plot plot_dict['misspecified_action'] = action_plot return plot_dict
def plot(solu, k): # Generates a plot of the four bar mechanism, which represents a frame in the animation print("Frame: ", k) sol = solu[k:k + 1] p = ( ggplot(sol) + # MAIN LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x=0, y=0), shape = 'o', size = 3) + geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) + # 2ND LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) + geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) + # AP LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) + geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) + # 3RD LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # 4TH LINKAGE geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # NODES IDENTIFICATION annotate("text", x = 0, y = -20, label = "$O_1$") + annotate("text", x = sol.Ro4[k].real, y = sol.Ro4[k].imag -20, label = "$O_4$") + annotate("text", x = sol.Ra[k].real+10, y = sol.Ra[k].imag, label = "$A$") + annotate("text", x = sol.Rba[k].real +20, y = sol.Rba[k].imag -10, label = "$B$") + annotate("text", x = sol.Rpa[k].real, y = sol.Rpa[k].imag -40, label = "$P$") + # ACCELERATIONS ARROWS (you may remove if you wish to remove acceleration informations) geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, \ xend = sol.Rba[k].real + sol.Aba[k].real * ACC_SCALE, \ yend = sol.Rba[k].imag + sol.Aba[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point B geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, \ xend = sol.Ra[k].real + sol.Aa[k].real * ACC_SCALE, \ yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point A geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \ xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \ yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point C # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations) # inputting text between '$ $' makes plotnine produce beautiful LaTeX text annotate("text", x = sol.Rba[k].real-30, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Ra[k].real+20, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag+20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') + # TIME IDENTIFICATION annotate("label", x = 120, y = -80, label = f'Time: ${sol.time[k]:.2f}~s$', alpha = 1) + # labs(x='$x~[mm]$', y='$y~[mm]$') + coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so. theme_bw() # Plot is prettier with this theme compared to the default. ) return p
def plot_scaling(plt_df: pd.DataFrame, sweep_vars: Sequence[Text] = None, with_baseline: bool = True) -> gg.ggplot: """Plot scaling of learning time against exponential baseline.""" p = _base_scaling(plt_df, sweep_vars, with_baseline) p += gg.xlab('deep sea problem size') p += gg.ylab('#episodes until < 90% bad episodes') if with_baseline: max_steps = np.minimum(NUM_EPISODES, plt_df.episode.max()) p += gg.coord_cartesian(ylim=(0, max_steps)) return plotting.facet_sweep_plot(p, sweep_vars)
def ensemble_plot(experiment_name='ensemble_nn', data_path=_DEFAULT_DATA_PATH): """Specialized plotting script for TS tutorial paper ensemble NN.""" df = load_data(experiment_name, data_path) plt_df = (df.groupby(['agent', 't']).agg({ 'instant_regret': np.mean }).reset_index()) def _get_agent_family(agent_name): if 'dropout' in agent_name.lower(): return 'Dropout' elif 'ensemble' in agent_name.lower(): return 'Ensemble' elif '/' in agent_name.lower(): return 'Annealing epsilon' else: return 'Fixed epsilon' def _rename_ensemble(agent_name): if 'ensemble' in agent_name: n_ensemble = agent_name.split('-')[0] new_name = 'ensemble=' + n_ensemble.zfill(3) return new_name else: return agent_name plt_df['agent_name'] = plt_df.agent.apply(_rename_ensemble) plt_df['agent_family'] = plt_df.agent.apply(_get_agent_family) custom_colors = ['#d53e4f', '#fdae61', '#a6d96a', '#66c2a5', '#5e4fa2'] plot_dict = {} for agent_family, df_family in plt_df.groupby(['agent_family']): if agent_family == 'Ensemble': custom_labels = [ 'Ensemble 3', 'Ensemble 10', 'Ensemble 30', 'Ensemble 100', 'Ensemble 300' ] gg_legend = gg.scale_colour_manual(values=custom_colors, labels=custom_labels, name='Agent') else: gg_legend = gg.scale_colour_manual(custom_colors, name='Agent') p = (gg.ggplot(df_family) + gg.aes('t', 'instant_regret', colour='agent_name') + gg.geom_line(size=1.25, alpha=0.75) + gg.facet_wrap('~ agent_family') + gg_legend + gg.coord_cartesian(ylim=(0, 60)) + gg.xlab('Timestep (t)') + gg.ylab('Average instantaneous regret') + gg.theme(figure_size=(6, 6))) plot_dict[experiment_name + '_' + agent_family] = p return plot_dict
def bandit_learning_format(plot: gg.ggplot) -> gg.ggplot: """Add nice bandit formatting to ggplot.""" plot += gg.scale_y_continuous(breaks=np.arange(0, 1.1, 0.1).tolist()) plot += gg.theme(panel_grid_major_y=gg.element_line(size=2.5), panel_grid_minor_y=gg.element_line(size=0)) plot += gg.geom_hline(gg.aes(yintercept=BASE_REGRET), linetype='dashed', alpha=0.4, size=1.75) plot += gg.coord_cartesian(ylim=(0, 1)) return plot
def plot_elos(): diffs = np.linspace(-1000, +1000) rates = 1 / (1 + 10**(-diffs / 400)) df = pd.DataFrame({'elo': diffs, 'winrate': rates}) return (pn.ggplot(df) + pn.geom_line(pn.aes(x='elo', y='winrate')) + pn.geom_vline(xintercept=0, alpha=.1) + pn.geom_hline(yintercept=.5, alpha=.1) + pn.labs(x='Own Elo relative to opponent\'s Elo', y='Win rate v. opponent') + pn.scale_y_continuous(labels=percent_format()) + pn.coord_cartesian(expand=False) + plot.IEEE())
def plot_learning(df: pd.DataFrame, sweep_vars: Sequence[str] = None) -> gg.ggplot: """Plots the average regret through time by optimal_horizon.""" df = dc_preprocess(df_in=df) p = plotting.plot_regret_learning( df_in=df, group_col='optimal_horizon', sweep_vars=sweep_vars, max_episode=sweep.NUM_EPISODES ) p += gg.geom_hline(gg.aes(yintercept=BASE_REGRET), linetype='dashed', alpha=0.4, size=1.75) p += gg.coord_cartesian(ylim=(0, 0.1)) return p
def plot_result_stats(results, title): stats = results.describe().unstack().reset_index().rename(columns={ "level_0": "metric", "level_1": "group", 0: "value" }) stats = stats[~stats["group"].isin(["count", "min", "max"])] stats["value_presentation"] = round(stats["value"], 2) plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") + p9.geom_col(position="dodge") + p9.theme_bw() + p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) + p9.geom_text(p9.aes(label="value_presentation"), position=p9.position_dodge(width=0.9), va="bottom")) return plot
def create(self, file_path: str) -> None: metrics = self._data["metric"].unique() for metric in metrics: data = self._data[self._data["metric"] == metric] q75, q25 = np.percentile(data["value"], [98, 2]) (ggplot(data, aes(x="category", y="value")) + geom_boxplot(outlier_shape="") + coord_cartesian(ylim=(q75 * 0.8, q25 * 1.2)) #+ facet_wrap(facets="metric", scales="free", ncol=3) + ggtitle(metric) #+ ggtitle("QMOOD Quality Attributes") + xlab("Category") + ylab("Value") + theme_classic(base_size=28, base_family="Helvetica") #+ theme(subplots_adjust={"wspace": 0.25, "hspace": 0.2}) ).save(f"{file_path}.{metric}.pdf", width=24, height=24)
def plot_calibrations(): params = data.sample_calibrations() return ( pn.ggplot( params, pn.aes(xmin='boardsize-.25', xmax='boardsize+.25', group='boardsize', fill='factor(boardsize)')) + pn.geom_hline(yintercept=.5, alpha=.2) + pn.geom_rect( pn.aes(ymin='lower', ymax='upper'), show_legend=False, color='k') + pn.geom_rect(pn.aes(ymin='mid', ymax='mid'), show_legend=False, color='k', size=2) + pn.scale_y_continuous(labels=percent_format()) + pn.scale_fill_hue(l=.4) + pn.coord_cartesian(ylim=(.4, .6)) + pn.labs(y='Win rate v. perfect play', x='Board size') + plot.IEEE())
def plot_average(df: pd.DataFrame, sweep_vars: Sequence[Text] = None, group_col: Text = 'noise_scale') -> gg.ggplot: """Plots the average regret through time by noise_scale.""" p = plotting.plot_regret_average( df_in=df, group_col=group_col, episode=sweep.NUM_EPISODES, sweep_vars=sweep_vars ) p += gg.scale_y_continuous(breaks=np.arange(0, 1.1, 0.1).tolist()) p += gg.theme(panel_grid_major_y=gg.element_line(size=2.5), panel_grid_minor_y=gg.element_line(size=0),) p += gg.geom_hline(gg.aes(yintercept=bandit_analysis.BASE_REGRET), linetype='dashed', alpha=0.4, size=1.75) p += gg.coord_cartesian(ylim=(0, 1)) return p
def plot_regret_learning(df_in: pd.DataFrame, group_col: Optional[str] = None, sweep_vars: Optional[Sequence[str]] = None, regret_col: str = 'total_regret', max_episode: Optional[int] = None) -> gg.ggplot: """Plots the average regret through time, grouped by group_var.""" df = df_in.copy() df['average_regret'] = df[regret_col] / df.episode df = df[df.episode <= (max_episode or np.inf)] if group_col is None: p = _plot_regret_single(df) else: p = _plot_regret_group(df, group_col) p += gg.geom_hline(gg.aes(yintercept=0.0), alpha=0) # axis hack p += gg.ylab('average regret per timestep') p += gg.coord_cartesian(xlim=(0, max_episode)) return facet_sweep_plot(p, sweep_vars, tall_plot=True)
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals, image_file_type): samp_vector = ["set1" for i in range(len(samp_set1_vals))] samp_vector.extend(["set2" for i in range(len(samp_set2_vals))]) data_vector = samp_set1_vals + samp_set2_vals dframe = pd.DataFrame(list(zip(samp_vector, data_vector)), columns=["sample", "value"]) gg = pn.ggplot(dframe, pn.aes(x="sample", y="value")) + pn.geom_jitter( position="jitter", width=0.2, height=0.01) + pn.coord_cartesian(ylim=(0, 100)) + pn.theme_bw() # TODO Just infer format from plotName gg.save(filename=plotName, format=image_file_type)
def limits(x, y=None, xbreaks=None, ybreaks=None): if y is None: y = x x0, x1 = x y0, y1 = y if xbreaks is None: xbreaks = np.linspace(x0, x1, x1 - x0 + 1) if ybreaks is None: ybreaks = np.linspace(y0, y1, y1 - y0 + 1) # We want these plots to continue to the top and left. return [ gg.coord_cartesian(xlim=x, ylim=y), gg.scale_x_continuous(limits=(x0, None), breaks=xbreaks), gg.scale_y_continuous(limits=(y0, None), breaks=ybreaks) ] return [ gg.scale_x_continuous(limits=x, breaks=xbreaks), gg.scale_y_continuous(limits=y, breaks=ybreaks) ]
def plot_regret_group_nosmooth(df_in: pd.DataFrame, group_col: str, sweep_vars: Sequence[str] = None, regret_col: str = 'total_regret', max_episode: int = None) -> gg.ggplot: """Plots the average regret through time without smoothing.""" df = df_in.copy() df['average_regret'] = df[regret_col] / df.episode df = df[df.episode <= max_episode] group_name = group_col.replace('_', ' ') df[group_name] = df[group_col] p = (gg.ggplot(df) + gg.aes(x='episode', y='average_regret', group=group_name, colour=group_name) + gg.geom_line(size=2, alpha=0.75) + gg.geom_hline(gg.aes(yintercept=0.0), alpha=0) # axis hack ) p += gg.coord_cartesian(xlim=(0, max_episode)) return facet_sweep_plot(p, sweep_vars, tall_plot=True)
def plot_flops_frontier(ags): df = data.modelled_elos(ags) return ( pn.ggplot( df, pn.aes( x='train_flops', color='factor(boardsize)', group='boardsize')) + pn.geom_line(pn.aes(y='400/np.log(10)*elo'), size=2) + pn.geom_line( pn.aes(y='400/np.log(10)*elohat'), size=1, linetype='dashed') + pn.labs( x='Training FLOPS', y='Elo v. perfect play', title= 'Performance is a sigmoid of compute, linearly scaled by board size' ) + pn.scale_x_continuous(trans='log10') + pn.scale_color_discrete(name='Boardsize') + pn.coord_cartesian(None, (None, 0)) + plot.mpl_theme() + plot.poster_sizes())
def plot_test(ags): df = ags.query('boardsize == 9').groupby('run').apply( lambda df: df[df.idx == df.idx.max()]).copy() df['test_flops'] = df.test_nodes * (df.train_flops / df.samples) subset = df.query('test_nodes == 64').sort_values('test_flops') selection = [ subset.loc[ELO * subset.elo > e].iloc[0].run for e in np.linspace(-2000, -500, 4) ] df = df[df.run.isin(selection)].copy() df['params'] = df.width**2 * df.depth df['arch'] = df.apply(lambda r: '{depth}×{width}'.format(**r), axis=1) labels = df.sort_values('test_flops').reset_index( drop=True).groupby('run').first().reset_index() return (pn.ggplot( df, pn.aes(x='test_flops', y='ELO*elo', color='params', group='run')) + pn.geom_point(size=.25, show_legend=False) + pn.geom_line(size=.5, show_legend=False) + pn.geom_text(pn.aes(label='test_nodes'), nudge_y=-50, show_legend=False, size=4, va='top') + pn.geom_text(pn.aes(label='test_nodes'), nudge_y=-50, show_legend=False, size=4, va='top') + pn.geom_text(pn.aes(label='arch'), data=labels, show_legend=False, size=6, nudge_x=-.1, ha='right') + pn.scale_x_continuous(trans='log10') + pn.scale_color_cmap('plasma', trans='log10', limits=(df.params.min(), 10 * df.params.max())) + pn.coord_cartesian( (3.5, None)) + pn.labs(x='Test-time compute (FLOPS-seconds)', y='Elo v. perfect play') + plot.IEEE())
def plot_frontiers(ags): df, model = data.modelled_elos(ags) labels = df.sort_values('train_flops').groupby( 'boardsize').first().reset_index() return (pn.ggplot( df, pn.aes(x='train_flops', color='factor(boardsize)', group='boardsize')) + pn.geom_line(pn.aes(y='ELO*elo'), size=.5, show_legend=False) + pn.geom_line(pn.aes(y='ELO*elohat'), size=.25, linetype='dashed', show_legend=False) + pn.geom_text(pn.aes(y='ELO*elohat', label='boardsize'), data=labels, show_legend=False, size=6, nudge_x=-.25, nudge_y=-15) + pn.labs(x='Training compute (FLOPS-seconds)', y='Elo v. perfect play') + pn.scale_color_discrete(l=.4) + pn.scale_x_continuous(trans='log10') + pn.coord_cartesian(None, (None, 0)) + plot.IEEE())
def pattern_research_plot(data): from colour import Color def colors_gradient_generator(low_color, high_color, color_steps): low_color_obj = Color(low_color) high_color_obj = Color(high_color) return map(lambda x : x.hex_l, low_color_obj.range_to(high_color_obj,color_steps)) blue = list(colors_gradient_generator("#004996", "#018ace", 3))[::-1] data = data.melt(id_vars=['hour_category'], value_vars= ['D','W','MS'], var_name='series', value_name='count') time_unit_categories = pd.Categorical(data['series'], categories= ['D','W','MS']) data = data.assign(series = time_unit_categories) plot =(p9.ggplot(data=data, mapping=p9.aes(x='hour_category', y ='count', fill ='series')) + p9.geom_bar(stat='identity', position='dodge') + p9.scale_fill_manual(blue,labels = ['D','W','MS']) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=8), axis_title = p9.element_text(size = 8,face = 'bold')) + p9.coord_cartesian(ylim = (0,100)) + p9.scale_y_continuous(labels=lambda l: ["%d%%" % (v) for v in l]) + p9.labs(x='hour_category',y='Ratio of attacks')) return plot
def plot_char_percent_vs_accuracy_smooth( self, expo=False, no_models=False, columns=False ): if self.y_max is not None: limits = [0, float(self.y_max)] eprint(f"Setting limits to: {limits}") else: limits = [0, 1] if expo: if ( os.path.exists("data/external/all_human_gameplay.json") and not self.no_humans ): with open("data/external/all_human_gameplay.json") as f: all_gameplay = json.load(f) frames = [] for event, name in [ ("parents", "Intermediate"), ("maryland", "Expert"), ("live", "National"), ]: if self.merge_humans: name = "Human" gameplay = all_gameplay[event] if event != "live": control_correct_positions = gameplay[ "control_correct_positions" ] control_wrong_positions = gameplay[ "control_wrong_positions" ] control_positions = ( control_correct_positions + control_wrong_positions ) control_positions = np.array(control_positions) control_result = np.array( len(control_correct_positions) * [1] + len(control_wrong_positions) * [0] ) argsort_control = np.argsort(control_positions) control_x = control_positions[argsort_control] control_sorted_result = control_result[argsort_control] control_y = ( control_sorted_result.cumsum() / control_sorted_result.shape[0] ) control_df = pd.DataFrame( {"correct": control_y, "char_percent": control_x} ) control_df["Dataset"] = "Regular Test" control_df["Guessing_Model"] = f" {name}" frames.append(control_df) adv_correct_positions = gameplay["adv_correct_positions"] adv_wrong_positions = gameplay["adv_wrong_positions"] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array( len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0] ) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({"correct": adv_y, "char_percent": adv_x}) adv_df["Dataset"] = "IR Adversarial" adv_df["Guessing_Model"] = f" {name}" frames.append(adv_df) if len(gameplay["advneural_correct_positions"]) > 0: adv_correct_positions = gameplay[ "advneural_correct_positions" ] adv_wrong_positions = gameplay["advneural_wrong_positions"] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array( len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0] ) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = ( adv_sorted_result.cumsum() / adv_sorted_result.shape[0] ) adv_df = pd.DataFrame( {"correct": adv_y, "char_percent": adv_x} ) adv_df["Dataset"] = "RNN Adversarial" adv_df["Guessing_Model"] = f" {name}" frames.append(adv_df) human_df = pd.concat(frames) human_vals = sort_humans(list(human_df["Guessing_Model"].unique())) human_dtype = CategoricalDtype(human_vals, ordered=True) human_df["Guessing_Model"] = human_df["Guessing_Model"].astype( human_dtype ) dataset_dtype = CategoricalDtype( ["Regular Test", "IR Adversarial", "RNN Adversarial"], ordered=True, ) human_df["Dataset"] = human_df["Dataset"].astype(dataset_dtype) if no_models: p = ggplot(human_df) + geom_point(shape=".") else: df = self.char_plot_df if 1 not in self.rounds: df = df[df["Dataset"] != "Round 1 - IR Adversarial"] if 2 not in self.rounds: df = df[df["Dataset"] != "Round 2 - IR Adversarial"] df = df[df["Dataset"] != "Round 2 - RNN Adversarial"] p = ggplot(df) if self.save_df is not None: eprint(f"Saving df to: {self.save_df}") df.to_json(self.save_df) if ( os.path.exists("data/external/all_human_gameplay.json") and not self.no_humans ): eprint("Loading human data") p = p + geom_line(data=human_df) if columns: facet_conf = facet_wrap("Guessing_Model", ncol=1) else: facet_conf = facet_wrap("Guessing_Model", nrow=1) if not no_models: if self.mvg_avg_char: chart = stat_smooth( method="mavg", se=False, method_args={"window": 400} ) else: chart = stat_summary_bin( fun_data=mean_no_se, bins=20, shape=".", linetype="None", size=0.5, ) else: chart = None p = p + facet_conf + aes(x="char_percent", y="correct", color="Dataset") if chart is not None: p += chart p = ( p + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, 0.5, 1]) + coord_cartesian(ylim=limits) + xlab("Percent of Question Revealed") + ylab("Accuracy") + theme( # legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={"t": 6, "b": 6, "l": 1, "r": 5}) ) + scale_color_manual( values=["#FF3333", "#66CC00", "#3333FF", "#FFFF33"], name="Questions", ) ) if self.title != "": p += ggtitle(self.title) return p else: if self.save_df is not None: eprint(f"Saving df to: {self.save_df}") df.to_json(self.save_df) return ( ggplot(self.char_plot_df) + aes(x="char_percent", y="correct", color="Guessing_Model") + stat_smooth(method="mavg", se=False, method_args={"window": 500}) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + coord_cartesian(ylim=limits) )
def day_night_attacks(Data, Data_m): print('======= Creating day_night_attacks =======') #Filter montlhy and ever Symptomes freq_all = Data[(Data.Group == 'sy')] freq_m = Data_m[(Data_m.Group == 'sy')] test = freq_all[(pd.isna(freq_all.year) == 0) & (pd.isna(freq_all.month) == 0)] Test_3 = pd.DataFrame(test.groupby("hour", as_index = False).count()) Test_3 = Test_3.iloc[:, 0:2] Test_3 = Test_3.rename(columns = {"Unnamed: 0": "n"}) test_m = freq_m[(pd.isna(freq_m.year) == 0) & (pd.isna(freq_m.month) == 0)] Test_3_m = pd.DataFrame(test_m.groupby("hour", as_index = False).count()) Test_3_m = Test_3_m.iloc[:, 0:2] Test_3_m = Test_3_m.rename(columns = {"Unnamed: 0": "n"}) plot =(p9.ggplot(data=Test_3, mapping=p9.aes(x='hour', y = 'n')) + p9.geom_point(color = 'red', size = 10) + p9.geom_line(color = 'red', size = 1) #+ p9.geom_point(color = 'red', size = 10) #+ p9.geom_line(color = 'red', size = 1) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.coord_cartesian(xlim = (1,25)) + p9.labs(x='Hours',y='No. of attacks') + p9.scale_x_discrete(limits = (range(1,25))) ) plot_month =(p9.ggplot(data=Test_3_m, mapping=p9.aes(x='hour', y = 'n')) #+ p9.geom_line(color = 'red', size = 5) + p9.geom_point(color = 'red', size = 10) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.coord_cartesian(xlim = (1,25)) + p9.labs(x='Hours',y='No. of attacks') + p9.scale_x_discrete(limits = (range(1,25))) ) #Creating and saving MONTHLY Grap_3 if (len(Test_3_m) > 0): #G3 = graph_3(freq_m) plot_month.save(filename = 'Graph_3.jpeg', plot = plot_month, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') #Creating and saving EVER Grap_3 if (len(freq_all) > 0): #G3 = graph_3(freq_all) plot.save(filename = 'Graph_ALL_3.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================day_night_attacks DONE ============================='))
def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False): if self.y_max is not None: limits = [0, float(self.y_max)] eprint(f'Setting limits to: {limits}') else: limits = [0, 1] if expo: if os.path.exists('data/external/all_human_gameplay.json' ) and not self.no_humans: with open('data/external/all_human_gameplay.json') as f: all_gameplay = json.load(f) frames = [] for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]: if self.merge_humans: name = 'Human' gameplay = all_gameplay[event] if event != 'live': control_correct_positions = gameplay[ 'control_correct_positions'] control_wrong_positions = gameplay[ 'control_wrong_positions'] control_positions = control_correct_positions + control_wrong_positions control_positions = np.array(control_positions) control_result = np.array( len(control_correct_positions) * [1] + len(control_wrong_positions) * [0]) argsort_control = np.argsort(control_positions) control_x = control_positions[argsort_control] control_sorted_result = control_result[ argsort_control] control_y = control_sorted_result.cumsum( ) / control_sorted_result.shape[0] control_df = pd.DataFrame({ 'correct': control_y, 'char_percent': control_x }) control_df['Dataset'] = 'Regular Test' control_df['Guessing_Model'] = f' {name}' frames.append(control_df) adv_correct_positions = gameplay[ 'adv_correct_positions'] adv_wrong_positions = gameplay['adv_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array( len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum( ) / adv_sorted_result.shape[0] adv_df = pd.DataFrame({ 'correct': adv_y, 'char_percent': adv_x }) adv_df['Dataset'] = 'IR Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) if len(gameplay['advneural_correct_positions']) > 0: adv_correct_positions = gameplay[ 'advneural_correct_positions'] adv_wrong_positions = gameplay[ 'advneural_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array( len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum( ) / adv_sorted_result.shape[0] adv_df = pd.DataFrame({ 'correct': adv_y, 'char_percent': adv_x }) adv_df['Dataset'] = 'RNN Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) human_df = pd.concat(frames) human_vals = sort_humans( list(human_df['Guessing_Model'].unique())) human_dtype = CategoricalDtype(human_vals, ordered=True) human_df['Guessing_Model'] = human_df[ 'Guessing_Model'].astype(human_dtype) dataset_dtype = CategoricalDtype( ['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True) human_df['Dataset'] = human_df['Dataset'].astype( dataset_dtype) if no_models: p = ggplot(human_df) + geom_point(shape='.') else: df = self.char_plot_df if 1 not in self.rounds: df = df[df['Dataset'] != 'Round 1 - IR Adversarial'] if 2 not in self.rounds: df = df[df['Dataset'] != 'Round 2 - IR Adversarial'] df = df[df['Dataset'] != 'Round 2 - RNN Adversarial'] p = ggplot(df) if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) if os.path.exists('data/external/all_human_gameplay.json' ) and not self.no_humans: eprint('Loading human data') p = p + geom_line(data=human_df) if columns: facet_conf = facet_wrap('Guessing_Model', ncol=1) else: facet_conf = facet_wrap('Guessing_Model', nrow=1) if not no_models: if self.mvg_avg_char: chart = stat_smooth(method='mavg', se=False, method_args={'window': 400}) else: chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5) else: chart = None p = (p + facet_conf + aes(x='char_percent', y='correct', color='Dataset')) if chart is not None: p += chart p = ( p + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, .5, 1]) + coord_cartesian(ylim=limits) + xlab('Percent of Question Revealed') + ylab('Accuracy') + theme( #legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={ 't': 6, 'b': 6, 'l': 1, 'r': 5 })) + scale_color_manual( values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')) if self.title != '': p += ggtitle(self.title) return p else: if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) return (ggplot(self.char_plot_df) + aes( x='char_percent', y='correct', color='Guessing_Model') + stat_smooth( method='mavg', se=False, method_args={'window': 500}) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + coord_cartesian(ylim=limits))
def intensity_graph(Data, Data_m): print('======= Creating intensity_graph =======') x = Data.Intensity[pd.isna(Data.Intensity) == True] if (len(x) == len(Data)): print("WARNING: All values for Intensity are NA's") else: #Filter ever and monthly symptomes and correct Intensity Data_m_int = Data_m[(Data_m.Group == "sy") & (pd.isna(Data_m.Intensity) == 0)] Data_all_int = Data[(Data.Group == "sy") & (pd.isna(Data.Intensity) == 0)] Test_3_m = Data_m_int.groupby("Intensity", sort = True, as_index = False).count() Test_3_m = Test_3_m.iloc[:, 0:2] Test_3_m= Test_3_m.rename(columns = {"Unnamed: 0": "n"}) Test_3 = Data_all_int.groupby("Intensity", sort = True, as_index = False).count() Test_3 = Test_3.iloc[:, 0:2] Test_3 = Test_3.rename(columns = {"Unnamed: 0": "n"}) #Test_3.Intensity = Test_3.Intensity.astype(str) plot =(p9.ggplot(data=Test_3, mapping=p9.aes(x='Intensity',y='n')) + p9.geom_col(fill = 'red') + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.coord_cartesian(xlim = (1,10)) + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11))) + p9.labs(x='',y='No. of attacks') ) plot_month =(p9.ggplot(data=Test_3_m, mapping=p9.aes(x='Intensity',y='n')) + p9.geom_col(fill = 'red') + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.coord_cartesian(xlim = (1,10)) + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11))) + p9.labs(x='',y='No. of attacks') ) #Creating and saving EVER Graph_1 if (len(Data_m_int) > 0): #G1 = graph_1(Data_all_int) plot_month.save(filename = 'Graph_1.jpeg', plot = plot_month, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') if (len(Data_all_int) > 0): #G1 = graph_1(Data_all_int) plot.save(filename = 'Graph_ALL_1.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================intensity_graph DONE ============================='))
def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False): if self.y_max is not None: limits = [0, float(self.y_max)] eprint(f'Setting limits to: {limits}') else: limits = [0, 1] if expo: if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: with open('data/external/all_human_gameplay.json') as f: all_gameplay = json.load(f) frames = [] for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]: if self.merge_humans: name = 'Human' gameplay = all_gameplay[event] if event != 'live': control_correct_positions = gameplay['control_correct_positions'] control_wrong_positions = gameplay['control_wrong_positions'] control_positions = control_correct_positions + control_wrong_positions control_positions = np.array(control_positions) control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0]) argsort_control = np.argsort(control_positions) control_x = control_positions[argsort_control] control_sorted_result = control_result[argsort_control] control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0] control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x}) control_df['Dataset'] = 'Regular Test' control_df['Guessing_Model'] = f' {name}' frames.append(control_df) adv_correct_positions = gameplay['adv_correct_positions'] adv_wrong_positions = gameplay['adv_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'IR Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) if len(gameplay['advneural_correct_positions']) > 0: adv_correct_positions = gameplay['advneural_correct_positions'] adv_wrong_positions = gameplay['advneural_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'RNN Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) human_df = pd.concat(frames) human_vals = sort_humans(list(human_df['Guessing_Model'].unique())) human_dtype = CategoricalDtype(human_vals, ordered=True) human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype) dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True) human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype) if no_models: p = ggplot(human_df) + geom_point(shape='.') else: df = self.char_plot_df if 1 not in self.rounds: df = df[df['Dataset'] != 'Round 1 - IR Adversarial'] if 2 not in self.rounds: df = df[df['Dataset'] != 'Round 2 - IR Adversarial'] df = df[df['Dataset'] != 'Round 2 - RNN Adversarial'] p = ggplot(df) if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: eprint('Loading human data') p = p + geom_line(data=human_df) if columns: facet_conf = facet_wrap('Guessing_Model', ncol=1) else: facet_conf = facet_wrap('Guessing_Model', nrow=1) if not no_models: if self.mvg_avg_char: chart = stat_smooth(method='mavg', se=False, method_args={'window': 400}) else: chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5) else: chart = None p = ( p + facet_conf + aes(x='char_percent', y='correct', color='Dataset') ) if chart is not None: p += chart p = ( p + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, .5, 1]) + coord_cartesian(ylim=limits) + xlab('Percent of Question Revealed') + ylab('Accuracy') + theme( #legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5}) ) + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions') ) if self.title != '': p += ggtitle(self.title) return p else: if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) return ( ggplot(self.char_plot_df) + aes(x='char_percent', y='correct', color='Guessing_Model') + stat_smooth(method='mavg', se=False, method_args={'window': 500}) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + coord_cartesian(ylim=limits) )
output_exp1 = expt.RunExp1(lambdas, alphas, [0.1], trainingSets, z) bestAlpha = output_exp1.groupby('l').agg(rmse=('rmse', min)).reset_index('l') #Experiment 2 lambdas = [0, 0.3, 0.8, 1] alphas = np.arange(0.0, 1.0, 0.05) output_exp2 = expt.RunExp2(lambdas, alphas, trainingSets, z) # rerun experiment 2 with different lambda values. Python is new to me # so this was easier than filtering -__- lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] output_exp21 = expt.RunExp2(lambdas, alphas, trainingSets, z) bestAlpha2 = output_exp21.groupby('l').agg(rmse=('rmse', min)).reset_index('l') #Plots # Figure 3 plt.ggplot(bestAlpha, plt.aes(x='l', y='rmse')) + plt.geom_line() + plt.geom_point() # Figure 4 plt.ggplot(output_exp2,plt.aes(x='a',y='rmse',color=('l'))) \ +plt.geom_line(plt.aes(group='l')) \ +plt.geom_point()\ +plt.coord_cartesian(xlim=(0,0.62),ylim=(0,0.7)) # Figure 5 plt.ggplot(bestAlpha2, plt.aes(x='l', y='rmse')) + plt.geom_line( plt.aes(group=1)) + plt.geom_point()
def cli(): parser = argparse.ArgumentParser( description='GAP - Git Activity Predictor') parser.add_argument('paths', metavar='PATH', type=str, nargs='*', default=['.'], help='Paths to one or more git repositories') parser.add_argument( '--date', type=lambda d: dateutil.parser.parse(d).date(), required=False, default=datetime.date.today(), help='Date used for predictions (default to current date)') parser.add_argument('--obs', type=int, required=False, default=20, help='Number of observations to consider') parser.add_argument('--probs', metavar='PROB', type=float, nargs='*', required=False, default=[0.5, 0.6, 0.7, 0.8, 0.9], help='Probabilities to output, strictly in [0,1].') parser.add_argument( '--limit', type=int, required=False, default=30, help= 'Limit contributors to the one that were active at least once during the last x days (default 30)' ) parser.add_argument( '--mapping', type=str, nargs='?', help= 'Mapping file to merge identities. This file must be a csv file where each line contains two values: the name to be merged, and the corresponding identity. Use "IGNORE" as identity to ignore specific names.' ) parser.add_argument('--branches', metavar='BRANCH', type=str, nargs='*', default=list(), help='Git branches to analyse (default to all).') parser.add_argument( '--as-dates', dest='as_dates', action='store_true', help= 'Express predictions using dates instead of time differences in days') group = parser.add_mutually_exclusive_group() group.add_argument('--text', action='store_true', help='Print results as text.') group.add_argument('--csv', action='store_true', help='Print results as csv.') group.add_argument('--json', action='store_true', help='Print results as json.') group.add_argument( '--plot', nargs='?', const=True, help='Export results to a plot. Filepath can be optionaly specified.') args = parser.parse_args() # Default plot location if args.plot is True: args.plot = str(args.date) + '.pdf' # Default to text if not other option is provided if not args.csv and not args.json and not args.plot: args.text = True # Identity mapping if args.mapping: d = pandas.read_csv(args.mapping, names=['source', 'target']) mapping = {r.source: r.target for r in d.itertuples()} else: mapping = {} raw_data = dict() # author -> dates of activity # Get data from git for path in args.paths: try: repo = git.Repo(path) except Exception as e: # Must be refined print('Unable to access repository {} ({}:{})'.format( path, e.__class__.__name__, e)) sys.exit() # Default branches if len(args.branches) == 0: commits = repo.iter_commits('--all') else: commits = repo.iter_commits(' '.join(args.branches)) for commit in commits: try: author = commit.author.name identity = mapping.get(author, author) if author.lower() != 'ignore' and identity.lower() == 'ignore': continue date = datetime.date.fromtimestamp(commit.authored_date) raw_data.setdefault(identity, []).append(date) except Exception as e: print('Unable to read commit ({}: {}): {}'.format( e.__class__.__name__, e, commit)) # Compute durations and apply model data = [] # (author, past activities, predicted durations) for author, commits in raw_data.items(): commits = sorted([e for e in commits if e <= args.date]) durations = dates_to_duration(commits, window_size=args.obs) if len(durations) >= args.obs: # Currently implemented with no censor surv = SurvfuncRight(durations, [1] * len(durations)) predictions = [surv.quantile(p) for p in args.probs] last_day = commits[-1] if last_day >= args.date - datetime.timedelta(args.limit): data.append(( author, commits, predictions, )) # Prepare dataframe df = pandas.DataFrame(index=set([a for a, c, p in data]), columns=['last'] + args.probs) if len(df) == 0: print( 'No author has {} observations and was active at least once during the last {} days' .format(args.obs, args.limit)) sys.exit() df.index.name = 'author' if not args.plot: for author, commits, predictions in data: last = commits[-1] if args.as_dates: df.at[author, 'last'] = last else: df.at[author, 'last'] = (last - args.date).days for prob, p in zip(args.probs, predictions): if args.as_dates: df.at[author, prob] = last + datetime.timedelta(days=int(p)) else: df.at[author, prob] = (last + datetime.timedelta(days=int(p)) - args.date).days df = df.sort_values(['last'] + args.probs, ascending=[False] + [True] * len(args.probs)) df = df.astype(str) if args.text: pandas.set_option('expand_frame_repr', False) pandas.set_option('display.max_columns', 999) print(df) elif args.csv: print(df.to_csv()) elif args.json: print(df.to_json(orient='index')) else: # Because of plotnine's way of initializing matplotlib import warnings warnings.filterwarnings("ignore") VIEW_LIMIT = 28 activities = [ ] # List of (author, day) where day is a delta w.r.t. given date forecasts = [ ] # List of (author, from_day, to_day, p) where probability p # applies between from_day and to_day (delta w.r.t. given date) for author, commits, predictions in data: last = (commits[-1] - args.date).days for e in commits: activities.append((author, (e - args.date).days)) previous = previous_previous = 0 for d, p in zip(predictions, args.probs): if d > previous: forecasts.append((author, last + previous, last + d, p)) previous_previous = previous previous = d else: forecasts.append( (author, last + previous_previous, last + d, p)) activities = pandas.DataFrame(columns=['author', 'day'], data=activities) forecasts = pandas.DataFrame(columns=['author', 'fromd', 'tod', 'p'], data=forecasts) plot = (p9.ggplot(p9.aes(y='author')) + p9.geom_segment( p9.aes('day - 0.5', 'author', xend='day + 0.5', yend='author'), data=activities, size=4, color='orange', ) + p9.geom_segment( p9.aes('fromd + 0.5', 'author', xend='tod + 0.5', yend='author', alpha='factor(p)'), data=forecasts.sort_values('p').drop_duplicates( ['author', 'fromd', 'tod'], keep='last'), size=4, color='steelblue', ) + p9.geom_vline( xintercept=0, color='r', alpha=0.5, linetype='dashed') + p9.scale_x_continuous( name=' << past days {:^20} future days >>'.format( str(args.date)), breaks=range(-VIEW_LIMIT // 7 * 7, (VIEW_LIMIT // 7 * 7) + 1, 7), minor_breaks=6) + p9.scale_y_discrete( name='', limits=activities.sort_values( 'day', ascending=False)['author'].unique()) + p9.scale_alpha_discrete(range=(0.2, 1), name=' ') + p9.coord_cartesian(xlim=(-VIEW_LIMIT, VIEW_LIMIT)) + p9.theme_matplotlib() + p9.theme( figure_size=(6, 4 * activities['author'].nunique() / 15))) fig = plot.draw() fig.savefig(args.plot, bbox_inches='tight') print('Plot exported to {}'.format(args.plot))
yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point A geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \ xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \ yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point C # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations) # positions of the accelerations texts may be altered in case the plot gets hard to read annotate("text", x = sol.Rba[k].real, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Ra[k].real, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag-20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') + # MECHANISM KINEMATIC PROPERTIES annotate("label", x = -50, y = -100, label = f'$\\theta_2={sol.theta2[k] * 180/(2*pi):.2f}^\\circ$') + # Brackets need to be doubled so Python doesn't interpret 3a or 4a as variables annotate("label", x = -10, y = -100, label = f'$\\theta_{{3a}}={sol.theta3a[k] * 180/(2*pi):.2f}^\\circ$, $\\theta_{{3c}}={sol.theta3c[k] * 180/(2*pi):.2f}^\\circ$') + annotate("label", x = 45, y = -100, label = f'$\\theta_{{4a}}={sol.theta4a[k] * 180/(2*pi):.2f}^\\circ$, $\\theta_{{4c}}={sol.theta4c[k] * 180/(2*pi):.2f}^\\circ$') + annotate("label", x = -50, y = -150, label = f'$\\omega_2={sol.omega2[k]:.2f}~rad/s$') + annotate("label", x = 0, y = -150, label = f'$\\omega_{{3a}}={sol.omega3a[k]:.2f}~rad/s$, $\\omega_{{3c}}={sol.omega3c[k]:.2f}~rad/s$') + annotate("label", x = 70, y = -150, label = f'$\\omega_{{4a}}={sol.omega4a[k]:.2f}~rad/s$, $\\omega_{{4c}}={sol.omega4c[k]:.2f}~rad/s$') + annotate("label", x = -50, y = -200, label = f'$\\alpha_2={sol.omega2[k]:.2f}~rad/s^2$') + annotate("label", x = 0, y = -200, label = f'$\\alpha_{{3a}}={sol.alpha3a[k]:.2f}~rad/s^2$, $\\alpha_{{3c}}={sol.alpha3c[k]:.2f}~rad/s^2$') + annotate("label", x = 70, y = -200, label = f'$\\alpha_{{4a}}={sol.alpha4a[k]:.2f}~rad/s^2$, $\\alpha_{{4c}}={sol.alpha4c[k]:.2f}~rad/s^2$') + # labs(x='$x~[mm]$', y='$y~[mm]$') + coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so. theme_bw() # Plot is prettier with this theme compared to the default. ) plot.save('SolutionPlot.pdf', dpi = 330, width = 50, height = 30, units = 'cm')