Esempio n. 1
0
def plot_individual_returns(
        df_in: pd.DataFrame,
        max_episode: int,
        return_column: str = 'episode_return',
        colour_var: Optional[str] = None,
        yintercept: Optional[float] = None,
        sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot:
    """Plot individual learning curves: one curve per sweep setting."""
    df = df_in.copy()
    df['unique_group'] = _make_unique_group_col(df, sweep_vars)
    p = (gg.ggplot(df) +
         gg.aes(x='episode', y=return_column, group='unique_group') +
         gg.coord_cartesian(xlim=(0, max_episode)))
    if colour_var:
        p += gg.geom_line(gg.aes(colour=colour_var), size=1.1, alpha=0.75)
        if len(df[colour_var].unique()) <= 5:
            df[colour_var] = df[colour_var].astype('category')
            p += gg.scale_colour_manual(values=FIVE_COLOURS)
    else:
        p += gg.geom_line(size=1.1, alpha=0.75, colour='#313695')
    if yintercept:
        p += gg.geom_hline(yintercept=yintercept,
                           alpha=0.5,
                           size=2,
                           linetype='dashed')
    return facet_sweep_plot(p, sweep_vars, tall_plot=True)
Esempio n. 2
0
def plot_downstream(clwe, table, output, ylim):
    df = pd.read_csv(data_file(table))
    df = df[df.clwe == clwe]
    df = df.assign(
        refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']),
        language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG'])
    )
    g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine'))
    g += p9.geom_bar(position='dodge', stat='identity', width=.8)
    g += p9.coord_cartesian(ylim=ylim)
    g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E'])
    g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial')
    g += p9.theme(
        plot_background=p9.element_rect(fill='white'),
        panel_grid_major_y=p9.element_line(),
        axis_text_x=p9.element_text(margin={'t': 10}),
        axis_text_y=p9.element_text(margin={'r': 8}),
        legend_position=(.7, .9),
        legend_direction='horizontal',
        legend_title=p9.element_blank(),
        legend_text=p9.element_text(size=FONT_SIZE),
        legend_box_margin=0,
        figure_size=(12, 3)
    )
    g.save(filename=output_file(output))
Esempio n. 3
0
def round_2_plot():
    if not os.path.exists(round_2_df_path):
        eprint(f'Downloading {round_2_df_url} to {round_2_df_path}')
        urlretrieve(round_2_df_url, round_2_df_path)
    verify_checksum(round_2_df_checksum, round_2_df_path)
    df = pd.read_json(round_2_df_path)
    p = (
        ggplot(df) + aes(x='char_percent', y='correct', color='Dataset') +
        facet_wrap('Guessing_Model', nrow=1) + stat_summary_bin(
            fun_data=mean_no_se, bins=20, shape='.', linetype='None',
            size=0.5) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) +
        scale_x_continuous(breaks=[0, .5, 1]) +
        coord_cartesian(ylim=[0, 0.7]) +
        ggtitle('Round 2 Attacks and Models') +
        xlab('Percent of Question Revealed') + ylab('Accuracy') + theme(
            #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
            strip_text_x=element_text(margin={
                't': 6,
                'b': 6,
                'l': 1,
                'r': 5
            })) +
        scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
                           name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_2_json.pdf', width=7.0, height=1.7)
Esempio n. 4
0
def misspecified_plot(experiment_name='finite_misspecified',
                      data_path=_DEFAULT_DATA_PATH):
"""Specialized plotting script for TS tutorial paper misspecified TS."""
  df = load_data(experiment_name, data_path)

  def _parse_np_array(np_string):
    return np.array(np_string.replace('[', '')
                    .replace(']', '')
                    .strip()
                    .split())
  df['posterior_mean'] = df.posterior_mean.apply(_parse_np_array)

  # Action means
  new_col_list = ['mean_0', 'mean_1', 'mean_2']
  for n, col in enumerate(new_col_list):
    df[col] = df['posterior_mean'].apply(lambda x: float(x[n]))

  plt_df = (df.groupby(['agent', 't'])
            .agg({'instant_regret': np.mean,
                  'mean_0': np.mean,
                  'mean_1': np.mean,
                  'mean_2': np.mean})
            .reset_index())

  regret_plot = (gg.ggplot(plt_df)
                 + gg.aes('t', 'instant_regret', colour='agent')
                 + gg.geom_line(size=1.25, alpha=0.75)
                 + gg.xlab('Timestep (t)')
                 + gg.ylab('Average instantaneous regret')
                 + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1')
                 + gg.coord_cartesian(ylim=(0, 0.02)))

  melt_df = pd.melt(plt_df, id_vars=['agent', 't'], value_vars=new_col_list)
  melt_df['group_id'] = melt_df.agent + melt_df.variable
  action_plot = (gg.ggplot(melt_df)
                 + gg.aes('t', 'value', colour='agent', group='group_id')
                 + gg.geom_line(size=1.25, alpha=0.75)
                 + gg.coord_cartesian(ylim=(0, 0.05))
                 + gg.xlab('Timestep (t)')
                 + gg.ylab('Expected mean reward')
                 + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1'))

  plot_dict = {}
  plot_dict['misspecified_regret'] = regret_plot
  plot_dict['misspecified_action'] = action_plot
  return plot_dict
Esempio n. 5
0
def plot(solu, k):

    # Generates a plot of the four bar mechanism, which represents a frame in the animation

    print("Frame: ", k)

    sol = solu[k:k + 1]

    p = ( ggplot(sol) +
         # MAIN LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x=0, y=0), shape = 'o', size = 3) +
         geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) +
         # 2ND LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) +
         geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) +
         # AP LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) +
         geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) +
         # 3RD LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # 4TH LINKAGE
         geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # NODES IDENTIFICATION
         annotate("text", x = 0, y = -20, label = "$O_1$") +
         annotate("text", x = sol.Ro4[k].real, y = sol.Ro4[k].imag -20, label = "$O_4$") +
         annotate("text", x = sol.Ra[k].real+10, y = sol.Ra[k].imag, label = "$A$") +
         annotate("text", x = sol.Rba[k].real +20, y = sol.Rba[k].imag -10, label = "$B$") +
         annotate("text", x = sol.Rpa[k].real, y = sol.Rpa[k].imag -40, label = "$P$") +
         # ACCELERATIONS ARROWS (you may remove if you wish to remove acceleration informations)
         geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, \
                          xend = sol.Rba[k].real + sol.Aba[k].real * ACC_SCALE, \
                          yend = sol.Rba[k].imag + sol.Aba[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point B
        geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, \
                          xend = sol.Ra[k].real + sol.Aa[k].real * ACC_SCALE, \
                          yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point A
        geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \
                          xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \
                          yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point C
         # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations)
         # inputting text between '$ $' makes plotnine produce beautiful LaTeX text
         annotate("text", x = sol.Rba[k].real-30, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') +
         annotate("text", x = sol.Ra[k].real+20, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') +
         annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag+20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') +
         # TIME IDENTIFICATION
         annotate("label", x = 120, y = -80, label = f'Time: ${sol.time[k]:.2f}~s$', alpha = 1) +
         #
         labs(x='$x~[mm]$', y='$y~[mm]$') +
         coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so.
         theme_bw() # Plot is prettier with this theme compared to the default.
         )

    return p
Esempio n. 6
0
def plot_scaling(plt_df: pd.DataFrame,
                 sweep_vars: Sequence[Text] = None,
                 with_baseline: bool = True) -> gg.ggplot:
    """Plot scaling of learning time against exponential baseline."""
    p = _base_scaling(plt_df, sweep_vars, with_baseline)
    p += gg.xlab('deep sea problem size')
    p += gg.ylab('#episodes until < 90% bad episodes')
    if with_baseline:
        max_steps = np.minimum(NUM_EPISODES, plt_df.episode.max())
        p += gg.coord_cartesian(ylim=(0, max_steps))
    return plotting.facet_sweep_plot(p, sweep_vars)
Esempio n. 7
0
def ensemble_plot(experiment_name='ensemble_nn', data_path=_DEFAULT_DATA_PATH):
    """Specialized plotting script for TS tutorial paper ensemble NN."""
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['agent', 't']).agg({
        'instant_regret': np.mean
    }).reset_index())

    def _get_agent_family(agent_name):
        if 'dropout' in agent_name.lower():
            return 'Dropout'
        elif 'ensemble' in agent_name.lower():
            return 'Ensemble'
        elif '/' in agent_name.lower():
            return 'Annealing epsilon'
        else:
            return 'Fixed epsilon'

    def _rename_ensemble(agent_name):
        if 'ensemble' in agent_name:
            n_ensemble = agent_name.split('-')[0]
            new_name = 'ensemble=' + n_ensemble.zfill(3)

            return new_name
        else:
            return agent_name

    plt_df['agent_name'] = plt_df.agent.apply(_rename_ensemble)
    plt_df['agent_family'] = plt_df.agent.apply(_get_agent_family)

    custom_colors = ['#d53e4f', '#fdae61', '#a6d96a', '#66c2a5', '#5e4fa2']

    plot_dict = {}
    for agent_family, df_family in plt_df.groupby(['agent_family']):
        if agent_family == 'Ensemble':
            custom_labels = [
                'Ensemble 3', 'Ensemble 10', 'Ensemble 30', 'Ensemble 100',
                'Ensemble 300'
            ]
            gg_legend = gg.scale_colour_manual(values=custom_colors,
                                               labels=custom_labels,
                                               name='Agent')
        else:
            gg_legend = gg.scale_colour_manual(custom_colors, name='Agent')

        p = (gg.ggplot(df_family) +
             gg.aes('t', 'instant_regret', colour='agent_name') +
             gg.geom_line(size=1.25, alpha=0.75) +
             gg.facet_wrap('~ agent_family') + gg_legend +
             gg.coord_cartesian(ylim=(0, 60)) + gg.xlab('Timestep (t)') +
             gg.ylab('Average instantaneous regret') +
             gg.theme(figure_size=(6, 6)))
        plot_dict[experiment_name + '_' + agent_family] = p

    return plot_dict
Esempio n. 8
0
def bandit_learning_format(plot: gg.ggplot) -> gg.ggplot:
    """Add nice bandit formatting to ggplot."""
    plot += gg.scale_y_continuous(breaks=np.arange(0, 1.1, 0.1).tolist())
    plot += gg.theme(panel_grid_major_y=gg.element_line(size=2.5),
                     panel_grid_minor_y=gg.element_line(size=0))
    plot += gg.geom_hline(gg.aes(yintercept=BASE_REGRET),
                          linetype='dashed',
                          alpha=0.4,
                          size=1.75)
    plot += gg.coord_cartesian(ylim=(0, 1))
    return plot
Esempio n. 9
0
def plot_elos():
    diffs = np.linspace(-1000, +1000)
    rates = 1 / (1 + 10**(-diffs / 400))
    df = pd.DataFrame({'elo': diffs, 'winrate': rates})

    return (pn.ggplot(df) + pn.geom_line(pn.aes(x='elo', y='winrate')) +
            pn.geom_vline(xintercept=0, alpha=.1) +
            pn.geom_hline(yintercept=.5, alpha=.1) +
            pn.labs(x='Own Elo relative to opponent\'s Elo',
                    y='Win rate v. opponent') +
            pn.scale_y_continuous(labels=percent_format()) +
            pn.coord_cartesian(expand=False) + plot.IEEE())
Esempio n. 10
0
def plot_learning(df: pd.DataFrame,
                  sweep_vars: Sequence[str] = None) -> gg.ggplot:
  """Plots the average regret through time by optimal_horizon."""
  df = dc_preprocess(df_in=df)
  p = plotting.plot_regret_learning(
      df_in=df,
      group_col='optimal_horizon',
      sweep_vars=sweep_vars,
      max_episode=sweep.NUM_EPISODES
  )
  p += gg.geom_hline(gg.aes(yintercept=BASE_REGRET),
                     linetype='dashed', alpha=0.4, size=1.75)
  p += gg.coord_cartesian(ylim=(0, 0.1))
  return p
Esempio n. 11
0
def plot_result_stats(results, title):
    stats = results.describe().unstack().reset_index().rename(columns={
        "level_0": "metric",
        "level_1": "group",
        0: "value"
    })
    stats = stats[~stats["group"].isin(["count", "min", "max"])]
    stats["value_presentation"] = round(stats["value"], 2)
    plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") +
            p9.geom_col(position="dodge") + p9.theme_bw() +
            p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) +
            p9.geom_text(p9.aes(label="value_presentation"),
                         position=p9.position_dodge(width=0.9),
                         va="bottom"))
    return plot
Esempio n. 12
0
    def create(self, file_path: str) -> None:
        metrics = self._data["metric"].unique()

        for metric in metrics:
            data = self._data[self._data["metric"] == metric]
            q75, q25 = np.percentile(data["value"], [98, 2])

            (ggplot(data, aes(x="category", y="value")) +
             geom_boxplot(outlier_shape="") +
             coord_cartesian(ylim=(q75 * 0.8, q25 * 1.2))
             #+ facet_wrap(facets="metric", scales="free", ncol=3)
             + ggtitle(metric)
             #+ ggtitle("QMOOD Quality Attributes")
             + xlab("Category") + ylab("Value") +
             theme_classic(base_size=28, base_family="Helvetica")
             #+ theme(subplots_adjust={"wspace": 0.25, "hspace": 0.2})
             ).save(f"{file_path}.{metric}.pdf", width=24, height=24)
Esempio n. 13
0
def plot_calibrations():
    params = data.sample_calibrations()
    return (
        pn.ggplot(
            params,
            pn.aes(xmin='boardsize-.25',
                   xmax='boardsize+.25',
                   group='boardsize',
                   fill='factor(boardsize)')) +
        pn.geom_hline(yintercept=.5, alpha=.2) + pn.geom_rect(
            pn.aes(ymin='lower', ymax='upper'), show_legend=False, color='k') +
        pn.geom_rect(pn.aes(ymin='mid', ymax='mid'),
                     show_legend=False,
                     color='k',
                     size=2) + pn.scale_y_continuous(labels=percent_format()) +
        pn.scale_fill_hue(l=.4) + pn.coord_cartesian(ylim=(.4, .6)) +
        pn.labs(y='Win rate v. perfect play', x='Board size') + plot.IEEE())
Esempio n. 14
0
def plot_average(df: pd.DataFrame,
                 sweep_vars: Sequence[Text] = None,
                 group_col: Text = 'noise_scale') -> gg.ggplot:
  """Plots the average regret through time by noise_scale."""
  p = plotting.plot_regret_average(
      df_in=df,
      group_col=group_col,
      episode=sweep.NUM_EPISODES,
      sweep_vars=sweep_vars
  )
  p += gg.scale_y_continuous(breaks=np.arange(0, 1.1, 0.1).tolist())
  p += gg.theme(panel_grid_major_y=gg.element_line(size=2.5),
                panel_grid_minor_y=gg.element_line(size=0),)
  p += gg.geom_hline(gg.aes(yintercept=bandit_analysis.BASE_REGRET),
                     linetype='dashed', alpha=0.4, size=1.75)
  p += gg.coord_cartesian(ylim=(0, 1))
  return p
Esempio n. 15
0
def plot_regret_learning(df_in: pd.DataFrame,
                         group_col: Optional[str] = None,
                         sweep_vars: Optional[Sequence[str]] = None,
                         regret_col: str = 'total_regret',
                         max_episode: Optional[int] = None) -> gg.ggplot:
    """Plots the average regret through time, grouped by group_var."""
    df = df_in.copy()
    df['average_regret'] = df[regret_col] / df.episode
    df = df[df.episode <= (max_episode or np.inf)]
    if group_col is None:
        p = _plot_regret_single(df)
    else:
        p = _plot_regret_group(df, group_col)
    p += gg.geom_hline(gg.aes(yintercept=0.0), alpha=0)  # axis hack
    p += gg.ylab('average regret per timestep')
    p += gg.coord_cartesian(xlim=(0, max_episode))
    return facet_sweep_plot(p, sweep_vars, tall_plot=True)
Esempio n. 16
0
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
             image_file_type):

    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    data_vector = samp_set1_vals + samp_set2_vals

    dframe = pd.DataFrame(list(zip(samp_vector, data_vector)),
                          columns=["sample", "value"])

    gg = pn.ggplot(dframe, pn.aes(x="sample", y="value")) + pn.geom_jitter(
        position="jitter", width=0.2,
        height=0.01) + pn.coord_cartesian(ylim=(0, 100)) + pn.theme_bw()

    # TODO Just infer format from plotName
    gg.save(filename=plotName, format=image_file_type)
Esempio n. 17
0
def limits(x, y=None, xbreaks=None, ybreaks=None):
    if y is None:
        y = x

    x0, x1 = x
    y0, y1 = y

    if xbreaks is None:
        xbreaks = np.linspace(x0, x1, x1 - x0 + 1)
    if ybreaks is None:
        ybreaks = np.linspace(y0, y1, y1 - y0 + 1)

    # We want these plots to continue to the top and left.
    return [ gg.coord_cartesian(xlim=x, ylim=y),
             gg.scale_x_continuous(limits=(x0, None), breaks=xbreaks),
             gg.scale_y_continuous(limits=(y0, None), breaks=ybreaks) ]
    return [ gg.scale_x_continuous(limits=x, breaks=xbreaks),
             gg.scale_y_continuous(limits=y, breaks=ybreaks) ]
Esempio n. 18
0
def plot_regret_group_nosmooth(df_in: pd.DataFrame,
                               group_col: str,
                               sweep_vars: Sequence[str] = None,
                               regret_col: str = 'total_regret',
                               max_episode: int = None) -> gg.ggplot:
  """Plots the average regret through time without smoothing."""
  df = df_in.copy()
  df['average_regret'] = df[regret_col] / df.episode
  df = df[df.episode <= max_episode]
  group_name = group_col.replace('_', ' ')
  df[group_name] = df[group_col]
  p = (gg.ggplot(df)
       + gg.aes(x='episode', y='average_regret',
                group=group_name, colour=group_name)
       + gg.geom_line(size=2, alpha=0.75)
       + gg.geom_hline(gg.aes(yintercept=0.0), alpha=0)  # axis hack
      )
  p += gg.coord_cartesian(xlim=(0, max_episode))
  return facet_sweep_plot(p, sweep_vars, tall_plot=True)
Esempio n. 19
0
def plot_flops_frontier(ags):
    df = data.modelled_elos(ags)

    return (
        pn.ggplot(
            df,
            pn.aes(
                x='train_flops', color='factor(boardsize)', group='boardsize'))
        + pn.geom_line(pn.aes(y='400/np.log(10)*elo'), size=2) + pn.geom_line(
            pn.aes(y='400/np.log(10)*elohat'), size=1, linetype='dashed') +
        pn.labs(
            x='Training FLOPS',
            y='Elo v. perfect play',
            title=
            'Performance is a sigmoid of compute, linearly scaled by board size'
        ) + pn.scale_x_continuous(trans='log10') +
        pn.scale_color_discrete(name='Boardsize') +
        pn.coord_cartesian(None,
                           (None, 0)) + plot.mpl_theme() + plot.poster_sizes())
Esempio n. 20
0
def plot_test(ags):
    df = ags.query('boardsize == 9').groupby('run').apply(
        lambda df: df[df.idx == df.idx.max()]).copy()
    df['test_flops'] = df.test_nodes * (df.train_flops / df.samples)

    subset = df.query('test_nodes == 64').sort_values('test_flops')
    selection = [
        subset.loc[ELO * subset.elo > e].iloc[0].run
        for e in np.linspace(-2000, -500, 4)
    ]

    df = df[df.run.isin(selection)].copy()

    df['params'] = df.width**2 * df.depth
    df['arch'] = df.apply(lambda r: '{depth}×{width}'.format(**r), axis=1)
    labels = df.sort_values('test_flops').reset_index(
        drop=True).groupby('run').first().reset_index()
    return (pn.ggplot(
        df, pn.aes(x='test_flops', y='ELO*elo', color='params', group='run')) +
            pn.geom_point(size=.25, show_legend=False) +
            pn.geom_line(size=.5, show_legend=False) +
            pn.geom_text(pn.aes(label='test_nodes'),
                         nudge_y=-50,
                         show_legend=False,
                         size=4,
                         va='top') + pn.geom_text(pn.aes(label='test_nodes'),
                                                  nudge_y=-50,
                                                  show_legend=False,
                                                  size=4,
                                                  va='top') +
            pn.geom_text(pn.aes(label='arch'),
                         data=labels,
                         show_legend=False,
                         size=6,
                         nudge_x=-.1,
                         ha='right') + pn.scale_x_continuous(trans='log10') +
            pn.scale_color_cmap('plasma',
                                trans='log10',
                                limits=(df.params.min(), 10 * df.params.max()))
            + pn.coord_cartesian(
                (3.5, None)) + pn.labs(x='Test-time compute (FLOPS-seconds)',
                                       y='Elo v. perfect play') + plot.IEEE())
Esempio n. 21
0
def plot_frontiers(ags):
    df, model = data.modelled_elos(ags)
    labels = df.sort_values('train_flops').groupby(
        'boardsize').first().reset_index()

    return (pn.ggplot(
        df,
        pn.aes(x='train_flops', color='factor(boardsize)', group='boardsize'))
            + pn.geom_line(pn.aes(y='ELO*elo'), size=.5, show_legend=False) +
            pn.geom_line(pn.aes(y='ELO*elohat'),
                         size=.25,
                         linetype='dashed',
                         show_legend=False) +
            pn.geom_text(pn.aes(y='ELO*elohat', label='boardsize'),
                         data=labels,
                         show_legend=False,
                         size=6,
                         nudge_x=-.25,
                         nudge_y=-15) +
            pn.labs(x='Training compute (FLOPS-seconds)',
                    y='Elo v. perfect play') + pn.scale_color_discrete(l=.4) +
            pn.scale_x_continuous(trans='log10') +
            pn.coord_cartesian(None, (None, 0)) + plot.IEEE())
Esempio n. 22
0
def pattern_research_plot(data):
    from colour import Color
    
    def colors_gradient_generator(low_color, high_color, color_steps):
        low_color_obj = Color(low_color)
        high_color_obj = Color(high_color)
        return map(lambda x : x.hex_l, low_color_obj.range_to(high_color_obj,color_steps))
    
    blue = list(colors_gradient_generator("#004996", "#018ace", 3))[::-1]
    data = data.melt(id_vars=['hour_category'], value_vars= ['D','W','MS'], var_name='series', value_name='count')
    time_unit_categories = pd.Categorical(data['series'], categories= ['D','W','MS'])
    data = data.assign(series = time_unit_categories)
    plot =(p9.ggplot(data=data,
                     mapping=p9.aes(x='hour_category', y ='count', fill ='series'))
        + p9.geom_bar(stat='identity', position='dodge') 
        + p9.scale_fill_manual(blue,labels = ['D','W','MS'])
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size=8),
                   axis_title = p9.element_text(size = 8,face = 'bold'))
        + p9.coord_cartesian(ylim = (0,100))
        + p9.scale_y_continuous(labels=lambda l: ["%d%%" % (v) for v in l])
        + p9.labs(x='hour_category',y='Ratio of attacks'))
        
    return plot
Esempio n. 23
0
    def plot_char_percent_vs_accuracy_smooth(
        self, expo=False, no_models=False, columns=False
    ):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f"Setting limits to: {limits}")
        else:
            limits = [0, 1]
        if expo:
            if (
                os.path.exists("data/external/all_human_gameplay.json")
                and not self.no_humans
            ):
                with open("data/external/all_human_gameplay.json") as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [
                        ("parents", "Intermediate"),
                        ("maryland", "Expert"),
                        ("live", "National"),
                    ]:
                        if self.merge_humans:
                            name = "Human"
                        gameplay = all_gameplay[event]
                        if event != "live":
                            control_correct_positions = gameplay[
                                "control_correct_positions"
                            ]
                            control_wrong_positions = gameplay[
                                "control_wrong_positions"
                            ]
                            control_positions = (
                                control_correct_positions + control_wrong_positions
                            )
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1]
                                + len(control_wrong_positions) * [0]
                            )
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = (
                                control_sorted_result.cumsum()
                                / control_sorted_result.shape[0]
                            )
                            control_df = pd.DataFrame(
                                {"correct": control_y, "char_percent": control_x}
                            )
                            control_df["Dataset"] = "Regular Test"
                            control_df["Guessing_Model"] = f" {name}"
                            frames.append(control_df)

                        adv_correct_positions = gameplay["adv_correct_positions"]
                        adv_wrong_positions = gameplay["adv_wrong_positions"]
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1]
                            + len(adv_wrong_positions) * [0]
                        )
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({"correct": adv_y, "char_percent": adv_x})
                        adv_df["Dataset"] = "IR Adversarial"
                        adv_df["Guessing_Model"] = f" {name}"
                        frames.append(adv_df)

                        if len(gameplay["advneural_correct_positions"]) > 0:
                            adv_correct_positions = gameplay[
                                "advneural_correct_positions"
                            ]
                            adv_wrong_positions = gameplay["advneural_wrong_positions"]
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(
                                len(adv_correct_positions) * [1]
                                + len(adv_wrong_positions) * [0]
                            )
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = (
                                adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            )
                            adv_df = pd.DataFrame(
                                {"correct": adv_y, "char_percent": adv_x}
                            )
                            adv_df["Dataset"] = "RNN Adversarial"
                            adv_df["Guessing_Model"] = f" {name}"
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df["Guessing_Model"].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df["Guessing_Model"] = human_df["Guessing_Model"].astype(
                        human_dtype
                    )
                    dataset_dtype = CategoricalDtype(
                        ["Regular Test", "IR Adversarial", "RNN Adversarial"],
                        ordered=True,
                    )
                    human_df["Dataset"] = human_df["Dataset"].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape=".")
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df["Dataset"] != "Round 1 - IR Adversarial"]
                if 2 not in self.rounds:
                    df = df[df["Dataset"] != "Round 2 - IR Adversarial"]
                    df = df[df["Dataset"] != "Round 2 - RNN Adversarial"]
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f"Saving df to: {self.save_df}")
                    df.to_json(self.save_df)

                if (
                    os.path.exists("data/external/all_human_gameplay.json")
                    and not self.no_humans
                ):
                    eprint("Loading human data")
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap("Guessing_Model", ncol=1)
            else:
                facet_conf = facet_wrap("Guessing_Model", nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(
                        method="mavg", se=False, method_args={"window": 400}
                    )
                else:
                    chart = stat_summary_bin(
                        fun_data=mean_no_se,
                        bins=20,
                        shape=".",
                        linetype="None",
                        size=0.5,
                    )
            else:
                chart = None

            p = p + facet_conf + aes(x="char_percent", y="correct", color="Dataset")
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, 0.5, 1])
                + coord_cartesian(ylim=limits)
                + xlab("Percent of Question Revealed")
                + ylab("Accuracy")
                + theme(
                    # legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={"t": 6, "b": 6, "l": 1, "r": 5})
                )
                + scale_color_manual(
                    values=["#FF3333", "#66CC00", "#3333FF", "#FFFF33"],
                    name="Questions",
                )
            )
            if self.title != "":
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f"Saving df to: {self.save_df}")
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x="char_percent", y="correct", color="Guessing_Model")
                + stat_smooth(method="mavg", se=False, method_args={"window": 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )
Esempio n. 24
0
def day_night_attacks(Data, Data_m):
    print('======= Creating day_night_attacks =======')
    #Filter montlhy and ever Symptomes
    freq_all = Data[(Data.Group == 'sy')]
    freq_m = Data_m[(Data_m.Group == 'sy')]
    
    test = freq_all[(pd.isna(freq_all.year) == 0) & (pd.isna(freq_all.month) == 0)]
    Test_3 = pd.DataFrame(test.groupby("hour", as_index = False).count())
    Test_3 = Test_3.iloc[:, 0:2]
    Test_3 = Test_3.rename(columns = {"Unnamed: 0": "n"})

    test_m = freq_m[(pd.isna(freq_m.year) == 0) & (pd.isna(freq_m.month) == 0)]
    Test_3_m = pd.DataFrame(test_m.groupby("hour", as_index = False).count())
    Test_3_m = Test_3_m.iloc[:, 0:2]
    Test_3_m = Test_3_m.rename(columns = {"Unnamed: 0": "n"})
    
    
    plot =(p9.ggplot(data=Test_3,
                     mapping=p9.aes(x='hour', y = 'n'))
        + p9.geom_point(color = 'red', size = 10)
        + p9.geom_line(color = 'red', size = 1)        
        #+ p9.geom_point(color = 'red', size = 10)
        #+ p9.geom_line(color = 'red', size = 1)
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size=40),
                   axis_title = p9.element_text(size = 40,face = 'bold'))
        + p9.coord_cartesian(xlim = (1,25))
        + p9.labs(x='Hours',y='No. of attacks')
        + p9.scale_x_discrete(limits = (range(1,25)))
        )
    plot_month =(p9.ggplot(data=Test_3_m,
                     mapping=p9.aes(x='hour', y = 'n'))
        #+ p9.geom_line(color = 'red', size = 5)
        + p9.geom_point(color = 'red', size = 10)
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size=40),
                   axis_title = p9.element_text(size = 40,face = 'bold'))
        + p9.coord_cartesian(xlim = (1,25))
        + p9.labs(x='Hours',y='No. of attacks')
        + p9.scale_x_discrete(limits = (range(1,25)))
        )

    #Creating and saving MONTHLY Grap_3
    if (len(Test_3_m) > 0):
        #G3 = graph_3(freq_m)
        plot_month.save(filename = 'Graph_3.jpeg',
                 plot = plot_month,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')

    #Creating and saving EVER Grap_3
    if (len(freq_all) > 0):
        #G3 = graph_3(freq_all)
        plot.save(filename = 'Graph_ALL_3.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')

    return(print('=================================day_night_attacks DONE ============================='))    
Esempio n. 25
0
    def plot_char_percent_vs_accuracy_smooth(self,
                                             expo=False,
                                             no_models=False,
                                             columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json'
                              ) and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'),
                                        ('maryland', 'Expert'),
                                        ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay[
                                'control_correct_positions']
                            control_wrong_positions = gameplay[
                                'control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1] +
                                len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[
                                argsort_control]
                            control_y = control_sorted_result.cumsum(
                            ) / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({
                                'correct': control_y,
                                'char_percent': control_x
                            })
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay[
                            'adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1] +
                            len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum(
                        ) / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({
                            'correct': adv_y,
                            'char_percent': adv_x
                        })
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay[
                                'advneural_correct_positions']
                            adv_wrong_positions = gameplay[
                                'advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(
                                len(adv_correct_positions) * [1] +
                                len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum(
                            ) / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({
                                'correct': adv_y,
                                'char_percent': adv_x
                            })
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(
                        list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df[
                        'Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(
                        ['Regular Test', 'IR Adversarial', 'RNN Adversarial'],
                        ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(
                        dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json'
                                  ) and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg',
                                        se=False,
                                        method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se,
                                             bins=20,
                                             shape='.',
                                             linetype='None',
                                             size=0.5)
            else:
                chart = None

            p = (p + facet_conf +
                 aes(x='char_percent', y='correct', color='Dataset'))
            if chart is not None:
                p += chart
            p = (
                p + scale_y_continuous(breaks=np.linspace(0, 1, 6)) +
                scale_x_continuous(breaks=[0, .5, 1]) +
                coord_cartesian(ylim=limits) +
                xlab('Percent of Question Revealed') + ylab('Accuracy') +
                theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={
                        't': 6,
                        'b': 6,
                        'l': 1,
                        'r': 5
                    })) + scale_color_manual(
                        values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
                        name='Questions'))
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (ggplot(self.char_plot_df) + aes(
                x='char_percent', y='correct', color='Guessing_Model') +
                    stat_smooth(
                        method='mavg', se=False, method_args={'window': 500}) +
                    scale_y_continuous(breaks=np.linspace(0, 1, 6)) +
                    coord_cartesian(ylim=limits))
Esempio n. 26
0
def intensity_graph(Data, Data_m):
    print('======= Creating intensity_graph =======')
    x = Data.Intensity[pd.isna(Data.Intensity) == True]
    if (len(x) == len(Data)):
       print("WARNING: All values for Intensity are NA's")
    
    else:
    #Filter ever and monthly symptomes and correct Intensity
        Data_m_int = Data_m[(Data_m.Group == "sy") & (pd.isna(Data_m.Intensity) == 0)]
        Data_all_int = Data[(Data.Group == "sy") & (pd.isna(Data.Intensity) == 0)]
        
        Test_3_m = Data_m_int.groupby("Intensity", sort = True, as_index = False).count()
        Test_3_m = Test_3_m.iloc[:, 0:2]
        Test_3_m= Test_3_m.rename(columns = {"Unnamed: 0": "n"})
        
        Test_3 = Data_all_int.groupby("Intensity", sort = True, as_index = False).count()
        Test_3 = Test_3.iloc[:, 0:2]
        Test_3 = Test_3.rename(columns = {"Unnamed: 0": "n"})
        #Test_3.Intensity = Test_3.Intensity.astype(str)
    
        
        plot =(p9.ggplot(data=Test_3,
                         mapping=p9.aes(x='Intensity',y='n'))
            + p9.geom_col(fill = 'red')
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=40),
                       axis_title = p9.element_text(size = 40,face = 'bold'))
            + p9.coord_cartesian(xlim = (1,10))
            + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11)))
            + p9.labs(x='',y='No. of attacks')
            )    
    
        plot_month =(p9.ggplot(data=Test_3_m,
                         mapping=p9.aes(x='Intensity',y='n'))
            + p9.geom_col(fill = 'red')
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=40),
                       axis_title = p9.element_text(size = 40,face = 'bold'))
            + p9.coord_cartesian(xlim = (1,10))
            + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11)))
            + p9.labs(x='',y='No. of attacks')
            )

    #Creating and saving EVER Graph_1
    if (len(Data_m_int) > 0):
        #G1 = graph_1(Data_all_int)
        plot_month.save(filename = 'Graph_1.jpeg',
                 plot = plot_month,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')
    if (len(Data_all_int) > 0):
        #G1 = graph_1(Data_all_int)
        plot.save(filename = 'Graph_ALL_1.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)    
    else: 
        print('Plot not created; no data found.')
    return(print('=================================intensity_graph DONE ============================='))
Esempio n. 27
0
    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )
Esempio n. 28
0
output_exp1 = expt.RunExp1(lambdas, alphas, [0.1], trainingSets, z)

bestAlpha = output_exp1.groupby('l').agg(rmse=('rmse', min)).reset_index('l')

#Experiment 2
lambdas = [0, 0.3, 0.8, 1]
alphas = np.arange(0.0, 1.0, 0.05)
output_exp2 = expt.RunExp2(lambdas, alphas, trainingSets, z)

# rerun experiment 2 with different lambda values. Python is new to me
# so this was easier than filtering -__-
lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

output_exp21 = expt.RunExp2(lambdas, alphas, trainingSets, z)
bestAlpha2 = output_exp21.groupby('l').agg(rmse=('rmse', min)).reset_index('l')

#Plots
# Figure 3
plt.ggplot(bestAlpha, plt.aes(x='l',
                              y='rmse')) + plt.geom_line() + plt.geom_point()

# Figure 4
plt.ggplot(output_exp2,plt.aes(x='a',y='rmse',color=('l'))) \
    +plt.geom_line(plt.aes(group='l')) \
    +plt.geom_point()\
    +plt.coord_cartesian(xlim=(0,0.62),ylim=(0,0.7))

# Figure 5
plt.ggplot(bestAlpha2, plt.aes(x='l', y='rmse')) + plt.geom_line(
    plt.aes(group=1)) + plt.geom_point()
Esempio n. 29
0
File: gap.py Progetto: tommens/gap
def cli():
    parser = argparse.ArgumentParser(
        description='GAP - Git Activity Predictor')
    parser.add_argument('paths',
                        metavar='PATH',
                        type=str,
                        nargs='*',
                        default=['.'],
                        help='Paths to one or more git repositories')
    parser.add_argument(
        '--date',
        type=lambda d: dateutil.parser.parse(d).date(),
        required=False,
        default=datetime.date.today(),
        help='Date used for predictions (default to current date)')
    parser.add_argument('--obs',
                        type=int,
                        required=False,
                        default=20,
                        help='Number of observations to consider')
    parser.add_argument('--probs',
                        metavar='PROB',
                        type=float,
                        nargs='*',
                        required=False,
                        default=[0.5, 0.6, 0.7, 0.8, 0.9],
                        help='Probabilities to output, strictly in [0,1].')
    parser.add_argument(
        '--limit',
        type=int,
        required=False,
        default=30,
        help=
        'Limit contributors to the one that were active at least once during the last x days (default 30)'
    )
    parser.add_argument(
        '--mapping',
        type=str,
        nargs='?',
        help=
        'Mapping file to merge identities. This file must be a csv file where each line contains two values: the name to be merged, and the corresponding identity. Use "IGNORE" as identity to ignore specific names.'
    )
    parser.add_argument('--branches',
                        metavar='BRANCH',
                        type=str,
                        nargs='*',
                        default=list(),
                        help='Git branches to analyse (default to all).')
    parser.add_argument(
        '--as-dates',
        dest='as_dates',
        action='store_true',
        help=
        'Express predictions using dates instead of time differences in days')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('--text',
                       action='store_true',
                       help='Print results as text.')
    group.add_argument('--csv',
                       action='store_true',
                       help='Print results as csv.')
    group.add_argument('--json',
                       action='store_true',
                       help='Print results as json.')
    group.add_argument(
        '--plot',
        nargs='?',
        const=True,
        help='Export results to a plot. Filepath can be optionaly specified.')

    args = parser.parse_args()

    # Default plot location
    if args.plot is True:
        args.plot = str(args.date) + '.pdf'

    # Default to text if not other option is provided
    if not args.csv and not args.json and not args.plot:
        args.text = True

    # Identity mapping
    if args.mapping:
        d = pandas.read_csv(args.mapping, names=['source', 'target'])
        mapping = {r.source: r.target for r in d.itertuples()}
    else:
        mapping = {}

    raw_data = dict()  # author -> dates of activity

    # Get data from git
    for path in args.paths:
        try:
            repo = git.Repo(path)
        except Exception as e:  # Must be refined
            print('Unable to access repository {} ({}:{})'.format(
                path, e.__class__.__name__, e))
            sys.exit()

        # Default branches
        if len(args.branches) == 0:
            commits = repo.iter_commits('--all')
        else:
            commits = repo.iter_commits(' '.join(args.branches))

        for commit in commits:
            try:
                author = commit.author.name
                identity = mapping.get(author, author)
                if author.lower() != 'ignore' and identity.lower() == 'ignore':
                    continue

                date = datetime.date.fromtimestamp(commit.authored_date)
                raw_data.setdefault(identity, []).append(date)
            except Exception as e:
                print('Unable to read commit ({}: {}): {}'.format(
                    e.__class__.__name__, e, commit))

    # Compute durations and apply model
    data = []  # (author, past activities, predicted durations)

    for author, commits in raw_data.items():
        commits = sorted([e for e in commits if e <= args.date])
        durations = dates_to_duration(commits, window_size=args.obs)

        if len(durations) >= args.obs:
            # Currently implemented with no censor
            surv = SurvfuncRight(durations, [1] * len(durations))
            predictions = [surv.quantile(p) for p in args.probs]
            last_day = commits[-1]

            if last_day >= args.date - datetime.timedelta(args.limit):
                data.append((
                    author,
                    commits,
                    predictions,
                ))

    # Prepare dataframe
    df = pandas.DataFrame(index=set([a for a, c, p in data]),
                          columns=['last'] + args.probs)
    if len(df) == 0:
        print(
            'No author has {} observations and was active at least once during the last {} days'
            .format(args.obs, args.limit))
        sys.exit()

    df.index.name = 'author'

    if not args.plot:
        for author, commits, predictions in data:
            last = commits[-1]
            if args.as_dates:
                df.at[author, 'last'] = last
            else:
                df.at[author, 'last'] = (last - args.date).days

            for prob, p in zip(args.probs, predictions):
                if args.as_dates:
                    df.at[author,
                          prob] = last + datetime.timedelta(days=int(p))
                else:
                    df.at[author,
                          prob] = (last + datetime.timedelta(days=int(p)) -
                                   args.date).days

        df = df.sort_values(['last'] + args.probs,
                            ascending=[False] + [True] * len(args.probs))
        df = df.astype(str)

        if args.text:
            pandas.set_option('expand_frame_repr', False)
            pandas.set_option('display.max_columns', 999)
            print(df)
        elif args.csv:
            print(df.to_csv())
        elif args.json:
            print(df.to_json(orient='index'))
    else:
        # Because of plotnine's way of initializing matplotlib
        import warnings
        warnings.filterwarnings("ignore")

        VIEW_LIMIT = 28

        activities = [
        ]  # List of (author, day) where day is a delta w.r.t. given date
        forecasts = [
        ]  # List of (author, from_day, to_day, p) where probability p
        # applies between from_day and to_day (delta w.r.t. given date)

        for author, commits, predictions in data:
            last = (commits[-1] - args.date).days
            for e in commits:
                activities.append((author, (e - args.date).days))

            previous = previous_previous = 0
            for d, p in zip(predictions, args.probs):
                if d > previous:
                    forecasts.append((author, last + previous, last + d, p))
                    previous_previous = previous
                    previous = d
                else:
                    forecasts.append(
                        (author, last + previous_previous, last + d, p))

        activities = pandas.DataFrame(columns=['author', 'day'],
                                      data=activities)
        forecasts = pandas.DataFrame(columns=['author', 'fromd', 'tod', 'p'],
                                     data=forecasts)

        plot = (p9.ggplot(p9.aes(y='author')) + p9.geom_segment(
            p9.aes('day - 0.5', 'author', xend='day + 0.5', yend='author'),
            data=activities,
            size=4,
            color='orange',
        ) + p9.geom_segment(
            p9.aes('fromd + 0.5',
                   'author',
                   xend='tod + 0.5',
                   yend='author',
                   alpha='factor(p)'),
            data=forecasts.sort_values('p').drop_duplicates(
                ['author', 'fromd', 'tod'], keep='last'),
            size=4,
            color='steelblue',
        ) + p9.geom_vline(
            xintercept=0,
            color='r', alpha=0.5, linetype='dashed') + p9.scale_x_continuous(
                name='  <<  past days {:^20} future days  >>'.format(
                    str(args.date)),
                breaks=range(-VIEW_LIMIT // 7 * 7,
                             (VIEW_LIMIT // 7 * 7) + 1, 7),
                minor_breaks=6) + p9.scale_y_discrete(
                    name='',
                    limits=activities.sort_values(
                        'day', ascending=False)['author'].unique()) +
                p9.scale_alpha_discrete(range=(0.2, 1), name=' ') +
                p9.coord_cartesian(xlim=(-VIEW_LIMIT, VIEW_LIMIT)) +
                p9.theme_matplotlib() + p9.theme(
                    figure_size=(6, 4 * activities['author'].nunique() / 15)))

        fig = plot.draw()
        fig.savefig(args.plot, bbox_inches='tight')
        print('Plot exported to {}'.format(args.plot))
Esempio n. 30
0
                          yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point A
         geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \
                          xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \
                          yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point C
          # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations)
          # positions of the accelerations texts may be altered in case the plot gets hard to read
          annotate("text", x = sol.Rba[k].real, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') +
          annotate("text", x = sol.Ra[k].real, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') +
          annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag-20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') +
         # MECHANISM KINEMATIC PROPERTIES
           annotate("label", x = -50, y = -100, label = f'$\\theta_2={sol.theta2[k] * 180/(2*pi):.2f}^\\circ$') +
                     # Brackets need to be doubled so Python doesn't interpret 3a or 4a as variables
           annotate("label", x = -10, y = -100, label = f'$\\theta_{{3a}}={sol.theta3a[k] * 180/(2*pi):.2f}^\\circ$, $\\theta_{{3c}}={sol.theta3c[k] * 180/(2*pi):.2f}^\\circ$') + 
           annotate("label", x = 45, y = -100, label = f'$\\theta_{{4a}}={sol.theta4a[k] * 180/(2*pi):.2f}^\\circ$, $\\theta_{{4c}}={sol.theta4c[k] * 180/(2*pi):.2f}^\\circ$') +
           
           annotate("label", x = -50, y = -150, label = f'$\\omega_2={sol.omega2[k]:.2f}~rad/s$') +
           annotate("label", x = 0, y = -150, label = f'$\\omega_{{3a}}={sol.omega3a[k]:.2f}~rad/s$, $\\omega_{{3c}}={sol.omega3c[k]:.2f}~rad/s$') +
           annotate("label", x = 70, y = -150, label = f'$\\omega_{{4a}}={sol.omega4a[k]:.2f}~rad/s$, $\\omega_{{4c}}={sol.omega4c[k]:.2f}~rad/s$') +
           
           annotate("label", x = -50, y = -200, label = f'$\\alpha_2={sol.omega2[k]:.2f}~rad/s^2$') +
           annotate("label", x = 0, y = -200, label = f'$\\alpha_{{3a}}={sol.alpha3a[k]:.2f}~rad/s^2$, $\\alpha_{{3c}}={sol.alpha3c[k]:.2f}~rad/s^2$') +
           annotate("label", x = 70, y = -200, label = f'$\\alpha_{{4a}}={sol.alpha4a[k]:.2f}~rad/s^2$, $\\alpha_{{4c}}={sol.alpha4c[k]:.2f}~rad/s^2$') +
         #
         labs(x='$x~[mm]$', y='$y~[mm]$') +
         coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so.
         theme_bw() # Plot is prettier with this theme compared to the default.
         ) 
    
plot.save('SolutionPlot.pdf', dpi = 330, width = 50, height = 30, units = 'cm')