Exemple #1
0
def test_labels():
    """
    Test invalid arguments to chart components
    """
    gg = ggplot(df, aes(x='x', y='y'))
    gg = gg + geom_point()
    gg = gg + xlab('xlab')
    gg = gg + ylab('ylab')
    gg = gg + ggtitle('title')

    assert gg.labels['x'] == 'xlab'
    assert gg.labels['y'] == 'ylab'
    assert gg.labels['title'] == 'title'

    gg = gg + labs(x='xlab2', y='ylab2', title='title2')
    assert gg.labels['x'] == 'xlab2'
    assert gg.labels['y'] == 'ylab2'
    assert gg.labels['title'] == 'title2'

    with pytest.raises(PlotnineError):
        gg = gg + xlab(None)

    with pytest.raises(PlotnineError):
        gg = gg + ylab(None)

    with pytest.raises(PlotnineError):
        gg = gg + ggtitle(None)

    with pytest.raises(PlotnineError):
        gg = gg + labs('x', 'y')
Exemple #2
0
    def plot_overlap_duration(self, data, options):
        matches = data["matches"]
        matches = matches.loc[matches.tag_overlap > 0]
        # matches.loc[:, "log_dur"] = log()

        plt = ggplot(data=matches, mapping=aes(x="tag_duration", y="tag_overlap",),)
        plt = (
            plt
            + geom_point()
            + xlab("Tag duration")
            + ylab("Proportion tag overlapping with matching event")
            + theme_classic()
            + theme(
                axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}),
                plot_title=element_text(
                    weight="bold", size=14, margin={"t": 10, "b": 10}
                ),
                figure_size=(10, 10),
                text=element_text(size=12, weight="bold"),
            )
            + ggtitle(
                (
                    "Proportion of tag overlapping with matching event depending on duration "
                    + "size for model {}, database {}, class {}\n"
                    + "with detector options {}"
                ).format(
                    options["scenario_info"]["model"],
                    options["scenario_info"]["database"],
                    options["scenario_info"]["class"],
                    options,
                )
            )
        )

        return plt
Exemple #3
0
    def plot_replicate_groups(self):
        from plotnine import ggplot, aes, ylab, xlab, geom_line, scale_y_continuous, geom_col, geom_point
        df1 = self.data1df
        df2 = self.data2df

        df1.insert(0, 'Experiment', '1')
        df2.insert(0, 'Experiment', '2')

        #len1 = len(df1.index)
        #len2 = len(df2.index)

        #print len1-len2
        #exit()

        #if len1 > len2:
        #    df1 = df1.drop(df1.tail(len1 - len2).index, inplace=True)
        #else:
        #    df2 = df2.drop(df2.tail(len2 - len1).index, inplace=True)

        # df = pd.concat([df1, df2])

        print(df1)
        print(df2)

        plot = ((ggplot() + ylab(u'Current (μA)') + xlab('Time (seconds)') +
                 geom_line(df1, aes('Time', 'Current', color='Channel')) +
                 geom_line(df2, aes('Time', 'Current', color='Channel'))))

        print(plot)
        return plot
Exemple #4
0
def accPlot(accsByNFeats):
    plotdata = []
    for s in accsByNFeats:
        plotdata.append(
            pd.concat([
                pd.DataFrame({
                    "p": p,
                    "acc": accsByNFeats[s][p],
                    "set": s
                },
                             index=[str(p)]) for p in accsByNFeats[s]
            ],
                      axis=0))
    ggd = pd.concat(plotdata)
    ggd['acc'] = ggd['acc'].astype(float)
    ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set'))
    ggo += gg.geom_line(alpha=0.5)
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000])
    ggo += gg.scale_color_manual(
        values=['darkgray', 'black', 'red', 'dodgerblue'])
    ggo += gg.ylab('Accuracy (5-fold CV)')
    print(ggo)
    return ggd
Exemple #5
0
def plot_key_stock_indicators(df, stock):
    assert isinstance(df, pd.DataFrame)
    assert all([
        'eps' in df.columns, 'pe' in df.columns, 'annual_dividend_yield'
        in df.columns
    ])

    df['volume'] = df['last_price'] * df[
        'volume'] / 1000000  # again, express as $(M)
    df['fetch_date'] = df.index
    plot_df = pd.melt(df,
                      id_vars='fetch_date',
                      value_vars=[
                          'pe', 'eps', 'annual_dividend_yield', 'volume',
                          'last_price'
                      ],
                      var_name='indicator',
                      value_name='value')
    plot_df['value'] = pd.to_numeric(plot_df['value'])
    plot_df['fetch_date'] = pd.to_datetime(plot_df['fetch_date'])

    plot = (
        p9.ggplot(plot_df, p9.aes('fetch_date', 'value', color='indicator')) +
        p9.geom_line(size=1.5, show_legend=False) +
        p9.facet_wrap('~ indicator', nrow=6, ncol=1, scales='free_y') +
        p9.theme(axis_text_x=p9.element_text(angle=30, size=7),
                 figure_size=(8, 7))
        #    + p9.aes(ymin=0)
        + p9.xlab("") + p9.ylab(""))
    return plot_as_inline_html_data(plot)
Exemple #6
0
def plot_fundamentals(df, stock) -> str:
    assert isinstance(df, pd.DataFrame)
    columns_to_report = ["pe", "eps", "annual_dividend_yield", "volume", \
                    "last_price", "change_in_percent_cumulative", \
                    "change_price", "market_cap", "number_of_shares"]
    colnames = df.columns
    for column in columns_to_report:
        assert column in colnames
   
    df["volume"] = df["last_price"] * df["volume"] / 1000000  # again, express as $(M)
    df["market_cap"] /= 1000 * 1000
    df["number_of_shares"] /= 1000 * 1000
    df["fetch_date"] = df.index
    plot_df = pd.melt(
        df,
        id_vars="fetch_date",
        value_vars=columns_to_report,
        var_name="indicator",
        value_name="value",
    )
    plot_df["value"] = pd.to_numeric(plot_df["value"])
    plot_df["fetch_date"] = pd.to_datetime(plot_df["fetch_date"])

    plot = (
        p9.ggplot(plot_df, p9.aes("fetch_date", "value", color="indicator"))
        + p9.geom_line(size=1.5, show_legend=False)
        + p9.facet_wrap("~ indicator", nrow=len(columns_to_report), ncol=1, scales="free_y")
        + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), 
                   axis_text_y=p9.element_text(size=7),
                   figure_size=(8, len(columns_to_report)))
        #    + p9.aes(ymin=0)
        + p9.xlab("")
        + p9.ylab("")
    )
    return plot_as_inline_html_data(plot)
Exemple #7
0
def round_2_plot():
    if not os.path.exists(round_2_df_path):
        eprint(f'Downloading {round_2_df_url} to {round_2_df_path}')
        urlretrieve(round_2_df_url, round_2_df_path)
    verify_checksum(round_2_df_checksum, round_2_df_path)
    df = pd.read_json(round_2_df_path)
    p = (
        ggplot(df) + aes(x='char_percent', y='correct', color='Dataset') +
        facet_wrap('Guessing_Model', nrow=1) + stat_summary_bin(
            fun_data=mean_no_se, bins=20, shape='.', linetype='None',
            size=0.5) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) +
        scale_x_continuous(breaks=[0, .5, 1]) +
        coord_cartesian(ylim=[0, 0.7]) +
        ggtitle('Round 2 Attacks and Models') +
        xlab('Percent of Question Revealed') + ylab('Accuracy') + theme(
            #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
            strip_text_x=element_text(margin={
                't': 6,
                'b': 6,
                'l': 1,
                'r': 5
            })) +
        scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
                           name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_2_json.pdf', width=7.0, height=1.7)
Exemple #8
0
def plot_predict(forecast):
    p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) +
         geom_point(colour='blue', alpha=0.3, na_rm=True) +
         geom_line(colour='blue', na_rm=True) + geom_line(
             data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') +
         geom_ribbon(data=forecast,
                     mapping=aes(ymin='yhat_lower', ymax='yhat_upper'),
                     fill='blue',
                     alpha=0.1) +
         scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') +
         xlab('Time') + ylab('Pressure') + theme_bw() +
         theme(axis_text_x=element_text(
             angle=45, hjust=1, face='bold', color='black'),
               axis_text_y=element_text(face='bold', colour='black')))

    ggplot.save(p,
                filename='predict_pressure_chart.png',
                path=os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                  'png'),
                width=8,
                height=6,
                units='in',
                dpi=326,
                verbose=False)
    return p
Exemple #9
0
def make_sentiment_plot(sentiment_df, exclude_zero_bin=True, plot_text_labels=True):
    rows = []
    print(
        "Sentiment plot: exclude zero bins? {} show text? {}".format(
            exclude_zero_bin, plot_text_labels
        )
    )

    for column in filter(lambda c: c.startswith("bin_"), sentiment_df.columns):
        c = Counter(sentiment_df[column])
        date = column[4:]
        for bin_name, val in c.items():
            if exclude_zero_bin and (bin_name == "0.0" or not isinstance(bin_name, str)):
                continue
            bin_name = str(bin_name)
            assert isinstance(bin_name, str)
            val = int(val)
            rows.append(
                {
                    "date": datetime.strptime(date, "%Y-%m-%d"),
                    "bin": bin_name,
                    "value": val,
                }
            )

    df = pd.DataFrame.from_records(rows)
    # print(df['bin'].unique())
    # HACK TODO FIXME: should get from price_change_bins()...
    order = [
        "-1000.0",
        "-100.0",
        "-10.0",
        "-5.0",
        "-3.0",
        "-2.0",
        "-1.0",
        "-1e-06",
        "1e-06",
        "1.0",
        "2.0",
        "3.0",
        "5.0",
        "10.0",
        "25.0",
        "100.0",
        "1000.0",
    ]
    df["bin_ordered"] = pd.Categorical(df["bin"], categories=order)

    plot = (
        p9.ggplot(df, p9.aes("date", "bin_ordered", fill="value"))
        + p9.geom_tile(show_legend=False)
        + p9.theme_bw()
        + p9.xlab("")
        + p9.ylab("Percentage daily change")
        + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(10, 5))
    )
    if plot_text_labels:
        plot = plot + p9.geom_text(p9.aes(label="value"), size=8, color="white")
    return plot_as_inline_html_data(plot)
def plot_replicate_density(
    df,
    batch,
    plate,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info"))
        + gg.geom_density(alpha=0.3) + gg.scale_fill_manual(
            name="Replicate",
            labels={
                "True": "True",
                "False": "False"
            },
            values=["#B99638", "#2DB898"],
        ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") +
        gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme(
            title=gg.element_text(size=9),
            axis_text=gg.element_text(size=5),
            axis_title=gg.element_text(size=8),
            legend_text=gg.element_text(size=6),
            legend_title=gg.element_text(size=7),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        ))

    if output_file_base:
        save_figure(density_gg, output_file_base, output_file_extensions, dpi,
                    height, width)

    return density_gg
Exemple #11
0
def customized_algorithm_plot(experiment_name='finite_simple_sanity',
                              data_path=_DEFAULT_DATA_PATH):
    """Simple plot of average instantaneous regret by agent, per timestep.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    p: ggplot plot
  """
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['t', 'agent']).agg({
        'instant_regret': np.mean
    }).reset_index())
    plt_df['agent_new_name'] = plt_df.agent.apply(rename_agent)

    custom_labels = ['Laplace TS', 'Langevin TS', 'TS', 'bootstrap TS']
    custom_colors = ["#E41A1C", "#377EB8", "#4DAF4A", "#984EA3"]

    p = (gg.ggplot(plt_df) +
         gg.aes('t', 'instant_regret', colour='agent_new_name') +
         gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('time period (t)') +
         gg.ylab('per-period regret') + gg.scale_color_manual(
             name='agent', labels=custom_labels, values=custom_colors))
    return p
Exemple #12
0
def plot_mem(df):
    x = df.copy()
    # initialise some extra columns useful for plotting
    x['new_cols'] = [str(i) for i in x['col_name']]
    x['new_cols'] = pd.Categorical(x['new_cols'],
                                   categories=x['new_cols'],
                                   ordered=True)
    x['cnt_print_loc_pos'] = (x.pcnt.values) + (np.max(x.pcnt.values)) / 70
    x['cnt_print_loc_neg'] = (x.pcnt.values) - (np.max(x.pcnt.values)) / 70
    # build basic plot
    ggplt  = p9.ggplot(x, p9.aes(x = 'new_cols', y = 'pcnt', fill = 'new_cols')) \
      + p9.geom_bar(stat = 'identity') \
      + p9.guides(fill = False) \
      + p9.ylab('% of total size') \
      + p9.xlab('') \
      + p9.theme(axis_text_x=p9.element_text(rotation = 45, hjust=1))

    # add text labels to the highest bars
    y1 = x.copy()[x.pcnt > 0.3 * np.max(x.pcnt)]
    ggplt = ggplt + \
      p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_neg', label = 'size', \
        fill = 'col_name'), inherit_aes = False, data = y1, color = 'white', \
        angle = 90, vjust = 'top')
    # add text labels to the lower bars
    y2 = x.copy()[x.pcnt <= 0.3 * np.max(x.pcnt)]
    ggplt = ggplt + \
      p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_pos', label = 'size', \
        fill = 'col_name'), inherit_aes = False, data = y2, color = 'gray', \
        angle = 90, vjust = 'bottom')
    return ggplt
Exemple #13
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') +
           geom_histogram(binwidth=2, position='identity', alpha=.6) +
           geom_text(aes(x='x', y=.22, label='x', color='Method'),
                     mean_len_df,
                     inherit_aes=False,
                     format_string='{:.1f}',
                     show_legend=False) +
           geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
                        mean_len_df,
                        inherit_aes=False,
                        color='black') + scale_linetype_manual(['dashed']) +
           facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) +
           xlab('Example Length') + ylab('Frequency') +
           scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               aspect_ratio=1,
               legend_title=element_blank(),
               legend_position=legend_position,
               legend_box=legend_box,
           ))

    return plt
Exemple #14
0
def cum_regret_plot(experiment_name, data_path=_DEFAULT_DATA_PATH):
    """Simple plot of average instantaneous regret by agent, per timestep.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    https://web.stanford.edu/~bvr/pubs/TS_Tutorial.pdf
  """
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['t', 'agent']).agg({
        'cum_regret': [np.mean, lower_interval, upper_interval]
    }).reset_index())
    plt_df.columns = ['_'.join(i) for i in plt_df.columns.values]
    p = (gg.ggplot(plt_df) + gg.aes('t_', 'cum_regret_mean', colour='agent_') +
         gg.geom_line(size=1.25, alpha=0.75) +
         gg.geom_ribbon(gg.aes(ymin='cum_regret_lower_interval',
                               ymax='cum_regret_upper_interval',
                               fill='agent_'),
                        alpha=0.1) + gg.xlab('time period (t)') +
         gg.ylab('cumulative regret') +
         gg.scale_colour_brewer(name='agent_', type='qual', palette='Set1'))

    plot_dict = {experiment_name + '_cum_regret': p}
    return plot_dict
Exemple #15
0
def round_1_plot():
    df = pd.read_csv('2019_tacl_trick/data/round_1.csv')
    model_dtype = CategoricalDtype(['DAN', 'RNN', 'IR'], ordered=True)
    df['Model'] = df['Model'].astype(model_dtype)

    # This following is a hack so that the legend widths are the same across plots
    def rename(x):
        if x == 'Round 1 - IR Adversarial':
            return 'Round 1 - IR Adversarial    '
        else:
            return x

    df['Dataset'] = df['Dataset'].map(rename)
    p = (ggplot(df) + aes(x='x', y='y', color='Dataset') +
         facet_wrap('Model', nrow=1) + geom_point(size=1.0, shape='o') +
         scale_y_continuous(breaks=np.linspace(0, 1, 6), limits=[0, 0.6]) +
         scale_x_continuous(breaks=[0, .5, 1]) +
         xlab('Percent of Question Revealed') + ylab('Accuracy') +
         ggtitle('Round 1 Attacks and Models') +
         theme(strip_text_x=element_text(margin={
             't': 6,
             'b': 6,
             'l': 1,
             'r': 5
         })) + scale_color_manual(
             values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
             name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_1_csv.pdf', width=7.0, height=1.7)
Exemple #16
0
    def plot_zmw_stats(self, **kwargs):
        """Plot of ZMW stats for all runs.

        Note
        ----
        Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`.

        Parameters
        ----------
        ``**kwargs`` : dict
            Keyword arguments passed to :meth:`Summaries.zmw_stats`.

        Returns
        -------
        plotnine.ggplot.ggplot
            Stacked bar graph of ZMW stats for each run.

        """
        df = self.zmw_stats(**kwargs)

        p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) +
             p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) +
             p9.theme(axis_text_x=p9.element_text(angle=90,
                                                  vjust=1,
                                                  hjust=0.5),
                      figure_size=(0.4 * len(df['name'].unique()), 2.5)
                      ) +
             p9.ylab('number of ZMWs') +
             p9.xlab('')
             )

        if len(df['status'].unique()) < len(CBPALETTE):
            p = p + p9.scale_fill_manual(CBPALETTE[1:])

        return p
Exemple #17
0
def summary(tags, opts=None):
    print(tags)
    tags_summary = (
        tags.groupby(["tag", "background"])
        .agg({"tag": "count"})
        .rename(columns={"tag": "n_tags"})
        .reset_index()
        .astype({"background": "category", "tag": "category"})
    )
    print(tags_summary)
    # tags_summary = tags_df.groupby(["species"]).agg(
    #     {"tag_duration": "sum", "species": "count"}
    # )

    # tags_summary.rename(columns={"species": "count"}, inplace=True)

    # tags_summary["tag_duration"] = tags_summary.tag_duration.astype(int)
    # tags_summary["duration"] = tags_summary.tag_duration.astype(str) + "s"
    # tags_summary = tags_summary.reindex(list(SPECIES_LABELS.keys()))
    # # tags_summary["species"] = tags_summary.index
    # tags_summary.reset_index(inplace=True)
    # tags_summary
    # (
    #     ggplot(
    #         data=tags_summary,
    #         mapping=aes(
    #             x="factor(species, ordered=False)",
    #             y="tag_duration",
    #             fill="factor(species, ordered=False)",
    #         ),
    #     )
    #     + geom_bar(stat="identity", show_legend=False)
    #     + xlab("Species")
    #     + ylab("Duration of annotations (s)")
    #     + geom_text(mapping=aes(label="count"), nudge_y=15)
    #     + theme_classic()
    #     + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels)
    # ).save("species_repartition_duration_mini.png", width=10, height=8)

    plt = (
        ggplot(
            data=tags_summary,
            mapping=aes(
                x="tag",  # "factor(species, ordered=False)",
                y="n_tags",
                fill="background",  # "factor(species, ordered=False)",
            ),
        )
        + geom_bar(stat="identity", show_legend=True, position=position_dodge())
        + xlab("Species")
        + ylab("Number of annotations")
        + geom_text(mapping=aes(label="n_tags"), nudge_y=15)
        + theme_classic()
        + theme(axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}))
        # + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels)
    ).save("tag_species_bg.png", width=10, height=8)
    # print(tags_summary)

    print(plt)
Exemple #18
0
def misspecified_plot(experiment_name='finite_misspecified',
                      data_path=_DEFAULT_DATA_PATH):
"""Specialized plotting script for TS tutorial paper misspecified TS."""
  df = load_data(experiment_name, data_path)

  def _parse_np_array(np_string):
    return np.array(np_string.replace('[', '')
                    .replace(']', '')
                    .strip()
                    .split())
  df['posterior_mean'] = df.posterior_mean.apply(_parse_np_array)

  # Action means
  new_col_list = ['mean_0', 'mean_1', 'mean_2']
  for n, col in enumerate(new_col_list):
    df[col] = df['posterior_mean'].apply(lambda x: float(x[n]))

  plt_df = (df.groupby(['agent', 't'])
            .agg({'instant_regret': np.mean,
                  'mean_0': np.mean,
                  'mean_1': np.mean,
                  'mean_2': np.mean})
            .reset_index())

  regret_plot = (gg.ggplot(plt_df)
                 + gg.aes('t', 'instant_regret', colour='agent')
                 + gg.geom_line(size=1.25, alpha=0.75)
                 + gg.xlab('Timestep (t)')
                 + gg.ylab('Average instantaneous regret')
                 + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1')
                 + gg.coord_cartesian(ylim=(0, 0.02)))

  melt_df = pd.melt(plt_df, id_vars=['agent', 't'], value_vars=new_col_list)
  melt_df['group_id'] = melt_df.agent + melt_df.variable
  action_plot = (gg.ggplot(melt_df)
                 + gg.aes('t', 'value', colour='agent', group='group_id')
                 + gg.geom_line(size=1.25, alpha=0.75)
                 + gg.coord_cartesian(ylim=(0, 0.05))
                 + gg.xlab('Timestep (t)')
                 + gg.ylab('Expected mean reward')
                 + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1'))

  plot_dict = {}
  plot_dict['misspecified_regret'] = regret_plot
  plot_dict['misspecified_action'] = action_plot
  return plot_dict
Exemple #19
0
def plot_seeds(df: pd.DataFrame,
               sweep_vars: Sequence[str] = None) -> gg.ggplot:
  """Plot the performance by individual work unit."""
  return mnist_analysis.plot_seeds(
      df_in=df,
      sweep_vars=sweep_vars,
      colour_var='noise_scale'
  ) + gg.ylab('average accuracy (removing noise)')
Exemple #20
0
def plot_seeds(df: pd.DataFrame,
               sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot:
  """Plot the performance by individual work unit."""
  return bandit_analysis.plot_seeds(
      df_in=df,
      sweep_vars=sweep_vars,
      colour_var='reward_scale'
  ) + gg.ylab('average episodic return (after rescaling)')
Exemple #21
0
def plot_seeds(df: pd.DataFrame,
               sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot:
  """Plot the performance by individual work unit."""
  return catch_analysis.plot_seeds(
      df_in=df,
      sweep_vars=sweep_vars,
      colour_var='noise_scale'
  ) + gg.ylab('average episodic return (removing noise)')
Exemple #22
0
    def __plot(
        self,
        plot_data,
        x,
        y,
        colour,
        lbl_x,
        lbl_y,
        facet,
        facet_scales,
        facet_by,
        smoothed,
        points,
        error_bars,
        save,
    ):
        cbbPalette = [
            "#000000",
            "#E69F00",
            "#56B4E9",
            "#009E73",
            "#0072B2",
            "#D55E00",
            "#CC79A7",
        ]
        plt = ggplot(data=plot_data, mapping=aes(x=x, y=y, colour=colour))
        plt += xlab(lbl_x)
        plt += ylab(lbl_y)
        # + facet_grid("site~", scales="free")
        # + geom_line()
        if facet:
            # TODO: use facet as save
            nrow, ncol = self.get_facet_rows(plot_data, facet_by)
            plt += facet_wrap(facet_by, nrow=nrow, ncol=ncol, scales=facet_scales)
        if points:
            plt += geom_point()
        if error_bars:
            # TODO use generic way to compute them
            pass
            # self.plt += geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
        # TODO: use smooth as save
        if smoothed:
            plt += geom_smooth(
                method="mavg",
                se=False,
                method_args={"window": 4, "center": True, "min_periods": 1},
            )
        else:
            plt += geom_line()
        plt += scale_colour_manual(values=cbbPalette, guide=False)
        plt += scale_x_continuous(labels=label_x)

        plt += theme(figure_size=(15, 18), dpi=150)

        if save:
            plt.save(**save)
        return plt
Exemple #23
0
def plot_market_wide_sector_performance(all_stocks_cip: pd.DataFrame):
    """
    Display specified dates for average sector performance. Each company is assumed to have at zero
    at the start of the observation period. A plot as base64 data is returned.
    """
    n_stocks = len(all_stocks_cip)
    # merge in sector information for each company
    code_and_sector = stocks_by_sector()
    n_unique_sectors = len(code_and_sector["sector_name"].unique())
    print("Found {} unique sectors".format(n_unique_sectors))

    #print(df)
    #print(code_and_sector)
    df = all_stocks_cip.merge(code_and_sector, left_index=True, right_on="asx_code")
    print(
        "Found {} stocks, {} sectors and merged total: {}".format(
            n_stocks, len(code_and_sector), len(df)
        )
    )
    # compute average change in percent of each unique sector over each day and sum over the dates
    cumulative_pct_change = df.expanding(axis="columns").sum()
    # merge date-wise into df
    for date in cumulative_pct_change.columns:
        df[date] = cumulative_pct_change[date]
    # df.to_csv('/tmp/crap.csv')
    grouped_df = df.groupby("sector_name").mean()
    # grouped_df.to_csv('/tmp/crap.csv')

    # ready the dataframe for plotting
    grouped_df = pd.melt(
        grouped_df,
        ignore_index=False,
        var_name="date",
        value_name="cumulative_change_percent",
    )
    grouped_df["sector"] = grouped_df.index
    grouped_df["date"] = pd.to_datetime(grouped_df["date"])
    n_col = 3
    plot = (
        p9.ggplot(
            grouped_df, p9.aes("date", "cumulative_change_percent", color="sector")
        )
        + p9.geom_line(size=1.0)
        + p9.facet_wrap(
            "~sector", nrow=n_unique_sectors // n_col + 1, ncol=n_col, scales="free_y"
        )
        + p9.xlab("")
        + p9.ylab("Average sector change (%)")
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=6),
            axis_text_y=p9.element_text(size=6),
            figure_size=(12, 6),
            panel_spacing=0.3,
            legend_position="none",
        )
    )
    return plot_as_inline_html_data(plot)
Exemple #24
0
 def plot_compare_accuracy(self, expo=False):
     if expo:
         return (ggplot(self.acc_df) + facet_wrap('position') +
                 aes(x='guesser', y='accuracy', fill='Dataset') +
                 geom_bar(stat='identity', position='dodge') +
                 xlab('Guessing Model') + ylab('Accuracy'))
     else:
         return (ggplot(self.acc_df) + facet_wrap('position') +
                 aes(x='guesser', y='accuracy') + geom_bar(stat='identity'))
Exemple #25
0
    def plot_ccs_stats(self, variable, *,
                       trim_frac=0.005, bins=25, histogram_stat='count',
                       maxcol=None, panelsize=1.75):
        """Plot histograms of CCS stats for all runs.

        Parameters
        ----------
        variable : {'length', 'passes', 'accuracy'}
            Variable for which we plot stats. You will get an error
            if :meth:`Summaries.has_stat` is not true for `variable`.
        trim_frac : float
            Trim this amount of the bottom and top fraction from the
            data before plotting. Useful if outliers greatly extend scale.
        bins : int
            Number of histogram binds
        histogram_stat : {'count', 'density'}
            Plot the count of CCSs or their density normalized for each run.
        maxcol : None or int
            Max number of columns in faceted plot.
        panelsize : float
            Size of each plot panel.

        Returns
        -------
        plotnine.ggplot.ggplot
            A panel of histograms.

        """
        df = (self.ccs_stats(variable)
              .assign(lower=lambda x: x[variable].quantile(trim_frac),
                      upper=lambda x: x[variable].quantile(1 - trim_frac),
                      trim=lambda x: ((x[variable] > x['upper']) |
                                      (x[variable] < x['lower']))
                      )
              .query('not trim')
              )

        npanels = len(df['name'].unique())
        if maxcol is None:
            ncol = npanels
        else:
            ncol = min(maxcol, npanels)
        nrow = math.ceil(npanels / ncol)

        p = (p9.ggplot(df, p9.aes(variable, y=f"..{histogram_stat}..")) +
             p9.geom_histogram(bins=bins) +
             p9.facet_wrap('~ name', ncol=ncol) +
             p9.theme(figure_size=(panelsize * ncol, panelsize * nrow),
                      axis_text_x=p9.element_text(angle=90,
                                                  vjust=1,
                                                  hjust=0.5)
                      ) +
             p9.ylab('number of CCSs')
             )

        return p
Exemple #26
0
def plot_seeds(df: pd.DataFrame,
               sweep_vars: Sequence[str] = None,
               num_episodes: int = NUM_EPISODES) -> gg.ggplot:
    """Plot the returns through time individually by run."""
    return deep_sea_analysis.plot_seeds(
        df_in=df,
        sweep_vars=sweep_vars,
        yintercept=np.exp(-1),
        num_episodes=num_episodes,
    ) + gg.ylab('average episodic return (excluding additive noise)')
Exemple #27
0
def plot_scores(df, title=None, xlab=None, ylab=None):
    g = (gg.ggplot(df, gg.aes(x=cfg.SCORE_COLNAME_X, y=cfg.SCORE_COLNAME_Y)) +
         gg.geom_line())
    if title is not None:
        g += gg.ggtitle(title)
    if xlab is not None:
        g += gg.xlab(xlab)
    if ylab is not None:
        g += gg.ylab(ylab)
    return g
Exemple #28
0
def plot_scaling_log(plt_df: pd.DataFrame,
                     sweep_vars: Optional[Sequence[str]] = None,
                     with_baseline=True) -> gg.ggplot:
    """Plot scaling of learning time against exponential baseline."""
    p = _base_scaling(plt_df, sweep_vars, with_baseline)
    p += gg.scale_x_log10(breaks=[5, 10, 20, 50])
    p += gg.scale_y_log10(breaks=[100, 300, 1000, 3000, 10000, 30000])
    p += gg.xlab('deep sea problem size (log scale)')
    p += gg.ylab('#episodes until < 90% bad episodes (log scale)')
    return plotting.facet_sweep_plot(p, sweep_vars)
Exemple #29
0
def ensemble_plot(experiment_name='ensemble_nn', data_path=_DEFAULT_DATA_PATH):
    """Specialized plotting script for TS tutorial paper ensemble NN."""
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['agent', 't']).agg({
        'instant_regret': np.mean
    }).reset_index())

    def _get_agent_family(agent_name):
        if 'dropout' in agent_name.lower():
            return 'Dropout'
        elif 'ensemble' in agent_name.lower():
            return 'Ensemble'
        elif '/' in agent_name.lower():
            return 'Annealing epsilon'
        else:
            return 'Fixed epsilon'

    def _rename_ensemble(agent_name):
        if 'ensemble' in agent_name:
            n_ensemble = agent_name.split('-')[0]
            new_name = 'ensemble=' + n_ensemble.zfill(3)

            return new_name
        else:
            return agent_name

    plt_df['agent_name'] = plt_df.agent.apply(_rename_ensemble)
    plt_df['agent_family'] = plt_df.agent.apply(_get_agent_family)

    custom_colors = ['#d53e4f', '#fdae61', '#a6d96a', '#66c2a5', '#5e4fa2']

    plot_dict = {}
    for agent_family, df_family in plt_df.groupby(['agent_family']):
        if agent_family == 'Ensemble':
            custom_labels = [
                'Ensemble 3', 'Ensemble 10', 'Ensemble 30', 'Ensemble 100',
                'Ensemble 300'
            ]
            gg_legend = gg.scale_colour_manual(values=custom_colors,
                                               labels=custom_labels,
                                               name='Agent')
        else:
            gg_legend = gg.scale_colour_manual(custom_colors, name='Agent')

        p = (gg.ggplot(df_family) +
             gg.aes('t', 'instant_regret', colour='agent_name') +
             gg.geom_line(size=1.25, alpha=0.75) +
             gg.facet_wrap('~ agent_family') + gg_legend +
             gg.coord_cartesian(ylim=(0, 60)) + gg.xlab('Timestep (t)') +
             gg.ylab('Average instantaneous regret') +
             gg.theme(figure_size=(6, 6)))
        plot_dict[experiment_name + '_' + agent_family] = p

    return plot_dict
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24, format_string='{:.1%}') +
      scale_x_discrete(limits=self._data["pattern"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Design Pattern Counts") + xlab("Design Pattern") +
      ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=24, height=8)
Exemple #31
0
 def plot_compare_accuracy(self, expo=False):
     if expo:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy', fill='Dataset')
             + geom_bar(stat='identity', position='dodge')
             + xlab('Guessing Model')
             + ylab('Accuracy')
         )
     else:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy')
             + geom_bar(stat='identity')
         )
Exemple #32
0
def yoy_growth():
    """
    This creates figures showing the number of questions versus year in dataset
    """
    with open('data/external/datasets/qanta.mapped.2018.04.18.json') as f:
        year_pages = defaultdict(set)
        year_questions = Counter()
        for q in json.load(f)['questions']:
            if q['page'] is not None:
                year_pages[q['year']].add(q['page'])
                year_questions[q['year']] += 1
    start_year = min(year_pages)
    # 2017 is the earlier year we have a full year's worth of data, including partial 2018 isn't accurate
    end_year = min(2017, max(year_pages))
    upto_year_pages = defaultdict(set)
    upto_year_questions = Counter()
    for upto_y in range(start_year, end_year + 1):
        for curr_y in range(start_year, upto_y + 1):
            upto_year_questions[upto_y] += year_questions[curr_y]
            for page in year_pages[curr_y]:
                upto_year_pages[upto_y].add(page)
    year_page_counts = {}
    for y, pages in upto_year_pages.items():
        year_page_counts[y] = len(pages)
    year_page_counts
    year_rows = []
    for y, page_count in year_page_counts.items():
        year_rows.append({'year': y, 'value': page_count, 'Quantity': 'Distinct Answers'})
        year_rows.append({'year': y, 'Quantity': 'Total Questions', 'value': upto_year_questions[y]})
    year_df = pd.DataFrame(year_rows)
    count_cat = CategoricalDtype(categories=['Total Questions', 'Distinct Answers'], ordered=True)
    year_df['Quantity'] = year_df['Quantity'].astype(count_cat)
    eprint(year_df[year_df.Quantity == 'Total Questions'])
    p = (
        ggplot(year_df)
        + aes(x='year', y='value', color='Quantity')
        + geom_line() + geom_point()
        + xlab('Year')
        + ylab('Count up to Year (inclusive)')
        + theme_fs()
        + scale_x_continuous(breaks=list(range(start_year, end_year + 1, 2)))
    )
    p.save(path.join(output_path, 'question_answer_counts.pdf'))
Exemple #33
0
def accPlot(accsByNFeats):
    plotdata = []
    for s in accsByNFeats:
        plotdata.append(pd.concat([DataFrame({"p" : p,
                                              "acc" : accsByNFeats[s][p],
                                              "set" : s},
                                             index = [str(p)])
                                   for p in accsByNFeats[s]],
                                  axis = 0))
    ggd = pd.concat(plotdata)
    ggd['acc'] = ggd['acc'].astype(float)
    ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set'))
    ggo += gg.geom_line(alpha=0.5)
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000])
    ggo += gg.scale_color_manual(values=['darkgray', 'black',
                                         'red', 'dodgerblue'])
    ggo += gg.ylab('Accuracy (5-fold CV)')
    print(ggo)
Exemple #34
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (
        ggplot(len_df)
        + aes(x='x', fill='Method', y='..density..')
        + geom_histogram(binwidth=2, position='identity', alpha=.6)
        + geom_text(
            aes(x='x', y=.22, label='x', color='Method'),
            mean_len_df,
            inherit_aes=False,
            format_string='{:.1f}',
            show_legend=False
        )
        + geom_segment(
            aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
            mean_len_df,
            inherit_aes=False, color='black'
        )
        + scale_linetype_manual(['dashed'])
        + facet_wrap('Task')
        + xlim(0, 20) + ylim(0, .23)
        + xlab('Example Length') + ylab('Frequency')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            aspect_ratio=1,
            legend_title=element_blank(),
            legend_position=legend_position,
            legend_box=legend_box,
        )
    )

    return plt
Exemple #35
0
def syntactic_diversity_plots():
    with open('data/external/syntactic_diversity_table.json') as f:
        rows = json.load(f)
    parse_df = pd.DataFrame(rows)
    parse_df['parse_ratio'] = parse_df['unique_parses'] / parse_df['parses']
    melt_df = pd.melt(
        parse_df,
        id_vars=['dataset', 'depth', 'overlap', 'parses'],
        value_vars=['parse_ratio', 'unique_parses'],
        var_name='metric',
        value_name='y'
    )

    def label_facet(name):
        if name == 'parse_ratio':
            return 'Average Unique Parses per Instance'
        elif name == 'unique_parses':
            return 'Count of Unique Parses'

    def label_y(ys):
        formatted_ys = []
        for y in ys:
            y = str(y)
            if y.endswith('000.0'):
                formatted_ys.append(y[:-5] + 'K')
            else:
                formatted_ys.append(y)
        return formatted_ys
    p = (
    ggplot(melt_df)
        + aes(x='depth', y='y', color='dataset')
        + facet_wrap('metric', scales='free_y', nrow=2, labeller=label_facet)
        + geom_line() + geom_point()
        + xlab('Parse Truncation Depth') + ylab('')
        + scale_color_discrete(name='Dataset')
        + scale_y_continuous(labels=label_y)
        + scale_x_continuous(
            breaks=list(range(1, 11)),
            minor_breaks=list(range(1, 11)),
            limits=[1, 10])
        + theme_fs()
    )
    p.save(path.join(output_path, 'syn_div_plot.pdf'))
    p = (
    ggplot(parse_df)
        + aes(x='depth', y='unique_parses', color='dataset')
        + geom_line() + geom_point()
        + xlab('Parse Truncation Depth')
        + ylab('Count of Unique Parses')
        + scale_color_discrete(name='Dataset')
        + scale_x_continuous(
            breaks=list(range(1, 11)),
            minor_breaks=list(range(1, 11)),
            limits=[1, 10])
        + theme_fs()
    )
    p.save(path.join(output_path, 'n_unique_parses.pdf'))
    p = (
        ggplot(parse_df)
        + aes(x='depth', y='parse_ratio', color='dataset')
        + geom_line() + geom_point()
        + xlab('Parse Truncation Depth')
        + ylab('Average Unique Parses per Instance')
        + scale_color_discrete(name='Dataset')
        + scale_x_continuous(breaks=list(range(1, 11)), minor_breaks=list(range(1, 11)), limits=[1, 10])
        + scale_y_continuous(limits=[0, 1])
        + theme_fs()
    )
    p.save(path.join(output_path, 'parse_ratio.pdf'))
Exemple #36
0
plt.ion()


import RestrictedData
xnorms = RestrictedData.xnorms
annots = RestrictedData.annots


tsne = TSNE(n_components=2, verbose=1,
            perplexity=10, method='barnes_hut', angle=0.5,
            init='pca', early_exaggeration=12, learning_rate=200,
            n_iter=1000, random_state=123)
tsneResults = tsne.fit_transform(xnorms['shen'].values)


ggd = pd.DataFrame({'sample' : xnorms['shen'].index,
                    'system' : annots['shen'].reindex(xnorms['shen'].index)['System'],
                    'coord1' : tsneResults[:, 0],
                    'coord2' : tsneResults[:, 1]})
plt.close()
ggo = gg.ggplot(ggd, gg.aes(x='coord1', y='coord2', color='system', label='sample'))
ggo += gg.geom_point()
ggo += gg.geom_text(nudge_y=9, show_legend=False)
ggo += gg.scale_color_manual(values=['firebrick', 'goldenrod', 'lightseagreen',
                                     'darkorchid', 'darkslategray', 'dodgerblue'])
ggo += gg.theme_bw()
ggo += gg.xlab('tSNE coordinate 1')
ggo += gg.ylab('tSNE coordinate 2')
print(ggo)
Exemple #37
0
    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )