Ejemplo n.º 1
0
def graph(df):
    
    graph = (ggplot(data=df,
           mapping=aes(x='Time', y='NDVI'))
         + geom_line(size =2, color = 'green')
         +geom_point()
         +theme_linedraw()
         + theme(axis_text_x= element_text(rotation=45, hjust=1))
         +scales.ylim(0,1)
         + geom_area(fill = "green", alpha = .4)
    )
    return graph
Ejemplo n.º 2
0
def all_stack(fold=BUZZER_DEV_FOLD):
    df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold)
    df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold)
    df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold)
    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)
    model_type = CategoricalDtype(categories=['Threshold', 'MLP', 'RNN'])
    df['Model'] = df['Model'].astype(model_type)
    p = (ggplot(df) +
         geom_area(aes(x='Position', y='Frequency', fill='Buzzing')) +
         facet_grid('~ Model') + theme_fs() + theme(aspect_ratio=1) +
         scale_fill_brewer(type='div', palette=7))
    p.save('output/buzzer/{}_stack.pdf'.format(fold))
Ejemplo n.º 3
0
Archivo: plot.py Proyecto: NPSDC/qb
def all_stack(fold=BUZZER_DEV_FOLD):
    df_rnn = stack("output/buzzer/RNNBuzzer", "RNN", fold)
    df_mlp = stack("output/buzzer/MLPBuzzer", "MLP", fold)
    df_thr = stack("output/buzzer/ThresholdBuzzer", "Threshold", fold)
    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)
    model_type = CategoricalDtype(categories=["Threshold", "MLP", "RNN"])
    df["Model"] = df["Model"].astype(model_type)
    p = (
        ggplot(df)
        + geom_area(aes(x="Position", y="Frequency", fill="Buzzing"))
        + facet_grid("~ Model")
        + theme_fs()
        + theme(aspect_ratio=1)
        + scale_fill_brewer(type="div", palette=7)
    )
    p.save("output/buzzer/{}_stack.pdf".format(fold))
Ejemplo n.º 4
0
Archivo: plot.py Proyecto: Pinafore/qb
def all_stack(fold=BUZZER_DEV_FOLD):
    df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold)
    df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold)
    df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold)
    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)
    model_type = CategoricalDtype(
        categories=['Threshold', 'MLP', 'RNN'])
    df['Model'] = df['Model'].astype(model_type)
    p = (
        ggplot(df)
        + geom_area(aes(x='Position', y='Frequency', fill='Buzzing'))
        + facet_grid('~ Model')
        + theme_fs()
        + theme(
            aspect_ratio=1,
        )
        + scale_fill_brewer(type='div', palette=7)
    )
    p.save('output/buzzer/{}_stack.pdf'.format(fold))
Ejemplo n.º 5
0
def test_area_aesthetics():
    p = (ggplot(df, aes('x', 'ymax+2', group='factor(z)')) + geom_area() +
         geom_area(aes('x+width', alpha='z')) +
         geom_area(aes('x+2*width', linetype='factor(z)'),
                   color='black',
                   fill=None,
                   size=2) +
         geom_area(aes('x+3*width', color='z'), fill=None, size=2) +
         geom_area(aes('x+4*width', fill='factor(z)')) +
         geom_area(aes('x+5*width', size='z'), color='black', fill=None) +
         scale_x_continuous(
             breaks=[i * 2 * np.pi
                     for i in range(7)],
             labels=['0'] + [r'${}\pi$'.format(2 * i) for i in range(1, 7)]))

    assert p + _theme == 'area_aesthetics'
Ejemplo n.º 6
0
def test_area_aesthetics():
    p = (ggplot(df, aes('x', 'ymax+2', group='factor(z)')) +
         geom_area() +
         geom_area(aes('x+width', alpha='z')) +
         geom_area(aes('x+2*width', linetype='factor(z)'),
                   color='black', fill=None, size=2) +
         geom_area(aes('x+3*width', color='z'),
                   fill=None, size=2) +
         geom_area(aes('x+4*width', fill='factor(z)')) +
         geom_area(aes('x+5*width', size='z'),
                   color='black', fill=None) +
         scale_x_continuous(
             breaks=[i*2*np.pi for i in range(7)],
             labels=['0'] + [r'${}\pi$'.format(2*i) for i in range(1, 7)])
         )

    assert p + _theme == 'area_aesthetics'
Ejemplo n.º 7
0
def eval(fold=BUZZER_DEV_FOLD):
    if not os.path.isdir(report_dir):
        os.mkdir(report_dir)

    valid = read_data(fold)
    print('# {} data: {}'.format(fold, len(valid)))
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  args.batch_size,
                                                  repeat=False,
                                                  shuffle=False)

    args.n_input = valid[0][1][0].shape[0]
    model = RNNBuzzer(args.n_input, args.n_layers, args.n_hidden,
                      args.n_output, args.dropout)
    chainer.serializers.load_npz(args.model_path, model)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    predictions = []
    buzzes = dict()
    for batch in tqdm(valid_iter):
        qids, vectors, labels, positions = list(map(list, zip(*batch)))
        batch = convert_seq(batch, device=args.gpu)
        preds = model.predict(batch['xs'], softmax=True)
        preds = [p.tolist() for p in preds]
        predictions.extend(preds)
        for i in range(len(qids)):
            buzzes[qids[i]] = []
            for pos, pred in zip(positions[i], preds[i]):
                buzzes[qids[i]].append((pos, pred))
            buzzes[qids[i]] = list(map(list, zip(*buzzes[qids[i]])))

    buzz_dir = os.path.join(buzzes_dir.format(fold))
    with open(buzz_dir, 'wb') as f:
        pickle.dump(buzzes, f)

    results = dict()
    for example_idx in range(len(valid)):
        qid, vectors, labels, positions = valid[example_idx]
        preds = predictions[example_idx]
        q_len = positions[-1]
        for i, pos in enumerate(positions):
            rel_pos = int(100 * pos / q_len)
            if rel_pos not in results:
                results[rel_pos] = []
            results[rel_pos].append((labels[i], preds[i][1]))

    freq = {'x': [], 'y': [], 'type': []}
    for k, rs in results.items():
        rs, scores = list(map(list, zip(*rs)))
        freq['x'].append(k / 100)
        freq['y'].append(sum(rs) / len(rs))
        freq['type'].append('acc')

        freq['x'].append(k / 100)
        freq['y'].append(sum(x > 0.5 for x in scores) / len(scores))
        freq['type'].append('0.5')

        freq['x'].append(k / 100)
        freq['y'].append(sum(x > 0.3 for x in scores) / len(scores))
        freq['type'].append('0.3')

        freq['x'].append(k / 100)
        freq['y'].append(sum(x > 0.7 for x in scores) / len(scores))
        freq['type'].append('0.7')
    freq_df = pd.DataFrame(freq)

    p0 = ggplot(freq_df) + geom_smooth(aes(x='x', y='y', color='type'))
    p0.save(os.path.join(report_dir, '{}_acc_buzz.pdf'.format(fold)))

    stack_freq = {'x': [], 'y': [], 'type': []}
    threshold = 0.5
    for k, rs in results.items():
        num = len(rs)
        only_oracle = sum((c == 1 and b <= threshold) for c, b in rs)
        only_buzzer = sum((c == 0 and b > threshold) for c, b in rs)
        both = sum((c == 1 and b > threshold) for c, b in rs)
        neither = sum((c == 0 and b <= threshold) for c, b in rs)

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(only_oracle / num)
        stack_freq['type'].append('only_oracle')

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(only_buzzer / num)
        stack_freq['type'].append('only_buzzer')

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(both / num)
        stack_freq['type'].append('both')

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(neither / num)
        stack_freq['type'].append('neither')

    stack_freq_df = pd.DataFrame(stack_freq)

    p1 = ggplot(stack_freq_df) + geom_area(aes(x='x', y='y', fill='type'))
    p1.save(os.path.join(report_dir, '{}_stack_area.pdf'.format(fold)))
sensitivities.append(0)
especifities_1.append(0)  #para que al plotearlo acabe en la diagonal
#pintamos ahora la curva
import matplotlib.pyplot as plt
"""%matplotlib inline
plt.plot(especifities_1,sensitivities, marker="o", linestyle="--", color="r")
x=[i*0.01 for i in range(100)]
y=[i*0.01 for i in range(100)]
plt.plot(x,y) #pinto la diagonal (el peor modelo que existe)
plt.xlabel("1-Especificidad")
plt.ylabel("Sensibilidad")
plt.title("Curva ROC")
#recordemos que mi seleccion de variables era una mierda absoluta
"""
#cuanto mayor sea el área entre la curva y la diagonal, mejor es el modelo predictivo
from sklearn import metrics
from plotnine import ggplot, aes, geom_line, geom_area, ggtitle, xlim, ylim  #si quiero importar todo pongo solo *

espec_1, sensit, _ = metrics.roc_curve(Y_test, prob)
df = pd.DataFrame({"x": espec_1, "y": sensit})

auc = metrics.auc(espec_1, sensit)  #área bajo la curva

print(df.head())
print(
    ggplot(df, aes(x="x", y="y")) + geom_line() +
    geom_line(linetype="dashed") + xlim(-0.01, 1.01) + ylim(-0.01, 1.01))
print(
    ggplot(df, aes(x="x", y="y")) + geom_area(alpha=0.25) +
    geom_line(aes(y="y")) + ggtitle("Curva ROC y AUC=%s " % str(auc)))
Ejemplo n.º 9
0
def area_plot(df,
              x,
              y,
              group=None,
              facet_x=None,
              facet_y=None,
              aggfun='sum',
              fill=False,
              sort_groups=True,
              base_size=10,
              figure_size=(6, 3)):
    '''
    Aggregates data in df and plots as a stacked area chart.

    Parameters
    ----------
    df : pd.DataFrame
      input dataframe
    x : str
      quoted expression to be plotted on the x axis
    y : str
      quoted expression to be plotted on the y axis
    group : str
      quoted expression to be used as group (ie color)
    facet_x : str
      quoted expression to be used as facet
    facet_y : str
      quoted expression to be used as facet
    aggfun : str or fun
      function to be used for aggregating (eg sum, mean, median ...)
    fill : bool
      plot shares for each group instead of absolute values
    sort_groups : bool
      sort groups by the sum of their value (otherwise alphabetical order is used)
    base_size : int
      base size for theme_ez
    figure_size :tuple of int
      figure size

    Returns
    -------
    g : EZPlot
      EZplot object

    '''

    # create a copy of the data
    dataframe = df.copy()

    # define groups and variables; remove and store (eventual) names
    names = {}
    groups = {}
    variables = {}

    for label, var in zip(['x', 'group', 'facet_x', 'facet_y'],
                          [x, group, facet_x, facet_y]):
        names[label], groups[label] = unname(var)
    names['y'], variables['y'] = unname(y)

    # fix special cases
    if x == '.index':
        groups['x'] = '.index'
        names[
            'x'] = dataframe.index.name if dataframe.index.name is not None else ''

    # aggregate data and reorder columns
    gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True)
    gdata['y'].fillna(0, inplace=True)
    gdata = gdata[[
        c for c in ['x', 'y', 'group', 'facet_x', 'facet_y']
        if c in gdata.columns
    ]]

    if fill:
        groups_to_normalize = [
            c for c in ['x', 'facet_x', 'facet_y'] if c in gdata.columns
        ]
        total_values = gdata \
            .groupby(groups_to_normalize)['y'] \
            .sum() \
            .reset_index() \
            .rename(columns = {'y':'tot_y'})
        gdata = pd.merge(gdata, total_values, on=groups_to_normalize)
        gdata['y'] = gdata['y'] / (gdata['tot_y'] + EPSILON)
        gdata.drop('tot_y', axis=1, inplace=True)
        ylabeller = percent_labels
    else:
        ylabeller = ez_labels

    # get plot object
    g = EZPlot(gdata)

    # determine order and create a categorical type
    if sort_groups:
        sort_data_groups(g)

    # get colors
    colors = np.flip(ez_colors(g.n_groups('group')))

    # set groups
    if group is None:
        g += p9.geom_area(p9.aes(x="x", y="y"),
                          colour=None,
                          fill=ez_colors(1)[0],
                          na_rm=True)
    else:
        g += p9.geom_area(p9.aes(x="x",
                                 y="y",
                                 group="factor(group)",
                                 fill="factor(group)"),
                          colour=None,
                          na_rm=True)
        g += p9.scale_fill_manual(values=colors)

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_timestamp('x'):
        g += p9.scale_x_datetime()
    elif g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

    # set y scale
    g += p9.scale_y_continuous(labels=ylabeller,
                               expand=[0, 0, 0.1 * (not fill) + 0.03, 0])

    # set axis labels
    g += \
        p9.xlab(names['x']) + \
        p9.ylab(names['y'])

    # set theme
    g += theme_ez(figure_size=figure_size,
                  base_size=base_size,
                  legend_title=p9.element_text(text=names['group'],
                                               size=base_size))

    if sort_groups:
        g += p9.guides(fill=p9.guide_legend(reverse=True),
                       color=p9.guide_legend(reverse=True))

    return g
Ejemplo n.º 10
0
        + plt9.geom_line()
        + plt9.geom_hline(yintercept=len(pokemon))
        + plt9.ylim(0, len(pokemon))
    )

    num_unique_pokemon_plot.save(args.num_unique_pokemon_plot, dpi=300)
    print("Output:", args.num_unique_pokemon_plot)

    data_2 = simulation_data.to_num_missing_data_frame()

    num_missing_pokemon_plot = (
        plt9.ggplot(
            data_2[data_2["case_id"] == 0],
            plt9.aes("roll_num", "num_missing", fill="rarity"),
        )
        + plt9.geom_area()
        + plt9.geom_hline(yintercept=len(pokemon))
        + plt9.ylim(0, len(pokemon))
        + plt9.scale_fill_hue(
            name="Rarity",
            labels=[
                "Common",
                "Uncommon",
                "Rare",
                "Very rare",
                "Legendary",
                "Ultra beast",
            ],
        )
        + plt9.xlab("Num rolls (exculding extra rolls)")
        + plt9.ylab("Num Pokémon missing")
Ejemplo n.º 11
0
def main():
    mpl.rc('mathtext', fontset='cm')

    warnings.filterwarnings('ignore',
                            r'(geom|position)_\w+ ?: Removed \d+ rows')
    warnings.filterwarnings('ignore', r'Saving .+ x .+ in image')
    warnings.filterwarnings('ignore', r'Filename: .+\.png')

    df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_f')
              + titles('P_f(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pf_Ob_Ol')

    df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_f')
              + titles('P_f(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pf_Ob_σ')

    df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_q')
              + titles('P_q(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pq_Ob_Ol')

    df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_q')
              + titles('P_q(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pq_Ob_σ')

    df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'O_l', 'Opr')
              + titles("O'(O_b, O_l)")
              + limits((1, 10), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'Opr_Ob_Ol')

    df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'σ', 'Opr')
              + titles("O'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Opr_Ob_σ')

    df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)})
            .assign(Pf=lambda x: Opr_Pf(x.Opr)))
    save_both(my_plot(df, 'Opr', 'Pf')
              + titles("P_f(O')")
              + labs("O'", 'P_f')
              + limits((1, 20), (0, 1),
                       xbreaks=np.linspace(2, 20, 10),
                       ybreaks=np.linspace(0, 1, 11))
              + gg.geom_line()
              + gg.geom_hline(yintercept=C, linetype='dashed', color='grey')
              , 'Pf_Opr')

    df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'σpr')
              + titles("σ'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'σpr_Ob_σ')

    df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)})
            .assign(Pq=lambda x: σpr_Pq(x.σpr)))
    save_both(my_plot(df, 'σpr', 'Pq')
              + titles("P_q(σ')")
              + labs("σ'", 'P_q')
              + limits((0, 20), (-1, 0),
                       xbreaks=np.linspace(0, 20, 11),
                       ybreaks=np.linspace(-1, 0, 11))
              + gg.geom_line()
              , 'Pq_σpr')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_free')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_free')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_qual')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_qual')

    df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f'))
    df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q'))
    df = pd.concat((df_Pf, df_Pq), ignore_index=True)
    df.drop_duplicates('O_b', inplace=True)

    Opr = df_Pf.query('σ==0').O_b[0]
    σpr = df_Pq.query('O_b==1').σ[0]

    labels = pd.DataFrame({
        'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3],
        'label': ["$O'$", "$σ'$", mathrm('More profit')]
    })
    lab_aes = gg.aes('x', 'y', label='label')

    save_both(
        gg.ggplot(df, gg.aes(x='O_b', y='σ'))
        + gg.geom_area(gg.aes(fill='profit'), alpha=0.3)
        + gg.geom_vline(xintercept=Opr, linetype='dashed')
        + gg.geom_hline(yintercept=σpr, linetype='dashed')

        # text alignment can't be specified in an aes
        + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top')
        + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom')
        + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom')

        + gg.scale_fill_discrete(name=mathrm('Bet type'),
                                 labels=[mathrm('Free'), mathrm('Qualifying')])
        + limits((1, 10), (0, 5))
        + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'),
                                     mathrm('more profitable'),
                                     mathrm('space')))
        + labs('O_b', 'σ')
        , 'Px_shapes')