Ejemplo n.º 1
0
def find_winning_algo(df, algo_pattern, ds_ids, args=None):
    rgx = re.compile(algo_pattern)
    df = df.loc[df.algo.map(lambda s: rgx.match(s) is not None)]
    algos = list(np.sort(df.algo.unique()))
    print len(algos), 'algos'

    stats = {}
    for ds_id in ds_ids:
        a_l = df.loc[df.ds == ds_id,
                     ['algo', 'sz', 'rawloss']].sort_values('algo')
        n = a_l.sz.max()
        loss = list(a_l.rawloss)
        wins = defaultdict(set)
        losses = defaultdict(set)

        for i in range(len(algos)):
            for j in range(i + 1, len(algos)):
                pval = significance(loss[i], loss[j], n)
                if pval < args.alpha:
                    if loss[i] < loss[j]:
                        winner, loser = i, j
                    else:
                        winner, loser = j, i
                    wins[winner].add(loser)
                    losses[loser].add(winner)

        stats[ds_id] = (wins, losses)

    survivors = range(len(algos))
    ranked = []
    while survivors:
        scores = np.zeros(len(survivors))

        for ds in ds_ids:
            win_loss_diff = np.array([
                len(stats[ds][0][alg]) - len(stats[ds][1][alg])
                for alg in survivors
            ])
            best = (win_loss_diff == win_loss_diff.max()).astype(np.int32)
            scores += best / best.sum()

        # print zip(np.array(algos)[np.array(survivors)], scores)
        # print 'losing algos:', np.array(algos)[np.array(survivors)[scores == scores.min()]]
        print np.sum(scores == scores.min()),
        # loser = survivors[scores.argmin()]
        loser_idx = np.random.choice(np.where(scores == scores.min())[0])
        loser = survivors[loser_idx]
        # print 'loser:', algos[loser]
        ranked.append(loser)
        survivors.remove(loser)
        for ds in ds_ids:
            for alg in survivors:
                if loser in stats[ds][0][alg]:
                    stats[ds][0][alg].remove(loser)
                if loser in stats[ds][1][alg]:
                    stats[ds][1][alg].remove(loser)

    print
    print[algos[i] for i in ranked[-3:]]
    print 'best:', algos[ranked[-1]]
Ejemplo n.º 2
0
def wins_losses(df, xname, yname, alpha=0.01):
    rawx = df.loc[df.algo == xname].groupby('ds').rawloss.mean()
    rawy = df.loc[df.algo == yname].groupby('ds').rawloss.mean()
    sz = df.loc[df.algo == xname].groupby('ds').sz.max()
    pvals = significance(rawx, rawy, sz)

    return (np.sum((rawx < rawy) & (pvals < alpha)),
            np.sum((rawx > rawy) & (pvals < alpha)))
Ejemplo n.º 3
0
def scatterplot(df,
                alg_names,
                labels=None,
                lim_min=-0.25,
                lim_max=1.,
                args=None,
                fname=None):
    assert len(alg_names) == 2
    if labels is None:
        labels = alg_names

    rawx = df.loc[df.algo == alg_names[0]].groupby('ds').rawloss.mean()
    rawy = df.loc[df.algo == alg_names[1]].groupby('ds').rawloss.mean()
    if args.rawloss:
        x, y = rawx, rawy
    else:
        x = df.loc[df.algo == alg_names[0]].groupby('ds').loss.mean()
        y = df.loc[df.algo == alg_names[1]].groupby('ds').loss.mean()
    sz = df.loc[df.algo == alg_names[0]].groupby('ds').sz.max()
    if args.use_cs:
        pvals = significance_cs01(rawx, rawy, sz)
    else:
        pvals = significance(rawx, rawy, sz)

    plt.figure(figsize=(2.5, 2.5))
    # plt.scatter(x, y,
    #             s=plt.rcParams['lines.markersize']**2 * (pvals < args.alpha).map(lambda x: 0.7 if x else 0.2),
    #             c=(pvals < args.alpha).map(lambda x: 'r' if x else 'k'))
    sign_idxs = (pvals < args.alpha)
    nsign_idxs = np.logical_not(sign_idxs)
    plt.scatter(x[nsign_idxs],
                y[nsign_idxs],
                s=plt.rcParams['lines.markersize']**2 * 0.2,
                c='k')
    plt.scatter(x[sign_idxs],
                y[sign_idxs],
                s=plt.rcParams['lines.markersize']**2 * 0.7,
                c='r')
    plt.xlim(lim_min, lim_max)
    plt.ylim(lim_min, lim_max)
    plt.plot([lim_min, lim_max], [lim_min, lim_max], color='k')

    plt.xlabel(labels[0])
    plt.ylabel(labels[1])

    if fname is not None:
        figname = fname + ('_cs' if args.use_cs else '')
    else:
        figname = '_vs_'.join(alg_names).replace(':', '_').replace('.', '')
    if args.min_actions is not None:
        figname += '_{}a'.format(args.min_actions)
    figname += '.pdf'
    plt.savefig(
        os.path.join(FIGDIR, figname),  #'{}_{}'.format(base_name(), figname)),
        bbox_inches='tight',
        pad_inches=0)
Ejemplo n.º 4
0
def wins_losses(df, xname, yname, args=None):
    rawx = df.loc[df.algo == xname].groupby('ds').rawloss.mean()
    rawy = df.loc[df.algo == yname].groupby('ds').rawloss.mean()
    sz = df.loc[df.algo == xname].groupby('ds').sz.max()
    if args.use_hoeffding:
        pvals = significance_cs01(rawx, rawy, sz)
    else:
        pvals = significance(rawx, rawy, sz)

    return (np.sum((rawx < rawy) & (pvals < args.alpha)),
            np.sum((rawx > rawy) & (pvals < args.alpha)))
Ejemplo n.º 5
0
def scatterplot(df,
                alg_names,
                labels=None,
                raw=False,
                lim_min=-0.25,
                lim_max=1.,
                args=None,
                fname=None):
    assert len(alg_names) == 2
    if labels is None:
        labels = alg_names

    rawx = df.loc[df.algo == alg_names[0]].groupby('ds').rawloss.mean()
    rawy = df.loc[df.algo == alg_names[1]].groupby('ds').rawloss.mean()
    if raw:
        x, y = rawx, rawy
    else:
        x = df.loc[df.algo == alg_names[0]].groupby('ds').loss.mean()
        y = df.loc[df.algo == alg_names[1]].groupby('ds').loss.mean()
    sz = df.loc[df.algo == alg_names[0]].groupby('ds').sz.max()
    pvals = significance(rawx, rawy, sz)

    plt.figure(figsize=(2.5, 2.5))
    plt.scatter(x,
                y,
                s=plt.rcParams['lines.markersize']**2 *
                (pvals < args.alpha).map(lambda x: 0.7 if x else 0.2),
                c=(pvals < args.alpha).map(lambda x: 'r' if x else 'k'))
    plt.xlim(lim_min, lim_max)
    plt.ylim(lim_min, lim_max)
    plt.plot([lim_min, lim_max], [lim_min, lim_max], color='k')

    plt.xlabel(labels[0])
    plt.ylabel(labels[1])

    if fname is not None:
        figname = fname + '.pdf'
    else:
        figname = '_vs_'.join(alg_names).replace(':', '_').replace('.',
                                                                   '') + '.pdf'
    plt.savefig(os.path.join(FIGDIR, figname),
                bbox_inches='tight',
                pad_inches=0)