Exemple #1
0
    def test_delete(self):
        d = {'a': '3', 'b': 3, 'c': 3., 'd': 3 * np.ones(1), 'e': 'data3'}

        with Database(self.tmp_path) as db:
            db.write('test', d)

        with Database(self.tmp_path) as db:
            assert len(db.read('test', ['b'], {'a': 3})) == 1

        with Database(self.tmp_path) as db:
            db.delete('test', {'a': '3'})

        with Database(self.tmp_path) as db:
            assert len(db.read('test', ['b'], {'a': 3})) == 0
def run(ARGS, data=None, model=None, is_test=False):
    data = data or get_regression_data(ARGS.dataset, split=ARGS.split)
    model = model or get_regression_model(ARGS.model)(is_test=is_test, seed=ARGS.seed)

    model.fit(data.X_train, data.Y_train)

    res = {}

    samples = model.sample(data.X_test, ARGS.num_samples)
    data_tiled = np.tile(data.X_test[None, :, :], [ARGS.num_samples, 1, 1])
    shape =  [ARGS.num_samples * data.X_test.shape[0], data.X_test.shape[1] + data.Y_test.shape[1]]
    A = np.reshape(np.concatenate([data_tiled, samples], -1), shape)
    B = np.concatenate([data.X_test, data.Y_test], -1)


    if ARGS.pca_dim > 0:
        AB = np.concatenate([A, B], 0)
        pca = PCA(n_components=ARGS.pca_dim).fit(AB)
        A = pca.transform(A)
        B = pca.transform(B)

    # import matplotlib.pyplot as plt
    # plt.scatter(A[:, 0], A[:, 1], color='b')
    # plt.scatter(B[:, 0], B[:, 1], color='r')
    # plt.show()

    kernel = gpflow.kernels.RBF(A.shape[-1])
    res['mmd'] = mmd(A, B, kernel)

    print(res)

    res.update(ARGS.__dict__)
    if not is_test:  # prgama: no cover
        with Database(ARGS.database_path) as db:
            db.write('mmd', res)
Exemple #3
0
    def setUp(self):
        self.data1 = {'a': '1', 'b': 1, 'c': 1., 'd': np.ones(1), 'e': 'data1'}
        self.data2 = {
            'a': '2',
            'b': 2,
            'c': 2.,
            'd': 2 * np.ones(1),
            'e': 'data2'
        }

        self.tmp_path = 'test.db'
        with Database(self.tmp_path) as db:
            db.write('test', self.data2)

        with Database(self.tmp_path) as db:
            db.write('test', self.data1)
Exemple #4
0
def run(ARGS, is_test=False):
    data = get_regression_data(ARGS.dataset, split=ARGS.split)

    Model = get_regression_model(ARGS.model)
    model = Model(is_test=is_test, seed=ARGS.seed)
    model.fit(data.X_train, data.Y_train)
    m, v = model.predict(data.X_test)

    res = {}

    l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5)
    res['test_loglik'] = np.average(l)

    lu = norm.logpdf(data.Y_test * data.Y_std, loc=m * data.Y_std, scale=(v**0.5) * data.Y_std)
    res['test_loglik_unnormalized'] = np.average(lu)

    d = data.Y_test - m
    du = d * data.Y_std

    res['test_mae'] = np.average(np.abs(d))
    res['test_mae_unnormalized'] = np.average(np.abs(du))

    res['test_rmse'] = np.average(d**2)**0.5
    res['test_rmse_unnormalized'] = np.average(du**2)**0.5

    res.update(ARGS.__dict__)

    if not is_test:  # pragma: no cover
        with Database(ARGS.database_path) as db:
            db.write('regression', res)
Exemple #5
0
    def test_read(self):
        fields = ['b', 'c', 'd', 'e']
        with Database(self.tmp_path) as db:
            results1 = db.read('test', fields, {'a': '1'})
            results2 = db.read('test', fields, {'a': '2'})

        for k, r1, r2 in zip(fields, results1[0], results2[0]):
            assert r1 == self.data1[k]
            assert r2 == self.data2[k]
def run(ARGS, data=None, model=None, is_test=False):

    data = data or get_classification_data(ARGS.dataset, split=ARGS.split)
    model = model or get_classification_model(ARGS.model)(data.K, is_test=is_test, seed=ARGS.seed)

    def onehot(Y, K):
        return np.eye(K)[Y.flatten().astype(int)].reshape(Y.shape[:-1] + (K,))

    Y_oh = onehot(data.Y_test, data.K)[None, :, :]  # [1 x N_test x K]

    model.fit(data.X_train, data.Y_train)
    p = model.predict(data.X_test)  # [N_test x K] or [samples x N_test x K]

    assert p.ndim in {2, 3}  # 3-dim in case of approximate predictions (multiple samples per each X)

    # clip very large and small probs
    eps = 1e-12
    p = np.clip(p, eps, 1 - eps)
    p = p / np.expand_dims(np.sum(p, -1), -1)

    assert np.all(p >= 0.0) and np.all(p <= 1.0)

    # evaluation metrics
    res = {}

    if p.ndim == 2:  # keep analysis as in the original code in case 2-dim predictions

        logp = multinomial.logpmf(Y_oh, n=1, p=p)  # [N_test]

        res['test_loglik'] = np.average(logp)

        pred = np.argmax(p, axis=-1)

    else:  # compute metrics in case of 3-dim predictions

        res['test_loglik'] = []

        for n in range(p.shape[0]):  # iterate through samples
            logp = multinomial.logpmf(Y_oh, n=1, p=p[n])  # [N_test]
            res['test_loglik'].append(logp)

        # Mixture test likelihood (mean over per data point evaluations)
        res['test_loglik'] = meanlogsumexp(res['test_loglik'])

        p = np.mean(p, axis=0)
        pred = np.argmax(p, axis=-1)

    res['test_acc'] = np.average(np.array(pred == data.Y_test.flatten()).astype(float))

    if not is_test:
        res.update(ARGS.__dict__)

    if not is_test:  # pragma: no cover
        with Database(ARGS.database_path) as db:
            db.write('classification', res)

    return res 
Exemple #7
0
def run(ARGS, is_test):
    data = get_regression_data(ARGS.dataset, split=ARGS.split, prop=1.)

    ind = np.zeros(data.X_train.shape[0]).astype(bool)
    ind[:ARGS.num_initial_points] = True

    X, Y = data.X_train, data.Y_train

    Model = non_bayesian_model(ARGS.model, 'regression') or\
            import_module('bayesian_benchmarks.models.{}.models'.format(ARGS.model)).RegressionModel
    model = Model(is_test=is_test, seed=ARGS.seed)

    test_ll = []
    train_ll = []
    all_ll = []
    test_rmse = []
    train_rmse = []
    all_rmse = []

    for _ in range(min(ARGS.iterations, X.shape[0] - ARGS.num_initial_points)):
        model.fit(X[ind], Y[ind])

        m, v = model.predict(X)  # ND

        vars = v.copy()

        # set the seen ones to -inf so we don't choose them
        vars[ind] = -np.inf

        # choose the highest variance point
        i = np.argmax(vars)
        ind[i] = True

        logp = norm.logpdf(Y, loc=m, scale=v**0.5)  # N
        d2 = (Y - m)**2

        test_ll.append(np.average(logp[np.invert(ind)]))
        train_ll.append(np.average(logp[ind]))
        all_ll.append(np.average(logp))
        test_rmse.append(np.average(d2[np.invert(ind)])**0.5)
        train_rmse.append(np.average(d2[ind])**0.5)
        all_rmse.append(np.average(d2)**0.5)

    # save
    res = {
        'test_loglik': np.array(test_ll),
        'train_loglik': np.array(train_ll),
        'total_loglik': np.array(all_ll),
        'test_rmse': np.array(test_rmse),
        'train_rmse': np.array(train_rmse),
        'total_rmse': np.array(all_rmse),
    }
    res.update(ARGS.__dict__)

    if not is_test:  # pragma: no cover
        with Database() as db:
            db.write('active_learning_continuous', res)
Exemple #8
0
def remove_already_run_experiments(table, experiments):
    res = []

    with Database() as db:
        for e in experiments:
            if len(db.read(table, ['test_loglik'], e)) == 0:
                res.append(e)

    s = 'originally {} experiments, but {} have already been run, so running {} experiments'
    print(s.format(len(experiments), len(experiments) - len(res), len(res)))
    return res
Exemple #9
0
def run(ARGS, data=None, model=None, is_test=False):

    data = data or get_regression_data(ARGS.dataset, split=ARGS.split)
    model = model or get_regression_model(ARGS.model)(is_test=is_test,
                                                      seed=ARGS.seed)
    res = {}

    print('data standard deviation is: ', data.Y_std)
    start = time.time()
    model.fit(data.X_train, data.Y_train)
    fit_time = time.time() - start
    res['fit_time'] = fit_time

    start = time.time()
    m, v = model.predict(data.X_test)
    infer_time = time.time() - start
    res['infer_time'] = infer_time

    l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5)
    res['test_loglik'] = np.average(l)

    lu = norm.logpdf(data.Y_test * data.Y_std,
                     loc=m * data.Y_std,
                     scale=(v**0.5) * data.Y_std)
    res['test_loglik_unnormalized'] = np.average(lu)

    d = data.Y_test - m
    std = v**0.5
    cal = (d < 1.96 * std) * (d > -1.96 * std)

    du = d * data.Y_std

    res['test_mae'] = np.average(np.abs(d))
    res['test_mae_unnormalized'] = np.average(np.abs(du))

    res['test_rmse'] = np.average(d**2)**0.5
    res['test_rmse_unnormalized'] = np.average(du**2)**0.5

    res['test_calibration'] = np.average(cal)

    res.update(ARGS.__dict__)

    if not is_test:  # pragma: no cover
        print("HERE!!!!! DB IS {}".format(ARGS.database_path))
        with Database(ARGS.database_path) as db:
            db.write('regression', res)

    return res
def run(ARGS, data=None, model=None, is_test=False):

    data = data or get_classification_data(ARGS.dataset, split=ARGS.split)
    model = model or get_classification_model(ARGS.model)(
        data.K, is_test=is_test, seed=ARGS.seed)

    def onehot(Y, K):
        return np.eye(K)[Y.flatten().astype(int)].reshape(Y.shape[:-1] + (K, ))

    Y_oh = onehot(data.Y_test, data.K)[None, :, :]  # 1, N_test, K

    model.fit(data.X_train, data.Y_train)
    p = model.predict(data.X_test)  # N_test, K

    # clip very large and small probs
    eps = 1e-12
    p = np.clip(p, eps, 1 - eps)
    p = p / np.expand_dims(np.sum(p, -1), -1)

    # evaluation metrics
    res = {}

    logp = multinomial.logpmf(Y_oh, n=1, p=p)

    res['test_loglik'] = np.average(logp)

    pred = np.argmax(p, axis=-1)

    res['test_acc'] = np.average(
        np.array(pred == data.Y_test.flatten()).astype(float))

    res['Y_test'] = data.Y_test
    res['p_test'] = p

    res.update(ARGS.__dict__)

    if not is_test:  # pragma: no cover
        with Database(ARGS.database_path) as db:
            db.write('classification', res)

    return res
def update_score_database(m,
                          v,
                          data,
                          ARGS,
                          is_test,
                          power=None,
                          weighting=None,
                          model_name=None):
    res = {}

    l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5)
    res['test_loglik'] = np.average(l)

    lu = norm.logpdf(data.Y_test * data.Y_std,
                     loc=m * data.Y_std,
                     scale=(v**0.5) * data.Y_std)
    res['test_loglik_unnormalized'] = np.average(lu)

    d = data.Y_test - m
    du = d * data.Y_std

    res['test_mae'] = np.average(np.abs(d))
    res['test_mae_unnormalized'] = np.average(np.abs(du))

    res['test_rmse'] = np.average(d**2)**0.5
    res['test_rmse_unnormalized'] = np.average(du**2)**0.5

    res.update(ARGS.__dict__)

    if 'expert' in ARGS.model:
        res['model'] = model_name + '_' + str(power) + '_' + ARGS.model.split(
            '_')[1] + '_' + ARGS.model.split('_')[2] + '_' + weighting

    if not is_test:  # pragma: no cover
        with Database(ARGS.database_path) as db:
            db.write('regression', res)
    print('end', res)

    return (res)
Exemple #12
0
def run(ARGS, data=None, model=None, is_test=False):

    data = data or get_regression_data(ARGS.dataset, split=ARGS.split)
    model = model or get_regression_model(ARGS.model)(
        is_test=is_test, seed=ARGS.seed, lr=ARGS.lr, iters=ARGS.iters)

    model.fit(data.X_train, data.Y_train)
    m, v = model.predict(data.X_test)

    res = {}

    l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5)
    res['test_loglik'] = np.average(l)

    lu = norm.logpdf(data.Y_test * data.Y_std,
                     loc=m * data.Y_std,
                     scale=(v**0.5) * data.Y_std)
    res['test_loglik_unnormalized'] = np.average(lu)

    d = data.Y_test - m
    du = d * data.Y_std

    res['test_mae'] = np.average(np.abs(d))
    res['test_mae_unnormalized'] = np.average(np.abs(du))

    res['test_rmse'] = np.average(d**2)**0.5
    res['test_rmse_unnormalized'] = np.average(du**2)**0.5

    res.update(ARGS.__dict__)
    res['model'] = '{}_{}'.format(res['model'], res['num_gpus'])

    if not is_test:  # pragma: no cover
        with Database(ARGS.database_path) as db:
            db.write('regression', res)

    return res
def run(ARGS, data=None, model=None, is_test=False):

    data = data or get_regression_data(ARGS.dataset, split=ARGS.split)
    model = model or get_regression_model(ARGS.model)(is_test=is_test,
                                                      seed=ARGS.seed)

    model.fit(data.X_train, data.Y_train)
    m, v = model.predict(
        data.X_test
    )  # both [data points x output dim] or [samples x data points x output dim]

    assert m.ndim == v.ndim
    assert m.ndim in {
        2, 3
    }  # 3-dim in case of approximate predictions (multiple samples per each X)
    assert np.all(v >= 0.0)

    res = {}
    log_eps = np.log(1e-12)  # log probability threshold
    log_1_minus_eps = np.log(1.0 - 1e-12)

    if m.ndim == 2:  # keep analysis as in the original code in case of 2-dim predictions

        l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5)  # []
        l = np.clip(l, log_eps, log_1_minus_eps)  # clip
        res['test_loglik'] = np.average(l)

        lu = norm.logpdf(data.Y_test * data.Y_std,
                         loc=m * data.Y_std,
                         scale=(v**0.5) * data.Y_std)
        lu = np.clip(lu, log_eps, log_1_minus_eps)  # clip
        res['test_loglik_unnormalized'] = np.average(lu)

        d = data.Y_test - m
        du = d * data.Y_std

        res['test_mae'] = np.average(np.abs(d))
        res['test_mae_unnormalized'] = np.average(np.abs(du))

        res['test_rmse'] = np.average(d**2)**0.5
        res['test_rmse_unnormalized'] = np.average(du**2)**0.5

    else:  # compute metrics in case of 3-dim predictions

        res['test_loglik'] = []
        res['test_loglik_unnormalized'] = []

        for n in range(m.shape[0]):  # iterate through samples
            l = norm.logpdf(data.Y_test, loc=m[n], scale=v[n]**0.5)
            l = np.clip(l, log_eps, log_1_minus_eps)  # clip
            res['test_loglik'].append(l)

            lu = norm.logpdf(data.Y_test * data.Y_std,
                             loc=m[n] * data.Y_std,
                             scale=(v[n]**0.5) * data.Y_std)
            lu = np.clip(lu, log_eps, log_1_minus_eps)  # clip
            res['test_loglik_unnormalized'].append(lu)

        # Mixture test likelihood (mean over per data point evaluations)
        res['test_loglik'] = meanlogsumexp(res['test_loglik'])

        # Mixture test likelihood (mean over per data point evaluations)
        res['test_loglik_unnormalized'] = meanlogsumexp(
            res['test_loglik_unnormalized'])

        d = data.Y_test - np.mean(m, axis=0)
        du = d * data.Y_std

        res['test_mae'] = np.average(np.abs(d))
        res['test_mae_unnormalized'] = np.average(np.abs(du))

        res['test_rmse'] = np.average(d**2)**0.5
        res['test_rmse_unnormalized'] = np.average(du**2)**0.5

    if not is_test:
        res.update(ARGS.__dict__)

    if not is_test:  # pragma: no cover
        with Database(ARGS.database_path) as db:
            db.write('regression', res)

    return res
Exemple #14
0
def read_regression_classification(db_loc, fs, models_names, datasets, task):
    fields = ['dataset', 'N', 'D'] + [m[1] for m in models_names]

    results = {}
    for f in fs:
        results[f] = {'table': {f: [] for f in fields}, 'vals': []}

    with Database(db_loc) as db:

        for dataset in datasets:
            for f in fs:
                results[f]['table']['dataset'].append(dataset[:10])
                results[f]['table']['N'].append(ALL_DATATSETS[dataset].N)
                results[f]['table']['D'].append(ALL_DATATSETS[dataset].D)

            row = {f: [] for f in fs}
            for model, name in models_names:
                res = db.read(task, fs, {'model': model, 'dataset': dataset})

                if len(res) == 0:
                    for f in fs:
                        results[f]['table'][name].append('')
                        row[f].append(np.nan)
                else:
                    print('{} {} {}'.format(model, dataset, len(res)))
                    for i, f in enumerate(fs):
                        L = [
                            np.nan if l[i] is None else float(l[i])
                            for l in res
                        ]
                        m = np.nanmean(L)
                        std = np.nanstd(L) if len(L) > 1 else np.nan
                        if m < 1000 and m > -1000:
                            r = '{:.3f}({:.3f})'.format(m, std)
                            row[f].append(m)
                        else:
                            r = 'nan'
                            row[f].append(np.nan)

                        results[f]['table'][name].append(r)

            for f in fs:
                results[f]['vals'].append(row[f])

    for f in fs:
        if 'unnormalized' not in f:
            vals = np.array(results[f]['vals'])

            avgs = np.nanmean(vals, 0)
            meds = np.nanmedian(vals, 0)
            rks = np.nanmean(rankarray(vals), 0)

            for s, n in [[avgs, 'avg'], [meds, 'median'], [rks, 'avg rank']]:
                results[f]['table']['dataset'].append(n)
                results[f]['table']['N'].append('')
                results[f]['table']['D'].append('')
                if task == 'classification':
                    results[f]['table']['K'].append('')
                for ss, name in zip(s, [m[1] for m in models_names]):
                    results[f]['table'][name].append('{:.3f}'.format(ss))

    return results, fields
Exemple #15
0
                                                 1,
                                                 feed_dict=s)[0]

    else:
        samples = model.predict_y_samples(x.reshape(1, -1),
                                          ARGS.num_predict_samples)

    Ss = samples[:, :, 0]
    bandwidth = 1.06 * np.std(Ss) * len(Ss)**(
        -1. / 5)  # Silverman's (1986) rule of thumb.
    kde = KernelDensity(bandwidth=float(bandwidth))

    l = kde.fit(Ss).score(y.reshape(-1, 1))
    logp[i] = float(l)
    shapiro_W[i] = float(shapiro((Ss - np.average(Ss)) / np.std(Ss))[0])
    rmse[i] = (np.average(Ss) - float(y))**2

res['test_loglik'] = np.average(logp)
res['test_shapiro_W_median'] = np.median(shapiro_W)
res['test_rmse'] = np.average(rmse)**0.5

res.update(ARGS.__dict__)
print(res)

#################################### save

from bayesian_benchmarks.database_utils import Database

with Database(results_path) as db:
    db.write('conditional_density_estimation', res)
Exemple #16
0
def check_needs_run(table, d):
    with Database() as db:
        try:
            return (len(db.read(table, ['test_loglik'], d.__dict__)) == 0)
        except:
            return True
Exemple #17
0
def read(datasets,
         models,
         splits,
         table,
         field,
         extra_text='',
         highlight_max=True,
         highlight_non_gaussian=True,
         use_error_bars=True):
    results = []
    results_test_shapiro_W_median = []

    with Database(database_path) as db:
        for dataset in datasets:
            for dd in models:
                for split in splits:
                    d = {'dataset': dataset, 'split': split}
                    d.update({'iterations': 100000})
                    d.update({k: dd[k] for k in ['configuration', 'mode']})

                    if True:  # _ALL_REGRESSION_DATATSETS[dataset].N < 1000:
                        res = db.read(table, [field, 'test_shapiro_W_median'],
                                      d)
                    else:
                        res = []

                    if len(res) > 0:
                        try:
                            results.append(float(res[0][0]))
                            results_test_shapiro_W_median.append(
                                float(res[0][1]))

                        except:
                            print(res, d, dataset)
                            # results.append(np.nan)
                            # results_test_shapiro_W_median.append(np.nan)
                    else:
                        results.append(np.nan)
                        results_test_shapiro_W_median.append(np.nan)

    results = np.array(results).reshape(len(datasets), len(models),
                                        len(splits))
    results_test_shapiro_W_median = np.array(
        results_test_shapiro_W_median).reshape(len(datasets), len(models),
                                               len(splits))
    results_test_shapiro_W_median = np.average(results_test_shapiro_W_median,
                                               -1)

    results_mean = np.nanmean(results, -1)
    results_std_err = np.nanstd(results, -1) / float(len(splits))**0.5

    argmax = np.argmax(results_mean, 1)
    lower_pts = [
        m[a] - e[a] for m, e, a in zip(results_mean, results_std_err, argmax)
    ]
    high_pts = results_mean + results_std_err
    argmaxes = [np.where(h > l)[0] for h, l in zip(high_pts, lower_pts)]

    rs = rank_array(np.transpose(results, [0, 2, 1]))

    rs_flat = rs.reshape(len(datasets) * len(splits), len(models))
    avg_ranks = np.average(rs_flat, 0)
    std_ranks = np.std(rs_flat, 0) / float(len(datasets) * len(splits))**0.5
    r = ['{:.2f} ({:.2f})'.format(m, s) for m, s in zip(avg_ranks, std_ranks)]

    res_combined = []
    for i, (ms, es, Ws) in enumerate(
            zip(results_mean, results_std_err, results_test_shapiro_W_median)):
        for j, (m, e, W) in enumerate(zip(ms, es, Ws)):
            if field == 'test_shapiro_W_median':
                if m < 0.999:
                    res_combined.append('{:.4f}'.format(m))
                else:
                    res_combined.append(r' ')

            else:
                if m > -1000:
                    if use_error_bars:
                        if m > -10:
                            t = '{:.2f} ({:.2f})'.format(m, e)
                        else:
                            t = '{:.0f} ({:.0f})'.format(m, e)
                    else:
                        if m > -10:
                            t = '{:.2f}'.format(m)
                        else:
                            t = '{:.0f}'.format(m)

                    if highlight_max and (j in argmaxes[i]):
                        t = r'\textbf{' + t + '}'
                    if highlight_non_gaussian and (W < 0.99):
                        t = r'\textit{' + t + '}'
                    res_combined.append(t)
                else:
                    res_combined.append('$-\infty$')

    results_pandas = np.array(res_combined).reshape(results_mean.shape)

    extra_fields = []
    extra_fields.append('Avg ranks')
    results_pandas = np.concatenate(
        [results_pandas, np.array(r).reshape(1, -1)], 0)

    extra_fields.append('Median diff from gp')
    ind = np.where(np.array([mm['nice_name'] for mm in models]) == 'G')[0][0]

    median = np.nanmedian(
        np.transpose(results - results[:, ind, :][:, None, :],
                     [0, 2, 1]).reshape(
                         len(datasets) * len(splits), len(models)), 0)
    median = ['{:.2f}'.format(m) for m in median]
    results_pandas = np.concatenate(
        [results_pandas, np.array(median).reshape(1, -1)], 0)

    _datasets = []
    for d in datasets:
        if 'wilson' in d:
            nd = d[len('wilson_'):]
        else:
            nd = d

        if (dataset_colors[nd][0] == 0) and (dataset_colors[nd][1] == 0):
            _d = nd

        elif (dataset_colors[nd][0] == 1) and (dataset_colors[nd][1] == 0):
            _d = r'{\color{myAcolor} \textbf{' + nd + '}\myAcolormarker}'

        elif (dataset_colors[nd][0] == 0) and (dataset_colors[nd][1] == 1):
            _d = r'{\color{myBcolor} \textbf{' + nd + '}\myBcolormarker}'

        elif (dataset_colors[nd][0] == 1) and (dataset_colors[nd][1] == 1):
            _d = r'{\color{myCcolor} \textbf{' + nd + '}\myCcolormarker}'

        _datasets.append(_d)

    res = pandas.DataFrame(data=results_pandas,
                           index=_datasets + extra_fields,
                           columns=[m['nice_name'] for m in models])
    res.insert(0, 'N', [_ALL_DATASETS[dataset].N for dataset in datasets] + [
        ' ',
    ] * len(extra_fields))
    res.insert(1, 'D', [_ALL_DATASETS[dataset].D for dataset in datasets] + [
        ' ',
    ] * len(extra_fields))

    if hasattr(_ALL_DATASETS[datasets[0]], 'K'):
        res.insert(2, 'K',
                   [_ALL_DATASETS[dataset].K for dataset in datasets] + [
                       ' ',
                   ] * len(extra_fields))

    pandas.DataFrame.to_csv(res, 'results_{}_{}{}.csv'.format(
        table, field, extra_text))  #, float_format='%.6f')
    with pandas.option_context("max_colwidth", 1000):
        latex = pandas.DataFrame.to_latex(res, escape=False)

    with open('results_{}_{}{}.tex'.format(table, field, extra_text),
              'w') as f:
        f.writelines(latex)

    return results
def run(ARGS, is_test=False):
    data = get_classification_data(ARGS.dataset, split=ARGS.split, prop=1.)

    ind = np.zeros(data.X_train.shape[0]).astype(bool)
    ind[:ARGS.num_initial_points] = True

    X, Y = data.X_train, data.Y_train

    def onehot(Y, K):
        return np.eye(K)[Y.flatten().astype(int)].reshape(Y.shape[:-1] + (K, ))

    Y_oh = onehot(Y, data.K)

    Model = get_classification_model(ARGS.model)
    model = Model(data.K, is_test=is_test, seed=ARGS.seed)

    test_ll = []
    train_ll = []
    all_ll = []
    test_acc = []
    train_acc = []
    all_acc = []

    for _ in range(min(ARGS.iterations, X.shape[0] - ARGS.num_initial_points)):
        model.fit(X[ind], Y[ind])

        p = model.predict(X)  # NK
        # clip very large and small probs
        eps = 1e-12
        p = np.clip(p, eps, 1 - eps)
        p = p / np.expand_dims(np.sum(p, -1), -1)

        # entropy of predictions at all points
        ent = multinomial.entropy(n=1, p=p)

        # set the seen ones to -inf so we don't choose them
        ent[ind] = -np.inf

        # choose the highest entropy point to see next
        i = np.argmax(ent)
        ind[i] = True

        logp = multinomial.logpmf(Y_oh, n=1, p=p)  # N
        is_correct = (np.argmax(p, 1) == Y.flatten())  # N

        test_ll.append(np.average(logp[np.invert(ind)]))
        train_ll.append(np.average(logp[ind]))
        all_ll.append(np.average(logp))
        test_acc.append(np.average(is_correct[np.invert(ind)]))
        train_acc.append(np.average(is_correct[ind]))
        all_acc.append(np.average(is_correct))

    res = {
        'test_loglik': np.array(test_ll),
        'train_loglik': np.array(train_ll),
        'total_loglik': np.array(all_ll),
        'test_acc': np.array(test_acc),
        'train_acc': np.array(train_acc),
        'total_acc': np.array(all_acc),
    }
    res.update(ARGS.__dict__)

    if not is_test:  # pragma: no cover
        with Database(ARGS.database_path) as db:
            db.write('active_learning_discrete', res)