def test_delete(self): d = {'a': '3', 'b': 3, 'c': 3., 'd': 3 * np.ones(1), 'e': 'data3'} with Database(self.tmp_path) as db: db.write('test', d) with Database(self.tmp_path) as db: assert len(db.read('test', ['b'], {'a': 3})) == 1 with Database(self.tmp_path) as db: db.delete('test', {'a': '3'}) with Database(self.tmp_path) as db: assert len(db.read('test', ['b'], {'a': 3})) == 0
def run(ARGS, data=None, model=None, is_test=False): data = data or get_regression_data(ARGS.dataset, split=ARGS.split) model = model or get_regression_model(ARGS.model)(is_test=is_test, seed=ARGS.seed) model.fit(data.X_train, data.Y_train) res = {} samples = model.sample(data.X_test, ARGS.num_samples) data_tiled = np.tile(data.X_test[None, :, :], [ARGS.num_samples, 1, 1]) shape = [ARGS.num_samples * data.X_test.shape[0], data.X_test.shape[1] + data.Y_test.shape[1]] A = np.reshape(np.concatenate([data_tiled, samples], -1), shape) B = np.concatenate([data.X_test, data.Y_test], -1) if ARGS.pca_dim > 0: AB = np.concatenate([A, B], 0) pca = PCA(n_components=ARGS.pca_dim).fit(AB) A = pca.transform(A) B = pca.transform(B) # import matplotlib.pyplot as plt # plt.scatter(A[:, 0], A[:, 1], color='b') # plt.scatter(B[:, 0], B[:, 1], color='r') # plt.show() kernel = gpflow.kernels.RBF(A.shape[-1]) res['mmd'] = mmd(A, B, kernel) print(res) res.update(ARGS.__dict__) if not is_test: # prgama: no cover with Database(ARGS.database_path) as db: db.write('mmd', res)
def setUp(self): self.data1 = {'a': '1', 'b': 1, 'c': 1., 'd': np.ones(1), 'e': 'data1'} self.data2 = { 'a': '2', 'b': 2, 'c': 2., 'd': 2 * np.ones(1), 'e': 'data2' } self.tmp_path = 'test.db' with Database(self.tmp_path) as db: db.write('test', self.data2) with Database(self.tmp_path) as db: db.write('test', self.data1)
def run(ARGS, is_test=False): data = get_regression_data(ARGS.dataset, split=ARGS.split) Model = get_regression_model(ARGS.model) model = Model(is_test=is_test, seed=ARGS.seed) model.fit(data.X_train, data.Y_train) m, v = model.predict(data.X_test) res = {} l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5) res['test_loglik'] = np.average(l) lu = norm.logpdf(data.Y_test * data.Y_std, loc=m * data.Y_std, scale=(v**0.5) * data.Y_std) res['test_loglik_unnormalized'] = np.average(lu) d = data.Y_test - m du = d * data.Y_std res['test_mae'] = np.average(np.abs(d)) res['test_mae_unnormalized'] = np.average(np.abs(du)) res['test_rmse'] = np.average(d**2)**0.5 res['test_rmse_unnormalized'] = np.average(du**2)**0.5 res.update(ARGS.__dict__) if not is_test: # pragma: no cover with Database(ARGS.database_path) as db: db.write('regression', res)
def test_read(self): fields = ['b', 'c', 'd', 'e'] with Database(self.tmp_path) as db: results1 = db.read('test', fields, {'a': '1'}) results2 = db.read('test', fields, {'a': '2'}) for k, r1, r2 in zip(fields, results1[0], results2[0]): assert r1 == self.data1[k] assert r2 == self.data2[k]
def run(ARGS, data=None, model=None, is_test=False): data = data or get_classification_data(ARGS.dataset, split=ARGS.split) model = model or get_classification_model(ARGS.model)(data.K, is_test=is_test, seed=ARGS.seed) def onehot(Y, K): return np.eye(K)[Y.flatten().astype(int)].reshape(Y.shape[:-1] + (K,)) Y_oh = onehot(data.Y_test, data.K)[None, :, :] # [1 x N_test x K] model.fit(data.X_train, data.Y_train) p = model.predict(data.X_test) # [N_test x K] or [samples x N_test x K] assert p.ndim in {2, 3} # 3-dim in case of approximate predictions (multiple samples per each X) # clip very large and small probs eps = 1e-12 p = np.clip(p, eps, 1 - eps) p = p / np.expand_dims(np.sum(p, -1), -1) assert np.all(p >= 0.0) and np.all(p <= 1.0) # evaluation metrics res = {} if p.ndim == 2: # keep analysis as in the original code in case 2-dim predictions logp = multinomial.logpmf(Y_oh, n=1, p=p) # [N_test] res['test_loglik'] = np.average(logp) pred = np.argmax(p, axis=-1) else: # compute metrics in case of 3-dim predictions res['test_loglik'] = [] for n in range(p.shape[0]): # iterate through samples logp = multinomial.logpmf(Y_oh, n=1, p=p[n]) # [N_test] res['test_loglik'].append(logp) # Mixture test likelihood (mean over per data point evaluations) res['test_loglik'] = meanlogsumexp(res['test_loglik']) p = np.mean(p, axis=0) pred = np.argmax(p, axis=-1) res['test_acc'] = np.average(np.array(pred == data.Y_test.flatten()).astype(float)) if not is_test: res.update(ARGS.__dict__) if not is_test: # pragma: no cover with Database(ARGS.database_path) as db: db.write('classification', res) return res
def run(ARGS, is_test): data = get_regression_data(ARGS.dataset, split=ARGS.split, prop=1.) ind = np.zeros(data.X_train.shape[0]).astype(bool) ind[:ARGS.num_initial_points] = True X, Y = data.X_train, data.Y_train Model = non_bayesian_model(ARGS.model, 'regression') or\ import_module('bayesian_benchmarks.models.{}.models'.format(ARGS.model)).RegressionModel model = Model(is_test=is_test, seed=ARGS.seed) test_ll = [] train_ll = [] all_ll = [] test_rmse = [] train_rmse = [] all_rmse = [] for _ in range(min(ARGS.iterations, X.shape[0] - ARGS.num_initial_points)): model.fit(X[ind], Y[ind]) m, v = model.predict(X) # ND vars = v.copy() # set the seen ones to -inf so we don't choose them vars[ind] = -np.inf # choose the highest variance point i = np.argmax(vars) ind[i] = True logp = norm.logpdf(Y, loc=m, scale=v**0.5) # N d2 = (Y - m)**2 test_ll.append(np.average(logp[np.invert(ind)])) train_ll.append(np.average(logp[ind])) all_ll.append(np.average(logp)) test_rmse.append(np.average(d2[np.invert(ind)])**0.5) train_rmse.append(np.average(d2[ind])**0.5) all_rmse.append(np.average(d2)**0.5) # save res = { 'test_loglik': np.array(test_ll), 'train_loglik': np.array(train_ll), 'total_loglik': np.array(all_ll), 'test_rmse': np.array(test_rmse), 'train_rmse': np.array(train_rmse), 'total_rmse': np.array(all_rmse), } res.update(ARGS.__dict__) if not is_test: # pragma: no cover with Database() as db: db.write('active_learning_continuous', res)
def remove_already_run_experiments(table, experiments): res = [] with Database() as db: for e in experiments: if len(db.read(table, ['test_loglik'], e)) == 0: res.append(e) s = 'originally {} experiments, but {} have already been run, so running {} experiments' print(s.format(len(experiments), len(experiments) - len(res), len(res))) return res
def run(ARGS, data=None, model=None, is_test=False): data = data or get_regression_data(ARGS.dataset, split=ARGS.split) model = model or get_regression_model(ARGS.model)(is_test=is_test, seed=ARGS.seed) res = {} print('data standard deviation is: ', data.Y_std) start = time.time() model.fit(data.X_train, data.Y_train) fit_time = time.time() - start res['fit_time'] = fit_time start = time.time() m, v = model.predict(data.X_test) infer_time = time.time() - start res['infer_time'] = infer_time l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5) res['test_loglik'] = np.average(l) lu = norm.logpdf(data.Y_test * data.Y_std, loc=m * data.Y_std, scale=(v**0.5) * data.Y_std) res['test_loglik_unnormalized'] = np.average(lu) d = data.Y_test - m std = v**0.5 cal = (d < 1.96 * std) * (d > -1.96 * std) du = d * data.Y_std res['test_mae'] = np.average(np.abs(d)) res['test_mae_unnormalized'] = np.average(np.abs(du)) res['test_rmse'] = np.average(d**2)**0.5 res['test_rmse_unnormalized'] = np.average(du**2)**0.5 res['test_calibration'] = np.average(cal) res.update(ARGS.__dict__) if not is_test: # pragma: no cover print("HERE!!!!! DB IS {}".format(ARGS.database_path)) with Database(ARGS.database_path) as db: db.write('regression', res) return res
def run(ARGS, data=None, model=None, is_test=False): data = data or get_classification_data(ARGS.dataset, split=ARGS.split) model = model or get_classification_model(ARGS.model)( data.K, is_test=is_test, seed=ARGS.seed) def onehot(Y, K): return np.eye(K)[Y.flatten().astype(int)].reshape(Y.shape[:-1] + (K, )) Y_oh = onehot(data.Y_test, data.K)[None, :, :] # 1, N_test, K model.fit(data.X_train, data.Y_train) p = model.predict(data.X_test) # N_test, K # clip very large and small probs eps = 1e-12 p = np.clip(p, eps, 1 - eps) p = p / np.expand_dims(np.sum(p, -1), -1) # evaluation metrics res = {} logp = multinomial.logpmf(Y_oh, n=1, p=p) res['test_loglik'] = np.average(logp) pred = np.argmax(p, axis=-1) res['test_acc'] = np.average( np.array(pred == data.Y_test.flatten()).astype(float)) res['Y_test'] = data.Y_test res['p_test'] = p res.update(ARGS.__dict__) if not is_test: # pragma: no cover with Database(ARGS.database_path) as db: db.write('classification', res) return res
def update_score_database(m, v, data, ARGS, is_test, power=None, weighting=None, model_name=None): res = {} l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5) res['test_loglik'] = np.average(l) lu = norm.logpdf(data.Y_test * data.Y_std, loc=m * data.Y_std, scale=(v**0.5) * data.Y_std) res['test_loglik_unnormalized'] = np.average(lu) d = data.Y_test - m du = d * data.Y_std res['test_mae'] = np.average(np.abs(d)) res['test_mae_unnormalized'] = np.average(np.abs(du)) res['test_rmse'] = np.average(d**2)**0.5 res['test_rmse_unnormalized'] = np.average(du**2)**0.5 res.update(ARGS.__dict__) if 'expert' in ARGS.model: res['model'] = model_name + '_' + str(power) + '_' + ARGS.model.split( '_')[1] + '_' + ARGS.model.split('_')[2] + '_' + weighting if not is_test: # pragma: no cover with Database(ARGS.database_path) as db: db.write('regression', res) print('end', res) return (res)
def run(ARGS, data=None, model=None, is_test=False): data = data or get_regression_data(ARGS.dataset, split=ARGS.split) model = model or get_regression_model(ARGS.model)( is_test=is_test, seed=ARGS.seed, lr=ARGS.lr, iters=ARGS.iters) model.fit(data.X_train, data.Y_train) m, v = model.predict(data.X_test) res = {} l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5) res['test_loglik'] = np.average(l) lu = norm.logpdf(data.Y_test * data.Y_std, loc=m * data.Y_std, scale=(v**0.5) * data.Y_std) res['test_loglik_unnormalized'] = np.average(lu) d = data.Y_test - m du = d * data.Y_std res['test_mae'] = np.average(np.abs(d)) res['test_mae_unnormalized'] = np.average(np.abs(du)) res['test_rmse'] = np.average(d**2)**0.5 res['test_rmse_unnormalized'] = np.average(du**2)**0.5 res.update(ARGS.__dict__) res['model'] = '{}_{}'.format(res['model'], res['num_gpus']) if not is_test: # pragma: no cover with Database(ARGS.database_path) as db: db.write('regression', res) return res
def run(ARGS, data=None, model=None, is_test=False): data = data or get_regression_data(ARGS.dataset, split=ARGS.split) model = model or get_regression_model(ARGS.model)(is_test=is_test, seed=ARGS.seed) model.fit(data.X_train, data.Y_train) m, v = model.predict( data.X_test ) # both [data points x output dim] or [samples x data points x output dim] assert m.ndim == v.ndim assert m.ndim in { 2, 3 } # 3-dim in case of approximate predictions (multiple samples per each X) assert np.all(v >= 0.0) res = {} log_eps = np.log(1e-12) # log probability threshold log_1_minus_eps = np.log(1.0 - 1e-12) if m.ndim == 2: # keep analysis as in the original code in case of 2-dim predictions l = norm.logpdf(data.Y_test, loc=m, scale=v**0.5) # [] l = np.clip(l, log_eps, log_1_minus_eps) # clip res['test_loglik'] = np.average(l) lu = norm.logpdf(data.Y_test * data.Y_std, loc=m * data.Y_std, scale=(v**0.5) * data.Y_std) lu = np.clip(lu, log_eps, log_1_minus_eps) # clip res['test_loglik_unnormalized'] = np.average(lu) d = data.Y_test - m du = d * data.Y_std res['test_mae'] = np.average(np.abs(d)) res['test_mae_unnormalized'] = np.average(np.abs(du)) res['test_rmse'] = np.average(d**2)**0.5 res['test_rmse_unnormalized'] = np.average(du**2)**0.5 else: # compute metrics in case of 3-dim predictions res['test_loglik'] = [] res['test_loglik_unnormalized'] = [] for n in range(m.shape[0]): # iterate through samples l = norm.logpdf(data.Y_test, loc=m[n], scale=v[n]**0.5) l = np.clip(l, log_eps, log_1_minus_eps) # clip res['test_loglik'].append(l) lu = norm.logpdf(data.Y_test * data.Y_std, loc=m[n] * data.Y_std, scale=(v[n]**0.5) * data.Y_std) lu = np.clip(lu, log_eps, log_1_minus_eps) # clip res['test_loglik_unnormalized'].append(lu) # Mixture test likelihood (mean over per data point evaluations) res['test_loglik'] = meanlogsumexp(res['test_loglik']) # Mixture test likelihood (mean over per data point evaluations) res['test_loglik_unnormalized'] = meanlogsumexp( res['test_loglik_unnormalized']) d = data.Y_test - np.mean(m, axis=0) du = d * data.Y_std res['test_mae'] = np.average(np.abs(d)) res['test_mae_unnormalized'] = np.average(np.abs(du)) res['test_rmse'] = np.average(d**2)**0.5 res['test_rmse_unnormalized'] = np.average(du**2)**0.5 if not is_test: res.update(ARGS.__dict__) if not is_test: # pragma: no cover with Database(ARGS.database_path) as db: db.write('regression', res) return res
def read_regression_classification(db_loc, fs, models_names, datasets, task): fields = ['dataset', 'N', 'D'] + [m[1] for m in models_names] results = {} for f in fs: results[f] = {'table': {f: [] for f in fields}, 'vals': []} with Database(db_loc) as db: for dataset in datasets: for f in fs: results[f]['table']['dataset'].append(dataset[:10]) results[f]['table']['N'].append(ALL_DATATSETS[dataset].N) results[f]['table']['D'].append(ALL_DATATSETS[dataset].D) row = {f: [] for f in fs} for model, name in models_names: res = db.read(task, fs, {'model': model, 'dataset': dataset}) if len(res) == 0: for f in fs: results[f]['table'][name].append('') row[f].append(np.nan) else: print('{} {} {}'.format(model, dataset, len(res))) for i, f in enumerate(fs): L = [ np.nan if l[i] is None else float(l[i]) for l in res ] m = np.nanmean(L) std = np.nanstd(L) if len(L) > 1 else np.nan if m < 1000 and m > -1000: r = '{:.3f}({:.3f})'.format(m, std) row[f].append(m) else: r = 'nan' row[f].append(np.nan) results[f]['table'][name].append(r) for f in fs: results[f]['vals'].append(row[f]) for f in fs: if 'unnormalized' not in f: vals = np.array(results[f]['vals']) avgs = np.nanmean(vals, 0) meds = np.nanmedian(vals, 0) rks = np.nanmean(rankarray(vals), 0) for s, n in [[avgs, 'avg'], [meds, 'median'], [rks, 'avg rank']]: results[f]['table']['dataset'].append(n) results[f]['table']['N'].append('') results[f]['table']['D'].append('') if task == 'classification': results[f]['table']['K'].append('') for ss, name in zip(s, [m[1] for m in models_names]): results[f]['table'][name].append('{:.3f}'.format(ss)) return results, fields
1, feed_dict=s)[0] else: samples = model.predict_y_samples(x.reshape(1, -1), ARGS.num_predict_samples) Ss = samples[:, :, 0] bandwidth = 1.06 * np.std(Ss) * len(Ss)**( -1. / 5) # Silverman's (1986) rule of thumb. kde = KernelDensity(bandwidth=float(bandwidth)) l = kde.fit(Ss).score(y.reshape(-1, 1)) logp[i] = float(l) shapiro_W[i] = float(shapiro((Ss - np.average(Ss)) / np.std(Ss))[0]) rmse[i] = (np.average(Ss) - float(y))**2 res['test_loglik'] = np.average(logp) res['test_shapiro_W_median'] = np.median(shapiro_W) res['test_rmse'] = np.average(rmse)**0.5 res.update(ARGS.__dict__) print(res) #################################### save from bayesian_benchmarks.database_utils import Database with Database(results_path) as db: db.write('conditional_density_estimation', res)
def check_needs_run(table, d): with Database() as db: try: return (len(db.read(table, ['test_loglik'], d.__dict__)) == 0) except: return True
def read(datasets, models, splits, table, field, extra_text='', highlight_max=True, highlight_non_gaussian=True, use_error_bars=True): results = [] results_test_shapiro_W_median = [] with Database(database_path) as db: for dataset in datasets: for dd in models: for split in splits: d = {'dataset': dataset, 'split': split} d.update({'iterations': 100000}) d.update({k: dd[k] for k in ['configuration', 'mode']}) if True: # _ALL_REGRESSION_DATATSETS[dataset].N < 1000: res = db.read(table, [field, 'test_shapiro_W_median'], d) else: res = [] if len(res) > 0: try: results.append(float(res[0][0])) results_test_shapiro_W_median.append( float(res[0][1])) except: print(res, d, dataset) # results.append(np.nan) # results_test_shapiro_W_median.append(np.nan) else: results.append(np.nan) results_test_shapiro_W_median.append(np.nan) results = np.array(results).reshape(len(datasets), len(models), len(splits)) results_test_shapiro_W_median = np.array( results_test_shapiro_W_median).reshape(len(datasets), len(models), len(splits)) results_test_shapiro_W_median = np.average(results_test_shapiro_W_median, -1) results_mean = np.nanmean(results, -1) results_std_err = np.nanstd(results, -1) / float(len(splits))**0.5 argmax = np.argmax(results_mean, 1) lower_pts = [ m[a] - e[a] for m, e, a in zip(results_mean, results_std_err, argmax) ] high_pts = results_mean + results_std_err argmaxes = [np.where(h > l)[0] for h, l in zip(high_pts, lower_pts)] rs = rank_array(np.transpose(results, [0, 2, 1])) rs_flat = rs.reshape(len(datasets) * len(splits), len(models)) avg_ranks = np.average(rs_flat, 0) std_ranks = np.std(rs_flat, 0) / float(len(datasets) * len(splits))**0.5 r = ['{:.2f} ({:.2f})'.format(m, s) for m, s in zip(avg_ranks, std_ranks)] res_combined = [] for i, (ms, es, Ws) in enumerate( zip(results_mean, results_std_err, results_test_shapiro_W_median)): for j, (m, e, W) in enumerate(zip(ms, es, Ws)): if field == 'test_shapiro_W_median': if m < 0.999: res_combined.append('{:.4f}'.format(m)) else: res_combined.append(r' ') else: if m > -1000: if use_error_bars: if m > -10: t = '{:.2f} ({:.2f})'.format(m, e) else: t = '{:.0f} ({:.0f})'.format(m, e) else: if m > -10: t = '{:.2f}'.format(m) else: t = '{:.0f}'.format(m) if highlight_max and (j in argmaxes[i]): t = r'\textbf{' + t + '}' if highlight_non_gaussian and (W < 0.99): t = r'\textit{' + t + '}' res_combined.append(t) else: res_combined.append('$-\infty$') results_pandas = np.array(res_combined).reshape(results_mean.shape) extra_fields = [] extra_fields.append('Avg ranks') results_pandas = np.concatenate( [results_pandas, np.array(r).reshape(1, -1)], 0) extra_fields.append('Median diff from gp') ind = np.where(np.array([mm['nice_name'] for mm in models]) == 'G')[0][0] median = np.nanmedian( np.transpose(results - results[:, ind, :][:, None, :], [0, 2, 1]).reshape( len(datasets) * len(splits), len(models)), 0) median = ['{:.2f}'.format(m) for m in median] results_pandas = np.concatenate( [results_pandas, np.array(median).reshape(1, -1)], 0) _datasets = [] for d in datasets: if 'wilson' in d: nd = d[len('wilson_'):] else: nd = d if (dataset_colors[nd][0] == 0) and (dataset_colors[nd][1] == 0): _d = nd elif (dataset_colors[nd][0] == 1) and (dataset_colors[nd][1] == 0): _d = r'{\color{myAcolor} \textbf{' + nd + '}\myAcolormarker}' elif (dataset_colors[nd][0] == 0) and (dataset_colors[nd][1] == 1): _d = r'{\color{myBcolor} \textbf{' + nd + '}\myBcolormarker}' elif (dataset_colors[nd][0] == 1) and (dataset_colors[nd][1] == 1): _d = r'{\color{myCcolor} \textbf{' + nd + '}\myCcolormarker}' _datasets.append(_d) res = pandas.DataFrame(data=results_pandas, index=_datasets + extra_fields, columns=[m['nice_name'] for m in models]) res.insert(0, 'N', [_ALL_DATASETS[dataset].N for dataset in datasets] + [ ' ', ] * len(extra_fields)) res.insert(1, 'D', [_ALL_DATASETS[dataset].D for dataset in datasets] + [ ' ', ] * len(extra_fields)) if hasattr(_ALL_DATASETS[datasets[0]], 'K'): res.insert(2, 'K', [_ALL_DATASETS[dataset].K for dataset in datasets] + [ ' ', ] * len(extra_fields)) pandas.DataFrame.to_csv(res, 'results_{}_{}{}.csv'.format( table, field, extra_text)) #, float_format='%.6f') with pandas.option_context("max_colwidth", 1000): latex = pandas.DataFrame.to_latex(res, escape=False) with open('results_{}_{}{}.tex'.format(table, field, extra_text), 'w') as f: f.writelines(latex) return results
def run(ARGS, is_test=False): data = get_classification_data(ARGS.dataset, split=ARGS.split, prop=1.) ind = np.zeros(data.X_train.shape[0]).astype(bool) ind[:ARGS.num_initial_points] = True X, Y = data.X_train, data.Y_train def onehot(Y, K): return np.eye(K)[Y.flatten().astype(int)].reshape(Y.shape[:-1] + (K, )) Y_oh = onehot(Y, data.K) Model = get_classification_model(ARGS.model) model = Model(data.K, is_test=is_test, seed=ARGS.seed) test_ll = [] train_ll = [] all_ll = [] test_acc = [] train_acc = [] all_acc = [] for _ in range(min(ARGS.iterations, X.shape[0] - ARGS.num_initial_points)): model.fit(X[ind], Y[ind]) p = model.predict(X) # NK # clip very large and small probs eps = 1e-12 p = np.clip(p, eps, 1 - eps) p = p / np.expand_dims(np.sum(p, -1), -1) # entropy of predictions at all points ent = multinomial.entropy(n=1, p=p) # set the seen ones to -inf so we don't choose them ent[ind] = -np.inf # choose the highest entropy point to see next i = np.argmax(ent) ind[i] = True logp = multinomial.logpmf(Y_oh, n=1, p=p) # N is_correct = (np.argmax(p, 1) == Y.flatten()) # N test_ll.append(np.average(logp[np.invert(ind)])) train_ll.append(np.average(logp[ind])) all_ll.append(np.average(logp)) test_acc.append(np.average(is_correct[np.invert(ind)])) train_acc.append(np.average(is_correct[ind])) all_acc.append(np.average(is_correct)) res = { 'test_loglik': np.array(test_ll), 'train_loglik': np.array(train_ll), 'total_loglik': np.array(all_ll), 'test_acc': np.array(test_acc), 'train_acc': np.array(train_acc), 'total_acc': np.array(all_acc), } res.update(ARGS.__dict__) if not is_test: # pragma: no cover with Database(ARGS.database_path) as db: db.write('active_learning_discrete', res)