async def evaluate_baseline(data, lr, fraction, epochs, seed): test = load_test(data, seed) baseline = train_baseline(data, lr, fraction, epochs, seed) test, baseline = await test, await baseline rng_seed(seed) ndcg_score, _ = evaluate_fraction(test, baseline, fraction) logging.info(f"[{seed}, {lr}] evaluation baseline: {ndcg_score:.4f}") return ndcg_score
async def evaluate_baseline(data, lr, fraction, epochs, eps, tau, seed): test = load_test(data, seed) baseline = train_baseline(data, lr, fraction, epochs, eps, tau, seed) test, baseline = await test, await baseline baseline = baseline.__deepcopy__() rng_seed(seed) acc_policy, acc_best = evaluate(test, baseline) logging.info(f"[{seed}, {lr}, {eps}] evaluation baseline: {acc_policy:.4f} (stochastic) {acc_best:.4f} (deterministic)") return {'policy': acc_policy, 'best': acc_best}
async def train_baseline(data, lr, fraction, epochs, seed): train = await load_train(data, seed) policy = OnlinePolicy(train.d, lr) baseline_size = int(fraction * train.size) prng = rng_seed(seed) indices = prng.permutation(train.size)[0:baseline_size] logging.info(f"[{seed}, {lr}] training baseline (size: {baseline_size})") optimize_supervised(train, indices, policy, lr, epochs) return policy
async def train_baseline(data, lr, l2, fraction, epochs, eps, tau, seed): train = await load_train(data) policy = EpsgreedyPolicy(train.k, train.d, lr=lr, eps=eps) #policy = BoltzmannPolicy(train.k, train.d, lr=lr, tau=tau, l2=l2) baseline_size = int(fraction * train.n) prng = rng_seed(seed) indices = prng.permutation(train.n)[0:baseline_size] logging.info(f"[{seed}, {lr}, {eps}] training baseline (size: {baseline_size}, weights:{train.d * train.k})") optimize_supervised_hinge(train, indices, policy, lr, epochs) return policy
async def ranking_run(config, data, behavior, points, seed): # Load train, test and policy train = load_train(data, seed) test = load_test(data, seed) baseline = best_baseline(data, seed) train, test, baseline = await train, await test, await baseline # Data structure to hold output results out = { 'deploy': np.zeros(len(points)), 'learned': np.zeros(len(points)), 'regret': np.zeros(len(points)) } # Seed randomness prng = rng_seed(seed) # Build policy args = {'d': train.d, 'pairs': train.pairs, 'baseline': baseline.__deepcopy__()} args.update(vars(config)) if behavior in ['perfect']: args['eta'] = 0.0 else: args['eta'] = 1.0 if not config.cold: args['w'] = np.copy(baseline.w) policy = create_policy(**args) if hasattr(policy, 'ucb_baseline') and hasattr(policy, 'lcb_w'): out['ucb_b'] = np.zeros(len(points)) out['lcb_w'] = np.zeros(len(points)) # Build behavior model click_model = build_click_model(behavior) # Generate training indices and seed randomness indices = prng.randint(0, train.size, np.max(points)) # Evaluate on point 0 out['deploy'][0], out['learned'][0] = evaluate(test, policy) log_progress(0, points, seed, data, behavior, config, out, policy) # Train and evaluate at specified points for i in range(1, len(points)): start = points[i - 1] end = points[i] out['regret'][i] = out['regret'][i - 1] + optimize(train, indices[start:end], policy, click_model) out['deploy'][i], out['learned'][i] = evaluate(test, policy) if hasattr(policy, 'ucb_baseline') and hasattr(policy, 'lcb_w'): out['ucb_b'], out['lcb_w'] = policy.ucb_baseline, policy.lcb_w log_progress(i, points, seed, data, behavior, config, out, policy) return out
async def statistical_baseline(data, l2, seed, strategy): with open("conf/classification/baselines.json", "rt") as f: baselines = json.load(f) fraction = baselines[data]['fraction'] train = await load_train(data, seed) draw_type = { 'ucb': TYPE_UCB, 'thompson': TYPE_THOMPSON }[strategy] policy = StatisticalPolicy(train.k, train.d, l2=l2, draw_type=draw_type) baseline_size = int(fraction * train.n) prng = rng_seed(seed) indices = prng.permutation(train.n)[0:baseline_size] logging.info(f"[{seed}] training ridge regression baseline (size: {baseline_size}, weights:{train.d * train.k})") optimize_supervised_ridge(train, indices, policy) return policy
async def load_from_path(file_path, min_d=0, sample=1.0, seed=0, sample_inverse=False): xs, ys = load_svmlight_file(file_path) ys = ys.astype(np.int32) ys -= np.min(ys) if sample < 1.0: prng = rng_seed(seed) indices = prng.permutation(xs.shape[0]) if not sample_inverse: indices = indices[0:int(sample * xs.shape[0])] else: indices = indices[int(sample * xs.shape[0]):] xs = xs[indices, :] ys = ys[indices] k = np.unique(ys).shape[0] n = xs.shape[0] d = xs.shape[1] xs = from_scipy(xs, min_d=min_d) #xs.todense().A ys.setflags(write=False) return ClassificationDataset(xs, ys, n, d, k)
async def classification_run(config, data, points, seed, vali=0.0): # Load train, test and policy train = load_train(data, seed) test = load_test(data, seed) policy = build_policy(config, data, points, seed) train, test, policy = await train, await test, await policy policy = policy.__deepcopy__() # Data structure to hold output results out = { 'deploy': np.zeros(len(points)), 'learned': np.zeros(len(points)), 'regret': np.zeros(len(points)), 'test_regret': np.zeros(len(points)), } # Generate training indices and seed randomness prng = rng_seed(seed) indices_shuffle = prng.permutation(train.n) train_indices = prng.randint(0, int((1.0 - vali) * train.n), np.max(points)) train_indices = indices_shuffle[train_indices] if vali != 0.0: vali_indices = prng.randint(int((1.0 - vali) * train.n), train.n, np.max(points)) vali_indices = indices_shuffle[vali_indices] else: vali_indices = train_indices # Evaluate on point 0 if vali == 0.0: out['deploy'][0], out['learned'][0] = evaluate(test, policy, np.arange(0, test.n)) else: out['deploy'][0], out['learned'][0] = evaluate( train, policy, indices_shuffle[np.arange(int(vali * train.n), train.n)]) out['regret'][0] = 0.0 out['test_regret'][0] = 0.0 log_progress(0, points, data, out, policy, config, seed) # Train and evaluate at specified points for i in range(1, len(points)): start = points[i - 1] end = points[i] train_regret, test_regret = optimize(train, np.copy(train_indices[start:end]), np.copy(vali_indices[start:end]), policy) out['regret'][i] = out['regret'][i - 1] + train_regret out['test_regret'][i] = out['test_regret'][i - 1] + test_regret if vali == 0.0: out['deploy'][i], out['learned'][i] = evaluate( test, policy, np.arange(0, test.n)) else: out['deploy'][i], out['learned'][i] = evaluate( train, policy, indices_shuffle[np.arange(int(vali * train.n), train.n)]) log_progress(i, points, data, out, policy, config, seed) return out