def get_candidate(model, acq, full_train_Y, q, bounds, dim): if acq == 'EI': if q == 1: EI = ExpectedImprovement(model, full_train_Y.max().item()) else: EI = qExpectedImprovement(model, full_train_Y.max().item()) bounds_t = torch.FloatTensor([[bounds[0]] * dim, [bounds[1]] * dim]) candidate, acq_value = optimize_acqf( EI, bounds=bounds_t, q=q, num_restarts=15, raw_samples=5000, ) elif acq == 'TS': sobol = SobolEngine(dim, scramble=True) n_candidates = min(5000, max(20000, 2000 * dim)) pert = sobol.draw(n_candidates) X_cand = (bounds[1] - bounds[0]) * pert + bounds[0] thompson_sampling = MaxPosteriorSampling(model=model, replacement=False) candidate = thompson_sampling(X_cand, num_samples=q) else: raise NotImplementedError('Only TS and EI are implemented') return candidate, EI if acq == 'EI' else None
def optimize_EI(gp, best_f, n_dim): """ Reference: https://botorch.org/api/optim.html bounds: 2d-ndarray (2, D) The values of lower and upper bound of each parameter. q: int The number of candidates to sample num_restarts: int The number of starting points for multistart optimization. raw_samples: int The number of initial points. Returns for joint_optimize is (num_restarts, q, D) """ mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) ei = ExpectedImprovement(gp, best_f=best_f, maximize=False) bounds = torch.from_numpy(np.array([[0.] * n_dim, [1.] * n_dim])) x = joint_optimize(ei, bounds=bounds, q=1, num_restarts=3, raw_samples=15) return np.array(x[0])
def test(dim=1): assert dim == 1 X0 = torch.Tensor([[0.93452506], [0.18872502], [0.89790337], [0.95841797], [0.82335255], [0.45000000], [0.50000000]]) Y0 = torch.Tensor([-0.4532849,-0.66614552,-0.92803395,0.08880341,-0.27683621,1.000000,1.500000]) # Y0 = Y0[:,None] Neval = Y0.shape[0] train_x = X0 train_y = Y0[:,None] Nrestarts = 10 model = SingleTaskGP(train_x, train_y) EI = ExpectedImprovement(model, best_f=0.2) print("[get_next_point()] Computing next candidate by maximizing the acquisition function ...") options={"batch_limit": 50,"maxiter": 200,"ftol":1e-9,"method":"L-BFGS-B","iprint":2,"maxls":30,"disp":True} x_next,alpha_next = optimize_acqf(acq_function=EI,bounds=torch.Tensor([[0.0]*dim,[1.0]*dim],device=device),q=1,num_restarts=Nrestarts, raw_samples=500,return_best_only=True,options=options) print("x_next:",x_next) print("alpha_next:",alpha_next)
def test_EI(self): for double in (True, False): self._setUp(double=double) EI = ExpectedImprovement(self.model_st, best_f=0.0) candidates, _ = optimize_acqf( acq_function=EI, bounds=self.bounds, q=1, num_restarts=10, raw_samples=20, options={"maxiter": 5}, ) self.assertTrue(-EPS <= candidates <= 1 + EPS) EI = ExpectedImprovement(self.model_fn, best_f=0.0) candidates, _ = optimize_acqf( acq_function=EI, bounds=self.bounds, q=1, num_restarts=10, raw_samples=20, options={"maxiter": 5}, ) self.assertTrue(-EPS <= candidates <= 1 + EPS)
def calc_int_r_and_add_points(self, z: object, value_ext: object, applied: bool): ''' Добавление точек в буффер и подсчет внешней награды для агента ''' if applied: ei = ExpectedImprovement(model=self.model, best_f=value_ext.max()) with torch.no_grad(): values = [ei(vector.unsqueeze(0)).numpy() for vector in z] int_r = np.stack(values) else: int_r = np.zeros((self.num_workers, 1)) self.add_points(z, value_ext) return int_r
def step(self, snapshot_mode: str = 'latest', meta_info: dict = None): # Save snapshot to save the correct iteration count self.save_snapshot() if self.curr_checkpoint == -2: # Train the initial policies in the source domain self.train_init_policies() self.reached_checkpoint() # setting counter to -1 if self.curr_checkpoint == -1: # Evaluate the initial policies in the target domain self.eval_init_policies() self.reached_checkpoint() # setting counter to 0 if self.curr_checkpoint == 0: # Normalize the input data and standardize the output data cands_norm = self.ddp_projector.project_to(self.cands) cands_values_stdized = standardize(self.cands_values).unsqueeze(1) # Create and fit the GP model gp = SingleTaskGP(cands_norm, cands_values_stdized) gp.likelihood.noise_covar.register_constraint('raw_noise', GreaterThan(1e-5)) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) print_cbt('Fitted the GP.', 'g') # Acquisition functions if self.acq_fcn_type == 'UCB': acq_fcn = UpperConfidenceBound(gp, beta=self.acq_param.get('beta', 0.1), maximize=True) elif self.acq_fcn_type == 'EI': acq_fcn = ExpectedImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True) elif self.acq_fcn_type == 'PI': acq_fcn = ProbabilityOfImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True) else: raise pyrado.ValueErr(given=self.acq_fcn_type, eq_constraint="'UCB', 'EI', 'PI'") # Optimize acquisition function and get new candidate point cand_norm, acq_value = optimize_acqf( acq_function=acq_fcn, bounds=to.stack([to.zeros(self.ddp_space.flat_dim), to.ones(self.ddp_space.flat_dim)]), q=1, num_restarts=self.acq_restarts, raw_samples=self.acq_samples ) next_cand = self.ddp_projector.project_back(cand_norm) print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g') self.cands = to.cat([self.cands, next_cand], dim=0) pyrado.save(self.cands, 'candidates', 'pt', self.save_dir, meta_info) self.reached_checkpoint() # setting counter to 1 if self.curr_checkpoint == 1: # Train and evaluate a new policy, repeat if the resulting policy did not exceed the success threshold wrapped_trn_fcn = until_thold_exceeded( self.thold_succ_subrtn.item(), self.max_subrtn_rep )(self.train_policy_sim) wrapped_trn_fcn(self.cands[-1, :], prefix=f'iter_{self._curr_iter}') self.reached_checkpoint() # setting counter to 2 if self.curr_checkpoint == 2: # Evaluate the current policy in the target domain policy = pyrado.load(self.policy, 'policy', 'pt', self.save_dir, meta_info=dict(prefix=f'iter_{self._curr_iter}')) self.curr_cand_value = self.eval_policy( self.save_dir, self._env_real, policy, self.mc_estimator, f'iter_{self._curr_iter}', self.num_eval_rollouts_real ) self.cands_values = to.cat([self.cands_values, self.curr_cand_value.view(1)], dim=0) pyrado.save(self.cands_values, 'candidates_values', 'pt', self.save_dir, meta_info) # Store the argmax after training and evaluating curr_argmax_cand = BayRn.argmax_posterior_mean( self.cands, self.cands_values.unsqueeze(1), self.ddp_space, self.acq_restarts, self.acq_samples ) self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0) pyrado.save(self.argmax_cand, 'candidates_argmax', 'pt', self.save_dir, meta_info) self.reached_checkpoint() # setting counter to 0
best_observed_ei.append(best_observed_value_ei) best_observed_nei.append(best_observed_value_nei) best_random.append(best_observed_value_ei) # run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): t0 = time.time() # fit the models fit_gpytorch_model(mll_ei) fit_gpytorch_model(mll_nei) # for best_f, we use the best observed noisy values as an approximation EI = ExpectedImprovement( model=model_ei, best_f=(train_obj_ei).max() ) NEI = NoisyExpectedImprovement( model=model_nei, X_observed=train_x_nei ) # optimize and get new observation new_x_ei, new_obj_ei = optimize_acqf_and_get_observation(EI) new_x_nei, new_obj_nei = optimize_acqf_and_get_observation(NEI) # update training points train_x_ei = torch.cat([train_x_ei, new_x_ei]) train_obj_ei = torch.cat([train_obj_ei, new_obj_ei])
def step(self, snapshot_mode: str, meta_info: dict = None): if not self.initialized: # Start initialization phase self.train_init_policies() self.eval_init_policies() self.initialized = True # Normalize the input data and standardize the output data cands_norm = self.uc_normalizer.project_to(self.cands) cands_values_stdized = standardize(self.cands_values).unsqueeze(1) # Create and fit the GP model gp = SingleTaskGP(cands_norm, cands_values_stdized) gp.likelihood.noise_covar.register_constraint('raw_noise', GreaterThan(1e-5)) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) print_cbt('Fitted the GP.', 'g') # Acquisition functions if self.acq_fcn_type == 'UCB': acq_fcn = UpperConfidenceBound(gp, beta=self.acq_param.get( 'beta', 0.1), maximize=True) elif self.acq_fcn_type == 'EI': acq_fcn = ExpectedImprovement( gp, best_f=cands_values_stdized.max().item(), maximize=True) elif self.acq_fcn_type == 'PI': acq_fcn = ProbabilityOfImprovement( gp, best_f=cands_values_stdized.max().item(), maximize=True) else: raise pyrado.ValueErr(given=self.acq_fcn_type, eq_constraint="'UCB', 'EI', 'PI'") # Optimize acquisition function and get new candidate point cand, acq_value = optimize_acqf( acq_function=acq_fcn, bounds=to.stack([to.zeros(self.cand_dim), to.ones(self.cand_dim)]), q=1, num_restarts=self.acq_restarts, raw_samples=self.acq_samples) next_cand = self.uc_normalizer.project_back(cand) print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g') self.cands = to.cat([self.cands, next_cand], dim=0) to.save(self.cands, osp.join(self._save_dir, 'candidates.pt')) # Train and valuate the new candidate (saves to iter_{self._curr_iter}_policy.pt) prefix = f'iter_{self._curr_iter}' wrapped_trn_fcn = until_thold_exceeded( self.thold_succ_subroutine.item(), max_iter=self.max_subroutine_rep)(self.train_policy_sim) wrapped_trn_fcn(cand, prefix) # Evaluate the current policy on the target domain policy = to.load(osp.join(self._save_dir, f'{prefix}_policy.pt')) self.curr_cand_value = self.eval_policy(self._save_dir, self._env_real, policy, self.montecarlo_estimator, prefix, self.num_eval_rollouts_real) self.cands_values = to.cat( [self.cands_values, self.curr_cand_value.view(1)], dim=0) to.save(self.cands_values, osp.join(self._save_dir, 'candidates_values.pt')) # Store the argmax after training and evaluating curr_argmax_cand = BayRn.argmax_posterior_mean( self.cands, self.cands_values.unsqueeze(1), self.uc_normalizer, self.acq_restarts, self.acq_samples) self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0) to.save(self.argmax_cand, osp.join(self._save_dir, 'candidates_argmax.pt')) self.make_snapshot(snapshot_mode, float(to.mean(self.cands_values)), meta_info)
def main(args, res_dir): print("HAAALLLOOO") modify_and_push_json() number_rollouts_per_param = NUM_ROLLOUTS_PER_SAMPLE # Sample potential goal and target positions. # WRITE DIFFICULTY LEVEL TO FILE with open(('./content/diff.txt'), 'w') as f: f.write(str(DIFFICULTY_LEVEL)) define_sample_fct(SAMPLE_FCT, number_rollouts_per_param, ("./content" + '/'), path2=(res_dir + '/')) push_github("push_position_file") #input ("WAIT") logging_txt_file = (res_dir + '/' + "RESULTS_LOGGING.txt") reference_path = (res_dir + '/') logger.info("Args:") for k, v in vars(args).items(): logger.info("%s: %s", k, v) task = EXPERIMENTS[args.experiment]() if False: if task.plot_model: x = task.x_domain(1000) X = to_tensor(x) y = task(x) Y = to_tensor(y) y_opt = task.y_opt y_history = np.zeros((args.n_seeds, task.num_iter + 1)) y_opt_est = np.zeros((args.n_seeds, task.num_iter)) bounds = torch.stack([to_tensor(task.x_min), to_tensor(task.x_max)]).to(TORCH_DEVICE) # TODO: find out what this means -> I think not needed if (False): # constuct 'cubature points' from bounds for whitening pts = np.vstack([task.x_min, task.x_max]) x_vert = np.hstack([ m.reshape((-1, 1)) for m in np.meshgrid(*(pts[:, i] for i in range(task.d_x))) ]) y_vert = task(x_vert, reference_path) x_bounds = to_tensor(x_vert) y_bounds = to_tensor(y_vert) globcount_thres = -1 for s in range(args.n_seeds): with open(logging_txt_file, "w") as f: f.write("SEED: " + str(s) + '\n') globcount = 0 model = MODELS[args.model](task.d_x, task.param_normalizer, **vars(args)) # Generate initial data _X_train_raw = task.x_sample(task.num_init_samples) _pt = task.x_sample(1) _X_train_raw = np.concatenate([_X_train_raw] + [_pt]) if (globcount <= globcount_thres): print(_X_train_raw) _y_train_raw = task(_X_train_raw, reference_path, globcount, run_eval=False) else: _y_train_raw = task(_X_train_raw, reference_path, globcount, run_eval=True) globcount += 1 X_train, _y_train = map(to_tensor, [_X_train_raw, _y_train_raw]) X_train = X_train.to(TORCH_DEVICE) _y_train = _y_train.to(TORCH_DEVICE) y_train = _y_train # Get best observed value from dataset with open(logging_txt_file, "a") as f: f.write("iteration: " + str(globcount) + '\n') f.write("max val: " + str(y_train.max().item()) + '\n') f.write("params: " + str(X_train[y_train.argmax().item(), :].tolist()) + '\n') y_history[s, 0] = y_train.max().item() logger.info("seed iter best opt | y_chosen | y_optimal") # Bayesian Optimization Loop for i in tqdm(range(task.num_iter), total=task.num_iter): _model = model.fit(X_train, y_train) # # FROM FABIO: # # Acquisition functions # if self.acq_fcn_type == "UCB": # acq_fcn = UpperConfidenceBound(gp, beta=self.acq_param.get("beta", 0.1), maximize=True) # elif self.acq_fcn_type == "EI": # acq_fcn = ExpectedImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True) # elif self.acq_fcn_type == "PI": # acq_fcn = ProbabilityOfImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True) # else: # raise pyrado.ValueErr(given=self.acq_fcn_type, eq_constraint="'UCB', 'EI', 'PI'") # # # Optimize acquisition function and get new candidate point # cand_norm, _ = optimize_acqf( # acq_function=acq_fcn, # bounds=to.stack([to.zeros(self.ddp_space.flat_dim), to.ones(self.ddp_space.flat_dim)]).to( # dtype=to.float32 # ), # q=1, # num_restarts=self.acq_restarts, # raw_samples=self.acq_samples, # ) acq = ExpectedImprovement( _model, best_f=model.data_normalizer.standardize_wo_calculation( y_train).max().item(), maximize=True) # Optimize acquisition function # q represents addidional points to sample # SEE DOCS: Expected imrpovement only supports q=1 candidate, acq_value = optimize_acqf( acq_function=acq, bounds=torch.stack([ torch.zeros(np.shape(task.param_normalizer.bound_lo)[0]), torch.ones(np.shape(task.param_normalizer.bound_lo)[0]) ]).to(dtype=torch.float32).to(TORCH_DEVICE), q=1, num_restarts=task.num_acq_restarts, raw_samples=task.num_acq_samples) candidate = model.param_normalizer.project_back(candidate) with torch.no_grad(): x_new = candidate if (globcount <= globcount_thres): print(x_new.cpu().numpy()) y_new = to_tensor( task(x_new.cpu().numpy(), reference_path, globcount, run_eval=False)).to(TORCH_DEVICE) else: y_new = to_tensor( task(x_new.cpu().numpy(), reference_path, globcount, run_eval=True)).to(TORCH_DEVICE) globcount += 1 y_est_m, y_est_v = model.predict( x_new.view(1, -1).to(TORCH_DEVICE)) y_opt_m, y_opt_v = model.predict( to_tensor(task.x_opt).view(1, -1).to(TORCH_DEVICE)) y_opt_est[s, i] = y_opt_m.cpu().numpy() # Update dataset X_train = torch.cat([X_train, x_new]) _y_train = torch.cat([_y_train, y_new]) y_train = _y_train # Update best observed value list y_history[s, i + 1] = y_train.max().item() logger.info( f"{s:2} {i+1:3} {y_history[s, i+1]:.4f} {y_opt:.4f} | {np.asscalar(y_est_m.cpu().numpy()):.4f}+/-{np.asscalar(torch.sqrt(y_est_v).cpu().numpy()):.4f} | {np.asscalar(y_opt_m.cpu().numpy()):.4f}+/-{np.asscalar(torch.sqrt(y_opt_v).cpu().numpy()):.4f}" ) with open(logging_txt_file, "a") as f: f.write("iteration: " + str(globcount) + '\n') f.write("max val: " + str(y_train.max().item()) + '\n') f.write("params: " + str(X_train[y_train.argmax().item(), :].tolist()) + '\n') if task.plot_model and args.plot: utility = acq( X.unsqueeze(1)) # why does it need batch x q x d?? plot_model(_model, X, Y, utility, X_train, _y_train, x_new, y_new, acq_value, y_history, y_opt, f"{s}_{i}") if args.plot and args.experiment != "toy": f, ax = plt.subplots(task.d_x) ax = [ax] if task.d_x == 1 else ax for d, a in enumerate(ax): x0 = np.copy(task.x_opt) xN = np.copy(task.x_opt) x0[d] = 0. xN[d] = 1. _x = np.linspace(x0, xN, 1000) x = _x[:, d] _y_m, _y_v = model.predict( to_tensor(_x).view(1000, -1).to(TORCH_DEVICE)) _y_m = _y_m.cpu() _y_v = _y_m.cpu() _y = task(_x, reference_path) _ytrain = task(X_train.cpu().numpy(), reference_path) a.plot(x, _y_m, 'b') plot_uncertainty(a, x, _y_m, _y_v, stds=[3, 2, 1]) a.plot(x, _y, 'r') a.plot(X_train.cpu().numpy()[:, d], _ytrain, 'm*') plt.savefig(os.path.join(res_dir, f"slice_{i}.png"), bbox_inches='tight', format='png') plt.close(f) save_metrics(y_history, y_opt, y_opt_est, res_dir)
def EI_run(seed, alpha, rho, x0=5, n0=100, iter_count=1000, mu_1=2, mu_2=5, sigma_1=1, sigma_2=1, SAA_seed=None): """ Does a single run of the Expected Improvement algorithm for the simple normal problem, without derivatives :param seed: random seed :param alpha: risk level :param rho: risk measure :param x0: Ignored! Just to keep the same arglist as others :param n0: outer sample starting size :param iter_count: number of iterations :param kwargs: passed to estimator :param SAA_seed: if given, an SAA version is run with this seed. :return: """ np.random.seed(seed) begin = datetime.datetime.now() args = (n0, alpha, rho, mu_1, mu_2, sigma_1, sigma_2, SAA_seed) points = torch.empty(iter_count, 1) values = torch.empty(points.shape) points[:4] = draw_sobol_samples(torch.tensor([[-5.], [5.]]), n=4, q=1).reshape(-1, 1) for i in range(4): values[i] = estimate_no_grad(points[i], *args) for i in range(4, iter_count): # fit gp # this transforms the GP to unit domain - botorch priors work best there transformed_points = points / 10. + 0.5 model = SingleTaskGP(transformed_points[:i], values[:i], outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # optimize EI to get the candidate acqf = ExpectedImprovement(model, best_f=torch.min(values), maximize=False) best_p, _ = optimize_acqf(acqf, bounds=torch.tensor([[0.], [1.]]), q=1, num_restarts=10, raw_samples=50) # transform it back to original domain best_p = best_p.detach() * 10. - 5. points[i] = best_p values[i] = estimate_no_grad(points[i], *args) best_list = torch.empty(points.shape) for i in range(1, iter_count + 1): # pick the arg min of the history to return best_ind = torch.argmin(values[:i], dim=0) best_list[i - 1] = points[best_ind] x_list = best_list now = datetime.datetime.now() print('done time: %s' % (now - begin)) print('call count: %d' % call_count) # np.save("sa_out/normal/EI_" + rho + "_" + str(alpha) + "_iter_" + str(iter_count) + "_x.npy", x_list) return x_list
def bayes_opt(x0, y0): """ Main Bayesian optimization loop. Begins by initializing model, then for each iteration, it fits the GP to the data, gets a new point with the acquisition function, adds it to the dataset, and exits if it's a successful attack """ best_observed = [] query_count, success = 0, 0 # call helper function to initialize model train_x, train_obj, mll, model, best_value, mean, std = initialize_model( x0, y0, n=args.initial_samples) if args.standardize_every_iter: train_obj = (train_obj - train_obj.mean()) / train_obj.std() best_observed.append(best_value) query_count += args.initial_samples # run args.iter rounds of BayesOpt after the initial random batch for _ in range(args.iter): # fit the model fit_gpytorch_model(mll) # define the qNEI acquisition module using a QMC sampler if args.q != 1: qmc_sampler = SobolQMCNormalSampler(num_samples=2000, seed=seed) qEI = qExpectedImprovement(model=model, sampler=qmc_sampler, best_f=best_value) else: if args.acqf == 'EI': qEI = ExpectedImprovement(model=model, best_f=best_value) elif args.acqf == 'PM': qEI = PosteriorMean(model) elif args.acqf == 'POI': qEI = ProbabilityOfImprovement(model, best_f=best_value) elif args.acqf == 'UCB': qEI = UpperConfidenceBound(model, beta=args.beta) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation(qEI, x0, y0) if args.standardize: new_obj = (new_obj - mean) / std # update training points train_x = torch.cat((train_x, new_x)) train_obj = torch.cat((train_obj, new_obj)) if args.standardize_every_iter: train_obj = (train_obj - train_obj.mean()) / train_obj.std() # update progress best_value, best_index = train_obj.max(0) best_observed.append(best_value.item()) best_candidate = train_x[best_index] # reinitialize the model so it is ready for fitting on next iteration torch.cuda.empty_cache() model.set_train_data(train_x, train_obj, strict=False) # get objective value of best candidate; if we found an adversary, exit best_candidate = best_candidate.view(1, -1) best_candidate = transform(best_candidate, args.dset, args.arch, args.cos, args.sin).to(device) best_candidate = proj(best_candidate, args.eps, args.inf_norm, args.discrete) with torch.no_grad(): adv_label = torch.argmax( cnn_model.predict_scores(best_candidate + x0)) if adv_label != y0: success = 1 if args.inf_norm: print('Adversarial Label', adv_label.item(), 'Norm:', best_candidate.abs().max().item()) else: print('Adversarial Label', adv_label.item(), 'Norm:', best_candidate.norm().item()) return query_count, success query_count += args.q # not successful (ran out of query budget) return query_count, success
mll = ExactMarginalLogLikelihood(gp.likelihood, gp) # Bayesian Optimization Loop for i in tqdm(range(num_iter), total=num_iter): print("Iteration:", i + 1) # Fit the model # mll.train() fit_gpytorch_model(mll) # mll.eval() # gp.eval() # Acquisition functions ucb = UpperConfidenceBound(gp, beta=ucb_beta, maximize=True) ei = ExpectedImprovement(gp, best_f=y_train.max().item(), maximize=True) pi = ProbabilityOfImprovement(gp, best_f=y_train.max().item(), maximize=True) acq_dict = {"UCB": ucb, "EI": ei, "PI": pi} # Optimize acquisition function candidate, acq_value = optimize_acqf( acq_function=acq_dict[acq_fcn], bounds=bounds, q=1, num_restarts=num_acq_restarts, raw_samples=num_acq_samples, ) x_new = candidate.detach()
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs, run_start, num_iterations, # acquisition_name, # acquisition_optimizer_name, gamma, num_random_init, num_restarts, raw_samples, noise_variance_init, # use_ard, # use_input_warping, standardize_targets, input_dir, output_dir): # TODO(LT): Turn into options # device = "cpu" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dtype = torch.double benchmark = make_benchmark(benchmark_name, dimensions=dimensions, dataset_name=dataset_name, input_dir=input_dir) name = make_name(benchmark_name, dimensions=dimensions, dataset_name=dataset_name) output_path = Path(output_dir).joinpath(name, method_name) output_path.mkdir(parents=True, exist_ok=True) options = dict(gamma=gamma, num_random_init=num_random_init, num_restarts=num_restarts, raw_samples=raw_samples, noise_variance_init=noise_variance_init, standardize_targets=standardize_targets) with output_path.joinpath("options.yaml").open('w') as f: yaml.dump(options, f) config_space = DenseConfigurationSpace(benchmark.get_config_space()) bounds = create_bounds(config_space.get_bounds(), device=device, dtype=dtype) input_dim = config_space.get_dimensions() def func(tensor, *args, **kwargs): """ Wrapper that receives and returns torch.Tensor """ config = dict_from_tensor(tensor, cs=config_space) # turn into maximization problem res = - benchmark.evaluate(config).value return torch.tensor(res, device=device, dtype=dtype) for run_id in trange(run_start, num_runs, unit="run"): t_start = datetime.now() rows = [] features = [] targets = [] noise_variance = torch.tensor(noise_variance_init, device=device, dtype=dtype) state_dict = None with trange(num_iterations) as iterations: for i in iterations: if len(targets) < num_random_init: # click.echo(f"Completed {i}/{num_random_init} initial runs. " # "Suggesting random candidate...") # TODO(LT): support random seed x_new = torch.rand(size=(input_dim,), device=device, dtype=dtype) else: # construct dataset X = torch.vstack(features) y = torch.hstack(targets).unsqueeze(axis=-1) y = standardize(y) if standardize_targets else y # construct model # model = FixedNoiseGP(X, standardize(y), noise_variance.expand_as(y), model = FixedNoiseGP(X, y, noise_variance.expand_as(y), input_transform=None).to(X) mll = ExactMarginalLogLikelihood(model.likelihood, model) if state_dict is not None: model.load_state_dict(state_dict) # update model fit_gpytorch_model(mll) # construct acquisition function tau = torch.quantile(y, q=1-gamma) iterations.set_postfix(tau=tau.item()) ei = ExpectedImprovement(model=model, best_f=tau) # optimize acquisition function X_batch, b = optimize_acqf(acq_function=ei, bounds=bounds, q=1, num_restarts=num_restarts, raw_samples=raw_samples, options=dict(batch_limit=5, maxiter=200)) x_new = X_batch.squeeze(axis=0) state_dict = model.state_dict() # evaluate blackbox objective # t0 = datetime.now() y_new = func(x_new) t1 = datetime.now() delta = t1 - t_start # update dataset features.append(x_new) targets.append(y_new) row = dict_from_tensor(x_new, cs=config_space) row["loss"] = - y_new.item() row["finished"] = delta.total_seconds() rows.append(row) data = pd.DataFrame(data=rows) data.to_csv(output_path.joinpath(f"{run_id:03d}.csv")) return 0
def expected_improvement(self, model, best_f): return ExpectedImprovement( model=model, best_f=best_f, maximize=False, )
def generate_batch( state, model, # GP model X, # Evaluated points on the domain [0, 1]^d Y, # Function values batch_size, n_candidates=None, # Number of candidates for Thompson sampling num_restarts=10, raw_samples=512, acqf="ts", # "ei" or "ts" deup=False, turbo=True, ): dim = X.shape[-1] assert acqf in ("ts", "ei") assert X.min() >= 0.0 and X.max() <= 1.0 and torch.all(torch.isfinite(Y)) if n_candidates is None: n_candidates = min(5000, max(2000, 200 * X.shape[-1])) # Scale the TR to be proportional to the lengthscales x_center = X[Y.argmax(), :].clone() if not deup: weights = model.covar_module.base_kernel.lengthscale.squeeze().detach() else: weights = model.f_predictor.covar_module.base_kernel.lengthscale.squeeze( ).detach() weights = weights / weights.mean() weights = weights / torch.prod(weights.pow(1.0 / len(weights))) tr_lb = torch.clamp(x_center - weights * state.length / 2.0, 0.0, 1.0) tr_ub = torch.clamp(x_center + weights * state.length / 2.0, 0.0, 1.0) if not turbo: tr_lb = torch.zeros(dim) tr_ub = torch.ones(dim) if acqf == "ts": sobol = SobolEngine(dim, scramble=True) pert = sobol.draw(n_candidates).to(dtype=dtype, device=device) pert = tr_lb + (tr_ub - tr_lb) * pert # Create a perturbation mask prob_perturb = min(20.0 / dim, 1.0) mask = (torch.rand(n_candidates, dim, dtype=dtype, device=device) <= prob_perturb) ind = torch.where(mask.sum(dim=1) == 0)[0] mask[ind, torch.randint(0, dim - 1, size=(len(ind), ), device=device)] = 1 # Create candidate points from the perturbations and the mask X_cand = x_center.expand(n_candidates, dim).clone() X_cand[mask] = pert[mask] # Sample on the candidate points thompson_sampling = MaxPosteriorSampling(model=model, replacement=False) X_next = thompson_sampling(X_cand, num_samples=batch_size) elif acqf == "ei": if batch_size > 1: ei = qExpectedImprovement(model, Y.max(), maximize=True) else: ei = ExpectedImprovement(model, Y.max(), maximize=True) try: X_next, acq_value = optimize_acqf( ei, bounds=torch.stack([tr_lb, tr_ub]), q=batch_size, num_restarts=num_restarts, raw_samples=raw_samples, ) except NotPSDError: sobol = SobolEngine(dim, scramble=True) pert = sobol.draw(batch_size).to(dtype=dtype, device=device) pert = tr_lb + (tr_ub - tr_lb) * pert X_next = pert print( 'Warning: NotPSDError, using {} purely random candidates for this step' .format(batch_size)) return X_next