def test_normalize_unnormalize(self): for dtype in (torch.float, torch.double): X = torch.tensor([0.0, 0.25, 0.5], device=self.device, dtype=dtype).view( -1, 1 ) expected_X_normalized = torch.tensor( [0.0, 0.5, 1.0], device=self.device, dtype=dtype ).view(-1, 1) bounds = torch.tensor([0.0, 0.5], device=self.device, dtype=dtype).view( -1, 1 ) X_normalized = normalize(X, bounds=bounds) self.assertTrue(torch.equal(expected_X_normalized, X_normalized)) self.assertTrue(torch.equal(X, unnormalize(X_normalized, bounds=bounds))) X2 = torch.tensor( [[0.25, 0.125, 0.0], [0.25, 0.0, 0.5]], device=self.device, dtype=dtype ).transpose(1, 0) expected_X2_normalized = torch.tensor( [[1.0, 0.5, 0.0], [0.5, 0.0, 1.0]], device=self.device, dtype=dtype ).transpose(1, 0) bounds2 = torch.tensor( [[0.0, 0.0], [0.25, 0.5]], device=self.device, dtype=dtype ) X2_normalized = normalize(X2, bounds=bounds2) self.assertTrue(torch.equal(X2_normalized, expected_X2_normalized)) self.assertTrue(torch.equal(X2, unnormalize(X2_normalized, bounds=bounds2)))
def optimize_qehvi_and_get_observation(model, train_obj, sampler): """Optimizes the qEHVI acquisition function, and returns a new candidate and observation.""" # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning(ref_point=problem.ref_point, Y=train_obj) acq_func = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist(), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize candidates, _ = optimize_acqf( acq_function=acq_func, bounds=standard_bounds, q=BATCH_SIZE, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) # observe new values new_x = unnormalize(candidates.detach(), bounds=problem.bounds) new_obj = problem(new_x) return new_x, new_obj
def _torch_optimize_qehvi_and_get_observation(self): torch_anti_ideal_point = torch.tensor( self._transformed_anti_ideal_point, dtype=torch.double) qehvi_partitioning = NondominatedPartitioning( ref_point=torch_anti_ideal_point, Y=torch.stack(self._torch_model.train_targets, dim=1)) qehvi_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) self._acquisition = qExpectedHypervolumeImprovement( model=self._torch_model, ref_point=self._transformed_anti_ideal_point, partitioning=qehvi_partitioning, sampler=qehvi_sampler) # these options all come from the tutorial # and likely need a serious review candidates, _ = optimize_acqf( acq_function=self._acquisition, bounds=self._botorch_domain, q=BATCH_SIZE, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) # is unnormalize necessary here? # we are providing the same bounds here and in optimizer new_x = unnormalize(candidates.detach(), bounds=self._botorch_domain) transformed_eps, transformed_err = self._optimization_handler(new_x) return new_x, transformed_eps, transformed_err
def get_bayes_pop(self, n_suggestions): """ Parameters ---------- n_suggestions: Number of new suggestions/trial solutions to generate using BO Returns The new set of trial solutions obtained by optimizing the acquisition function ------- """ try: candidates, _ = optimize_acqf( acq_function=self.acquisition, bounds=self.min_max_bounds, q=n_suggestions, num_restarts=10, raw_samples=512, # used for initialization heuristic sequential=True) bayes_pop = unnormalize(candidates, self.torch_bounds).numpy() except Exception as e: print('Error in get_bayes_pop(): {}'.format(e)) population = self.search_space.unwarp( bayes_pop) # Translate the solution back to the original space return population
def optimize_qparego_and_get_observation(model, train_obj, sampler): """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization of the qParEGO acquisition function, and returns a new candidate and observation.""" acq_func_list = [] for _ in range(BATCH_SIZE): weights = sample_simplex(problem.num_objectives, **tkwargs).squeeze() objective = GenericMCObjective( get_chebyshev_scalarization(weights=weights, Y=train_obj)) acq_func = qExpectedImprovement( # pyre-ignore: [28] model=model, objective=objective, best_f=objective(train_obj).max(), sampler=sampler, ) acq_func_list.append(acq_func) # optimize candidates, _ = optimize_acqf_list( acq_function_list=acq_func_list, bounds=standard_bounds, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200 }, ) # observe new values new_x = unnormalize(candidates.detach(), bounds=problem.bounds) new_obj = problem(new_x) return new_x, new_obj
def get_calibrated_params(*, country, area, multi_beta_calibration, maxiters=None): """ Returns calibrated parameters for a `country` and an `area` """ if maxiters: param_dict = get_calibrated_params_limited_iters( country, area, multi_beta_calibration=multi_beta_calibration, maxiters=maxiters, ) return param_dict state = load_state(calibration_states[country][area]) theta = state['train_theta'] best_observed_idx = state['best_observed_idx'] norm_params = theta[best_observed_idx] param_bounds = (calibration_model_param_bounds_multi if multi_beta_calibration else calibration_model_param_bounds_single) sim_bounds = pdict_to_parr(pdict=param_bounds, multi_beta_calibration=multi_beta_calibration).T params = transforms.unnormalize(norm_params, sim_bounds) param_dict = parr_to_pdict(parr=params, multi_beta_calibration=multi_beta_calibration) return param_dict
def test_evaluate_slack_true(self): for dtype in (torch.float, torch.double): for f in self.functions: f.to(device=self.device, dtype=dtype) X = unnormalize(torch.rand(1, f.dim), bounds=f.bounds) slack = f.evaluate_slack_true(X) self.assertEqual(slack.shape, torch.Size([1, f.num_constraints]))
def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) res['objs'] = [-y for y in res['objs']] return res # Caution: negative values imply feasibility in botorch
def get_calibrated_params_from_path(path, estimate_mobility_reduction=False, multi_beta_calibration=False): """ Returns calibrated parameters for a `country` and an `area` """ state = load_state(path) theta = state['train_theta'] best_observed_idx = state['best_observed_idx'] norm_params = theta[best_observed_idx] param_bounds = (calibration_model_param_bounds_multi if multi_beta_calibration else calibration_model_param_bounds_single) if estimate_mobility_reduction: param_bounds['p_stay_home'] = [0.0, 1.0] sim_bounds = pdict_to_parr( pdict=param_bounds, multi_beta_calibration=multi_beta_calibration, estimate_mobility_reduction=estimate_mobility_reduction).T params = transforms.unnormalize(norm_params, sim_bounds) param_dict = parr_to_pdict( parr=params, multi_beta_calibration=False, estimate_mobility_reduction=estimate_mobility_reduction) return param_dict
def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) objs = [-y for y in res['objs']] return objs
def eval_objective(x, dim=10): """This is a helper function we use to unnormalize and evalaute a point""" fun = Ackley(dim=dim, negate=True).to(dtype=dtype, device=device) fun.bounds[0, :].fill_(low) fun.bounds[1, :].fill_(up) dim = fun.dim lb, ub = fun.bounds return fun(unnormalize(x, fun.bounds))
def test_normalize_unnormalize(self, cuda=False): tkwargs = {"device": torch.device("cuda" if cuda else "cpu")} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype X = torch.tensor([0.0, 0.25, 0.5], **tkwargs).view(-1, 1) expected_X_normalized = torch.tensor([0.0, 0.5, 1.0], **tkwargs).view(-1, 1) bounds = torch.tensor([0.0, 0.5], **tkwargs).view(-1, 1) X_normalized = normalize(X, bounds=bounds) self.assertTrue(torch.equal(expected_X_normalized, X_normalized)) self.assertTrue(torch.equal(X, unnormalize(X_normalized, bounds=bounds))) X2 = torch.tensor( [[0.25, 0.125, 0.0], [0.25, 0.0, 0.5]], **tkwargs ).transpose(1, 0) expected_X2_normalized = torch.tensor( [[1.0, 0.5, 0.0], [0.5, 0.0, 1.0]], **tkwargs ).transpose(1, 0) bounds2 = torch.tensor([[0.0, 0.0], [0.25, 0.5]], **tkwargs) X2_normalized = normalize(X2, bounds=bounds2) self.assertTrue(torch.equal(X2_normalized, expected_X2_normalized)) self.assertTrue(torch.equal(X2, unnormalize(X2_normalized, bounds=bounds2)))
def f_2(self, X: Tensor) -> Tensor: X = torch.cat( [X, torch.zeros_like(X)], dim=-1, ) # Cut out the first part of the function. X = X * 0.95 + 0.03 X = unnormalize(X, self.levy.bounds.to(X)) Y = self.levy(X).unsqueeze(-1) Y -= X[..., :1].pow(2) * 0.75 return Y
def test_normalize_unnormalize(self, cuda=False): tkwargs = {"device": torch.device("cuda" if cuda else "cpu")} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype X = torch.tensor([0.0, 0.25, 0.5], **tkwargs).view(-1, 1) expected_X_normalized = torch.tensor([0.0, 0.5, 1.0], **tkwargs).view(-1, 1) bounds = torch.tensor([0.0, 0.5], **tkwargs).view(-1, 1) X_normalized = normalize(X, bounds=bounds) self.assertTrue(torch.equal(expected_X_normalized, X_normalized)) self.assertTrue( torch.equal(X, unnormalize(X_normalized, bounds=bounds))) X2 = torch.tensor([[0.25, 0.125, 0.0], [0.25, 0.0, 0.5]], **tkwargs).transpose(1, 0) expected_X2_normalized = torch.tensor( [[1.0, 0.5, 0.0], [0.5, 0.0, 1.0]], **tkwargs).transpose(1, 0) bounds2 = torch.tensor([[0.0, 0.0], [0.25, 0.5]], **tkwargs) X2_normalized = normalize(X2, bounds=bounds2) self.assertTrue(torch.equal(X2_normalized, expected_X2_normalized)) self.assertTrue( torch.equal(X2, unnormalize(X2_normalized, bounds=bounds2)))
def get_calibrated_params_limited_iters(country, area, multi_beta_calibration, maxiters): """ Returns calibrated parameters using only the first `maxiters` iterations of BO. """ state = load_state(calibration_states[country][area]) train_G = state['train_G'] train_G = train_G[:min(maxiters, len(train_G))] train_theta = state['train_theta'] mob_settings = calibration_mob_paths[country][area][0] with open(mob_settings, 'rb') as fp: mob_kwargs = pickle.load(fp) mob = MobilitySimulator(**mob_kwargs) data_start_date = calibration_start_dates[country][area] data_end_date = calibration_lockdown_dates[country]['end'] unscaled_area_cases = collect_data_from_df( country=country, area=area, datatype='new', start_date_string=data_start_date, end_date_string=data_end_date) assert (len(unscaled_area_cases.shape) == 2) # Scale down cases based on number of people in town and region sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs) n_days, n_age = sim_cases.shape G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age) G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1) def objective(G): return -(G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days train_G_objectives = objective(train_G) best_observed_idx = train_G_objectives.argmax() best_observed_obj = train_G_objectives[best_observed_idx].item() param_bounds = (calibration_model_param_bounds_multi if multi_beta_calibration else calibration_model_param_bounds_single) sim_bounds = pdict_to_parr(pdict=param_bounds, multi_beta_calibration=multi_beta_calibration).T normalized_calibrated_params = train_theta[best_observed_idx] calibrated_params = transforms.unnormalize(normalized_calibrated_params, sim_bounds) calibrated_params = parr_to_pdict( parr=calibrated_params, multi_beta_calibration=multi_beta_calibration) return calibrated_params
def sample_truncated_normal_perturbations( X: Tensor, n_discrete_points: int, sigma: float, bounds: Tensor, qmc: bool = True, ) -> Tensor: r"""Sample points around `X`. Sample perturbed points around `X` such that the added perturbations are sampled from N(0, sigma^2 I) and truncated to be within [0,1]^d. Args: X: A `n x d`-dim tensor starting points. n_discrete_points: The number of points to sample. sigma: The standard deviation of the additive gaussian noise for perturbing the points. bounds: A `2 x d`-dim tensor containing the bounds. qmc: A boolean indicating whether to use qmc. Returns: A `n_discrete_points x d`-dim tensor containing the sampled points. """ X = normalize(X, bounds=bounds) d = X.shape[1] # sample points from N(X_center, sigma^2 I), truncated to be within # [0, 1]^d. if X.shape[0] > 1: rand_indices = torch.randint(X.shape[0], (n_discrete_points, ), device=X.device) X = X[rand_indices] if qmc: std_bounds = torch.zeros(2, d, dtype=X.dtype, device=X.device) std_bounds[1] = 1 u = draw_sobol_samples(bounds=std_bounds, n=n_discrete_points, q=1).squeeze(1) else: u = torch.rand((n_discrete_points, d), dtype=X.dtype, device=X.device) # compute bounds to sample from a = -X b = 1 - X # compute z-score of bounds alpha = a / sigma beta = b / sigma normal = Normal(0, 1) cdf_alpha = normal.cdf(alpha) # use inverse transform perturbation = normal.icdf(cdf_alpha + u * (normal.cdf(beta) - cdf_alpha)) * sigma # add perturbation and clip points that are still outside perturbed_X = (X + perturbation).clamp(0.0, 1.0) return unnormalize(perturbed_X, bounds=bounds)
def optimize_acqf_and_get_observation(acq_func): """Optimizes the acquisition function, and returns a new candidate and a noisy observation""" candidates, _ = optimize_acqf( acq_function=acq_func, bounds=torch.stack([ torch.zeros(d, dtype=dt), torch.ones(d, dtype=dt), ]), q=1, num_restarts=10, raw_samples=200, ) x = unnormalize(candidates.detach(), bounds=bounds) print('Hyper-parameter: ' + str(x)) obj = self.train(x.view(-1)).unsqueeze(-1) print(print('Error: ' + str(obj))) return x, obj
def qparego_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is larger than three. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) weights = sample_simplex(n_objectives).squeeze() scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) objective = ConstrainedMCObjective( objective=lambda Z: scalarization(Z[..., :n_objectives]), constraints=constraints, ) else: train_y = train_obj objective = GenericMCObjective(scalarization) train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqf = qExpectedImprovement( model=model, best_f=objective(train_y).max(), sampler=SobolQMCNormalSampler(num_samples=256), objective=objective, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200 }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def qehvi_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is three or less. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) is_feas = (train_con <= 0).all(dim=-1) train_obj_feas = train_obj[is_feas] constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) additional_qehvi_kwargs = { "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), "constraints": constraints, } else: train_y = train_obj train_obj_feas = train_obj additional_qehvi_kwargs = {} train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # Approximate box decomposition similar to Ax when the number of objectives is large. # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults if n_objectives > 2: alpha = 10**(-8 + n_objectives) else: alpha = 0.0 partitioning = NondominatedPartitioning(num_outcomes=n_objectives, Y=train_obj_feas, alpha=alpha) ref_point = train_obj.min(dim=0).values - 1e-8 ref_point_list = ref_point.tolist() acqf = qExpectedHypervolumeImprovement( model=model, ref_point=ref_point_list, partitioning=partitioning, sampler=SobolQMCNormalSampler(num_samples=256), **additional_qehvi_kwargs, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def generate_initial_observations(n, logger): """ Takes an integer `n` and generates `n` initial observations from the black box function using Sobol random parameter settings in the unit cube. Returns parameter setting and black box function outputs """ if n <= 0: raise ValueError( 'qKnowledgeGradient and GP needs at least one observation to be defined properly.') # sobol sequence # new_thetas: [n, n_params] new_thetas = torch.tensor( sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float) # simulator observations # new_G, new_G_sem: [n, n_days * n_age] (flattened outputs) new_G = torch.zeros((n, n_days * n_age), dtype=torch.float) new_G_sem = torch.zeros((n, n_days * n_age), dtype=torch.float) for i in range(n): t0 = time.time() # get mean and standard error of mean (sem) of every simulation output G, G_sem = composite_simulation(new_thetas[i, :]) new_G[i, :] = G new_G_sem[i, :] = G_sem # log G_objectives = objective(new_G[:i+1]) best_idx = G_objectives.argmax() best = G_objectives[best_idx].item() current = objective(G).item() case_diff = ( G.reshape(n_days, n_age)[-1].sum() - G_obs.reshape(n_days, n_age)[-1].sum()) t1 = time.time() logger.log( i=i - n, time=t1 - t0, best=best, objective=current, case_diff=case_diff, theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds) ) # save state state = { 'train_theta': new_thetas[:i+1], 'train_G': new_G[:i+1], 'train_G_sem': new_G_sem[:i+1], 'best_observed_obj': best, 'best_observed_idx': best_idx, } save_state(state, logger.filename + '_init') # compute best objective from simulations f = objective(new_G) best_f_idx = f.argmax() best_f = f[best_f_idx].item() return new_thetas, new_G, new_G_sem, best_f, best_f_idx
def composite_simulation(norm_params): """ Takes a set of normalized (unit cube) BO parameters and returns simulator output means and standard errors based on multiple random restarts. This corresponds to the black-box function. """ # un-normalize normalized params to obtain simulation parameters params = transforms.unnormalize(norm_params, sim_bounds) # finalize settings based which parameters are calibrated kwargs = copy.deepcopy(launch_kwargs) if args.measures_optimized: ''' Measures are calibrated ''' measure_params = parr_to_pdict(params, measures_optimized=args.measures_optimized) # social distancing measures: calibration is only done for `SocialDistancingForAllMeasure` for now measure_list_ = [ SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), SocialDistancingForAllMeasure( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), p_stay_home=measure_params['p_stay_home']), ] # close sites if specified if args.measures_close: beta_multipliers = {'education': 1.0, 'social': 1.0, 'bus_stop': 1.0, 'office': 1.0, 'supermarket': 1.0} for category in args.measures_close: if category in beta_multipliers.keys(): beta_multipliers[category] = 0.0 else: raise ValueError(f'Site type `{category}` passed in `--measures_close` is invalid.\n' f'Available are {str(list(beta_multipliers.keys()))}') measure_list_.append(BetaMultiplierMeasureByType( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), beta_multiplier=beta_multipliers )) kwargs['measure_list'] = MeasureList(measure_list_) # get optimized model paramters for this country and area calibrated_model_params = settings_optimized_town_params[args.country][args.area] if calibrated_model_params is None: raise ValueError(f'Cannot optimize measures for {args.country}-{args.area} because model parameters ' 'have not been fitted yet. Set values in `calibration_settings.py`') kwargs['params'] = calibrated_model_params else: ''' Model parameters calibrated ''' kwargs['measure_list'] = MeasureList([ SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), ]) kwargs['params'] = parr_to_pdict(params, measures_optimized=args.measures_optimized) # run simulation in parallel, summary = launch_parallel_simulations(**kwargs) # (random_repeats, n_people) posi_started = torch.tensor(summary.state_started_at['posi']) posi_started -= test_lag_days * TO_HOURS # account for test lag # (random_repeats, n_days) age_groups = torch.tensor(summary.people_age) posi_cumulative = convert_timings_to_cumulative_daily( timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS) if posi_cumulative.shape[0] <= 1: raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.') # compute mean and standard error of means G = torch.mean(posi_cumulative, dim=0) G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0]) # make sure noise is not zero for non-degerateness G_sem = torch.max(G_sem, MIN_NOISE) # flatten G = G.reshape(1, n_days * n_age) G_sem = G_sem.reshape(1, n_days * n_age) return G, G_sem
def unnormalize_theta(theta): ''' Computes unnormalized parameters ''' return transforms.unnormalize(theta, sim_bounds)
def run_RGPE(test_task: int, objective, bounds, base_model_list): input_dim = bounds.shape[1] best_rgpe_all = [] best_argmax_rgpe_all = [] # Average over multiple trials for trial in range(N_TRIALS): print(f"Trial {trial + 1} of {N_TRIALS}") best_BMs = [] best_rgpe = [] # Initial random observations raw_x = draw_sobol_samples(bounds=bounds, n=RANDOM_INITIALIZATION_SIZE, q=1, seed=trial).squeeze(1) train_x = normalize(raw_x, bounds=bounds) train_y_noiseless = objective(raw_x, shift=test_task) train_y = train_y_noiseless + noise_std * torch.randn_like( train_y_noiseless) train_yvar = torch.full_like(train_y, noise_std**2) # keep track of the best observed point at each iteration best_value = train_y.max().item() best_rgpe.append(best_value) # Run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(N_BATCH): target_model = get_fitted_model(train_x, train_y, train_yvar) model_list = base_model_list + [target_model] rank_weights = compute_rank_weights( train_x, train_y, base_model_list, target_model, NUM_POSTERIOR_SAMPLES, ) # create model and acquisition function rgpe_model = RGPE(model_list, rank_weights) sampler_qnei = SobolQMCNormalSampler(num_samples=MC_SAMPLES) qNEI = qNoisyExpectedImprovement( model=rgpe_model, X_baseline=train_x, sampler=sampler_qnei, ) # optimize candidate, _ = optimize_acqf( acq_function=qNEI, bounds=bounds, q=Q_BATCH_SIZE, num_restarts=N_RESTARTS, raw_samples=N_RESTART_CANDIDATES, ) # fetch the new values new_x = candidate.detach() new_y_noiseless = objective(unnormalize(new_x, bounds=bounds), shift=test_task) new_y = new_y_noiseless + noise_std * torch.randn_like( new_y_noiseless) new_yvar = torch.full_like(new_y, noise_std**2) # update training points train_x = torch.cat((train_x, new_x)) train_y = torch.cat((train_y, new_y)) train_yvar = torch.cat((train_yvar, new_yvar)) # get the new best observed value best_value = train_y.max().item() best_idx = torch.argmax(train_y).item() best_candidate = train_x[best_idx].view(1, -1) _, best_BM = objective(unnormalize(best_candidate, bounds=bounds), shift=test_task, include_BMs=True) best_rgpe.append(best_value) best_BMs.append(best_BM) best_rgpe_all.append(best_rgpe) best_argmax_rgpe_all.append(best_BMs) BM_winner_idx = np.argmax(np.array(best_rgpe_all)[:, -1], axis=0) BM_winner = np.reshape(np.array(best_argmax_rgpe_all[BM_winner_idx][-1]), (2, input_dim)) return BM_winner
def mixin_tree(T: Tensor, bounds: Tensor, alpha: float) -> Tensor: return (1 - alpha) * T + alpha * unnormalize(torch.rand_like(T), bounds)
def mixin_layer(X: Tensor, bounds: Tensor, eta: float) -> Tensor: perturbations = unnormalize(B.sample(X.shape).squeeze(-1), bounds) return (1 - eta) * X + eta * perturbations
def evaluate_slack_true(self, X: Tensor) -> Tensor: X_tf = unnormalize(X, self.con_bounds) return 50 - (X_tf[..., 0:1] - 2.5).pow(2) - (X_tf[..., 1:2] - 7.5).pow(2)
def generate_initial_observations(n, logger, loaded_init_theta=None, loaded_init_G=None, loaded_init_G_sem=None): """ Takes an integer `n` and generates `n` initial observations from the black box function using Sobol random parameter settings in the unit cube. Returns parameter setting and black box function outputs. If `loaded_init_theta/G/G_sem` are specified, initialization is loaded (possibly partially, in which case the initialization using the Sobol random sequence is continued where left off). """ if n <= 0: raise ValueError( 'qKnowledgeGradient and GP needs at least one observation to be defined properly.') # sobol sequence proposal points # new_thetas: [n, n_params] new_thetas = torch.tensor( sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float) # check whether initial observations are loaded loaded = (loaded_init_theta is not None and loaded_init_G is not None and loaded_init_G_sem is not None) if loaded: n_loaded = loaded_init_theta.shape[0] # loaded no. of observations total n_loaded_init = min(n_loaded, n) # loaded no. of quasi-random initialization observations n_init = max(n_loaded, n) # final no. of observations returned, at least quasi-random initializations # check whether loaded proposal points are same as without loading observations try: assert(np.allclose(loaded_init_theta[:n_loaded_init], new_thetas[:n_loaded_init])) except AssertionError: print( '\n\n\n===> Warning: parameters of loaded inital observations ' 'do not coincide with initialization that would have been done. ' 'Double check simulation, ninit, and parameter bounds, which could change ' 'the initial random Sobol sequence. \nThe loaded parameter settings are used. \n\n\n' ) if n_init > n: new_thetas = loaded_init_theta # size of tensor increased to `n_init`, as more than Sobol init points loaded else: n_loaded = 0 # loaded no. of observations total n_loaded_init = 0 # loaded no. of quasi-random initialization observations n_init = n # final no. of observations returned, at least quasi-random initializations # instantiate simulator observation tensors if per_age_group_objective: # new_G, new_G_sem: [n_init, n_days * n_age] (flattened outputs) new_G = torch.zeros((n_init, n_days * n_age), dtype=torch.float) new_G_sem = torch.zeros((n_init, n_days * n_age), dtype=torch.float) else: # new_G, new_G_sem: [n_init, n_days] new_G = torch.zeros((n_init, n_days), dtype=torch.float) new_G_sem = torch.zeros((n_init, n_days), dtype=torch.float) # generate `n` initial evaluations at quasi random settings; if applicable, skip and load expensive evaluation result for i in range(n_init): # if loaded, use initial observation for this parameter settings if loaded and i <= n_loaded - 1: new_thetas[i] = loaded_init_theta[i] G, G_sem = loaded_init_G[i], loaded_init_G_sem[i] walltime = 0.0 # if not loaded, evaluate as usual else: t0 = time.time() G, G_sem = composite_simulation(new_thetas[i]) walltime = time.time() - t0 new_G[i] = G new_G_sem[i] = G_sem # log G_objectives = objective(new_G[:i+1]) best_idx = G_objectives.argmax() best = G_objectives[best_idx].item() current = objective(G).item() if per_age_group_objective: case_diff = G.reshape(n_days, n_age)[-1].sum() - G_obs_aggregate[-1] else: case_diff = G[-1] - G_obs_aggregate[-1] logger.log( i=i - n, time=walltime, best=best, objective=current, case_diff=case_diff, theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds) ) # save state state = { 'train_theta': new_thetas[:i+1], 'train_G': new_G[:i+1], 'train_G_sem': new_G_sem[:i+1], 'best_observed_obj': best, 'best_observed_idx': best_idx, } save_state(state, logger.filename) # compute best objective from simulations f = objective(new_G) best_f_idx = f.argmax() best_f = f[best_f_idx].item() return new_thetas, new_G, new_G_sem, best_f, best_f_idx
def qei_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based batch Expected Improvement (qEI). The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with single-objective optimization. Args: train_x: Previous parameter configurations. A ``torch.Tensor`` of shape ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials and ``n_params`` is the number of parameters. ``n_params`` may be larger than the actual number of parameters if categorical parameters are included in the search space, since these parameters are one-hot encoded. Values are not normalized. train_obj: Previously observed objectives. A ``torch.Tensor`` of shape ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. ``n_objectives`` is the number of objectives. Observations are not normalized. train_con: Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of constraints. A constraint is violated if strictly larger than 0. If no constraints are involved in the optimization, this argument will be :obj:`None`. bounds: Search space bounds. A ``torch.Tensor`` of shape ``(n_params, 2)``. ``n_params`` is identical to that of ``train_x``. The first and the second column correspond to the lower and upper bounds for each parameter respectively. Returns: Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. """ if train_obj.size(-1) != 1: raise ValueError("Objective may only contain single values with qEI.") if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) is_feas = (train_con <= 0).all(dim=-1) train_obj_feas = train_obj[is_feas] if train_obj_feas.numel() == 0: # TODO(hvy): Do not use 0 as the best observation. _logger.warning( "No objective values are feasible. Using 0 as the best objective in qEI." ) best_f = torch.zeros(()) else: best_f = train_obj_feas.max() constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) objective = ConstrainedMCObjective( objective=lambda Z: Z[..., 0], constraints=constraints, ) else: train_y = train_obj best_f = train_obj.max() objective = None # Using the default identity objective. train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqf = qExpectedImprovement( model=model, best_f=best_f, sampler=SobolQMCNormalSampler(num_samples=256), objective=objective, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=10, raw_samples=512, options={ "batch_limit": 5, "maxiter": 200 }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) res['objs'] = [-y for y in res['objs']] return res # Caution: negative values imply feasibility in botorch hv_diffs = [] time_list = [] global_start_time = time.time() # random seed np.random.seed(seed) torch.manual_seed(seed) # call helper functions to generate initial training data and initialize model train_x, train_obj, train_con = generate_initial_data( initial_runs, objective_function, time_list, global_start_time) # fix bug: find feasible real_initial_runs = initial_runs while real_initial_runs < max_runs: # compute feasible observations is_feas = (train_con <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj > problem.ref_point).all(dim=-1) if (is_feas & better_than_ref).any(): break train_x, train_obj, train_con = expand_initial_data( train_x, train_obj, train_con, objective_function, time_list, global_start_time) real_initial_runs += 1 print('=== Expand initial data to find feasible. Iter =', real_initial_runs) mll, model = initialize_model(train_x, train_obj, train_con) # for plot X_init = train_x.cpu().numpy().astype(np.float64) Y_init = -1 * train_obj.cpu().numpy().astype(np.float64) # calculate hypervolume of init data for i in range(real_initial_runs): train_obj_i = train_obj[:i + 1] train_con_i = train_con[:i + 1] # compute pareto front is_feas_i = (train_con_i <= 0).all(dim=-1) feas_train_obj_i = train_obj_i[is_feas_i] pareto_mask = is_non_dominated(feas_train_obj_i) pareto_y = feas_train_obj_i[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # run (max_runs - real_initial_runs) rounds of BayesOpt after the initial random batch for iteration in range(real_initial_runs + 1, max_runs + 1): t0 = time.time() try: # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # compute feasible observations is_feas = (train_con <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj > problem.ref_point).all(dim=-1) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, # use observations that are better than the specified reference point and feasible Y=train_obj[better_than_ref & is_feas], ) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, # define an objective that specifies which outcomes are the objectives objective=IdentityMCMultiOutputObjective( outcomes=list(range(problem.num_objs))), # specify that the constraint is on the last outcome constraints=constraint_callable_list( problem.num_constraints, num_objs=problem.num_objs), ) # optimize and get new observation new_x, new_obj, new_con = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) except Exception as e: # handle numeric problem step = 2 print( '===== Exception in optimization loop, restart with 1/%d of training data: %s' % (step, str(e))) if refit == 1: mll, model = initialize_model(train_x[::step], train_obj[::step], train_con[::step]) else: mll, model = initialize_model( train_x[::step], train_obj[::step], train_con[::step], model.state_dict(), ) # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # compute feasible observations is_feas = (train_con[::step] <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj[::step] > problem.ref_point).all( dim=-1) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, # use observations that are better than the specified reference point and feasible Y=train_obj[::step][better_than_ref & is_feas], ) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, # define an objective that specifies which outcomes are the objectives objective=IdentityMCMultiOutputObjective( outcomes=list(range(problem.num_objs))), # specify that the constraint is on the last outcome constraints=constraint_callable_list( problem.num_constraints, num_objs=problem.num_objs), ) # optimize and get new observation new_x, new_obj, new_con = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) assert len(time_list) == iteration # update training points train_x = torch.cat([train_x, new_x]) train_obj = torch.cat([train_obj, new_obj]) train_con = torch.cat([train_con, new_con]) # update progress # compute pareto front is_feas = (train_con <= 0).all(dim=-1) feas_train_obj = train_obj[is_feas] pareto_mask = is_non_dominated(feas_train_obj) pareto_y = feas_train_obj[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting # Note: they find improved performance from not warm starting the model hyperparameters # using the hyperparameters from the previous iteration if refit == 1: mll, model = initialize_model(train_x, train_obj, train_con) else: mll, model = initialize_model( train_x, train_obj, train_con, model.state_dict(), ) t1 = time.time() print( "Iter %d: x=%s, perf=%s, con=%s, hv_diff=%f, time=%.2f, global_time=%.2f" % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj, new_con, hv_diff, t1 - t0, time_list[-1]), flush=True) # compute pareto front is_feas = (train_con <= 0).all(dim=-1) feas_train_obj = train_obj[is_feas] pareto_mask = is_non_dominated(feas_train_obj) pareto_y = feas_train_obj[pareto_mask] pf = -1 * pareto_y.cpu().numpy().astype(np.float64) # Save result X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype( np.float64) # caution train_obj[~is_feas] = -INFEASIBLE_OBJ_VALUE # set infeasible Y = -1 * train_obj.cpu().numpy().astype(np.float64) # plot for debugging if plot_mode == 1: plot_pf(problem, problem_str, mth, pf, Y_init) return hv_diffs, pf, X, Y, time_list
# ======================================================================== with torch.no_grad(): pred_r_train_mean = gp_recon_model.posterior(x_train).mean pred_r_train_std = gp_recon_model.posterior(x_train).variance.sqrt() pred_r_test_mean = gp_recon_model.posterior(x_val).mean pred_r_test_std = gp_recon_model.posterior(x_val).variance.sqrt() pred_r_train_mean = pred_r_train_mean.clamp_min( 1e-4).log() if transfo == "exp" else pred_r_train_mean pred_r_train_std = pred_r_train_std.clamp_min( 1e-4).log() if transfo == "exp" else pred_r_train_std pred_r_test_mean = pred_r_test_mean.clamp_min( 1e-4).log() if transfo == "exp" else pred_r_test_mean pred_r_test_std = pred_r_test_std.clamp_min(1e-4).log() if transfo == "exp" else pred_r_test_std pred_r_train_mean = unnormalize(pred_r_train_mean, rbounds) if transfo == "normalize" else pred_r_train_mean pred_r_train_std = unnormalize(pred_r_train_std, rbounds) if transfo == "normalize" else pred_r_train_std pred_r_test_mean = unnormalize(pred_r_test_mean, rbounds) if transfo == "normalize" else pred_r_test_mean pred_r_test_std = unnormalize(pred_r_test_std, rbounds) if transfo == "normalize" else pred_r_test_std gp_recon_model_fit_train = (pred_r_train_mean - r_train).pow(2).div(len(r_train)) gp_recon_model_fit_test = (pred_r_test_mean - r_val).pow(2).div(len(r_train)) torch.save(gp_recon_model_fit_train, os.path.join(save_folder, "gp_recon_model_fit_train.pt")) torch.save(gp_recon_model_fit_test, os.path.join(save_folder, "gp_recon_model_fit_test.pt")) print(f'\tMSE on r train set : {gp_recon_model_fit_train.sum().item():.3f}') print(f'\tMSE on r validation set: {gp_recon_model_fit_test.sum().item():.3f}') df_ = pd.DataFrame(
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) objs = [-y for y in res['objs']] return objs hv_diffs = [] time_list = [] global_start_time = time.time() # random seed np.random.seed(seed) torch.manual_seed(seed) # call helper functions to generate initial training data and initialize model train_x, train_obj = generate_initial_data(initial_runs, objective_function, time_list, global_start_time) mll, model = initialize_model(train_x, train_obj) # for plot X_init = train_x.cpu().numpy().astype(np.float64) Y_init = -1 * train_obj.cpu().numpy().astype(np.float64) # calculate hypervolume of init data for i in range(initial_runs): train_obj_i = train_obj[:i + 1] # compute pareto front pareto_mask = is_non_dominated(train_obj_i) pareto_y = train_obj_i[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # run (max_runs - initial_runs) rounds of BayesOpt after the initial random batch for iteration in range(initial_runs + 1, max_runs + 1): t0 = time.time() try: # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, Y=train_obj) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) except Exception as e: step = 2 print( '===== Exception in optimization loop, restart with 1/%d of training data: %s' % (step, str(e))) if refit == 1: mll, model = initialize_model(train_x[::step], train_obj[::step]) else: mll, model = initialize_model( train_x[::step], train_obj[::step], model.state_dict(), ) # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, Y=train_obj[::step]) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) assert len(time_list) == iteration # update training points train_x = torch.cat([train_x, new_x]) train_obj = torch.cat([train_obj, new_obj]) # update progress # compute pareto front pareto_mask = is_non_dominated(train_obj) pareto_y = train_obj[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting # Note: they find improved performance from not warm starting the model hyperparameters # using the hyperparameters from the previous iteration if refit == 1: mll, model = initialize_model(train_x, train_obj) else: mll, model = initialize_model( train_x, train_obj, model.state_dict(), ) t1 = time.time() print( "Iter %d: x=%s, perf=%s, hv_diff=%f, time=%.2f, global_time=%.2f" % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj, hv_diff, t1 - t0, time_list[-1]), flush=True) # Save result X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype( np.float64) # caution Y = -1 * train_obj.cpu().numpy().astype(np.float64) # compute pareto front pareto_mask = is_non_dominated(train_obj) pareto_y = train_obj[pareto_mask] pf = -1 * pareto_y.cpu().numpy().astype(np.float64) # plot for debugging if plot_mode == 1: plot_pf(problem, problem_str, mth, pf, Y_init) return hv_diffs, pf, X, Y, time_list