def test_column_wise_clamp_scalar_tensors(self): X, X_expected = self.X, self.X_expected with self.assertRaises(ValueError): X_clmp = columnwise_clamp(X, torch.tensor(1), torch.tensor(-1)) X_clmp = columnwise_clamp(X, torch.tensor(-1), torch.tensor(0.5)) self.assertTrue(torch.equal(X_clmp, X_expected)) X_clmp = columnwise_clamp(X, torch.tensor(-3), torch.tensor(3)) self.assertTrue(torch.equal(X_clmp, X))
def test_column_wise_clamp_scalars(self): X, X_expected = self.X, self.X_expected with self.assertRaises(ValueError): X_clmp = columnwise_clamp(X, 1, -1) X_clmp = columnwise_clamp(X, -1, 0.5) self.assertTrue(torch.equal(X_clmp, X_expected)) X_clmp = columnwise_clamp(X, -3, 3) self.assertTrue(torch.equal(X_clmp, X))
def test_column_wise_clamp_scalar_tensors(self, cuda=False): X = self.X.cuda() if cuda else self.X X_expected = self.X_expected.cuda() if cuda else self.X_expected with self.assertRaises(ValueError): X_clmp = columnwise_clamp(X, torch.tensor(1), torch.tensor(-1)) X_clmp = columnwise_clamp(X, torch.tensor(-1), torch.tensor(0.5)) self.assertTrue(torch.equal(X_clmp, X_expected)) X_clmp = columnwise_clamp(X, torch.tensor(-3), torch.tensor(3)) self.assertTrue(torch.equal(X_clmp, X))
def test_column_wise_clamp_scalars(self, cuda=False): X = self.X.cuda() if cuda else self.X X_expected = self.X_expected.cuda() if cuda else self.X_expected with self.assertRaises(ValueError): X_clmp = columnwise_clamp(X, 1, -1) X_clmp = columnwise_clamp(X, -1, 0.5) self.assertTrue(torch.equal(X_clmp, X_expected)) X_clmp = columnwise_clamp(X, -3, 3) self.assertTrue(torch.equal(X_clmp, X))
def test_column_wise_clamp_tensors(self): X, X_expected = self.X, self.X_expected with self.assertRaises(ValueError): X_clmp = columnwise_clamp(X, torch.ones(2), torch.zeros(2)) with self.assertRaises(RuntimeError): X_clmp = columnwise_clamp(X, torch.zeros(3), torch.ones(3)) X_clmp = columnwise_clamp(X, torch.tensor([-1, -1]), torch.tensor([0.5, 0.5])) self.assertTrue(torch.equal(X_clmp, X_expected)) X_clmp = columnwise_clamp(X, torch.tensor([-3, -3]), torch.tensor([3, 3])) self.assertTrue(torch.equal(X_clmp, X))
def test_column_wise_clamp_raise_on_violation(self): X = self.X with self.assertRaises(BotorchError): X_clmp = columnwise_clamp( X, torch.zeros(2), torch.ones(2), raise_on_violation=True ) X_clmp = columnwise_clamp( X, torch.tensor([-3, -3]), torch.tensor([3, 3]), raise_on_violation=True ) self.assertTrue(torch.equal(X_clmp, X))
def test_column_wise_clamp_full_dim_tensors(self): X = torch.tensor([[[-1, 2, 0.5], [0.5, 3, 1.5]], [[0.5, 1, 0], [2, -2, 3]]]) lower = torch.tensor([[[0, 0.5, 1], [0, 2, 2]], [[0, 2, 0], [1, -1, 0]]]) upper = torch.tensor([[[1, 1.5, 1], [1, 4, 3]], [[1, 3, 0.5], [3, 1, 2.5]]]) X_expected = torch.tensor( [[[0, 1.5, 1], [0.5, 3, 2]], [[0.5, 2, 0], [2, -1, 2.5]]] ) X_clmp = columnwise_clamp(X, lower, upper) self.assertTrue(torch.equal(X_clmp, X_expected)) X_clmp = columnwise_clamp(X, lower - 5, upper + 5) self.assertTrue(torch.equal(X_clmp, X)) with self.assertRaises(ValueError): X_clmp = columnwise_clamp(X, torch.ones_like(X), torch.zeros_like(X)) with self.assertRaises(RuntimeError): X_clmp = columnwise_clamp(X, lower.unsqueeze(-3), upper.unsqueeze(-3))
def gen( self, num_points: int, # Current implementation only generates 1 point at a time model: MonotonicRejectionGP, ): """Query next point(s) to run by optimizing the acquisition function. Args: num_points (int, optional): Number of points to query. model (AEPsychMixin): Fitted model of the data. Returns: np.ndarray: Next set of point(s) to evaluate, [num_points x dim]. """ options = self.model_gen_options or {} num_restarts = options.get("num_restarts", 10) raw_samples = options.get("raw_samples", 1000) verbosity_freq = options.get("verbosity_freq", -1) lr = options.get("lr", 0.01) momentum = options.get("momentum", 0.9) nesterov = options.get("nesterov", True) epochs = options.get("epochs", 50) milestones = options.get("milestones", [25, 40]) gamma = options.get("gamma", 0.1) loss_constraint_fun = options.get( "loss_constraint_fun", default_loss_constraint_fun ) # Augment bounds with deriv indicator bounds = torch.cat((model.bounds_, torch.zeros(2, 1)), dim=1) # Fix deriv indicator to 0 during optimization fixed_features = {(bounds.shape[1] - 1): 0.0} # Fix explore features to random values if self.explore_features is not None: for idx in self.explore_features: val = ( bounds[0, idx] + torch.rand(1, dtype=bounds.dtype) * (bounds[1, idx] - bounds[0, idx]) ).item() fixed_features[idx] = val bounds[0, idx] = val bounds[1, idx] = val acqf = self._instantiate_acquisition_fn(model) # Initialize batch_initial_conditions = gen_batch_initial_conditions( acq_function=acqf, bounds=bounds, q=1, num_restarts=num_restarts, raw_samples=raw_samples, ) clamped_candidates = columnwise_clamp( X=batch_initial_conditions, lower=bounds[0], upper=bounds[1] ).requires_grad_(True) candidates = fix_features(clamped_candidates, fixed_features) optimizer = torch.optim.SGD( params=[clamped_candidates], lr=lr, momentum=momentum, nesterov=nesterov ) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=gamma ) # Optimize for epoch in range(epochs): loss = -acqf(candidates).sum() # adjust loss based on constraints on candidates loss = loss_constraint_fun(loss, candidates) if verbosity_freq > 0 and epoch % verbosity_freq == 0: logger.info("Iter: {} - Value: {:.3f}".format(epoch, -(loss.item()))) def closure(): optimizer.zero_grad() loss.backward( retain_graph=True ) # Variational model requires retain_graph return loss optimizer.step(closure) clamped_candidates.data = columnwise_clamp( X=clamped_candidates, lower=bounds[0], upper=bounds[1] ) candidates = fix_features(clamped_candidates, fixed_features) lr_scheduler.step() # Extract best point with torch.no_grad(): batch_acquisition = acqf(candidates) best = torch.argmax(batch_acquisition.view(-1), dim=0) Xopt = candidates[best][:, :-1].detach() return Xopt
def gen_candidates_scipy( initial_conditions: Tensor, acquisition_function: Module, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, constraints=(), options: Optional[Dict[str, Any]] = None, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Tuple[Tensor, Tensor]: """ This function generates a set of candidates using `scipy.optimize.minimize` Parameters ---------- :param initial_conditions: starting points for optimization :param acquisition_function: acquisition function to be optimized Optional parameters ------------------- :param lower_bounds: minimum values for each column of initial_conditions :param upper_bounds: maximum values for each column of initial_conditions :param constraints: constraints in scipy format :param options: options for candidate generation :param fixed_features: A map {feature_index: value} for features that should be fixed to a particular value during generation. Returns ------- :return: 2-element tuple containing the set of generated candidates and the acquisition value for each t-batch. """ options = options or {} x0 = columnwise_clamp(initial_conditions, lower_bounds, upper_bounds).requires_grad_(True) bounds = Bounds(lb=lower_bounds, ub=upper_bounds, keep_feasible=True) def f(x): X = (torch.from_numpy(x).to( initial_conditions).contiguous().requires_grad_(True)) X_fix = fix_features(X=X, fixed_features=fixed_features) loss = -acquisition_function(X_fix[None]).sum() # compute gradient w.r.t. the inputs (does not accumulate in leaves) gradf = _arrayify( torch.autograd.grad(loss, X)[0].contiguous().view(-1)) fval = loss.item() return fval, gradf candidates = torch.zeros(x0.shape, dtype=torch.float64) # TODO this does not handle the case where q!=1 for i in range(x0.shape[0]): res = minimize( f, x0[i, 0].detach().numpy(), method="SLSQP", jac=True, bounds=bounds, constraints=constraints, options={k: v for k, v in options.items() if k != "method"}, ) candidates[i] = fix_features( X=torch.from_numpy(res.x).to(initial_conditions).contiguous(), fixed_features=fixed_features, ) batch_acquisition = acquisition_function(candidates) return candidates, batch_acquisition
def gen_candidates_torch( initial_conditions: Tensor, acquisition_function: Callable, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, optimizer: Type[Optimizer] = torch.optim.Adam, options: Optional[Dict[str, Union[float, str]]] = None, verbose: bool = True, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Iterable[Any]: # -> Tuple[Tensor, Any, Optional[Tensor]]: r"""Generate a set of candidates using a `torch.optim` optimizer. Optimizes an acquisition function starting from a set of initial candidates using an optimizer from `torch.optim`. Args: initial_conditions: Starting points for optimization. acquisition_function: Acquisition function to be used. lower_bounds: Minimum values for each column of initial_conditions. upper_bounds: Maximum values for each column of initial_conditions. optimizer (Optimizer): The pytorch optimizer to use to perform candidate search. options: Options used to control the optimization. Includes maxiter: Maximum number of iterations verbose: If True, provide verbose output. fixed_features: This is a dictionary of feature indices to values, where all generated candidates will have features fixed to these values. If the dictionary value is None, then that feature will just be fixed to the clamped value and not optimized. Assumes values to be compatible with lower_bounds and upper_bounds! Returns: 2-element tuple containing - The set of generated candidates. - The acquisition value for each t-batch. """ options = options or {} _jitter = options.get('jitter', 0.) clamped_candidates = columnwise_clamp( X=initial_conditions, lower=lower_bounds, upper=upper_bounds ).requires_grad_(True) candidates = fix_features(clamped_candidates, fixed_features) bayes_optimizer = optimizer( params=[clamped_candidates], lr=options.get("lr", 0.025) ) i = 0 stop = False stopping_criterion = ExpMAStoppingCriterion( **_filter_kwargs(ExpMAStoppingCriterion, **options) ) while not stop: i += 1 batch_loss = acquisition_function(candidates) loss = -batch_loss.sum() if verbose: print("Iter: {} - Value: {:.3f}".format(i, -(loss.item()))) if torch.isnan(loss): print('loss is nan, exiting optimization of the acquisition function.') break bayes_optimizer.zero_grad() loss.backward() if options.get('clip_gradient', False): torch.nn.utils.clip_grad_value_(clamped_candidates, clip_value=options.get('clip_value', 10.)) bayes_optimizer.step() clamped_candidates.data = columnwise_clamp( clamped_candidates, lower_bounds + _jitter, upper_bounds - _jitter ) candidates = fix_features(clamped_candidates, fixed_features) stop = stopping_criterion.evaluate(fvals=loss.detach()) # clamped_candidates = columnwise_clamp( # X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True # ) with torch.no_grad(): batch_acquisition = acquisition_function(candidates) return candidates, batch_acquisition
def gen_candidates_scipy( initial_conditions: Tensor, acquisition_function: AcquisitionFunction, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None, equality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None, nonlinear_inequality_constraints: Optional[List[Callable]] = None, options: Optional[Dict[str, Any]] = None, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Tuple[Tensor, Tensor]: r"""Generate a set of candidates using `scipy.optimize.minimize`. Optimizes an acquisition function starting from a set of initial candidates using `scipy.optimize.minimize` via a numpy converter. Args: initial_conditions: Starting points for optimization. acquisition_function: Acquisition function to be used. lower_bounds: Minimum values for each column of initial_conditions. upper_bounds: Maximum values for each column of initial_conditions. inequality constraints: A list of tuples (indices, coefficients, rhs), with each tuple encoding an inequality constraint of the form `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`. equality constraints: A list of tuples (indices, coefficients, rhs), with each tuple encoding an inequality constraint of the form `\sum_i (X[indices[i]] * coefficients[i]) = rhs`. nonlinear_inequality_constraints: A list of callables with that represent non-linear inequality constraints of the form `callable(x) >= 0`. Each callable is expected to take a `(num_restarts) x q x d`-dim tensor as an input and return a `(num_restarts) x q`-dim tensor with the constraint values. The constraints will later be passed to SLSQP. options: Options used to control the optimization including "method" and "maxiter". Select method for `scipy.minimize` using the "method" key. By default uses L-BFGS-B for box-constrained problems and SLSQP if inequality or equality constraints are present. fixed_features: This is a dictionary of feature indices to values, where all generated candidates will have features fixed to these values. If the dictionary value is None, then that feature will just be fixed to the clamped value and not optimized. Assumes values to be compatible with lower_bounds and upper_bounds! Returns: 2-element tuple containing - The set of generated candidates. - The acquisition value for each t-batch. Example: >>> qEI = qExpectedImprovement(model, best_f=0.2) >>> bounds = torch.tensor([[0., 0.], [1., 2.]]) >>> Xinit = gen_batch_initial_conditions( >>> qEI, bounds, q=3, num_restarts=25, raw_samples=500 >>> ) >>> batch_candidates, batch_acq_values = gen_candidates_scipy( initial_conditions=Xinit, acquisition_function=qEI, lower_bounds=bounds[0], upper_bounds=bounds[1], ) """ options = options or {} # if there are fixed features we may optimize over a domain of lower dimension reduced_domain = False if fixed_features: # TODO: We can support fixed features, see Max's comment on D33551393. We can # consider adding this at a later point. if nonlinear_inequality_constraints: raise NotImplementedError( "Fixed features are not supported when non-linear inequality " "constraints are given." ) # if there are no constraints things are straightforward if not (inequality_constraints or equality_constraints): reduced_domain = True # if there are we need to make sure features are fixed to specific values else: reduced_domain = None not in fixed_features.values() if reduced_domain: _no_fixed_features = _remove_fixed_features_from_optimization( fixed_features=fixed_features, acquisition_function=acquisition_function, initial_conditions=initial_conditions, lower_bounds=lower_bounds, upper_bounds=upper_bounds, inequality_constraints=inequality_constraints, equality_constraints=equality_constraints, ) # call the routine with no fixed_features clamped_candidates, batch_acquisition = gen_candidates_scipy( initial_conditions=_no_fixed_features.initial_conditions, acquisition_function=_no_fixed_features.acquisition_function, lower_bounds=_no_fixed_features.lower_bounds, upper_bounds=_no_fixed_features.upper_bounds, inequality_constraints=_no_fixed_features.inequality_constraints, equality_constraints=_no_fixed_features.equality_constraints, options=options, fixed_features=None, ) clamped_candidates = _no_fixed_features.acquisition_function._construct_X_full( clamped_candidates ) return clamped_candidates, batch_acquisition clamped_candidates = columnwise_clamp( X=initial_conditions, lower=lower_bounds, upper=upper_bounds ) shapeX = clamped_candidates.shape x0 = clamped_candidates.view(-1) bounds = make_scipy_bounds( X=initial_conditions, lower_bounds=lower_bounds, upper_bounds=upper_bounds ) constraints = make_scipy_linear_constraints( shapeX=clamped_candidates.shape, inequality_constraints=inequality_constraints, equality_constraints=equality_constraints, ) def f_np_wrapper(x: np.ndarray, f: Callable): """Given a torch callable, compute value + grad given a numpy array.""" if np.isnan(x).any(): raise RuntimeError( f"{np.isnan(x).sum()} elements of the {x.size} element array " f"`x` are NaN." ) X = ( torch.from_numpy(x) .to(initial_conditions) .view(shapeX) .contiguous() .requires_grad_(True) ) X_fix = fix_features(X, fixed_features=fixed_features) loss = f(X_fix).sum() # compute gradient w.r.t. the inputs (does not accumulate in leaves) gradf = _arrayify(torch.autograd.grad(loss, X)[0].contiguous().view(-1)) if np.isnan(gradf).any(): msg = ( f"{np.isnan(gradf).sum()} elements of the {x.size} element " "gradient array `gradf` are NaN. This often indicates numerical issues." ) if initial_conditions.dtype != torch.double: msg += " Consider using `dtype=torch.double`." raise RuntimeError(msg) fval = loss.item() return fval, gradf if nonlinear_inequality_constraints: # Make sure `batch_limit` is 1 for now. if not (len(shapeX) == 3 and shapeX[:2] == torch.Size([1, 1])): raise ValueError( "`batch_limit` must be 1 when non-linear inequality constraints " "are given." ) constraints += make_scipy_nonlinear_inequality_constraints( nonlinear_inequality_constraints=nonlinear_inequality_constraints, f_np_wrapper=f_np_wrapper, x0=x0, ) x0 = _arrayify(x0) def f(x): return -acquisition_function(x) res = minimize( fun=f_np_wrapper, args=(f,), x0=x0, method=options.get("method", "SLSQP" if constraints else "L-BFGS-B"), jac=True, bounds=bounds, constraints=constraints, callback=options.get("callback", None), options={k: v for k, v in options.items() if k not in ["method", "callback"]}, ) candidates = fix_features( X=torch.from_numpy(res.x).to(initial_conditions).reshape(shapeX), fixed_features=fixed_features, ) # SLSQP sometimes fails in the line search or may just fail to find a feasible # candidate in which case we just return the starting point. This happens rarely, # so it shouldn't be an issue given enough restarts. if nonlinear_inequality_constraints and any( nlc(candidates.view(-1)) < NLC_TOL for nlc in nonlinear_inequality_constraints ): candidates = torch.from_numpy(x0).to(candidates).reshape(shapeX) warnings.warn( "SLSQP failed to converge to a solution the satisfies the non-linear " "constraints. Returning the feasible starting point." ) clamped_candidates = columnwise_clamp( X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True ) with torch.no_grad(): batch_acquisition = acquisition_function(clamped_candidates) return clamped_candidates, batch_acquisition
def gen_candidates_scipy( initial_conditions: Tensor, acquisition_function: Module, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None, equality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None, options: Optional[Dict[str, Any]] = None, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Tuple[Tensor, Tensor]: r"""Generate a set of candidates using `scipy.optimize.minimize`. Optimizes an acquisition function starting from a set of initial candidates using `scipy.optimize.minimize` via a numpy converter. Args: initial_conditions: Starting points for optimization. acquisition_function: Acquisition function to be used. lower_bounds: Minimum values for each column of initial_conditions. upper_bounds: Maximum values for each column of initial_conditions. inequality constraints: A list of tuples (indices, coefficients, rhs), with each tuple encoding an inequality constraint of the form `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`. equality constraints: A list of tuples (indices, coefficients, rhs), with each tuple encoding an inequality constraint of the form `\sum_i (X[indices[i]] * coefficients[i]) = rhs`. options: Options used to control the optimization including "method" and "maxiter". Select method for `scipy.minimize` using the method" key. By default uses L-BFGS-B for box-constrained problems and SLSQP if inequality or equality constraints are present. fixed_features: This is a dictionary of feature indices to values, where all generated candidates will have features fixed to these values. If the dictionary value is None, then that feature will just be fixed to the clamped value and not optimized. Assumes values to be compatible with lower_bounds and upper_bounds! Returns: 2-element tuple containing - The set of generated candidates. - The acquisition value for each t-batch. Example: >>> qEI = qExpectedImprovement(model, best_f=0.2) >>> bounds = torch.tensor([[0., 0.], [1., 2.]]) >>> Xinit = gen_batch_initial_conditions( >>> qEI, bounds, q=3, num_restarts=25, raw_samples=500 >>> ) >>> batch_candidates, batch_acq_values = gen_candidates_scipy( initial_conditions=Xinit, acquisition_function=qEI, lower_bounds=bounds[0], upper_bounds=bounds[1], ) """ options = options or {} clamped_candidates = columnwise_clamp( X=initial_conditions, lower=lower_bounds, upper=upper_bounds ).requires_grad_(True) shapeX = clamped_candidates.shape x0 = _arrayify(clamped_candidates.view(-1)) bounds = make_scipy_bounds( X=initial_conditions, lower_bounds=lower_bounds, upper_bounds=upper_bounds ) constraints = make_scipy_linear_constraints( shapeX=clamped_candidates.shape, inequality_constraints=inequality_constraints, equality_constraints=equality_constraints, ) def f(x): if np.isnan(x).any(): raise RuntimeError( f"{np.isnan(x).sum()} elements of the {x.size} element array " f"`x` are NaN." ) X = ( torch.from_numpy(x) .to(initial_conditions) .view(shapeX) .contiguous() .requires_grad_(True) ) X_fix = fix_features(X=X, fixed_features=fixed_features) loss = -acquisition_function(X_fix).sum() # compute gradient w.r.t. the inputs (does not accumulate in leaves) gradf = _arrayify(torch.autograd.grad(loss, X)[0].contiguous().view(-1)) if np.isnan(gradf).any(): msg = ( f"{np.isnan(gradf).sum()} elements of the {x.size} element " "gradient array `gradf` are NaN. This often indicates numerical issues." ) if initial_conditions.dtype != torch.double: msg += " Consider using `dtype=torch.double`." raise RuntimeError(msg) fval = loss.item() return fval, gradf res = minimize( f, x0, method=options.get("method", "SLSQP" if constraints else "L-BFGS-B"), jac=True, bounds=bounds, constraints=constraints, callback=options.get("callback", None), options={k: v for k, v in options.items() if k not in ["method", "callback"]}, ) candidates = fix_features( X=torch.from_numpy(res.x).to(initial_conditions).view(shapeX).contiguous(), fixed_features=fixed_features, ) clamped_candidates = columnwise_clamp( X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True ) with torch.no_grad(): batch_acquisition = acquisition_function(clamped_candidates) return clamped_candidates, batch_acquisition
def gen_candidates_torch( initial_conditions: Tensor, acquisition_function: Callable, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, optimizer: Type[Optimizer] = torch.optim.Adam, options: Optional[Dict[str, Union[float, str]]] = None, verbose: bool = True, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Tuple[Tensor, Tensor]: r"""Generate a set of candidates using a `torch.optim` optimizer. Optimizes an acquisition function starting from a set of initial candidates using an optimizer from `torch.optim`. Args: initial_conditions: Starting points for optimization. acquisition_function: Acquisition function to be used. lower_bounds: Minimum values for each column of initial_conditions. upper_bounds: Maximum values for each column of initial_conditions. optimizer (Optimizer): The pytorch optimizer to use to perform candidate search. options: Options used to control the optimization. Includes maxiter: Maximum number of iterations verbose: If True, provide verbose output. fixed_features: This is a dictionary of feature indices to values, where all generated candidates will have features fixed to these values. If the dictionary value is None, then that feature will just be fixed to the clamped value and not optimized. Assumes values to be compatible with lower_bounds and upper_bounds! Returns: 2-element tuple containing - The set of generated candidates. - The acquisition value for each t-batch. Example: >>> qEI = qExpectedImprovement(model, best_f=0.2) >>> bounds = torch.tensor([[0., 0.], [1., 2.]]) >>> Xinit = gen_batch_initial_conditions( >>> qEI, bounds, q=3, num_restarts=25, raw_samples=500 >>> ) >>> batch_candidates, batch_acq_values = gen_candidates_torch( initial_conditions=Xinit, acquisition_function=qEI, lower_bounds=bounds[0], upper_bounds=bounds[1], ) """ options = options or {} clamped_candidates = columnwise_clamp( X=initial_conditions, lower=lower_bounds, upper=upper_bounds ).requires_grad_(True) candidates = fix_features(clamped_candidates, fixed_features) bayes_optimizer = optimizer( params=[clamped_candidates], lr=options.get("lr", 0.025) ) param_trajectory: Dict[str, List[Tensor]] = {"candidates": []} loss_trajectory: List[float] = [] i = 0 stop = False stopping_criterion = ExpMAStoppingCriterion( **_filter_kwargs(ExpMAStoppingCriterion, **options) ) while not stop: i += 1 loss = -acquisition_function(candidates).sum() if verbose: print("Iter: {} - Value: {:.3f}".format(i, -(loss.item()))) loss_trajectory.append(loss.item()) param_trajectory["candidates"].append(candidates.clone()) def closure(): bayes_optimizer.zero_grad() loss.backward() return loss bayes_optimizer.step(closure) clamped_candidates.data = columnwise_clamp( clamped_candidates, lower_bounds, upper_bounds ) candidates = fix_features(clamped_candidates, fixed_features) stop = stopping_criterion.evaluate(fvals=loss.detach()) clamped_candidates = columnwise_clamp( X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True ) with torch.no_grad(): batch_acquisition = acquisition_function(candidates) return candidates, batch_acquisition
def gen( self, model_gen_options: Optional[Dict[str, Any]] = None, explore_features: Optional[List[int]] = None, ) -> Tuple[Tensor, Optional[List[Dict[str, Any]]]]: """Generate candidate by optimizing acquisition function. Args: model_gen_options: Dictionary with options for generating candidate, such as SGD parameters. See code for all options and their defaults. explore_features: List of features that will be selected randomly and then fixed for acquisition fn optimization. Returns: Xopt: (1 x d) tensor of the generated candidate candidate_metadata: List of dict of metadata for each candidate. Contains acquisition value for the candidate. """ # Default optimization settings # TODO are these sufficiently robust? Can they be tuned better? options = model_gen_options or {} num_restarts = options.get("num_restarts", 10) raw_samples = options.get("raw_samples", 1000) verbosity_freq = options.get("verbosity_freq", -1) lr = options.get("lr", 0.01) momentum = options.get("momentum", 0.9) nesterov = options.get("nesterov", True) epochs = options.get("epochs", 50) milestones = options.get("milestones", [25, 40]) gamma = options.get("gamma", 0.1) loss_constraint_fun = options.get( "loss_constraint_fun", default_loss_constraint_fun ) acq_function = self._get_acquisition_fn() # Augment bounds with deriv indicator bounds = torch.cat((self.bounds_, torch.zeros(2, 1, dtype=self.dtype)), dim=1) # Fix deriv indicator to 0 during optimization fixed_features = {(bounds.shape[1] - 1): 0.0} # Fix explore features to random values if explore_features is not None: for idx in explore_features: val = ( bounds[0, idx] + torch.rand(1, dtype=self.dtype) * (bounds[1, idx] - bounds[0, idx]) ).item() fixed_features[idx] = val bounds[0, idx] = val bounds[1, idx] = val # Initialize batch_initial_conditions = gen_batch_initial_conditions( acq_function=acq_function, bounds=bounds, q=1, num_restarts=num_restarts, raw_samples=raw_samples, ) clamped_candidates = columnwise_clamp( X=batch_initial_conditions, lower=bounds[0], upper=bounds[1] ).requires_grad_(True) candidates = fix_features(clamped_candidates, fixed_features) optimizer = torch.optim.SGD( params=[clamped_candidates], lr=lr, momentum=momentum, nesterov=nesterov ) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=gamma ) # Optimize for epoch in range(epochs): loss = -acq_function(candidates).sum() # adjust loss based on constraints on candidates loss = loss_constraint_fun(loss, candidates) if verbosity_freq > 0 and epoch % verbosity_freq == 0: logger.info("Iter: {} - Value: {:.3f}".format(epoch, -(loss.item()))) def closure(): optimizer.zero_grad() loss.backward( retain_graph=True ) # Variational model requires retain_graph return loss optimizer.step(closure) clamped_candidates.data = columnwise_clamp( X=clamped_candidates, lower=bounds[0], upper=bounds[1] ) candidates = fix_features(clamped_candidates, fixed_features) lr_scheduler.step() # Extract best point with torch.no_grad(): batch_acquisition = acq_function(candidates) best = torch.argmax(batch_acquisition.view(-1), dim=0) Xopt = candidates[best][:, :-1].detach() candidate_metadata = [{"acquisition_value": batch_acquisition[best].item()}] return Xopt, candidate_metadata