def query_acq_func(acq_func_id: str, acq_func_kwargs: Dict[str, Any], gp_model: SingleTaskGP, q: int, num_MC_samples_acq: int): if not hasattr(AnalyticAcquisitionFunction, acq_func_id): # use MC version of acq function acq_func_id = f'q{acq_func_id}' resampler = SobolQMCNormalSampler(num_samples=num_MC_samples_acq, resample=True).to( gp_model.train_inputs[0]) acq_func_kwargs['sampler'] = resampler acq_func_class = getattr(acquisition, acq_func_id) acq_func = acq_func_class( gp_model, **_filter_kwargs(acq_func_class, **acq_func_kwargs)) return acq_func
def batch_cross_validation( model_cls: Type[GPyTorchModel], mll_cls: Type[MarginalLogLikelihood], cv_folds: CVFolds, fit_args: Optional[Dict[str, Any]] = None, observation_noise: bool = False, ) -> CVResults: r"""Perform cross validation by using gpytorch batch mode. Args: model_cls: A GPyTorchModel class. This class must initialize the likelihood internally. Note: Multi-task GPs are not currently supported. mll_cls: A MarginalLogLikelihood class. cv_folds: A CVFolds tuple. fit_args: Arguments passed along to fit_gpytorch_model Returns: A CVResults tuple with the following fields - model: GPyTorchModel for batched cross validation - posterior: GPyTorchPosterior where the mean has shape `n x 1 x m` or `batch_shape x n x 1 x m` - observed_Y: A `n x 1 x m` or `batch_shape x n x 1 x m` tensor of observations. - observed_Yvar: A `n x 1 x m` or `batch_shape x n x 1 x m` tensor of observed measurement noise. Example: >>> train_X = torch.rand(10, 1) >>> train_Y = torch.sin(6 * train_X) + 0.2 * torch.rand_like(train_X) >>> cv_folds = gen_loo_cv_folds(train_X, train_Y) >>> cv_results = batch_cross_validation( >>> SingleTaskGP, >>> ExactMarginalLogLikelihood, >>> cv_folds, >>> ) WARNING: This function is currently very memory inefficient, use it only for problems of small size. """ fit_args = fit_args or {} kwargs = { "train_X": cv_folds.train_X, "train_Y": cv_folds.train_Y, "train_Yvar": cv_folds.train_Yvar, } model_cv = model_cls(**_filter_kwargs(model_cls, **kwargs)) mll_cv = mll_cls(model_cv.likelihood, model_cv) mll_cv.to(cv_folds.train_X) mll_cv = fit_gpytorch_model(mll_cv, **fit_args) # Evaluate on the hold-out set in batch mode with torch.no_grad(): posterior = model_cv.posterior(cv_folds.test_X, observation_noise=observation_noise) return CVResults( model=model_cv, posterior=posterior, observed_Y=cv_folds.test_Y, observed_Yvar=cv_folds.test_Yvar, )
def fit_gpytorch_torch( mll: MarginalLogLikelihood, bounds: Optional[ParameterBounds] = None, optimizer_cls: Optimizer = Adam, options: Optional[Dict[str, Any]] = None, track_iterations: bool = True, approx_mll: bool = True, ) -> Tuple[MarginalLogLikelihood, Dict[str, Union[float, List[OptimizationIteration]]]]: r"""Fit a gpytorch model by maximizing MLL with a torch optimizer. The model and likelihood in mll must already be in train mode. Note: this method requires that the model has `train_inputs` and `train_targets`. Args: mll: MarginalLogLikelihood to be maximized. bounds: A ParameterBounds dictionary mapping parameter names to tuples of lower and upper bounds. Bounds specified here take precedence over bounds on the same parameters specified in the constraints registered with the module. optimizer_cls: Torch optimizer to use. Must not require a closure. options: options for model fitting. Relevant options will be passed to the `optimizer_cls`. Additionally, options can include: "disp" to specify whether to display model fitting diagnostics and "maxiter" to specify the maximum number of iterations. track_iterations: Track the function values and wall time for each iteration. approx_mll: If True, use gpytorch's approximate MLL computation ( according to the gpytorch defaults based on the training at size). Unlike for the deterministic algorithms used in fit_gpytorch_scipy, this is not an issue for stochastic optimizers. Returns: 2-element tuple containing - mll with parameters optimized in-place. - Dictionary with the following key/values: "fopt": Best mll value. "wall_time": Wall time of fitting. "iterations": List of OptimizationIteration objects with information on each iteration. If track_iterations is False, will be empty. Example: >>> gp = SingleTaskGP(train_X, train_Y) >>> mll = ExactMarginalLogLikelihood(gp.likelihood, gp) >>> mll.train() >>> fit_gpytorch_torch(mll) >>> mll.eval() """ optim_options = {"maxiter": 100, "disp": True, "lr": 0.05} optim_options.update(options or {}) exclude = optim_options.pop("exclude", None) if exclude is not None: mll_params = [ t for p_name, t in mll.named_parameters() if p_name not in exclude ] else: mll_params = list(mll.parameters()) optimizer = optimizer_cls( params=[{"params": mll_params}], **_filter_kwargs(optimizer_cls, **optim_options), ) # get bounds specified in model (if any) bounds_: ParameterBounds = {} if hasattr(mll, "named_parameters_and_constraints"): for param_name, _, constraint in mll.named_parameters_and_constraints(): if constraint is not None and not constraint.enforced: bounds_[param_name] = constraint.lower_bound, constraint.upper_bound # update with user-supplied bounds (overwrites if already exists) if bounds is not None: bounds_.update(bounds) iterations = [] t1 = time.time() param_trajectory: Dict[str, List[Tensor]] = { name: [] for name, param in mll.named_parameters() } loss_trajectory: List[float] = [] i = 0 stop = False stopping_criterion = ExpMAStoppingCriterion( **_filter_kwargs(ExpMAStoppingCriterion, **optim_options) ) train_inputs, train_targets = mll.model.train_inputs, mll.model.train_targets while not stop: optimizer.zero_grad() with gpt_settings.fast_computations(log_prob=approx_mll): output = mll.model(*train_inputs) # we sum here to support batch mode args = [output, train_targets] + _get_extra_mll_args(mll) loss = -mll(*args).sum() loss.backward() loss_trajectory.append(loss.item()) for name, param in mll.named_parameters(): param_trajectory[name].append(param.detach().clone()) if optim_options["disp"] and ( (i + 1) % 10 == 0 or i == (optim_options["maxiter"] - 1) ): print(f"Iter {i + 1}/{optim_options['maxiter']}: {loss.item()}") if track_iterations: iterations.append(OptimizationIteration(i, loss.item(), time.time() - t1)) optimizer.step() # project onto bounds: if bounds_: for pname, param in mll.named_parameters(): if pname in bounds_: param.data = param.data.clamp(*bounds_[pname]) i += 1 stop = stopping_criterion.evaluate(fvals=loss.detach()) info_dict = { "fopt": loss_trajectory[-1], "wall_time": time.time() - t1, "iterations": iterations, } return mll, info_dict
def gen_candidates_torch( initial_conditions: Tensor, acquisition_function: Callable, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, optimizer: Type[Optimizer] = torch.optim.Adam, options: Optional[Dict[str, Union[float, str]]] = None, verbose: bool = True, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Iterable[Any]: # -> Tuple[Tensor, Any, Optional[Tensor]]: r"""Generate a set of candidates using a `torch.optim` optimizer. Optimizes an acquisition function starting from a set of initial candidates using an optimizer from `torch.optim`. Args: initial_conditions: Starting points for optimization. acquisition_function: Acquisition function to be used. lower_bounds: Minimum values for each column of initial_conditions. upper_bounds: Maximum values for each column of initial_conditions. optimizer (Optimizer): The pytorch optimizer to use to perform candidate search. options: Options used to control the optimization. Includes maxiter: Maximum number of iterations verbose: If True, provide verbose output. fixed_features: This is a dictionary of feature indices to values, where all generated candidates will have features fixed to these values. If the dictionary value is None, then that feature will just be fixed to the clamped value and not optimized. Assumes values to be compatible with lower_bounds and upper_bounds! Returns: 2-element tuple containing - The set of generated candidates. - The acquisition value for each t-batch. """ options = options or {} _jitter = options.get('jitter', 0.) clamped_candidates = columnwise_clamp( X=initial_conditions, lower=lower_bounds, upper=upper_bounds ).requires_grad_(True) candidates = fix_features(clamped_candidates, fixed_features) bayes_optimizer = optimizer( params=[clamped_candidates], lr=options.get("lr", 0.025) ) i = 0 stop = False stopping_criterion = ExpMAStoppingCriterion( **_filter_kwargs(ExpMAStoppingCriterion, **options) ) while not stop: i += 1 batch_loss = acquisition_function(candidates) loss = -batch_loss.sum() if verbose: print("Iter: {} - Value: {:.3f}".format(i, -(loss.item()))) if torch.isnan(loss): print('loss is nan, exiting optimization of the acquisition function.') break bayes_optimizer.zero_grad() loss.backward() if options.get('clip_gradient', False): torch.nn.utils.clip_grad_value_(clamped_candidates, clip_value=options.get('clip_value', 10.)) bayes_optimizer.step() clamped_candidates.data = columnwise_clamp( clamped_candidates, lower_bounds + _jitter, upper_bounds - _jitter ) candidates = fix_features(clamped_candidates, fixed_features) stop = stopping_criterion.evaluate(fvals=loss.detach()) # clamped_candidates = columnwise_clamp( # X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True # ) with torch.no_grad(): batch_acquisition = acquisition_function(candidates) return candidates, batch_acquisition
def gen_candidates_torch( initial_conditions: Tensor, acquisition_function: AcquisitionFunction, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, optimizer: Type[Optimizer] = torch.optim.Adam, options: Optional[Dict[str, Union[float, str]]] = None, callback: Optional[Callable[[int, Tensor, Tensor], NoReturn]] = None, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Tuple[Tensor, Tensor]: r"""Generate a set of candidates using a `torch.optim` optimizer. Optimizes an acquisition function starting from a set of initial candidates using an optimizer from `torch.optim`. Args: initial_conditions: Starting points for optimization. acquisition_function: Acquisition function to be used. lower_bounds: Minimum values for each column of initial_conditions. upper_bounds: Maximum values for each column of initial_conditions. optimizer (Optimizer): The pytorch optimizer to use to perform candidate search. options: Options used to control the optimization. Includes maxiter: Maximum number of iterations callback: A callback function accepting the current iteration, loss, and gradients as arguments. This function is executed after computing the loss and gradients, but before calling the optimizer. fixed_features: This is a dictionary of feature indices to values, where all generated candidates will have features fixed to these values. If the dictionary value is None, then that feature will just be fixed to the clamped value and not optimized. Assumes values to be compatible with lower_bounds and upper_bounds! Returns: 2-element tuple containing - The set of generated candidates. - The acquisition value for each t-batch. Example: >>> qEI = qExpectedImprovement(model, best_f=0.2) >>> bounds = torch.tensor([[0., 0.], [1., 2.]]) >>> Xinit = gen_batch_initial_conditions( >>> qEI, bounds, q=3, num_restarts=25, raw_samples=500 >>> ) >>> batch_candidates, batch_acq_values = gen_candidates_torch( initial_conditions=Xinit, acquisition_function=qEI, lower_bounds=bounds[0], upper_bounds=bounds[1], ) """ options = options or {} # if there are fixed features we may optimize over a domain of lower dimension if fixed_features: subproblem = _remove_fixed_features_from_optimization( fixed_features=fixed_features, acquisition_function=acquisition_function, initial_conditions=initial_conditions, lower_bounds=lower_bounds, upper_bounds=upper_bounds, inequality_constraints=None, equality_constraints=None, ) # call the routine with no fixed_features clamped_candidates, batch_acquisition = gen_candidates_torch( initial_conditions=subproblem.initial_conditions, acquisition_function=subproblem.acquisition_function, lower_bounds=subproblem.lower_bounds, upper_bounds=subproblem.upper_bounds, optimizer=optimizer, options=options, callback=callback, fixed_features=None, ) clamped_candidates = subproblem.acquisition_function._construct_X_full( clamped_candidates ) return clamped_candidates, batch_acquisition _clamp = partial(columnwise_clamp, lower=lower_bounds, upper=upper_bounds) clamped_candidates = _clamp(initial_conditions).requires_grad_(True) _optimizer = optimizer(params=[clamped_candidates], lr=options.get("lr", 0.025)) i = 0 stop = False stopping_criterion = ExpMAStoppingCriterion( **_filter_kwargs(ExpMAStoppingCriterion, **options) ) while not stop: i += 1 with torch.no_grad(): X = _clamp(clamped_candidates).requires_grad_(True) loss = -acquisition_function(X).sum() grad = torch.autograd.grad(loss, X)[0] if callback: callback(i, loss, grad) def assign_grad(): _optimizer.zero_grad() clamped_candidates.grad = grad return loss _optimizer.step(assign_grad) stop = stopping_criterion.evaluate(fvals=loss.detach()) clamped_candidates = _clamp(clamped_candidates) with torch.no_grad(): batch_acquisition = acquisition_function(clamped_candidates) return clamped_candidates, batch_acquisition
def gen_candidates_torch( initial_conditions: Tensor, acquisition_function: Callable, lower_bounds: Optional[Union[float, Tensor]] = None, upper_bounds: Optional[Union[float, Tensor]] = None, optimizer: Type[Optimizer] = torch.optim.Adam, options: Optional[Dict[str, Union[float, str]]] = None, verbose: bool = True, fixed_features: Optional[Dict[int, Optional[float]]] = None, ) -> Tuple[Tensor, Tensor]: r"""Generate a set of candidates using a `torch.optim` optimizer. Optimizes an acquisition function starting from a set of initial candidates using an optimizer from `torch.optim`. Args: initial_conditions: Starting points for optimization. acquisition_function: Acquisition function to be used. lower_bounds: Minimum values for each column of initial_conditions. upper_bounds: Maximum values for each column of initial_conditions. optimizer (Optimizer): The pytorch optimizer to use to perform candidate search. options: Options used to control the optimization. Includes maxiter: Maximum number of iterations verbose: If True, provide verbose output. fixed_features: This is a dictionary of feature indices to values, where all generated candidates will have features fixed to these values. If the dictionary value is None, then that feature will just be fixed to the clamped value and not optimized. Assumes values to be compatible with lower_bounds and upper_bounds! Returns: 2-element tuple containing - The set of generated candidates. - The acquisition value for each t-batch. Example: >>> qEI = qExpectedImprovement(model, best_f=0.2) >>> bounds = torch.tensor([[0., 0.], [1., 2.]]) >>> Xinit = gen_batch_initial_conditions( >>> qEI, bounds, q=3, num_restarts=25, raw_samples=500 >>> ) >>> batch_candidates, batch_acq_values = gen_candidates_torch( initial_conditions=Xinit, acquisition_function=qEI, lower_bounds=bounds[0], upper_bounds=bounds[1], ) """ options = options or {} clamped_candidates = columnwise_clamp( X=initial_conditions, lower=lower_bounds, upper=upper_bounds ).requires_grad_(True) candidates = fix_features(clamped_candidates, fixed_features) bayes_optimizer = optimizer( params=[clamped_candidates], lr=options.get("lr", 0.025) ) param_trajectory: Dict[str, List[Tensor]] = {"candidates": []} loss_trajectory: List[float] = [] i = 0 stop = False stopping_criterion = ExpMAStoppingCriterion( **_filter_kwargs(ExpMAStoppingCriterion, **options) ) while not stop: i += 1 loss = -acquisition_function(candidates).sum() if verbose: print("Iter: {} - Value: {:.3f}".format(i, -(loss.item()))) loss_trajectory.append(loss.item()) param_trajectory["candidates"].append(candidates.clone()) def closure(): bayes_optimizer.zero_grad() loss.backward() return loss bayes_optimizer.step(closure) clamped_candidates.data = columnwise_clamp( clamped_candidates, lower_bounds, upper_bounds ) candidates = fix_features(clamped_candidates, fixed_features) stop = stopping_criterion.evaluate(fvals=loss.detach()) clamped_candidates = columnwise_clamp( X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True ) with torch.no_grad(): batch_acquisition = acquisition_function(candidates) return candidates, batch_acquisition