def _sample_max_value_Thompson( model: Model, candidate_set: Tensor, num_samples: int, maximize: bool = True ) -> Tensor: """Samples the max values by discrete Thompson sampling. Should generally be called within a `with torch.no_grad()` context. Args: model: A fitted single-outcome model. candidate_set: A `n x d` Tensor including `n` candidate points to discretize the design space. num_samples: Number of max value samples. maximize: If True, consider the problem a maximization problem. Returns: A `num_samples x num_fantasies` Tensor of max value samples """ posterior = model.posterior(candidate_set) weight = 1.0 if maximize else -1.0 samples = weight * posterior.rsample(torch.Size([num_samples])).squeeze(-1) # samples is num_samples x (num_fantasies) x n max_values, _ = samples.max(dim=-1) if len(samples.shape) == 2: max_values = max_values.unsqueeze(-1) # num_samples x num_fantasies return max_values
def _get_induced_fantasy_model(model: Model, Xs: List[Tensor], samplers: List[Optional[MCSampler]]) -> Model: r"""Recursive computation of the fantasy model induced by an input tree. Args: model: A Model of appropriate batch size. Specifically, it must be possible to evaluate the model's posterior at `Xs[0]`. Xs: A list `[X_j, ..., X_k]` of tensors, where `X_i` has shape `f_i x .... x f_1 x batch_shape x q_i x d`. samplers: A list of `k - j` samplers, such that the number of samples of sampler `i` is `f_i`. The last element of this list is considered the "inner sampler", which is used for evaluating the objective in case it is an MCAcquisitionObjective. Returns: A Model obtained by iteratively fantasizing over the input tree `Xs`. """ if len(Xs) == 1: return model else: fantasy_model = model.fantasize( X=Xs[0], sampler=samplers[0], observation_noise=True, ) return _get_induced_fantasy_model(model=fantasy_model, Xs=Xs[1:], samplers=samplers[1:])
def get_infeasible_cost( X: Tensor, model: Model, objective: Callable[[Tensor], Tensor] = squeeze_last_dim) -> float: r"""Get infeasible cost for a model and objective. Computes an infeasible cost `M` such that `-M < min_x f(x)` almost always, so that feasible points are preferred. Args: X: A `n x d` Tensor of `n` design points to use in evaluating the minimum. These points should cover the design space well. The more points the better the estimate, at the expense of added computation. model: A fitted botorch model. objective: The objective with which to evaluate the model output. Returns: The infeasible cost `M` value. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> objective = lambda Y: Y[..., -1] ** 2 >>> M = get_infeasible_cost(train_X, model, obj) """ posterior = model.posterior(X) lb = objective(posterior.mean - 6 * posterior.variance.clamp_min(0).sqrt()).min() M = -(lb.clamp_max(0.0)) return M.item()
def predict_from_model_mcmc(model: Model, X: Tensor) -> Tuple[Tensor, Tensor]: r"""Predicts outcomes given a model and input tensor. This method integrates over the hyperparameter posterior. Args: model: A batched botorch Model where the batch dimension corresponds to sampled hyperparameters. X: A `n x d` tensor of input parameters. Returns: Tensor: The predicted posterior mean as an `n x o`-dim tensor. Tensor: The predicted posterior covariance as a `n x o x o`-dim tensor. """ with torch.no_grad(): # compute the batch (independent posterior over the inputs) posterior = model.posterior(X.unsqueeze(-3)) # the mean and variance both have shape: n x num_samples x m (after squeezing) mean = posterior.mean.cpu().detach() # TODO: Allow Posterior to (optionally) return the full covariance matrix # pyre-ignore variance = posterior.variance.cpu().detach().clamp_min(0) # marginalize over samples t1 = variance.sum(dim=0) / variance.shape[0] t2 = mean.pow(2).sum(dim=0) / variance.shape[0] t3 = -(mean.sum(dim=0) / variance.shape[0]).pow(2) variance = t1 + t2 + t3 mean = mean.mean(dim=0) cov = torch.diag_embed(variance) return mean, cov
def get_PosteriorMean( model: Model, objective_weights: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, X_observed: Optional[Tensor] = None, X_pending: Optional[Tensor] = None, **kwargs: Any, ) -> AcquisitionFunction: r"""Instantiates a PosteriorMean acquisition function. Note: If no OutcomeConstraints given, return an analytic acquisition function. This requires {optimizer_kwargs: {joint_optimization: True}} or an optimizer that does not assume pending point support. Args: objective_weights: The objective is to maximize a weighted sum of the columns of f(x). These are the weights. outcome_constraints: A tuple of (A, b). For k outcome constraints and m outputs at f(x), A is (k x m) and b is (k x 1) such that A f(x) <= b. (Not used by single task models) X_observed: A tensor containing points observed for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). X_pending: A tensor containing points whose evaluation is pending (i.e. that have been submitted for evaluation) present for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). Returns: PosteriorMean: The instantiated acquisition function. """ if X_observed is None: raise ValueError("There are no feasible observed points.") # construct Objective module if kwargs.get("chebyshev_scalarization", False): with torch.no_grad(): Y = model.posterior(X_observed).mean obj_tf = get_chebyshev_scalarization(weights=objective_weights, Y=Y) else: obj_tf = get_objective_weights_transform(objective_weights) def obj_fn(samples: Tensor, X: Optional[Tensor] = None) -> Tensor: return obj_tf(samples) if outcome_constraints is None: objective = GenericMCObjective(objective=obj_fn) else: con_tfs = get_outcome_constraint_transforms(outcome_constraints) inf_cost = get_infeasible_cost(X=X_observed, model=model, objective=obj_fn) objective = ConstrainedMCObjective(objective=obj_fn, constraints=con_tfs or [], infeasible_cost=inf_cost) # Use qSimpleRegret, not analytic posterior, to handle arbitrary objective fns. acq_func = qSimpleRegret(model, objective=objective) return acq_func
def _sample_max_value_Gumbel( model: Model, candidate_set: Tensor, num_samples: int, maximize: bool = True ) -> Tensor: """Samples the max values by Gumbel approximation. Should generally be called within a `with torch.no_grad()` context. Args: model: A fitted single-outcome model. candidate_set: A `n x d` Tensor including `n` candidate points to discretize the design space. num_samples: Number of max value samples. maximize: If True, consider the problem a maximization problem. Returns: A `num_samples x num_fantasies` Tensor of max value samples """ # define the approximate CDF for the max value under the independence assumption posterior = model.posterior(candidate_set) weight = 1.0 if maximize else -1.0 mu = weight * posterior.mean sigma = posterior.variance.clamp_min(1e-8).sqrt() # mu, sigma is (num_fantasies) X n X 1 if len(mu.shape) == 3 and mu.shape[-1] == 1: mu = mu.squeeze(-1).T sigma = sigma.squeeze(-1).T # mu, sigma is now n X num_fantasies or n X 1 # bisect search to find the quantiles 25, 50, 75 lo = (mu - 3 * sigma).min(dim=0).values hi = (mu + 5 * sigma).max(dim=0).values num_fantasies = mu.shape[1] device = candidate_set.device dtype = candidate_set.dtype quantiles = torch.zeros(num_fantasies, 3, device=device, dtype=dtype) for i in range(num_fantasies): lo_, hi_ = lo[i], hi[i] normal = torch.distributions.normal.Normal(mu[:, i], sigma[:, i]) quantiles[i, :] = torch.tensor( [ brentq(lambda y: normal.cdf(y).log().sum().exp() - p, lo_, hi_) for p in [0.25, 0.50, 0.75] ] ) q25, q50, q75 = quantiles[:, 0], quantiles[:, 1], quantiles[:, 2] # q25, q50, q75 are 1 dimensional tensor with size of either 1 or num_fantasies # parameter fitting based on matching percentiles for the Gumbel distribution b = (q25 - q75) / (log(log(4.0 / 3.0)) - log(log(4.0))) a = q50 + b * log(log(2.0)) # inverse sampling from the fitted Gumbel CDF distribution sample_shape = (num_samples, num_fantasies) eps = torch.rand(*sample_shape, device=device, dtype=dtype) max_values = a - b * eps.log().mul(-1.0).log() return max_values # num_samples x num_fantasies
def construct_inputs_qEHVI( model: Model, training_data: TrainingData, objective_thresholds: Tensor, objective: Optional[AcquisitionObjective] = None, **kwargs: Any, ) -> Dict[str, Any]: r"""Construct kwargs for `qExpectedHypervolumeImprovement` constructor.""" X_observed = training_data.X # compute posterior mean (for ref point computation ref pareto frontier) with torch.no_grad(): Y_pmean = model.posterior(X_observed).mean outcome_constraints = kwargs.pop("outcome_constraints", None) # For HV-based acquisition functions we pass the constraint transform directly if outcome_constraints is None: cons_tfs = None else: cons_tfs = get_outcome_constraint_transforms(outcome_constraints) # Adjust `Y_pmean` to contrain feasible points only. feas = torch.stack([c(Y_pmean) <= 0 for c in cons_tfs], dim=-1).all(dim=-1) Y_pmean = Y_pmean[feas] if objective is None: objective = IdentityMCMultiOutputObjective() ehvi_kwargs = construct_inputs_EHVI( model=model, training_data=training_data, objective_thresholds=objective_thresholds, objective=objective, # Pass `Y_pmean` that accounts for constraints to `construct_inputs_EHVI` # to ensure that correct non-dominated partitioning is produced. Y_pmean=Y_pmean, **kwargs, ) sampler = kwargs.get("sampler") if sampler is None: sampler = _get_sampler( mc_samples=kwargs.get("mc_samples", 128), qmc=kwargs.get("qmc", True) ) add_qehvi_kwargs = { "sampler": sampler, "X_pending": kwargs.get("X_pending"), "constraints": cons_tfs, "eta": kwargs.get("eta", 1e-3), } return {**ehvi_kwargs, **add_qehvi_kwargs}
def subset_model( model: Model, objective_weights: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, objective_thresholds: Optional[Tensor] = None, ) -> Tuple[Model, Tensor, Optional[Tuple[Tensor, Tensor]], Optional[Tensor],]: """Subset a botorch model to the outputs used in the optimization. Args: model: A BoTorch Model. If the model does not implement the `subset_outputs` method, this function is a null-op and returns the input arguments. objective_weights: The objective is to maximize a weighted sum of the columns of f(x). These are the weights. objective_thresholds: The `m`-dim tensor of objective thresholds. There is one for each modeled metric. outcome_constraints: A tuple of (A, b). For k outcome constraints and m outputs at f(x), A is (k x m) and b is (k x 1) such that A f(x) <= b. (Not used by single task models) Returns: A four-tuple of model, objective_weights, outcome_constraints, and objective thresholds all subset to only those outputs that appear in either the objective weights or the outcome constraints. """ nonzero = objective_weights != 0 if outcome_constraints is not None: A, _ = outcome_constraints nonzero = nonzero | torch.any(A != 0, dim=0) idcs = torch.arange(nonzero.size(0))[nonzero].tolist() if len(idcs) == model.num_outputs: # if we use all model outputs, just return the inputs return model, objective_weights, outcome_constraints, objective_thresholds elif len(idcs) > model.num_outputs: raise RuntimeError( "Model size inconsistency. Tryting to subset a model with " f"{model.num_outputs} outputs to {len(idcs)} outputs" ) try: model = model.subset_output(idcs=idcs) objective_weights = objective_weights[nonzero] if outcome_constraints is not None: A, b = outcome_constraints outcome_constraints = A[:, nonzero], b if objective_thresholds is not None: objective_thresholds = objective_thresholds[nonzero] except NotImplementedError: pass return model, objective_weights, outcome_constraints, objective_thresholds
def __init__( self, pref_model: Model, outcome_model: Optional[DeterministicModel] = None, previous_winner: Optional[Tensor] = None, ) -> None: r"""Analytic implementation of Expected Utility of the Best Option under the Laplace model (assumes a PairwiseGP is used as the preference model) as proposed in [Lin2020preference]_. Args: pref_model: The preference model that maps the outcomes (i.e., Y) to scalar-valued utility. model: A deterministic model that maps parameters (i.e., X) to outcomes (i.e., Y). The outcome model f defines the search space of Y = f(X). If model is None, we are directly calculating EUBO on the parameter space. When used with `OneSamplePosteriorDrawModel`, we are obtaining EUBO-zeta as described in [Lin2020preference]. previous_winner: Tensor representing the previous winner in the Y space. Defaults to None. """ pref_model.eval() super().__init__(model=pref_model) # ensure the model is in eval mode self.add_module("outcome_model", outcome_model) self.register_buffer("previous_winner", previous_winner) tkwargs = { "dtype": pref_model.datapoints.dtype, "device": pref_model.datapoints.device, } std_norm = torch.distributions.normal.Normal( torch.zeros(1, **tkwargs), torch.ones(1, **tkwargs), ) self.std_norm = std_norm
def construct_inputs_EHVI( model: Model, training_data: TrainingData, objective_thresholds: Tensor, objective: Optional[AnalyticMultiOutputObjective] = None, **kwargs: Any, ) -> Dict[str, Any]: r"""Construct kwargs for `ExpectedHypervolumeImprovement` constructor.""" num_objectives = objective_thresholds.shape[0] if kwargs.get("outcome_constraints") is not None: raise NotImplementedError( "EHVI does not yet support outcome constraints.") X_observed = training_data.X alpha = kwargs.get( "alpha", get_default_partitioning_alpha(num_objectives=num_objectives), ) # This selects the objectives (a subset of the outcomes) and set each # objective threhsold to have the proper optimization direction. if objective is None: objective = IdentityAnalyticMultiOutputObjective() ref_point = objective(objective_thresholds) # Compute posterior mean (for ref point computation ref pareto frontier) # if one is not provided among arguments. Y_pmean = kwargs.get("Y_pmean") if Y_pmean is None: with torch.no_grad(): Y_pmean = model.posterior(X_observed).mean if alpha > 0: partitioning = NondominatedPartitioning( ref_point=ref_point, Y=objective(Y_pmean), alpha=alpha, ) else: partitioning = FastNondominatedPartitioning( ref_point=ref_point, Y=objective(Y_pmean), ) return { "model": model, "ref_point": ref_point, "partitioning": partitioning, "objective": objective, }
def predict_from_model(model: Model, X: Tensor) -> Tuple[Tensor, Tensor]: r"""Predicts outcomes given a model and input tensor. Args: model: A botorch Model. X: A `n x d` tensor of input parameters. Returns: Tensor: The predicted posterior mean as an `n x o`-dim tensor. Tensor: The predicted posterior covariance as a `n x o x o`-dim tensor. """ with torch.no_grad(): posterior = model.posterior(X) mean = posterior.mean.cpu().detach() # TODO: Allow Posterior to (optionally) return the full covariance matrix variance = posterior.variance.cpu().detach() cov = variance.unsqueeze(-1) * torch.eye(variance.shape[-1], dtype=variance.dtype) return mean, cov
def predict_from_model(model: Model, X: Tensor) -> Tuple[Tensor, Tensor]: r"""Predicts outcomes given a model and input tensor. Args: model: A botorch Model. X: A `n x d` tensor of input parameters. Returns: Tensor: The predicted posterior mean as an `n x o`-dim tensor. Tensor: The predicted posterior covariance as a `n x o x o`-dim tensor. """ with torch.no_grad(): posterior = model.posterior(X) mean = posterior.mean.cpu().detach() # TODO: Allow Posterior to (optionally) return the full covariance matrix variance = posterior.variance.cpu().detach().clamp_min(0) # pyre-ignore cov = torch.diag_embed(variance) return mean, cov
def get_acquisition_function( acquisition_function_name: str, model: Model, objective: MCAcquisitionObjective, X_observed: Tensor, X_pending: Optional[Tensor] = None, mc_samples: int = 500, qmc: bool = True, seed: Optional[int] = None, **kwargs, ) -> monte_carlo.MCAcquisitionFunction: r"""Convenience function for initializing botorch acquisition functions. Args: acquisition_function_name: Name of the acquisition function. model: A fitted model. objective: A MCAcquisitionObjective. X_observed: A `m1 x d`-dim Tensor of `m1` design points that have already been observed. X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation is pending. mc_samples: The number of samples to use for (q)MC evaluation of the acquisition function. qmc: If True, use quasi-Monte-Carlo sampling (instead of iid). seed: If provided, perform deterministic optimization (i.e. the function to optimize is fixed and not stochastic). Returns: The requested acquisition function. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0])) >>> acqf = get_acquisition_function("qEI", model, obj, train_X) """ # initialize the sampler if qmc: sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed) else: sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed) # instantiate and return the requested acquisition function if acquisition_function_name == "qEI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qExpectedImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qPI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qProbabilityOfImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, tau=kwargs.get("tau", 1e-3), ) elif acquisition_function_name == "qNEI": return monte_carlo.qNoisyExpectedImprovement( model=model, X_baseline=X_observed, sampler=sampler, objective=objective, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", False), ) elif acquisition_function_name == "qSR": return monte_carlo.qSimpleRegret(model=model, sampler=sampler, objective=objective, X_pending=X_pending) elif acquisition_function_name == "qUCB": if "beta" not in kwargs: raise ValueError("`beta` must be specified in kwargs for qUCB.") return monte_carlo.qUpperConfidenceBound( model=model, beta=kwargs["beta"], sampler=sampler, objective=objective, X_pending=X_pending, ) raise NotImplementedError( f"Unknown acquisition function {acquisition_function_name}")
def prune_inferior_points( model: Model, X: Tensor, objective: Optional[MCAcquisitionObjective] = None, num_samples: int = 2048, max_frac: float = 1.0, ) -> Tensor: r"""Prune points from an input tensor that are unlikely to be the best point. Given a model, an objective, and an input tensor `X`, this function returns the subset of points in `X` that have some probability of being the best point under the objective. This function uses sampling to estimate the probabilities, the higher the number of points `n` in `X` the higher the number of samples `num_samples` should be to obtain accurate estimates. Args: model: A fitted model. Batched models are currently not supported. X: An input tensor of shape `n x d`. Batched inputs are currently not supported. objective: The objective under which to evaluate the posterior. num_samples: The number of samples used to compute empirical probabilities of being the best point. max_frac: The maximum fraction of points to retain. Must satisfy `0 < max_frac <= 1`. Ensures that the number of elements in the returned tensor does not exceed `ceil(max_frac * n)`. Returns: A `n' x d` with subset of points in `X`, where n' = min(N_nz, ceil(max_frac * n)) with `N_nz` the number of points in `X` that have non-zero (empirical, under `num_samples` samples) probability of being the best point. """ if X.ndim > 2: # TODO: support batched inputs (req. dealing with ragged tensors) raise UnsupportedError( "Batched inputs `X` are currently unsupported by prune_inferior_points" ) max_points = math.ceil(max_frac * X.size(-2)) if max_points < 1 or max_points > X.size(-2): raise ValueError(f"max_frac must take values in (0, 1], is {max_frac}") with torch.no_grad(): posterior = model.posterior(X=X) if posterior.event_shape.numel() > SobolEngine.MAXDIM: if settings.debug.on(): warnings.warn( f"Sample dimension q*m={posterior.event_shape.numel()} exceeding Sobol " f"max dimension ({SobolEngine.MAXDIM}). Using iid samples instead.", SamplingWarning, ) sampler = IIDNormalSampler(num_samples=num_samples) else: sampler = SobolQMCNormalSampler(num_samples=num_samples) samples = sampler(posterior) if objective is None: objective = IdentityMCObjective() obj_vals = objective(samples) if obj_vals.ndim > 2: # TODO: support batched inputs (req. dealing with ragged tensors) raise UnsupportedError( "Batched models are currently unsupported by prune_inferior_points" ) is_best = torch.argmax(obj_vals, dim=-1) idcs, counts = torch.unique(is_best, return_counts=True) if len(idcs) > max_points: counts, order_idcs = torch.sort(counts, descending=True) idcs = order_idcs[:max_points] return X[idcs]
def _get_acquisition_func( model: Model, acquisition_function_name: str, objective_weights: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, X_observed: Optional[Tensor] = None, X_pending: Optional[Tensor] = None, mc_objective: Type[GenericMCObjective] = GenericMCObjective, constrained_mc_objective: Optional[ Type[ConstrainedMCObjective] ] = ConstrainedMCObjective, mc_objective_kwargs: Optional[Dict] = None, **kwargs: Any, ) -> AcquisitionFunction: r"""Instantiates a acquisition function. Args: model: The underlying model which the acqusition function uses to estimate acquisition values of candidates. acquisition_function_name: Name of the acquisition function. objective_weights: The objective is to maximize a weighted sum of the columns of f(x). These are the weights. outcome_constraints: A tuple of (A, b). For k outcome constraints and m outputs at f(x), A is (k x m) and b is (k x 1) such that A f(x) <= b. (Not used by single task models) X_observed: A tensor containing points observed for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). X_pending: A tensor containing points whose evaluation is pending (i.e. that have been submitted for evaluation) present for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). mc_objective: GenericMCObjective class, used for constructing a MC-objective. If constructing a penalized MC-objective, pass in PenalizedMCObjective together with mc_objective_kwargs . constrained_mc_objective: ConstrainedMCObjective class, used when applying constraints on the outcomes. mc_objective_kwargs: kwargs for constructing MC-objective. For GenericMCObjective, leave it as None. For PenalizedMCObjective, it needs to be specified in the format of kwargs. mc_samples: The number of MC samples to use (default: 512). qmc: If True, use qMC instead of MC (default: True). prune_baseline: If True, prune the baseline points for NEI (default: True). chebyshev_scalarization: Use augmented Chebyshev scalarization. Returns: The instantiated acquisition function. """ if X_observed is None: raise ValueError("There are no feasible observed points.") # construct Objective module if kwargs.get("chebyshev_scalarization", False): with torch.no_grad(): Y = model.posterior(X_observed).mean obj_tf = get_chebyshev_scalarization(weights=objective_weights, Y=Y) else: obj_tf = get_objective_weights_transform(objective_weights) def objective(samples: Tensor, X: Optional[Tensor] = None) -> Tensor: return obj_tf(samples) if outcome_constraints is None: mc_objective_kwargs = {} if mc_objective_kwargs is None else mc_objective_kwargs objective = mc_objective(objective=objective, **mc_objective_kwargs) else: if constrained_mc_objective is None: raise ValueError( "constrained_mc_objective cannot be set to None " "when applying outcome constraints." ) if issubclass(mc_objective, PenalizedMCObjective): raise RuntimeError( "Outcome constraints are not supported for PenalizedMCObjective." ) con_tfs = get_outcome_constraint_transforms(outcome_constraints) inf_cost = get_infeasible_cost(X=X_observed, model=model, objective=objective) objective = constrained_mc_objective( objective=objective, constraints=con_tfs or [], infeasible_cost=inf_cost ) return get_acquisition_function( acquisition_function_name=acquisition_function_name, model=model, objective=objective, X_observed=X_observed, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", True), mc_samples=kwargs.get("mc_samples", 512), qmc=kwargs.get("qmc", True), # pyre-fixme[6]: Expected `Optional[int]` for 9th param but got # `Union[float, int]`. seed=torch.randint(1, 10000, (1,)).item(), marginalize_dim=kwargs.get("marginalize_dim"), )
def test_abstract_base_model(self): with self.assertRaises(TypeError): Model()
def get_EHVI( model: Model, objective_weights: Tensor, objective_thresholds: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, X_observed: Optional[Tensor] = None, X_pending: Optional[Tensor] = None, **kwargs: Any, ) -> AcquisitionFunction: r"""Instantiates a qExpectedHyperVolumeImprovement acquisition function. Args: model: The underlying model which the acqusition function uses to estimate acquisition values of candidates. objective_weights: The objective is to maximize a weighted sum of the columns of f(x). These are the weights. objective_thresholds: A tensor containing thresholds forming a reference point from which to calculate pareto frontier hypervolume. Points that do not dominate the objective_thresholds contribute nothing to hypervolume. outcome_constraints: A tuple of (A, b). For k outcome constraints and m outputs at f(x), A is (k x m) and b is (k x 1) such that A f(x) <= b. (Not used by single task models) X_observed: A tensor containing points observed for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). X_pending: A tensor containing points whose evaluation is pending (i.e. that have been submitted for evaluation) present for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). mc_samples: The number of MC samples to use (default: 512). qmc: If True, use qMC instead of MC (default: True). Returns: qExpectedHypervolumeImprovement: The instantiated acquisition function. """ if X_observed is None: raise ValueError("There are no feasible observed points.") # construct Objective module ( objective, objective_thresholds, ) = get_weighted_mc_objective_and_objective_thresholds( objective_weights=objective_weights, objective_thresholds=objective_thresholds) with torch.no_grad(): Y = model.posterior(X_observed).mean # For EHVI acquisition functions we pass the constraint transform directly. if outcome_constraints is None: cons_tfs = None else: cons_tfs = get_outcome_constraint_transforms(outcome_constraints) num_objectives = objective_thresholds.shape[0] return get_acquisition_function( acquisition_function_name="qEHVI", model=model, # TODO (jej): Fix pyre error below by restructuring class hierarchy. # pyre-fixme[6]: Expected `botorch.acquisition.objective. # MCAcquisitionObjective` for 3rd parameter `objective` to call # `get_acquisition_function` but got `IdentityMCMultiOutputObjective`. objective=objective, X_observed=X_observed, X_pending=X_pending, constraints=cons_tfs, mc_samples=kwargs.get("mc_samples", DEFAULT_EHVI_MC_SAMPLES), qmc=kwargs.get("qmc", True), alpha=kwargs.get( "alpha", get_default_partitioning_alpha(num_objectives=num_objectives)), seed=torch.randint(1, 10000, (1, )).item(), ref_point=objective_thresholds.tolist(), Y=Y, )
def prune_inferior_points_multi_objective( model: Model, X: Tensor, ref_point: Tensor, objective: Optional[MCMultiOutputObjective] = None, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, num_samples: int = 2048, max_frac: float = 1.0, marginalize_dim: Optional[int] = None, ) -> Tensor: r"""Prune points from an input tensor that are unlikely to be pareto optimal. Given a model, an objective, and an input tensor `X`, this function returns the subset of points in `X` that have some probability of being pareto optimal, better than the reference point, and feasible. This function uses sampling to estimate the probabilities, the higher the number of points `n` in `X` the higher the number of samples `num_samples` should be to obtain accurate estimates. Args: model: A fitted model. Batched models are currently not supported. X: An input tensor of shape `n x d`. Batched inputs are currently not supported. ref_point: The reference point. objective: The objective under which to evaluate the posterior. constraints: A list of callables, each mapping a Tensor of dimension `sample_shape x batch-shape x q x m` to a Tensor of dimension `sample_shape x batch-shape x q`, where negative values imply feasibility. num_samples: The number of samples used to compute empirical probabilities of being the best point. max_frac: The maximum fraction of points to retain. Must satisfy `0 < max_frac <= 1`. Ensures that the number of elements in the returned tensor does not exceed `ceil(max_frac * n)`. marginalize_dim: A batch dimension that should be marginalized. For example, this is useful when using a batched fully Bayesian model. Returns: A `n' x d` with subset of points in `X`, where n' = min(N_nz, ceil(max_frac * n)) with `N_nz` the number of points in `X` that have non-zero (empirical, under `num_samples` samples) probability of being pareto optimal. """ if X.ndim > 2: # TODO: support batched inputs (req. dealing with ragged tensors) raise UnsupportedError( "Batched inputs `X` are currently unsupported by " "prune_inferior_points_multi_objective") max_points = math.ceil(max_frac * X.size(-2)) if max_points < 1 or max_points > X.size(-2): raise ValueError(f"max_frac must take values in (0, 1], is {max_frac}") with torch.no_grad(): posterior = model.posterior(X=X) if posterior.event_shape.numel() > SobolEngine.MAXDIM: if settings.debug.on(): warnings.warn( f"Sample dimension q*m={posterior.event_shape.numel()} exceeding Sobol " f"max dimension ({SobolEngine.MAXDIM}). Using iid samples instead.", SamplingWarning, ) sampler = IIDNormalSampler(num_samples=num_samples) else: sampler = SobolQMCNormalSampler(num_samples=num_samples) samples = sampler(posterior) if objective is None: objective = IdentityMCMultiOutputObjective() obj_vals = objective(samples, X=X) if obj_vals.ndim > 3: if obj_vals.ndim == 4 and marginalize_dim is not None: obj_vals = obj_vals.mean(dim=marginalize_dim) else: # TODO: support batched inputs (req. dealing with ragged tensors) raise UnsupportedError( "Models with multiple batch dims are currently unsupported by" " prune_inferior_points_multi_objective.") if constraints is not None: infeas = torch.stack([c(samples) > 0 for c in constraints], dim=0).any(dim=0) if infeas.ndim == 3 and marginalize_dim is not None: # make sure marginalize_dim is not negative if marginalize_dim < 0: # add 1 to the normalize marginalize_dim since we have already # removed the output dim marginalize_dim = ( 1 + normalize_indices([marginalize_dim], d=infeas.ndim)[0]) infeas = infeas.float().mean(dim=marginalize_dim).round().bool() # set infeasible points to be the ref point obj_vals[infeas] = ref_point pareto_mask = is_non_dominated( obj_vals, deduplicate=False) & (obj_vals > ref_point).all(dim=-1) probs = pareto_mask.to(dtype=X.dtype).mean(dim=0) idcs = probs.nonzero().view(-1) if idcs.shape[0] > max_points: counts, order_idcs = torch.sort(probs, descending=True) idcs = order_idcs[:max_points] return X[idcs]
def _step( model: Model, Xs: List[Tensor], samplers: List[Optional[MCSampler]], valfunc_cls: List[Optional[Type[AcquisitionFunction]]], valfunc_argfacs: List[Optional[TAcqfArgConstructor]], inner_samplers: List[Optional[MCSampler]], objective: MCAcquisitionObjective, posterior_transform: PosteriorTransform, running_val: Optional[Tensor] = None, sample_weights: Optional[Tensor] = None, step_index: int = 0, ) -> Tensor: r"""Recursive multi-step look-ahead computation. Helper function computing the "value-to-go" of a multi-step lookahead scheme. Args: model: A Model of appropriate batch size. Specifically, it must be possible to evaluate the model's posterior at `Xs[0]`. Xs: A list `[X_j, ..., X_k]` of tensors, where `X_i` has shape `f_i x .... x f_1 x batch_shape x q_i x d`. samplers: A list of `k - j` samplers, such that the number of samples of sampler `i` is `f_i`. The last element of this list is considered the "inner sampler", which is used for evaluating the objective in case it is an MCAcquisitionObjective. valfunc_cls: A list of acquisition function class to be used as the (stage + terminal) value functions. Each element (except for the last one) can be `None`, in which case a zero stage value is assumed for the respective stage. valfunc_argfacs: A list of callables that map a `Model` and input tensor `X` to a dictionary of kwargs for the respective stage value function constructor. If `None`, only the standard `model`, `sampler` and `objective` kwargs will be used. inner_samplers: A list of `MCSampler` objects, each to be used in the stage value function at the corresponding index. objective: The MCAcquisitionObjective under which the model output is evaluated. posterior_transform: A PosteriorTransform. Used to transform the posterior before sampling / evaluating the model output. running_val: As `batch_shape`-dim tensor containing the current running value. sample_weights: A tensor of shape `f_i x .... x f_1 x batch_shape` when called in the `i`-th step by which to weight the stage value samples. Used in conjunction with Gauss-Hermite integration or importance sampling. Assumed to be `None` in the initial step (when `step_index=0`). step_index: The index of the look-ahead step. `step_index=0` indicates the initial step. Returns: A `b`-dim tensor containing the multi-step value of the design `X`. """ X = Xs[0] if sample_weights is None: # only happens in the initial step sample_weights = torch.ones(*X.shape[:-2], device=X.device, dtype=X.dtype) # compute stage value stage_val = _compute_stage_value( model=model, valfunc_cls=valfunc_cls[0], X=X, objective=objective, posterior_transform=posterior_transform, inner_sampler=inner_samplers[0], arg_fac=valfunc_argfacs[0], ) if stage_val is not None: # update running value # if not None, running_val has shape f_{i-1} x ... x f_1 x batch_shape # stage_val has shape f_i x ... x f_1 x batch_shape # this sum will add a dimension to running_val so that # updated running_val has shape f_i x ... x f_1 x batch_shape running_val = stage_val if running_val is None else running_val + stage_val # base case: no more fantasizing, return value if len(Xs) == 1: # compute weighted average over all leaf nodes of the tree batch_shape = running_val.shape[step_index:] # expand sample weights to make sure it is the same shape as running_val, # because we need to take a sum over sample weights for computing the # weighted average sample_weights = sample_weights.expand(running_val.shape) return (running_val * sample_weights).view(-1, *batch_shape).sum(dim=0) # construct fantasy model (with batch shape f_{j+1} x ... x f_1 x batch_shape) prop_grads = step_index > 0 # need to propagate gradients for steps > 0 fantasy_model = model.fantasize(X=X, sampler=samplers[0], observation_noise=True, propagate_grads=prop_grads) # augment sample weights appropriately sample_weights = _construct_sample_weights(prev_weights=sample_weights, sampler=samplers[0]) return _step( model=fantasy_model, Xs=Xs[1:], samplers=samplers[1:], valfunc_cls=valfunc_cls[1:], valfunc_argfacs=valfunc_argfacs[1:], inner_samplers=inner_samplers[1:], objective=objective, posterior_transform=posterior_transform, sample_weights=sample_weights, running_val=running_val, step_index=step_index + 1, )
def get_acquisition_function( acquisition_function_name: str, model: Model, objective: MCAcquisitionObjective, X_observed: Tensor, X_pending: Optional[Tensor] = None, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, mc_samples: int = 500, qmc: bool = True, seed: Optional[int] = None, **kwargs, ) -> monte_carlo.MCAcquisitionFunction: r"""Convenience function for initializing botorch acquisition functions. Args: acquisition_function_name: Name of the acquisition function. model: A fitted model. objective: A MCAcquisitionObjective. X_observed: A `m1 x d`-dim Tensor of `m1` design points that have already been observed. X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation is pending. constraints: A list of callables, each mapping a Tensor of dimension `sample_shape x batch-shape x q x m` to a Tensor of dimension `sample_shape x batch-shape x q`, where negative values imply feasibility. Used when constraint_transforms are not passed as part of the objective. mc_samples: The number of samples to use for (q)MC evaluation of the acquisition function. qmc: If True, use quasi-Monte-Carlo sampling (instead of iid). seed: If provided, perform deterministic optimization (i.e. the function to optimize is fixed and not stochastic). Returns: The requested acquisition function. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0])) >>> acqf = get_acquisition_function("qEI", model, obj, train_X) """ # initialize the sampler if qmc: sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed) else: sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed) # instantiate and return the requested acquisition function if acquisition_function_name == "qEI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qExpectedImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qPI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qProbabilityOfImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, tau=kwargs.get("tau", 1e-3), ) elif acquisition_function_name == "qNEI": return monte_carlo.qNoisyExpectedImprovement( model=model, X_baseline=X_observed, sampler=sampler, objective=objective, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", False), ) elif acquisition_function_name == "qSR": return monte_carlo.qSimpleRegret(model=model, sampler=sampler, objective=objective, X_pending=X_pending) elif acquisition_function_name == "qUCB": if "beta" not in kwargs: raise ValueError("`beta` must be specified in kwargs for qUCB.") return monte_carlo.qUpperConfidenceBound( model=model, beta=kwargs["beta"], sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qEHVI": # pyre-fixme [16]: `Model` has no attribute `train_targets` try: ref_point = kwargs["ref_point"] except KeyError: raise ValueError( "`ref_point` must be specified in kwargs for qEHVI") try: Y = kwargs["Y"] except KeyError: raise ValueError("`Y` must be specified in kwargs for qEHVI") # get feasible points if constraints is not None: feas = torch.stack([c(Y) <= 0 for c in constraints], dim=-1).all(dim=-1) Y = Y[feas] obj = objective(Y) partitioning = NondominatedPartitioning( ref_point=torch.as_tensor(ref_point, dtype=Y.dtype, device=Y.device), Y=obj, alpha=kwargs.get("alpha", 0.0), ) return moo_monte_carlo.qExpectedHypervolumeImprovement( model=model, ref_point=ref_point, partitioning=partitioning, sampler=sampler, objective=objective, constraints=constraints, X_pending=X_pending, ) raise NotImplementedError( f"Unknown acquisition function {acquisition_function_name}")
def _instantiate_acqf( self, model: Model, objective: AcquisitionObjective, model_dependent_kwargs: Dict[str, Any], objective_thresholds: Optional[Tensor] = None, X_pending: Optional[Tensor] = None, X_baseline: Optional[Tensor] = None, ) -> None: # Extract model dependent kwargs outcome_constraints = model_dependent_kwargs.pop("outcome_constraints") # Replicate `get_EHVI` transformation code X_observed = X_baseline if X_observed is None: raise ValueError("There are no feasible observed points.") if objective_thresholds is None: raise ValueError("Objective Thresholds required") with torch.no_grad(): Y = model.posterior(X_observed).mean # For EHVI acquisition functions we pass the constraint transform directly. if outcome_constraints is None: cons_tfs = None else: cons_tfs = get_outcome_constraint_transforms(outcome_constraints) num_objectives = objective_thresholds.shape[0] mc_samples = self.options.get("mc_samples", DEFAULT_EHVI_MC_SAMPLES) qmc = self.options.get("qmc", True) alpha = self.options.get( "alpha", get_default_partitioning_alpha(num_objectives=num_objectives), ) # this selects the objectives (a subset of the outcomes) and set each # objective threhsold to have the proper optimization direction ref_point = objective(objective_thresholds).tolist() # initialize the sampler seed = int(torch.randint(1, 10000, (1,)).item()) if qmc: sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed) else: sampler = IIDNormalSampler( num_samples=mc_samples, seed=seed ) # pragma: nocover if not ref_point: raise ValueError( "`ref_point` must be specified in kwargs for qEHVI" ) # pragma: nocover # get feasible points if cons_tfs is not None: # pyre-ignore [16]: `Tensor` has no attribute `all`. feas = torch.stack([c(Y) <= 0 for c in cons_tfs], dim=-1).all(dim=-1) Y = Y[feas] obj = objective(Y) partitioning = NondominatedPartitioning( ref_point=torch.as_tensor(ref_point, dtype=Y.dtype, device=Y.device), Y=obj, alpha=alpha, ) self.acqf = self._botorch_acqf_class( # pyre-ignore[28]: Some kwargs are # not expected in base `AcquisitionFunction` but are expected in # its subclasses. model=model, ref_point=ref_point, partitioning=partitioning, sampler=sampler, objective=objective, constraints=cons_tfs, X_pending=X_pending, )