def untransform( self, Y: Tensor, Yvar: Optional[Tensor] = None ) -> Tuple[Tensor, Optional[Tensor]]: r"""Un-transform log-transformed outcomes Args: Y: A `batch_shape x n x m`-dim tensor of log-transfomred targets. Yvar: A `batch_shape x n x m`-dim tensor of log- transformed observation noises associated with the training targets (if applicable). Returns: A two-tuple with the un-transformed outcomes: - The exponentiated outcome observations. - The exponentiated observation noise (if applicable). """ Y_utf = torch.exp(Y) outputs = normalize_indices(self._outputs, d=Y.size(-1)) if outputs is not None: Y_utf = torch.stack( [ Y_utf[..., i] if i in outputs else Y[..., i] for i in range(Y.size(-1)) ], dim=-1, ) if Yvar is not None: # TODO: Delta method, possibly issue warning raise NotImplementedError( "Log does not yet support transforming observation noise" ) return Y_utf, Yvar
def __init__( self, m: int, outputs: Optional[List[int]] = None, batch_shape: torch.Size = torch.Size(), # noqa: B008 min_stdv: float = 1e-8, ) -> None: r"""Standardize outcomes (zero mean, unit variance). Args: m: The output dimension. outputs: Which of the outputs to standardize. If omitted, all outputs will be standardized. batch_shape: The batch_shape of the training targets. min_stddv: The minimum standard deviation for which to perform standardization (if lower, only de-mean the data). """ super().__init__() self.register_buffer("means", torch.zeros(*batch_shape, 1, m)) self.register_buffer("stdvs", torch.zeros(*batch_shape, 1, m)) self.register_buffer("_stdvs_sq", torch.zeros(*batch_shape, 1, m)) self._outputs = normalize_indices(outputs, d=m) self._m = m self._batch_shape = batch_shape self._min_stdv = min_stdv
def subset_output(self, idcs: List[int]) -> OutcomeTransform: r"""Subset the transform along the output dimension. Args: idcs: The output indices to subset the transform to. Returns: The current outcome transform, subset to the specified output indices. """ new_m = len(idcs) if new_m > self._m: raise RuntimeError( "Trying to subset a transform have more outputs than " " the original transform." ) nlzd_idcs = normalize_indices(idcs, d=self._m) new_outputs = None if self._outputs is not None: new_outputs = [i for i in self._outputs if i in nlzd_idcs] new_tf = self.__class__( m=new_m, outputs=new_outputs, batch_shape=self._batch_shape, min_stdv=self._min_stdv, ) new_tf.means = self.means[..., nlzd_idcs] new_tf.stdvs = self.stdvs[..., nlzd_idcs] new_tf._stdvs_sq = self._stdvs_sq[..., nlzd_idcs] if not self.training: new_tf.eval() return new_tf
def test_normalize_indices(self): self.assertIsNone(normalize_indices(None, 3)) indices = [0, 2] nlzd_indices = normalize_indices(indices, 3) self.assertEqual(nlzd_indices, indices) nlzd_indices = normalize_indices(indices, 4) self.assertEqual(nlzd_indices, indices) indices = [0, -1] nlzd_indices = normalize_indices(indices, 3) self.assertEqual(nlzd_indices, [0, 2]) with self.assertRaises(ValueError): nlzd_indices = normalize_indices([3], 3) with self.assertRaises(ValueError): nlzd_indices = normalize_indices([-4], 3)
def __init__(self, Y_mean: Tensor, Y_std: Tensor, outcomes: Optional[List[int]] = None) -> None: r"""Initialize objective. Args: Y_mean: `m`-dim tensor of outcome means. Y_std: `m`-dim tensor of outcome standard deviations. outcomes: A list of `m' <= m` indices that specifies which of the `m` model outputs should be considered as the outcomes for MOO. If omitted, use all model outcomes. Typically used for constrained optimization. """ if Y_mean.ndim > 1 or Y_std.ndim > 1: raise BotorchTensorDimensionError( "Y_mean and Y_std must both be 1-dimensional, but got " f"{Y_mean.ndim} and {Y_std.ndim}") super().__init__() if outcomes is not None: if len(outcomes) < 2: raise BotorchTensorDimensionError( "Must specify at least two outcomes for MOO.") elif len(outcomes) > Y_mean.shape[-1]: raise BotorchTensorDimensionError( f"Cannot specify more ({len(outcomes)}) outcomes that present in " f"the normalization inputs ({Y_mean.shape[-1]}).") nlzd_idcs = normalize_indices(outcomes, Y_mean.shape[-1]) self.register_buffer( "outcomes", torch.tensor(nlzd_idcs, dtype=torch.long).to(device=Y_mean.device), ) Y_mean = Y_mean.index_select(-1, self.outcomes) Y_std = Y_std.index_select(-1, self.outcomes) self.register_buffer("Y_mean", Y_mean) self.register_buffer("Y_std", Y_std)
def __init__(self, outcomes: Optional[List[int]] = None, num_outcomes: Optional[int] = None) -> None: r"""Initialize Objective. Args: weights: `m'`-dim tensor of outcome weights. outcomes: A list of the `m'` indices that the weights should be applied to. num_outcomes: The total number of outcomes `m` """ super().__init__() if outcomes is not None: if len(outcomes) < 2: raise BotorchTensorDimensionError( "Must specify at least two outcomes for MOO.") if any(i < 0 for i in outcomes): if num_outcomes is None: raise BotorchError( "num_outcomes is required if any outcomes are less than 0." ) outcomes = normalize_indices(outcomes, num_outcomes) self.register_buffer("outcomes", torch.tensor(outcomes, dtype=torch.long))
def __init__( self, train_X: Tensor, train_Y: Tensor, cat_dims: List[int], cont_kernel_factory: Optional[Callable[[int, List[int]], Kernel]] = None, likelihood: Optional[Likelihood] = None, outcome_transform: Optional[OutcomeTransform] = None, # TODO input_transform: Optional[InputTransform] = None, # TODO ) -> None: r"""A single-task exact GP model supporting categorical parameters. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. cat_dims: A list of indices corresponding to the columns of the input `X` that should be considered categorical features. cont_kernel_factory: A method that accepts `ard_num_dims` and `active_dims` arguments and returns an instatiated GPyTorch `Kernel` object to be used as the ase kernel for the continuous dimensions. If omitted, this model uses a Matern-2.5 kernel as the kernel for the ordinal parameters. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. # outcome_transform: An outcome transform that is applied to the # training data during instantiation and to the posterior during # inference (that is, the `Posterior` obtained by calling # `.posterior` on the model will be on the original scale). # input_transform: An input transform that is applied in the model's # forward pass. Example: >>> train_X = torch.cat( [torch.rand(20, 2), torch.randint(3, (20, 1))], dim=-1) ) >>> train_Y = ( torch.sin(train_X[..., :-1]).sum(dim=1, keepdim=True) + train_X[..., -1:] ) >>> model = MixedSingleTaskGP(train_X, train_Y, cat_dims=[-1]) """ if outcome_transform is not None: raise UnsupportedError("outcome transforms not yet supported") if input_transform is not None: raise UnsupportedError("input transforms not yet supported") if len(cat_dims) == 0: raise ValueError( "Must specify categorical dimensions for MixedSingleTaskGP" ) input_batch_shape, aug_batch_shape = self.get_batch_dimensions( train_X=train_X, train_Y=train_Y ) if cont_kernel_factory is None: def cont_kernel_factory( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int] ) -> MaternKernel: return MaternKernel( nu=2.5, batch_shape=batch_shape, ard_num_dims=ard_num_dims, active_dims=active_dims, ) if likelihood is None: # This Gamma prior is quite close to the Horseshoe prior min_noise = 1e-5 if train_X.dtype == torch.float else 1e-6 likelihood = GaussianLikelihood( batch_shape=aug_batch_shape, noise_constraint=GreaterThan( min_noise, transform=None, initial_value=1e-3 ), noise_prior=GammaPrior(0.9, 10.0), ) d = train_X.shape[-1] cat_dims = normalize_indices(indices=cat_dims, d=d) ord_dims = sorted(set(range(d)) - set(cat_dims)) if len(ord_dims) == 0: covar_module = ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), ) ) else: sum_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) + ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) ) prod_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) * CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) covar_module = sum_kernel + prod_kernel super().__init__( train_X=train_X, train_Y=train_Y, likelihood=likelihood, covar_module=covar_module, outcome_transform=outcome_transform, input_transform=input_transform, )
def prune_inferior_points_multi_objective( model: Model, X: Tensor, ref_point: Tensor, objective: Optional[MCMultiOutputObjective] = None, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, num_samples: int = 2048, max_frac: float = 1.0, marginalize_dim: Optional[int] = None, ) -> Tensor: r"""Prune points from an input tensor that are unlikely to be pareto optimal. Given a model, an objective, and an input tensor `X`, this function returns the subset of points in `X` that have some probability of being pareto optimal, better than the reference point, and feasible. This function uses sampling to estimate the probabilities, the higher the number of points `n` in `X` the higher the number of samples `num_samples` should be to obtain accurate estimates. Args: model: A fitted model. Batched models are currently not supported. X: An input tensor of shape `n x d`. Batched inputs are currently not supported. ref_point: The reference point. objective: The objective under which to evaluate the posterior. constraints: A list of callables, each mapping a Tensor of dimension `sample_shape x batch-shape x q x m` to a Tensor of dimension `sample_shape x batch-shape x q`, where negative values imply feasibility. num_samples: The number of samples used to compute empirical probabilities of being the best point. max_frac: The maximum fraction of points to retain. Must satisfy `0 < max_frac <= 1`. Ensures that the number of elements in the returned tensor does not exceed `ceil(max_frac * n)`. marginalize_dim: A batch dimension that should be marginalized. For example, this is useful when using a batched fully Bayesian model. Returns: A `n' x d` with subset of points in `X`, where n' = min(N_nz, ceil(max_frac * n)) with `N_nz` the number of points in `X` that have non-zero (empirical, under `num_samples` samples) probability of being pareto optimal. """ if X.ndim > 2: # TODO: support batched inputs (req. dealing with ragged tensors) raise UnsupportedError( "Batched inputs `X` are currently unsupported by " "prune_inferior_points_multi_objective") max_points = math.ceil(max_frac * X.size(-2)) if max_points < 1 or max_points > X.size(-2): raise ValueError(f"max_frac must take values in (0, 1], is {max_frac}") with torch.no_grad(): posterior = model.posterior(X=X) if posterior.event_shape.numel() > SobolEngine.MAXDIM: if settings.debug.on(): warnings.warn( f"Sample dimension q*m={posterior.event_shape.numel()} exceeding Sobol " f"max dimension ({SobolEngine.MAXDIM}). Using iid samples instead.", SamplingWarning, ) sampler = IIDNormalSampler(num_samples=num_samples) else: sampler = SobolQMCNormalSampler(num_samples=num_samples) samples = sampler(posterior) if objective is None: objective = IdentityMCMultiOutputObjective() obj_vals = objective(samples, X=X) if obj_vals.ndim > 3: if obj_vals.ndim == 4 and marginalize_dim is not None: obj_vals = obj_vals.mean(dim=marginalize_dim) else: # TODO: support batched inputs (req. dealing with ragged tensors) raise UnsupportedError( "Models with multiple batch dims are currently unsupported by" " prune_inferior_points_multi_objective.") if constraints is not None: infeas = torch.stack([c(samples) > 0 for c in constraints], dim=0).any(dim=0) if infeas.ndim == 3 and marginalize_dim is not None: # make sure marginalize_dim is not negative if marginalize_dim < 0: # add 1 to the normalize marginalize_dim since we have already # removed the output dim marginalize_dim = ( 1 + normalize_indices([marginalize_dim], d=infeas.ndim)[0]) infeas = infeas.float().mean(dim=marginalize_dim).round().bool() # set infeasible points to be the ref point obj_vals[infeas] = ref_point pareto_mask = is_non_dominated( obj_vals, deduplicate=False) & (obj_vals > ref_point).all(dim=-1) probs = pareto_mask.to(dtype=X.dtype).mean(dim=0) idcs = probs.nonzero().view(-1) if idcs.shape[0] > max_points: counts, order_idcs = torch.sort(probs, descending=True) idcs = order_idcs[:max_points] return X[idcs]