Beispiel #1
    def test_match_batch_shape(self):
        X = torch.rand(3, 2)
        Y = torch.rand(1, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.unsqueeze(0)))

        X = torch.rand(1, 3, 2)
        Y = torch.rand(2, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.repeat(2, 1, 1)))

        X = torch.rand(2, 3, 2)
        Y = torch.rand(1, 3, 2)
        with self.assertRaises(RuntimeError):
            match_batch_shape(X, Y)
Beispiel #2
    def test_match_batch_shape(self):
        X = torch.rand(3, 2)
        Y = torch.rand(1, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.unsqueeze(0)))

        X = torch.rand(1, 3, 2)
        Y = torch.rand(2, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.repeat(2, 1, 1)))

        X = torch.rand(2, 3, 2)
        Y = torch.rand(1, 3, 2)
        with self.assertRaises(RuntimeError):
            match_batch_shape(X, Y)
Beispiel #3
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate qNoisyExpectedImprovement on the candidate set `X`.

            X: A `batch_shape x q x d`-dim Tensor of t-batches with `q` `d`-dim design
                points each.

            A `batch_shape'`-dim Tensor of Noisy Expected Improvement values at the
            given design points `X`, where `batch_shape'` is the broadcasted batch shape
            of model and input `X`.
        q = X.shape[-2]
        X_full =[match_batch_shape(self.X_baseline, X), X], dim=-2)
        # TODO: Implement more efficient way to compute posterior over both training and
        # test points in GPyTorch (
        posterior = self.model.posterior(
            X_full, posterior_transform=self.posterior_transform)
        if self._cache_root:
            diffs = self._forward_cached(posterior=posterior,
            samples = self.sampler(posterior)
            obj = self.objective(samples, X=X_full)
            diffs = obj[..., -q:].max(dim=-1).values - obj[..., :-q].max(

        return diffs.clamp_min(0).mean(dim=0)
    def _sample_max_values(self):
        r"""Sample max values for MC approximation of the expectation in MES"""
        with torch.no_grad():
            # Append X_pending to candidate set
            if self.X_pending is None:
                X_pending = torch.tensor(
                    [], dtype=self.candidate_set.dtype, device=self.candidate_set.device
                X_pending = self.X_pending
            X_pending = match_batch_shape(X_pending, self.candidate_set)
            candidate_set =[self.candidate_set, X_pending], dim=0)

            # project the candidate_set to the highest fidelity,
            # which is needed for the multi-fidelity MES
                candidate_set = self.project(candidate_set)
            except AttributeError:

            # sample max values
            if self.use_gumbel:
                self.posterior_max_values = _sample_max_value_Gumbel(
                    self.model, candidate_set, self.num_mv_samples, self.maximize
                self.posterior_max_values = _sample_max_value_Thompson(
                    self.model, candidate_set, self.num_mv_samples, self.maximize
    def get_multi_step_tree_input_representation(self,
                                                 X: Tensor) -> List[Tensor]:
        r"""Get the multi-step tree representation of X.

            X: A `batch_shape x q' x d`-dim Tensor with `q'` design points for each
                batch, where `q' = q_0 + f_1 q_1 + f_2 f_1 q_2 + ...`. Here `q_i`
                is the number of candidates jointly considered in look-ahead step
                `i`, and `f_i` is respective number of fantasies.

            A list `[X_j, ..., X_k]` of tensors, where `X_i` has shape
            `f_i x .... x f_1 x batch_shape x q_i x d`.

        batch_shape, shapes, sizes = self.get_split_shapes(X=X)
        # Each X_i in Xsplit has shape batch_shape x qtilde x d with
        # qtilde = f_i * ... * f_1 * q_i
        Xsplit = torch.split(X, sizes, dim=-2)
        # now reshape (need to permute batch_shape and qtilde dimensions for i > 0)
        perm = [-2] + list(range(len(batch_shape))) + [-1]
        X0 = Xsplit[0].reshape(shapes[0])
        Xother = [
            for X, shape in zip(Xsplit[1:], shapes[1:])
        # concatenate in pending points
        if self.X_pending is not None:
            X0 =[X0, match_batch_shape(self.X_pending, X0)], dim=-2)

        return [X0] + Xother
    def __init__(
        model: Model,
        candidate_set: Tensor,
        num_fantasies: int = 16,
        num_mv_samples: int = 10,
        num_y_samples: int = 128,
        use_gumbel: bool = True,
        maximize: bool = True,
        X_pending: Optional[Tensor] = None,
        train_inputs: Tensor = None,
        **kwargs: Any,
    ) -> None:
        r"""Single-outcome max-value entropy search acquisition function.

            model: A fitted single-outcome model.
            candidate_set: A `n x d` Tensor including `n` candidate points to
                discretize the design space. Max values are sampled from the
                (joint) model posterior over these points.
            num_fantasies: Number of fantasies to generate. The higher this
                number the more accurate the model (at the expense of model
                complexity, wall time and memory). Ignored if `X_pending` is `None`.
            num_mv_samples: Number of max value samples.
            num_y_samples: Number of posterior samples at specific design point `X`.
            use_gumbel: If True, use Gumbel approximation to sample the max values.
            X_pending: A `m x d`-dim Tensor of `m` design points that have been
                submitted for function evaluation but have not yet been evaluated.
            maximize: If True, consider the problem a maximization problem.
            train_inputs: A `n_train x d` Tensor that the model has been fitted on,
                optional if model is an exact GP model.
        sampler = SobolQMCNormalSampler(num_y_samples)
        super().__init__(model=model, sampler=sampler)

        # Batch GP models (e.g. fantasized models) are not currently supported
        if train_inputs is None:
            train_inputs = self.model.train_inputs[0]
        if train_inputs.ndim > 2:
            raise NotImplementedError(
                "Batch GP models (e.g. fantasized models) "
                "are not yet supported by qMaxValueEntropy")

        self._init_model = model  # only used for the `fantasize()` in `set_X_pending()`
        train_inputs = match_batch_shape(train_inputs, candidate_set)
        self.candidate_set =[candidate_set, train_inputs], dim=0)
        self.fantasies_sampler = SobolQMCNormalSampler(num_fantasies)
        self.num_fantasies = num_fantasies
        self.use_gumbel = use_gumbel
        self.num_mv_samples = num_mv_samples
        self.maximize = maximize
        self.weight = 1.0 if maximize else -1.0

        # If we put the `self._sample_max_values()` to `set_X_pending()`,
        # it will throw errors when the initial `super().__init__()` is called,
        # since some members required by `_sample_max_values()` are not yet initialized.
        if X_pending is None:
Beispiel #7
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate qKnowledgeGradient on the candidate set `X`.

            X: A `b x (q + num_fantasies) x d` Tensor with `b` t-batches of
                `q + num_fantasies` design points each. We split this X tensor
                into two parts in the `q` dimension (`dim=-2`). The first `q`
                are the q-batch of design points and the last num_fantasies are
                the current solutions of the inner optimization problem.

                `X_fantasies = X[..., -num_fantasies:, :]`
                `X_fantasies.shape = b x num_fantasies x d`

                `X_actual = X[..., :-num_fantasies, :]`
                `X_actual.shape = b x q x d`

            A Tensor of shape `b`. For t-batch b, the q-KG value of the design
                `X_actual[b]` is averaged across the fantasy models, where
                `X_fantasies[b, i]` is chosen as the final selection for the
                `i`-th fantasy model.
                NOTE: If `current_value` is not provided, then this is not the
                true KG value of `X_actual[b]`, and `X_fantasies[b, : ]` must be
                maximized at fixed `X_actual[b]`.
        X_actual, X_fantasies = _split_fantasy_points(X=X,

        # We only concatenate X_pending into the X part after splitting
        if self.X_pending is not None:
            X_actual =
                 match_batch_shape(self.X_pending, X_actual)],

        # construct the fantasy model of shape `num_fantasies x b`
        fantasy_model = self.model.fantasize(X=X_actual,

        # get the value function
        value_function = _get_value_function(

        # make sure to propagate gradients to the fantasy model train inputs
        with settings.propagate_grads(True):
            values = value_function(X=X_fantasies)  # num_fantasies x b

        if self.current_value is not None:
            values = values - self.current_value

        # return average over the fantasy samples
        return values.mean(dim=0)
Beispiel #8
    def test_match_batch_shape_multi_dim(self):
        X = torch.rand(1, 3, 2)
        Y = torch.rand(5, 4, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.expand(5, 4, 3, 2)))

        X = torch.rand(4, 3, 2)
        Y = torch.rand(5, 4, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.repeat(5, 1, 1, 1)))

        X = torch.rand(2, 1, 3, 2)
        Y = torch.rand(2, 4, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.repeat(1, 4, 1, 1)))

        X = torch.rand(4, 2, 3, 2)
        Y = torch.rand(4, 3, 3, 2)
        with self.assertRaises(RuntimeError):
            match_batch_shape(X, Y)
Beispiel #9
    def test_match_batch_shape_multi_dim(self):
        X = torch.rand(1, 3, 2)
        Y = torch.rand(5, 4, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.expand(5, 4, 3, 2)))

        X = torch.rand(4, 3, 2)
        Y = torch.rand(5, 4, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.repeat(5, 1, 1, 1)))

        X = torch.rand(2, 1, 3, 2)
        Y = torch.rand(2, 4, 3, 2)
        X_tf = match_batch_shape(X, Y)
        self.assertTrue(torch.equal(X_tf, X.repeat(1, 4, 1, 1)))

        X = torch.rand(4, 2, 3, 2)
        Y = torch.rand(4, 3, 3, 2)
        with self.assertRaises(RuntimeError):
            match_batch_shape(X, Y)
Beispiel #10
    def __init__(
        model: Model,
        candidate_set: Tensor,
        num_mv_samples: int = 10,
        posterior_transform: Optional[PosteriorTransform] = None,
        use_gumbel: bool = True,
        maximize: bool = True,
        X_pending: Optional[Tensor] = None,
        train_inputs: Optional[Tensor] = None,
    ) -> None:
        r"""Single-outcome MES-like acquisition functions based on discrete MV sampling.

            model: A fitted single-outcome model.
            candidate_set: A `n x d` Tensor including `n` candidate points to
                discretize the design space. Max values are sampled from the
                (joint) model posterior over these points.
            num_mv_samples: Number of max value samples.
            posterior_transform: A PosteriorTransform. If using a multi-output model,
                a PosteriorTransform that transforms the multi-output posterior into a
                single-output posterior is required.
            use_gumbel: If True, use Gumbel approximation to sample the max values.
            maximize: If True, consider the problem a maximization problem.
            X_pending: A `m x d`-dim Tensor of `m` design points that have been
                submitted for function evaluation but have not yet been evaluated.
            train_inputs: A `n_train x d` Tensor that the model has been fitted on.
                Not required if the model is an instance of a GPyTorch ExactGP model.
        self.use_gumbel = use_gumbel

        if train_inputs is None and hasattr(model, "train_inputs"):
            train_inputs = model.train_inputs[0]
        if train_inputs is not None:
            if train_inputs.ndim > 2:
                raise NotImplementedError(
                    "Batch GP models (e.g. fantasized models) "
                    "are not yet supported by `MaxValueBase`"
            train_inputs = match_batch_shape(train_inputs, candidate_set)
            candidate_set =[candidate_set, train_inputs], dim=0)

        self.candidate_set = candidate_set

Beispiel #11
 def forward(self, X: Tensor) -> Tensor:
     X_full =[match_batch_shape(self.X_baseline, X), X], dim=-2)
     # Note: it is important to compute the full posterior over `(X_baseline, X)`
     # to ensure that we properly sample `f(X)` from the joint distribution `
     # `f(X_baseline, X) ~ P(f | D)` given that we can already fixed the sampled
     # function values for `f(X_baseline)`
     # TODO: improve efficiency by not recomputing baseline-baseline
     # covariance matrix
     posterior = self.model.posterior(X_full)
     q = X.shape[-2]
     self._set_sampler(q=q, posterior=posterior)
     samples = self._get_f_X_samples(posterior=posterior, q=q)
     # add previous nehvi from pending points
     return self._compute_qehvi(samples=samples) + self._prev_nehvi
Beispiel #12
 def forward(self, X: Tensor) -> Tensor:
     X_full =[match_batch_shape(self.X_baseline, X), X], dim=-2)
     # Note: it is important to compute the full posterior over `(X_baseline, X)`
     # to ensure that we properly sample `f(X)` from the joint distribution `
     # `f(X_baseline, X) ~ P(f | D)` given that we can already fixed the sampled
     # function values for `f(X_baseline)`.
     # TODO: improve efficiency by not recomputing baseline-baseline
     # covariance matrix.
     posterior = self.model.posterior(X_full)
     # Account for possible one-to-many transform.
     n_w = posterior.event_shape[X_full.dim() - 2] // X_full.shape[-2]
     q_in = X.shape[-2] * n_w
     self._set_sampler(q_in=q_in, posterior=posterior)
     samples = self._get_f_X_samples(posterior=posterior, q_in=q_in)
     # Add previous nehvi from pending points.
     return self._compute_qehvi(samples=samples, X=X) + self._prev_nehvi
Beispiel #13
    def _sample_max_values(self,
                           num_samples: int,
                           X_pending: Optional[Tensor] = None) -> Tensor:
        r"""Draw samples from the posterior over maximum values on a discrete set.

        These samples are used to compute Monte Carlo approximations of expecations
        over the posterior over the function maximum.

            num_samples: The number of samples to draw.
            X_pending: A `m x d`-dim Tensor of `m` design points that have been
                submitted for function evaluation but have not yet been evaluated.

            A `num_samples x num_fantasies` Tensor of posterior max value samples
            (`num_fantasies=1` for non-fantasized models).
        if self.use_gumbel:
            sample_max_values = _sample_max_value_Gumbel
            sample_max_values = _sample_max_value_Thompson
        candidate_set = self.candidate_set

        with torch.no_grad():
            if X_pending is not None:
                # Append X_pending to candidate set
                X_pending = match_batch_shape(X_pending, self.candidate_set)
                candidate_set =[self.candidate_set, X_pending],

            # project the candidate_set to the highest fidelity,
            # which is needed for the multi-fidelity MES
                candidate_set = self.project(candidate_set)
            except AttributeError:

            self.posterior_max_values = sample_max_values(
Beispiel #14
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate qNoisyExpectedImprovement on the candidate set `X`.

            X: A `batch_shape x q x d`-dim Tensor of t-batches with `q` `d`-dim design
                points each.

            A `batch_shape'`-dim Tensor of Noisy Expected Improvement values at the
            given design points `X`, where `batch_shape'` is the broadcasted batch shape
            of model and input `X`.
        q = X.shape[-2]
        X_full =[X, match_batch_shape(self.X_baseline, X)], dim=-2)
        # TODO (T41248036): Implement more efficient way to compute posterior
        # over both training and test points in GPyTorch
        posterior = self.model.posterior(X_full)
        samples = self.sampler(posterior)
        obj = self.objective(samples)
        diffs = obj[:, :, :q].max(dim=-1)[0] - obj[:, :, q:].max(dim=-1)[0]
        return diffs.clamp_min(0).mean(dim=0)
Beispiel #15
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate analytical EUBO on the candidate set X.

            X: A `batch_shape x q x d`-dim Tensor, where `q = 2` if `previous_winner`
                is not `None`, and `q = 1` otherwise.

            The acquisition value for each batch as a tensor of shape `batch_shape`.
        if not ((X.shape[-2] == 2) or ((X.shape[-2] == 1) and
                                       (self.previous_winner is not None))):
            raise UnsupportedError(
                f"{self.__class__.__name__} only support q=2 or q=1"
                "with a previous winner specified")

        Y = X if self.outcome_model is None else self.outcome_model(X)

        if self.previous_winner is not None:
            Y =[Y, match_batch_shape(self.previous_winner, Y)],

        # Calling forward directly instead of posterior here to
        # obtain the full covariance matrix
        pref_posterior = self.model(Y)
        pref_mean = pref_posterior.mean
        pref_cov = pref_posterior.covariance_matrix
        delta = pref_mean[..., 0] - pref_mean[..., 1]
        sigma = torch.sqrt(pref_cov[..., 0, 0] + pref_cov[..., 1, 1] -
                           pref_cov[..., 0, 1] - pref_cov[..., 1, 0])
        u = delta / sigma

        ucdf = self.std_norm.cdf(u)
        updf = torch.exp(self.std_norm.log_prob(u))
        acqf_val = sigma * (updf + u * ucdf)
        if self.previous_winner is None:
            acqf_val = acqf_val + pref_mean[..., 1]
        return acqf_val
Beispiel #16
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate qMultiFidelityKnowledgeGradient on the candidate set `X`.

            X: A `b x (q + num_fantasies) x d` Tensor with `b` t-batches of
                `q + num_fantasies` design points each. We split this X tensor
                into two parts in the `q` dimension (`dim=-2`). The first `q`
                are the q-batch of design points and the last num_fantasies are
                the current solutions of the inner optimization problem.

                `X_fantasies = X[..., -num_fantasies:, :]`
                `X_fantasies.shape = b x num_fantasies x d`

                `X_actual = X[..., :-num_fantasies, :]`
                `X_actual.shape = b x q x d`

                In addition, `X` may be augmented with fidelity parameteres as
                part of thee `d`-dimension. Projecting fidelities to the target
                fidelity is handled by `project`.

            A Tensor of shape `b`. For t-batch b, the q-KG value of the design
                `X_actual[b]` is averaged across the fantasy models, where
                `X_fantasies[b, i]` is chosen as the final selection for the
                `i`-th fantasy model.
                NOTE: If `current_value` is not provided, then this is not the
                true KG value of `X_actual[b]`, and `X_fantasies[b, : ]` must be
                maximized at fixed `X_actual[b]`.
        X_actual, X_fantasies = _split_fantasy_points(X=X, n_f=self.num_fantasies)

        # We only concatenate X_pending into the X part after splitting
        if self.X_pending is not None:
            X_eval =
                [X_actual, match_batch_shape(self.X_pending, X_actual)], dim=-2
            X_eval = X_actual

        # construct the fantasy model of shape `num_fantasies x b`
        # expand X (to potentially add trace observations)
        fantasy_model = self.model.fantasize(
            X=self.expand(X_eval), sampler=self.sampler, observation_noise=True

        # get the value function
        value_function = _get_value_function(
            model=fantasy_model, objective=self.objective, sampler=self.inner_sampler

        # make sure to propagate gradients to the fantasy model train inputs
        # project the fantasy points
        with settings.propagate_grads(True):
            values = value_function(X=self.project(X_fantasies))  # num_fantasies x b

        if self.current_value is not None:
            values = values - self.current_value

        if self.cost_aware_utility is not None:
            values = self.cost_aware_utility(
                X=X_actual, deltas=values, sampler=self.cost_sampler

        # return average over the fantasy samples
        return values.mean(dim=0)