コード例 #1
0
def add_output_dim(X: Tensor, original_batch_shape: torch.Size) -> Tuple[Tensor, int]:
    r"""Insert the output dimension at the correct location.

    The trailing batch dimensions of X must match the original batch dimensions
    of the training inputs, but can also include extra batch dimensions.

    Args:
        X: A `(new_batch_shape) x (original_batch_shape) x n x d` tensor of
            features.
        original_batch_shape: the batch shape of the model's training inputs.

    Returns:
        2-element tuple containing

        - A `(new_batch_shape) x (original_batch_shape) x m x n x d` tensor of
            features.
        - The index corresponding to the output dimension.
    """
    X_batch_shape = X.shape[:-2]
    if len(X_batch_shape) > 0 and len(original_batch_shape) > 0:
        # check that X_batch_shape supports broadcasting or augments
        # original_batch_shape with extra batch dims
        error_msg = (
            "The trailing batch dimensions of X must match the trailing "
            "batch dimensions of the training inputs."
        )
        _mul_broadcast_shape(X_batch_shape, original_batch_shape, error_msg=error_msg)
    # insert `m` dimension
    X = X.unsqueeze(-3)
    output_dim_idx = max(len(original_batch_shape), len(X_batch_shape))
    return X, output_dim_idx
コード例 #2
0
ファイル: higher_order_gp.py プロジェクト: wjmaddox/botorch
    def _get_joint_covariance(self, inputs):
        """
        Internal method to expose the joint test train covariance.
        """

        from gpytorch.models import ExactGP
        from gpytorch.utils.broadcasting import _mul_broadcast_shape

        train_inputs = self.train_inputs
        # Concatenate the input to the training input
        full_inputs = []
        batch_shape = train_inputs[0].shape[:-2]
        for train_input, input in zip(train_inputs, inputs):
            # Make sure the batch shapes agree for training/test data
            # This seems to be deprecated
            # if batch_shape != train_input.shape[:-2]:
            #     batch_shape = _mul_broadcast_shape(
            #         batch_shape, train_input.shape[:-2]
            #     )
            #     train_input = train_input.expand(
            #         *batch_shape, *train_input.shape[-2:]
            #     )
            if batch_shape != input.shape[:-2]:
                batch_shape = _mul_broadcast_shape(batch_shape, input.shape[:-2])
                train_input = train_input.expand(*batch_shape, *train_input.shape[-2:])
                input = input.expand(*batch_shape, *input.shape[-2:])
            full_inputs.append(torch.cat([train_input, input], dim=-2))

        # Get the joint distribution for training/test data
        full_output = super(ExactGP, self).__call__(*full_inputs)
        return full_output.lazy_covariance_matrix
コード例 #3
0
 def forward(self, input_set):
     #destandardize from the GP and send that to our NN model
     output = self.model((input_set * self.x_std) + self.x_mean)
     #restandardize the output for the GP
     output = (output - self.y_mean) / self.y_std
     if output.shape[:-2] == self.batch_shape:
         return output.squeeze()
     else:
         return output.expand(
             _mul_broadcast_shape(input_set.shape[:-1], output.shape))
コード例 #4
0
 def forward(self, inputs):
     if inputs.shape[:-2] == self.batch_shape:
         if self.ndim == 1:
             a_x = (self.slope * inputs**2).view(-1, )
         else:
             a_x = torch.matmul(inputs**2, self.slope)
         return self.constant.expand(inputs.shape[:-1]) + a_x.expand(
             inputs.shape[:-1])
     else:
         return self.slope.expand(
             _mul_broadcast_shape(inputs.shape[:-1], self.constant.shape))
コード例 #5
0
    def forward(self, i1, i2, **params):
        covar_matrix = self.covar_matrix()
        batch_shape = _mul_broadcast_shape(i1.shape[:-2], self.batch_shape)
        index_shape = batch_shape + i1.shape[-2:]

        res = InterpolatedLazyTensor(
            base_lazy_tensor=covar_matrix,
            left_interp_indices=i1.expand(index_shape),
            right_interp_indices=i2.expand(index_shape),
        )
        return res
コード例 #6
0
ファイル: test_utils.py プロジェクト: saitcakmak/botorch
 def test_expand_and_copy_tensor(self):
     for input_batch_shape, batch_shape in product(
         (torch.Size([4, 1]), torch.Size([2, 3, 1])),
         (torch.Size([5]), torch.Size([])),
     ):
         if len(batch_shape) == 0:
             input_batch_shape = input_batch_shape[:-1]
         X = torch.rand(input_batch_shape + torch.Size([2, 1]))
         expand_shape = (
             _mul_broadcast_shape(input_batch_shape, batch_shape) + X.shape[-2:]
         )
         X_tf = expand_and_copy_tensor(X, batch_shape=batch_shape)
         self.assertEqual(X_tf.shape, expand_shape)
         self.assertFalse(X_tf is X.expand(expand_shape))
コード例 #7
0
ファイル: utils.py プロジェクト: saitcakmak/botorch
def expand_and_copy_tensor(X: Tensor, batch_shape: torch.Size) -> Tensor:
    r"""Expand and copy X according to batch_shape.

    Args:
        X: A `input_batch_shape x n x d`-dim tensor of inputs
        batch_shape: The new batch shape

    Returns:
        A `input_batch_shape x batch_shape x n x d`-dim tensor of inputs
    """
    err_msg = (f"Provided batch shape ({batch_shape}) and input batch shape "
               f"({X.shape[:-2]}) are not broadcastable.")
    batch_shape = _mul_broadcast_shape(X.shape[:-2],
                                       batch_shape,
                                       error_msg=err_msg)
    expand_shape = batch_shape + X.shape[-2:]
    return X.expand(expand_shape).clone()
コード例 #8
0
    def batch_shape(self) -> torch.Size:
        r"""The batch shape of the model.

        This is a batch shape from an I/O perspective, independent of the internal
        representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel).
        For a model with `m` outputs, a `test_batch_shape x q x d`-shaped input `X`
        to the `posterior` method returns a Posterior object over an output of
        shape `broadcast(test_batch_shape, model.batch_shape) x q x m`.
        """
        batch_shapes = {ti[0].shape[:-2] for ti in self.train_inputs}
        if len(batch_shapes) > 1:
            msg = (
                f"Component models of {self.__class__.__name__} have different "
                "batch shapes")
            try:
                broadcast_shape = _mul_broadcast_shape(*batch_shapes)
                warnings.warn(msg + ". Broadcasting batch shapes.")
                return broadcast_shape
            except RuntimeError:
                raise NotImplementedError(msg + " that are not broadcastble.")
        return next(iter(batch_shapes))
コード例 #9
0
 def forward(self, inputs):
     if inputs.shape[:-2] == self.batch_shape:
         return self.constant.expand(inputs.shape[:-1])
     else:
         return self.constant.expand(
             _mul_broadcast_shape(inputs.shape[:-1], self.constant.shape))
コード例 #10
0
    def forward(self, x):
        """Forward propagate the module.

        This method determines how to marginalize out the inducing function values.
        Specifically, forward defines how to transform a variational distribution over
        the inducing point values, q(u), in to a variational distribution over
        the function values at specified locations x, q(f|x), by integrating
        p(f|x, u)q(u)du

        Parameters
        ----------
        x (torch.tensor):
            Locations x to get the variational posterior of the function values at.

        Returns
        -------
            The distribution q(f|x)
        """
        variational_dist = self.variational_distribution.approx_variational_distribution
        inducing_points = self.inducing_points
        inducing_batch_shape = inducing_points.shape[:-2]
        if inducing_batch_shape < x.shape[:-2] or len(
                inducing_batch_shape) < len(x.shape[:-2]):
            batch_shape = _mul_broadcast_shape(inducing_points.shape[:-2],
                                               x.shape[:-2])
            inducing_points = inducing_points.expand(
                *batch_shape, *inducing_points.shape[-2:])
            x = x.expand(*batch_shape, *x.shape[-2:])
            variational_dist = variational_dist.expand(batch_shape)

        # If our points equal the inducing points, we're done
        if torch.equal(x, inducing_points):
            return variational_dist

        # Otherwise, we have to marginalize
        else:
            num_induc = inducing_points.size(-2)
            full_inputs = torch.cat([inducing_points, x], dim=-2)
            full_output = self.model.forward(full_inputs)
            full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

            # Mean terms
            test_mean = full_mean[..., num_induc:]
            induc_mean = full_mean[..., :num_induc]
            mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1)

            # Covariance terms
            induc_induc_covar = full_covar[
                ..., :num_induc, :num_induc].add_jitter()
            induc_data_covar = full_covar[..., :num_induc,
                                          num_induc:].evaluate()
            data_data_covar = full_covar[..., num_induc:, num_induc:]
            aux = variational_dist.lazy_covariance_matrix.root_decomposition()
            root_variational_covar = aux.root.evaluate()

            # If we had to expand the inducing points,
            # shrink the inducing mean and induc_induc_covar dimension
            # This makes everything more computationally efficient
            if len(inducing_batch_shape) < len(induc_induc_covar.batch_shape):
                index = tuple(0 for _ in range(
                    len(induc_induc_covar.batch_shape) -
                    len(inducing_batch_shape)))
                repeat_size = torch.Size(
                    (tuple(induc_induc_covar.batch_shape[:len(index)]) + tuple(
                        1
                        for _ in induc_induc_covar.batch_shape[len(index):])))
                induc_induc_covar = BatchRepeatLazyTensor(
                    induc_induc_covar.__getitem__(index), repeat_size)

            # If we're less than a certain size, we'll compute the Cholesky
            # decomposition of induc_induc_covar
            cholesky = False
            if settings.fast_computations.log_prob.off() or (
                    num_induc <= settings.max_cholesky_size.value()):
                induc_induc_covar = CholLazyTensor(
                    induc_induc_covar.cholesky())
                cholesky = True

            # If we are making predictions and don't need variances, we can do things
            # very quickly.
            if not self.training and settings.skip_posterior_variances.on():
                if not hasattr(self, "_mean_cache"):
                    self._mean_cache = induc_induc_covar.inv_matmul(
                        mean_diff).detach()

                predictive_mean = torch.add(
                    test_mean,
                    induc_data_covar.transpose(-2, -1).matmul(
                        self._mean_cache).squeeze(-1))

                predictive_covar = ZeroLazyTensor(test_mean.size(-1),
                                                  test_mean.size(-1))

                return MultivariateNormal(predictive_mean, predictive_covar)

            # Cache the CG results
            # For now: run variational inference without a preconditioner
            # The preconditioner screws things up for some reason
            with settings.max_preconditioner_size(0):
                # Cache the CG results
                left_tensors = torch.cat([mean_diff, root_variational_covar],
                                         -1)
                with torch.no_grad():
                    eager_rhs = torch.cat([left_tensors, induc_data_covar], -1)
                    solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = \
                        CachedCGLazyTensor.precompute_terms(
                            induc_induc_covar, eager_rhs.detach(),
                            logdet_terms=(not cholesky),
                            include_tmats=(not settings.skip_logdet_forward.on() and
                                           not cholesky)
                        )
                    eager_rhss = [
                        eager_rhs.detach(),
                        eager_rhs[..., left_tensors.size(-1):].detach(),
                        eager_rhs[..., :left_tensors.size(-1)].detach()
                    ]
                    solves = [
                        solve.detach(), solve[...,
                                              left_tensors.size(-1):].detach(),
                        solve[..., :left_tensors.size(-1)].detach()
                    ]
                    if settings.skip_logdet_forward.on():
                        eager_rhss.append(
                            torch.cat([probe_vecs, left_tensors], -1))
                        solves.append(
                            torch.cat([
                                probe_vec_solves,
                                solve[..., :left_tensors.size(-1)]
                            ], -1))
                induc_induc_covar = CachedCGLazyTensor(
                    induc_induc_covar,
                    eager_rhss=eager_rhss,
                    solves=solves,
                    probe_vectors=probe_vecs,
                    probe_vector_norms=probe_vec_norms,
                    probe_vector_solves=probe_vec_solves,
                    probe_vector_tmats=tmats,
                )

            if self.training:
                self._memoize_cache[
                    "prior_distribution_memo"] = MultivariateNormal(
                        induc_mean, induc_induc_covar)

            # Compute predictive mean/covariance
            inv_products = induc_induc_covar.inv_matmul(
                induc_data_covar, left_tensors.transpose(-1, -2))
            predictive_mean = torch.add(test_mean, inv_products[..., 0, :])
            predictive_covar = RootLazyTensor(inv_products[...,
                                                           1:, :].transpose(
                                                               -1, -2))
            if self.training:
                interp_data_data_var, _ = induc_induc_covar.inv_quad_logdet(
                    induc_data_covar, logdet=False, reduce_inv_quad=False)
                data_covariance = DiagLazyTensor(
                    (data_data_covar.diag() - interp_data_data_var).clamp(
                        0, math.inf))
            else:
                neg_induc_data_data_covar = torch.matmul(
                    induc_data_covar.transpose(-1, -2).mul(-1),
                    induc_induc_covar.inv_matmul(induc_data_covar))
                data_covariance = data_data_covar + neg_induc_data_data_covar
            predictive_covar = PsdSumLazyTensor(predictive_covar,
                                                data_covariance)

            return MultivariateNormal(predictive_mean, predictive_covar)
コード例 #11
0
    def __call__(self, *args, **kwargs):
        train_inputs = list(self.train_inputs) if self.train_inputs is not None else []
        inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in args]

        # Training mode: optimizing
        if self.training:
            if self.train_inputs is None:
                raise RuntimeError(
                    "train_inputs, train_targets cannot be None in training mode. "
                    "Call .eval() for prior predictions, or call .set_train_data() to add training data."
                )
            if settings.debug.on():
                if not all(
                    torch.equal(train_input, input)
                    for train_input, input in zip(train_inputs, inputs)
                ):
                    raise RuntimeError("You must train on the training inputs!")
            res = super().__call__(*inputs, **kwargs)
            return res

        # Prior mode
        elif settings.prior_mode.on() or self.train_inputs is None or self.train_targets is None:
            full_inputs = args
            full_output = super(ExactTP, self).__call__(*full_inputs, **kwargs)
            if settings.debug().on():
                if not isinstance(full_output, MultivariateStudentT):
                    raise RuntimeError("ExactTP.forward must return a MultivariateStudentT")
            return full_output

        # Posterior mode
        else:
            if settings.debug.on():
                if all(
                    torch.equal(train_input, input)
                    for train_input, input in zip(train_inputs, inputs)
                ):
                    warnings.warn(
                        "The input matches the stored training data. Did you forget to call model.train()?",
                        GPInputWarning,
                    )

            # Get the terms that only depend on training data
            if self.prediction_strategy is None:
                train_output = super().__call__(*train_inputs, **kwargs)

                # Create the prediction strategy for
                self.prediction_strategy = prediction_strategy(
                    train_inputs=train_inputs,
                    train_prior_dist=train_output,
                    train_labels=self.train_targets,
                    likelihood=self.likelihood,
                )

            # Concatenate the input to the training input
            full_inputs = []
            batch_shape = train_inputs[0].shape[:-2]
            for train_input, input in zip(train_inputs, inputs):
                # Make sure the batch shapes agree for training/test data
                if batch_shape != train_input.shape[:-2]:
                    batch_shape = _mul_broadcast_shape(batch_shape, train_input.shape[:-2])
                    train_input = train_input.expand(*batch_shape, *train_input.shape[-2:])
                if batch_shape != input.shape[:-2]:
                    batch_shape = _mul_broadcast_shape(batch_shape, input.shape[:-2])
                    train_input = train_input.expand(*batch_shape, *train_input.shape[-2:])
                    input = input.expand(*batch_shape, *input.shape[-2:])
                full_inputs.append(torch.cat([train_input, input], dim=-2))

            # Get the joint distribution for training/test data
            full_output = super(ExactTP, self).__call__(*full_inputs, **kwargs)
            if settings.debug().on():
                if not isinstance(full_output, MultivariateStudentT):
                    raise RuntimeError("ExactTP.forward must return a MultivariateStudentT")
            full_mean, full_covar = full_output.loc, full_output.lazy_covariance_matrix

            # Determine the shape of the joint distribution
            batch_shape = full_output.batch_shape
            joint_shape = full_output.event_shape
            tasks_shape = joint_shape[1:]  # For multitask learning
            test_shape = torch.Size(
                [joint_shape[0] - self.prediction_strategy.train_shape[0], *tasks_shape]
            )

            # Make the prediction
            with settings._use_eval_tolerance():
                predictive_mean, predictive_covar = self.prediction_strategy.exact_prediction(
                    full_mean, full_covar
                )

            # Reshape predictive mean to match the appropriate event shape
            predictive_mean = predictive_mean.view(*batch_shape, *test_shape).contiguous()
            return full_output.__class__(
                predictive_mean, predictive_covar, full_output.nu, full_output.data_num
            )
コード例 #12
0
    def get_fantasy_model(self, inputs, targets, **kwargs):
        """
        Returns a new TP model that incorporates the specified inputs and targets as new training data.

        Using this method is more efficient than updating with `set_train_data` when the number of inputs is relatively
        small, because any computed test-time caches will be updated in linear time rather than computed from scratch.

        .. note::
            If `targets` is a batch (e.g. `b x m`), then the TP returned from this method will be a batch mode TP.
            If `inputs` is of the same (or lesser) dimension as `targets`, then it is assumed that the fantasy points
            are the same for each target batch.

        :param torch.Tensor inputs: (`b1 x ... x bk x m x d` or `f x b1 x ... x bk x m x d`) Locations of fantasy
            observations.
        :param torch.Tensor targets: (`b1 x ... x bk x m` or `f x b1 x ... x bk x m`) Labels of fantasy observations.
        :return: An `ExactTP` model with `n + m` training examples, where the `m` fantasy examples have been added
            and all test-time caches have been updated.
        :rtype: ~gpytorch.models.ExactTP
        """
        if self.prediction_strategy is None:
            raise RuntimeError(
                "Fantasy observations can only be added after making predictions with a model so that "
                "all test independent caches exist. Call the model on some data first!"
            )

        model_batch_shape = self.train_inputs[0].shape[:-2]

        if self.train_targets.dim() > len(model_batch_shape) + 1:
            raise RuntimeError(
                "Cannot yet add fantasy observations to multitask GPs, but this is coming soon!"
            )

        if not isinstance(inputs, list):
            inputs = [inputs]

        inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in inputs]

        target_batch_shape = targets.shape[:-1]
        input_batch_shape = inputs[0].shape[:-2]
        tbdim, ibdim = len(target_batch_shape), len(input_batch_shape)

        if not (tbdim == ibdim + 1 or tbdim == ibdim):
            raise RuntimeError(
                f"Unsupported batch shapes: The target batch shape ({target_batch_shape}) must have either the "
                f"same dimension as or one more dimension than the input batch shape ({input_batch_shape})"
            )

        # Check whether we can properly broadcast batch dimensions
        err_msg = (
            f"Model batch shape ({model_batch_shape}) and target batch shape "
            f"({target_batch_shape}) are not broadcastable."
        )
        _mul_broadcast_shape(model_batch_shape, target_batch_shape, error_msg=err_msg)

        if len(model_batch_shape) > len(input_batch_shape):
            input_batch_shape = model_batch_shape
        if len(model_batch_shape) > len(target_batch_shape):
            target_batch_shape = model_batch_shape

        # If input has no fantasy batch dimension but target does, we can save memory and computation by not
        # computing the covariance for each element of the batch. Therefore we don't expand the inputs to the
        # size of the fantasy model here - this is done below, after the evaluation and fast fantasy update
        train_inputs = [
            tin.expand(input_batch_shape + tin.shape[-2:]) for tin in self.train_inputs
        ]
        train_targets = self.train_targets.expand(
            target_batch_shape + self.train_targets.shape[-1:]
        )

        full_inputs = [
            torch.cat([train_input, input.expand(input_batch_shape + input.shape[-2:])], dim=-2)
            for train_input, input in zip(train_inputs, inputs)
        ]
        full_targets = torch.cat(
            [train_targets, targets.expand(target_batch_shape + targets.shape[-1:])], dim=-1
        )

        try:
            fantasy_kwargs = {"noise": kwargs.pop("noise")}
        except KeyError:
            fantasy_kwargs = {}

        full_output = super(ExactTP, self).__call__(*full_inputs, **kwargs)

        # Copy model without copying training data or prediction strategy (since we'll overwrite those)
        old_pred_strat = self.prediction_strategy
        old_train_inputs = self.train_inputs
        old_train_targets = self.train_targets
        old_likelihood = self.likelihood
        self.prediction_strategy = None
        self.train_inputs = None
        self.train_targets = None
        self.likelihood = None
        new_model = deepcopy(self)
        self.prediction_strategy = old_pred_strat
        self.train_inputs = old_train_inputs
        self.train_targets = old_train_targets
        self.likelihood = old_likelihood

        new_model.likelihood = old_likelihood.get_fantasy_likelihood(**fantasy_kwargs)
        new_model.prediction_strategy = old_pred_strat.get_fantasy_strategy(
            inputs, targets, full_inputs, full_targets, full_output, **fantasy_kwargs
        )

        # if the fantasies are at the same points, we need to expand the inputs for the new model
        if tbdim == ibdim + 1:
            new_model.train_inputs = [
                fi.expand(target_batch_shape + fi.shape[-2:]) for fi in full_inputs
            ]
        else:
            new_model.train_inputs = full_inputs
        new_model.train_targets = full_targets

        return new_model