def add_output_dim(X: Tensor, original_batch_shape: torch.Size) -> Tuple[Tensor, int]: r"""Insert the output dimension at the correct location. The trailing batch dimensions of X must match the original batch dimensions of the training inputs, but can also include extra batch dimensions. Args: X: A `(new_batch_shape) x (original_batch_shape) x n x d` tensor of features. original_batch_shape: the batch shape of the model's training inputs. Returns: 2-element tuple containing - A `(new_batch_shape) x (original_batch_shape) x m x n x d` tensor of features. - The index corresponding to the output dimension. """ X_batch_shape = X.shape[:-2] if len(X_batch_shape) > 0 and len(original_batch_shape) > 0: # check that X_batch_shape supports broadcasting or augments # original_batch_shape with extra batch dims error_msg = ( "The trailing batch dimensions of X must match the trailing " "batch dimensions of the training inputs." ) _mul_broadcast_shape(X_batch_shape, original_batch_shape, error_msg=error_msg) # insert `m` dimension X = X.unsqueeze(-3) output_dim_idx = max(len(original_batch_shape), len(X_batch_shape)) return X, output_dim_idx
def _get_joint_covariance(self, inputs): """ Internal method to expose the joint test train covariance. """ from gpytorch.models import ExactGP from gpytorch.utils.broadcasting import _mul_broadcast_shape train_inputs = self.train_inputs # Concatenate the input to the training input full_inputs = [] batch_shape = train_inputs[0].shape[:-2] for train_input, input in zip(train_inputs, inputs): # Make sure the batch shapes agree for training/test data # This seems to be deprecated # if batch_shape != train_input.shape[:-2]: # batch_shape = _mul_broadcast_shape( # batch_shape, train_input.shape[:-2] # ) # train_input = train_input.expand( # *batch_shape, *train_input.shape[-2:] # ) if batch_shape != input.shape[:-2]: batch_shape = _mul_broadcast_shape(batch_shape, input.shape[:-2]) train_input = train_input.expand(*batch_shape, *train_input.shape[-2:]) input = input.expand(*batch_shape, *input.shape[-2:]) full_inputs.append(torch.cat([train_input, input], dim=-2)) # Get the joint distribution for training/test data full_output = super(ExactGP, self).__call__(*full_inputs) return full_output.lazy_covariance_matrix
def forward(self, input_set): #destandardize from the GP and send that to our NN model output = self.model((input_set * self.x_std) + self.x_mean) #restandardize the output for the GP output = (output - self.y_mean) / self.y_std if output.shape[:-2] == self.batch_shape: return output.squeeze() else: return output.expand( _mul_broadcast_shape(input_set.shape[:-1], output.shape))
def forward(self, inputs): if inputs.shape[:-2] == self.batch_shape: if self.ndim == 1: a_x = (self.slope * inputs**2).view(-1, ) else: a_x = torch.matmul(inputs**2, self.slope) return self.constant.expand(inputs.shape[:-1]) + a_x.expand( inputs.shape[:-1]) else: return self.slope.expand( _mul_broadcast_shape(inputs.shape[:-1], self.constant.shape))
def forward(self, i1, i2, **params): covar_matrix = self.covar_matrix() batch_shape = _mul_broadcast_shape(i1.shape[:-2], self.batch_shape) index_shape = batch_shape + i1.shape[-2:] res = InterpolatedLazyTensor( base_lazy_tensor=covar_matrix, left_interp_indices=i1.expand(index_shape), right_interp_indices=i2.expand(index_shape), ) return res
def test_expand_and_copy_tensor(self): for input_batch_shape, batch_shape in product( (torch.Size([4, 1]), torch.Size([2, 3, 1])), (torch.Size([5]), torch.Size([])), ): if len(batch_shape) == 0: input_batch_shape = input_batch_shape[:-1] X = torch.rand(input_batch_shape + torch.Size([2, 1])) expand_shape = ( _mul_broadcast_shape(input_batch_shape, batch_shape) + X.shape[-2:] ) X_tf = expand_and_copy_tensor(X, batch_shape=batch_shape) self.assertEqual(X_tf.shape, expand_shape) self.assertFalse(X_tf is X.expand(expand_shape))
def expand_and_copy_tensor(X: Tensor, batch_shape: torch.Size) -> Tensor: r"""Expand and copy X according to batch_shape. Args: X: A `input_batch_shape x n x d`-dim tensor of inputs batch_shape: The new batch shape Returns: A `input_batch_shape x batch_shape x n x d`-dim tensor of inputs """ err_msg = (f"Provided batch shape ({batch_shape}) and input batch shape " f"({X.shape[:-2]}) are not broadcastable.") batch_shape = _mul_broadcast_shape(X.shape[:-2], batch_shape, error_msg=err_msg) expand_shape = batch_shape + X.shape[-2:] return X.expand(expand_shape).clone()
def batch_shape(self) -> torch.Size: r"""The batch shape of the model. This is a batch shape from an I/O perspective, independent of the internal representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel). For a model with `m` outputs, a `test_batch_shape x q x d`-shaped input `X` to the `posterior` method returns a Posterior object over an output of shape `broadcast(test_batch_shape, model.batch_shape) x q x m`. """ batch_shapes = {ti[0].shape[:-2] for ti in self.train_inputs} if len(batch_shapes) > 1: msg = ( f"Component models of {self.__class__.__name__} have different " "batch shapes") try: broadcast_shape = _mul_broadcast_shape(*batch_shapes) warnings.warn(msg + ". Broadcasting batch shapes.") return broadcast_shape except RuntimeError: raise NotImplementedError(msg + " that are not broadcastble.") return next(iter(batch_shapes))
def forward(self, inputs): if inputs.shape[:-2] == self.batch_shape: return self.constant.expand(inputs.shape[:-1]) else: return self.constant.expand( _mul_broadcast_shape(inputs.shape[:-1], self.constant.shape))
def forward(self, x): """Forward propagate the module. This method determines how to marginalize out the inducing function values. Specifically, forward defines how to transform a variational distribution over the inducing point values, q(u), in to a variational distribution over the function values at specified locations x, q(f|x), by integrating p(f|x, u)q(u)du Parameters ---------- x (torch.tensor): Locations x to get the variational posterior of the function values at. Returns ------- The distribution q(f|x) """ variational_dist = self.variational_distribution.approx_variational_distribution inducing_points = self.inducing_points inducing_batch_shape = inducing_points.shape[:-2] if inducing_batch_shape < x.shape[:-2] or len( inducing_batch_shape) < len(x.shape[:-2]): batch_shape = _mul_broadcast_shape(inducing_points.shape[:-2], x.shape[:-2]) inducing_points = inducing_points.expand( *batch_shape, *inducing_points.shape[-2:]) x = x.expand(*batch_shape, *x.shape[-2:]) variational_dist = variational_dist.expand(batch_shape) # If our points equal the inducing points, we're done if torch.equal(x, inducing_points): return variational_dist # Otherwise, we have to marginalize else: num_induc = inducing_points.size(-2) full_inputs = torch.cat([inducing_points, x], dim=-2) full_output = self.model.forward(full_inputs) full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix # Mean terms test_mean = full_mean[..., num_induc:] induc_mean = full_mean[..., :num_induc] mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1) # Covariance terms induc_induc_covar = full_covar[ ..., :num_induc, :num_induc].add_jitter() induc_data_covar = full_covar[..., :num_induc, num_induc:].evaluate() data_data_covar = full_covar[..., num_induc:, num_induc:] aux = variational_dist.lazy_covariance_matrix.root_decomposition() root_variational_covar = aux.root.evaluate() # If we had to expand the inducing points, # shrink the inducing mean and induc_induc_covar dimension # This makes everything more computationally efficient if len(inducing_batch_shape) < len(induc_induc_covar.batch_shape): index = tuple(0 for _ in range( len(induc_induc_covar.batch_shape) - len(inducing_batch_shape))) repeat_size = torch.Size( (tuple(induc_induc_covar.batch_shape[:len(index)]) + tuple( 1 for _ in induc_induc_covar.batch_shape[len(index):]))) induc_induc_covar = BatchRepeatLazyTensor( induc_induc_covar.__getitem__(index), repeat_size) # If we're less than a certain size, we'll compute the Cholesky # decomposition of induc_induc_covar cholesky = False if settings.fast_computations.log_prob.off() or ( num_induc <= settings.max_cholesky_size.value()): induc_induc_covar = CholLazyTensor( induc_induc_covar.cholesky()) cholesky = True # If we are making predictions and don't need variances, we can do things # very quickly. if not self.training and settings.skip_posterior_variances.on(): if not hasattr(self, "_mean_cache"): self._mean_cache = induc_induc_covar.inv_matmul( mean_diff).detach() predictive_mean = torch.add( test_mean, induc_data_covar.transpose(-2, -1).matmul( self._mean_cache).squeeze(-1)) predictive_covar = ZeroLazyTensor(test_mean.size(-1), test_mean.size(-1)) return MultivariateNormal(predictive_mean, predictive_covar) # Cache the CG results # For now: run variational inference without a preconditioner # The preconditioner screws things up for some reason with settings.max_preconditioner_size(0): # Cache the CG results left_tensors = torch.cat([mean_diff, root_variational_covar], -1) with torch.no_grad(): eager_rhs = torch.cat([left_tensors, induc_data_covar], -1) solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = \ CachedCGLazyTensor.precompute_terms( induc_induc_covar, eager_rhs.detach(), logdet_terms=(not cholesky), include_tmats=(not settings.skip_logdet_forward.on() and not cholesky) ) eager_rhss = [ eager_rhs.detach(), eager_rhs[..., left_tensors.size(-1):].detach(), eager_rhs[..., :left_tensors.size(-1)].detach() ] solves = [ solve.detach(), solve[..., left_tensors.size(-1):].detach(), solve[..., :left_tensors.size(-1)].detach() ] if settings.skip_logdet_forward.on(): eager_rhss.append( torch.cat([probe_vecs, left_tensors], -1)) solves.append( torch.cat([ probe_vec_solves, solve[..., :left_tensors.size(-1)] ], -1)) induc_induc_covar = CachedCGLazyTensor( induc_induc_covar, eager_rhss=eager_rhss, solves=solves, probe_vectors=probe_vecs, probe_vector_norms=probe_vec_norms, probe_vector_solves=probe_vec_solves, probe_vector_tmats=tmats, ) if self.training: self._memoize_cache[ "prior_distribution_memo"] = MultivariateNormal( induc_mean, induc_induc_covar) # Compute predictive mean/covariance inv_products = induc_induc_covar.inv_matmul( induc_data_covar, left_tensors.transpose(-1, -2)) predictive_mean = torch.add(test_mean, inv_products[..., 0, :]) predictive_covar = RootLazyTensor(inv_products[..., 1:, :].transpose( -1, -2)) if self.training: interp_data_data_var, _ = induc_induc_covar.inv_quad_logdet( induc_data_covar, logdet=False, reduce_inv_quad=False) data_covariance = DiagLazyTensor( (data_data_covar.diag() - interp_data_data_var).clamp( 0, math.inf)) else: neg_induc_data_data_covar = torch.matmul( induc_data_covar.transpose(-1, -2).mul(-1), induc_induc_covar.inv_matmul(induc_data_covar)) data_covariance = data_data_covar + neg_induc_data_data_covar predictive_covar = PsdSumLazyTensor(predictive_covar, data_covariance) return MultivariateNormal(predictive_mean, predictive_covar)
def __call__(self, *args, **kwargs): train_inputs = list(self.train_inputs) if self.train_inputs is not None else [] inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in args] # Training mode: optimizing if self.training: if self.train_inputs is None: raise RuntimeError( "train_inputs, train_targets cannot be None in training mode. " "Call .eval() for prior predictions, or call .set_train_data() to add training data." ) if settings.debug.on(): if not all( torch.equal(train_input, input) for train_input, input in zip(train_inputs, inputs) ): raise RuntimeError("You must train on the training inputs!") res = super().__call__(*inputs, **kwargs) return res # Prior mode elif settings.prior_mode.on() or self.train_inputs is None or self.train_targets is None: full_inputs = args full_output = super(ExactTP, self).__call__(*full_inputs, **kwargs) if settings.debug().on(): if not isinstance(full_output, MultivariateStudentT): raise RuntimeError("ExactTP.forward must return a MultivariateStudentT") return full_output # Posterior mode else: if settings.debug.on(): if all( torch.equal(train_input, input) for train_input, input in zip(train_inputs, inputs) ): warnings.warn( "The input matches the stored training data. Did you forget to call model.train()?", GPInputWarning, ) # Get the terms that only depend on training data if self.prediction_strategy is None: train_output = super().__call__(*train_inputs, **kwargs) # Create the prediction strategy for self.prediction_strategy = prediction_strategy( train_inputs=train_inputs, train_prior_dist=train_output, train_labels=self.train_targets, likelihood=self.likelihood, ) # Concatenate the input to the training input full_inputs = [] batch_shape = train_inputs[0].shape[:-2] for train_input, input in zip(train_inputs, inputs): # Make sure the batch shapes agree for training/test data if batch_shape != train_input.shape[:-2]: batch_shape = _mul_broadcast_shape(batch_shape, train_input.shape[:-2]) train_input = train_input.expand(*batch_shape, *train_input.shape[-2:]) if batch_shape != input.shape[:-2]: batch_shape = _mul_broadcast_shape(batch_shape, input.shape[:-2]) train_input = train_input.expand(*batch_shape, *train_input.shape[-2:]) input = input.expand(*batch_shape, *input.shape[-2:]) full_inputs.append(torch.cat([train_input, input], dim=-2)) # Get the joint distribution for training/test data full_output = super(ExactTP, self).__call__(*full_inputs, **kwargs) if settings.debug().on(): if not isinstance(full_output, MultivariateStudentT): raise RuntimeError("ExactTP.forward must return a MultivariateStudentT") full_mean, full_covar = full_output.loc, full_output.lazy_covariance_matrix # Determine the shape of the joint distribution batch_shape = full_output.batch_shape joint_shape = full_output.event_shape tasks_shape = joint_shape[1:] # For multitask learning test_shape = torch.Size( [joint_shape[0] - self.prediction_strategy.train_shape[0], *tasks_shape] ) # Make the prediction with settings._use_eval_tolerance(): predictive_mean, predictive_covar = self.prediction_strategy.exact_prediction( full_mean, full_covar ) # Reshape predictive mean to match the appropriate event shape predictive_mean = predictive_mean.view(*batch_shape, *test_shape).contiguous() return full_output.__class__( predictive_mean, predictive_covar, full_output.nu, full_output.data_num )
def get_fantasy_model(self, inputs, targets, **kwargs): """ Returns a new TP model that incorporates the specified inputs and targets as new training data. Using this method is more efficient than updating with `set_train_data` when the number of inputs is relatively small, because any computed test-time caches will be updated in linear time rather than computed from scratch. .. note:: If `targets` is a batch (e.g. `b x m`), then the TP returned from this method will be a batch mode TP. If `inputs` is of the same (or lesser) dimension as `targets`, then it is assumed that the fantasy points are the same for each target batch. :param torch.Tensor inputs: (`b1 x ... x bk x m x d` or `f x b1 x ... x bk x m x d`) Locations of fantasy observations. :param torch.Tensor targets: (`b1 x ... x bk x m` or `f x b1 x ... x bk x m`) Labels of fantasy observations. :return: An `ExactTP` model with `n + m` training examples, where the `m` fantasy examples have been added and all test-time caches have been updated. :rtype: ~gpytorch.models.ExactTP """ if self.prediction_strategy is None: raise RuntimeError( "Fantasy observations can only be added after making predictions with a model so that " "all test independent caches exist. Call the model on some data first!" ) model_batch_shape = self.train_inputs[0].shape[:-2] if self.train_targets.dim() > len(model_batch_shape) + 1: raise RuntimeError( "Cannot yet add fantasy observations to multitask GPs, but this is coming soon!" ) if not isinstance(inputs, list): inputs = [inputs] inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in inputs] target_batch_shape = targets.shape[:-1] input_batch_shape = inputs[0].shape[:-2] tbdim, ibdim = len(target_batch_shape), len(input_batch_shape) if not (tbdim == ibdim + 1 or tbdim == ibdim): raise RuntimeError( f"Unsupported batch shapes: The target batch shape ({target_batch_shape}) must have either the " f"same dimension as or one more dimension than the input batch shape ({input_batch_shape})" ) # Check whether we can properly broadcast batch dimensions err_msg = ( f"Model batch shape ({model_batch_shape}) and target batch shape " f"({target_batch_shape}) are not broadcastable." ) _mul_broadcast_shape(model_batch_shape, target_batch_shape, error_msg=err_msg) if len(model_batch_shape) > len(input_batch_shape): input_batch_shape = model_batch_shape if len(model_batch_shape) > len(target_batch_shape): target_batch_shape = model_batch_shape # If input has no fantasy batch dimension but target does, we can save memory and computation by not # computing the covariance for each element of the batch. Therefore we don't expand the inputs to the # size of the fantasy model here - this is done below, after the evaluation and fast fantasy update train_inputs = [ tin.expand(input_batch_shape + tin.shape[-2:]) for tin in self.train_inputs ] train_targets = self.train_targets.expand( target_batch_shape + self.train_targets.shape[-1:] ) full_inputs = [ torch.cat([train_input, input.expand(input_batch_shape + input.shape[-2:])], dim=-2) for train_input, input in zip(train_inputs, inputs) ] full_targets = torch.cat( [train_targets, targets.expand(target_batch_shape + targets.shape[-1:])], dim=-1 ) try: fantasy_kwargs = {"noise": kwargs.pop("noise")} except KeyError: fantasy_kwargs = {} full_output = super(ExactTP, self).__call__(*full_inputs, **kwargs) # Copy model without copying training data or prediction strategy (since we'll overwrite those) old_pred_strat = self.prediction_strategy old_train_inputs = self.train_inputs old_train_targets = self.train_targets old_likelihood = self.likelihood self.prediction_strategy = None self.train_inputs = None self.train_targets = None self.likelihood = None new_model = deepcopy(self) self.prediction_strategy = old_pred_strat self.train_inputs = old_train_inputs self.train_targets = old_train_targets self.likelihood = old_likelihood new_model.likelihood = old_likelihood.get_fantasy_likelihood(**fantasy_kwargs) new_model.prediction_strategy = old_pred_strat.get_fantasy_strategy( inputs, targets, full_inputs, full_targets, full_output, **fantasy_kwargs ) # if the fantasies are at the same points, we need to expand the inputs for the new model if tbdim == ibdim + 1: new_model.train_inputs = [ fi.expand(target_batch_shape + fi.shape[-2:]) for fi in full_inputs ] else: new_model.train_inputs = full_inputs new_model.train_targets = full_targets return new_model