def test_batch_sample(self): block_tensor = self.blocks.clone().requires_grad_(True) res = BlockDiagLazyTensor(NonLazyTensor(block_tensor), num_blocks=4) actual = res.evaluate() with gpytorch.settings.max_root_decomposition_size(1000): samples = res.zero_mean_mvn_samples(10000) sample_covar = samples.unsqueeze(-1).matmul( samples.unsqueeze(-2)).mean(0) self.assertLess(((sample_covar - actual).abs() / actual.abs().clamp(1, 1e5)).max().item(), 2e-1)
def mask_dependent_covar(self, M1s, U1, M2s, U2, covar_xx): # Assume M1s, M2s sorted descending B = M1s.shape[:-1] M1s = M1s[..., 0] idxs1 = torch.nonzero(M1s - torch.ones_like(M1s)) idxend1 = torch.min(idxs1).item() if idxs1.numel() else M1s.size(-1) # assume sorted assert (M1s[..., idxend1:] == 0).all() U1s = U1[..., :idxend1, :] M2s = M2s[..., 0] idxs2 = torch.nonzero(M2s - torch.ones_like(M2s)) idxend2 = torch.min(idxs2).item() if idxs2.numel() else M2s.size(-1) # assume sorted assert (M2s[..., idxend2:] == 0).all() U2s = U2[..., :idxend2, :] V = ensurelazy(self.task_covar_module.V.covar_matrix) U = ensurelazy(self.task_covar_module.U.covar_matrix) Kxx = ensurelazy(covar_xx) k_xx_22 = Kxx[idxend1:, idxend2:] if k_xx_22.numel(): Kij_xx_22 = self.kernel2(k_xx_22, V, U) k_xx_11 = Kxx[:idxend1, :idxend2] if k_xx_11.numel(): H1 = BlockDiagLazyTensor(NonLazyTensor(U1s.unsqueeze(1))) H2 = BlockDiagLazyTensor(NonLazyTensor(U2s.unsqueeze(1))) Kij_xx_11 = self.kernel1(k_xx_11, H1, H2, V, U) if k_xx_11.numel() and k_xx_22.numel(): k_xx_12 = Kxx[:idxend1, idxend2:] assert k_xx_12.numel() Kij_xx_12 = self.correlation_kernel_12(k_xx_12, H1, V, U) k_xx_21 = Kxx[idxend1:, :idxend2] assert k_xx_21.numel() Kij_xx_21 = self.correlation_kernel_12(k_xx_21.t(), H2, V, U).t() Kij_xx = lazycat([ lazycat([Kij_xx_11, Kij_xx_12], dim=1), lazycat([Kij_xx_21, Kij_xx_22], dim=1) ], dim=0) #Kij_xx.evaluate() return Kij_xx elif k_xx_22.numel(): return Kij_xx_22 else: assert k_xx_11.numel() return Kij_xx_11
def forward(self, x1, x2, diag=False, batch_dims=None, **params): if batch_dims == (0, 2): raise RuntimeError( "MultitaskRBFKernel does not accept the batch_dims argument.") covar_x1 = self.within_covar_module(x1, x2, **params) covar_x2 = self.within_covar_module(x1, x2, **params) for_diag = torch.stack((covar_x1.evaluate_kernel()[0].evaluate(), covar_x2.evaluate_kernel()[0].evaluate())) res = BlockDiagLazyTensor(NonLazyTensor(for_diag)) if diag: return res.diag() else: return res
def __call__(self, inputs, are_samples=False, **kwargs): """ Forward data through this hidden GP layer. The output is a MultitaskMultivariateNormal distribution (or MultivariateNormal distribution is output_dims=None). If the input is >=2 dimensional Tensor (e.g. `n x d`), we pass the input through each hidden GP, resulting in a `n x h` multitask Gaussian distribution (where all of the `h` tasks represent an output dimension and are independent from one another). We then draw `s` samples from these Gaussians, resulting in a `s x n x h` MultitaskMultivariateNormal distribution. If the input is a >=3 dimensional Tensor, and the `are_samples=True` kwarg is set, then we assume that the outermost batch dimension is a samples dimension. The output will have the same number of samples. For example, a `s x b x n x d` input will result in a `s x b x n x h` MultitaskMultivariateNormal distribution. The goal of these last two points is that if you have a tensor `x` that is `n x d`, then: >>> hidden_gp2(hidden_gp(x)) will just work, and return a tensor of size `s x n x h2`, where `h2` is the output dimensionality of hidden_gp2. In this way, hidden GP layers are easily composable. """ deterministic_inputs = not are_samples if isinstance(inputs, MultitaskMultivariateNormal): inputs = torch.distributions.Normal( loc=inputs.mean, scale=inputs.variance.sqrt()).rsample() deterministic_inputs = False if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs) if self.output_dims is not None: mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) # Maybe expand inputs? if deterministic_inputs: output = output.expand( torch.Size([settings.num_likelihood_samples.value()]) + output.batch_shape) return output
def __call__(self, inputs, are_samples=False, expand_for_quadgrid=True, **kwargs): if isinstance(inputs, MultitaskMultivariateNormal): # inputs is definitely in the second layer, and mean is n x t mus, sigmas = inputs.mean, inputs.variance.sqrt() if expand_for_quadgrid: xi_mus = mus.unsqueeze(0) # 1 x n x t xi_sigmas = sigmas.unsqueeze(0) # 1 x n x t else: xi_mus = mus xi_sigmas = sigmas # unsqueeze sigmas to 1 x n x t, locations from [q] to Q^T x 1 x T. # Broadcasted result will be Q^T x N x T qg = self.quad_sites.view([self.num_quad_sites] + [1] * (xi_mus.dim() - 2) + [self.input_dims]) xi_sigmas = xi_sigmas * qg inputs = xi_mus + xi_sigmas # q^t x n x t if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs, **kwargs) if self.num_quad_sites > 0: if self.output_dims is not None and not isinstance( output, MultitaskMultivariateNormal): mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) else: output = output.loc.transpose( -1, -2) # this layer provides noiseless kernel interpolation return output
def test_diag(self): block_tensor = self.blocks.clone().requires_grad_(True) actual_block_diag = torch.zeros(32, 32) for i in range(8): actual_block_diag[i * 4:(i + 1) * 4, i * 4:(i + 1) * 4] = block_tensor[i] res = BlockDiagLazyTensor(NonLazyTensor(block_tensor)).diag() actual = actual_block_diag.diag() self.assertTrue(approx_equal(actual, res))
def __call__(self, inputs, **kwargs): if isinstance(inputs, MultitaskMultivariateNormal): # This is for subsequent layers. We apply quadrature here # Mean, stdv are q x ... x n x t mus, sigmas = inputs.mean, inputs.variance.sqrt() qg = self.quad_sites.view([self.num_quad_sites] + [1] * (mus.dim() - 2) + [self.input_dims]) sigmas = sigmas * qg inputs = mus + sigmas # q^t x n x t deterministic_inputs = False else: deterministic_inputs = True if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs, **kwargs) # If this is the first layer (deterministic inputs), expand the output # This allows quadrature to be applied to future layers if deterministic_inputs: output = output.expand( torch.Size([self.num_quad_sites]) + output.batch_shape) if self.num_quad_sites > 0: if self.output_dims is not None and not isinstance( output, MultitaskMultivariateNormal): mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) else: output = output.loc.transpose( -1, -2) # this layer provides noiseless kernel interpolation return output
def test_getitem_batch(self): block_tensor = self.blocks.clone().requires_grad_(True) actual_block_diag = torch.zeros(2, 16, 16) for i in range(2): for j in range(4): actual_block_diag[i, j * 4:(j + 1) * 4, j * 4:(j + 1) * 4] = block_tensor[i * 4 + j] res = BlockDiagLazyTensor(NonLazyTensor(block_tensor), num_blocks=4)[0].evaluate() actual = actual_block_diag[0] self.assertTrue(approx_equal(actual, res)) res = BlockDiagLazyTensor(NonLazyTensor(block_tensor), num_blocks=4)[0, :5].evaluate() actual = actual_block_diag[0, :5] self.assertTrue(approx_equal(actual, res)) res = BlockDiagLazyTensor(NonLazyTensor(block_tensor), num_blocks=4)[1:, :5, 2] actual = actual_block_diag[1:, :5, 2] self.assertTrue(approx_equal(actual, res))
def test_matmul(self): rhs_tensor = torch.randn(4 * 8, 4, requires_grad=True) rhs_tensor_copy = rhs_tensor.clone().detach().requires_grad_(True) block_tensor = self.blocks.clone().requires_grad_(True) block_tensor_copy = self.blocks.clone().requires_grad_(True) actual_block_diag = torch.zeros(32, 32) for i in range(8): actual_block_diag[i * 4:(i + 1) * 4, i * 4:(i + 1) * 4] = block_tensor_copy[i] res = BlockDiagLazyTensor( NonLazyTensor(block_tensor)).matmul(rhs_tensor) actual = actual_block_diag.matmul(rhs_tensor_copy) self.assertTrue(approx_equal(res, actual)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(rhs_tensor.grad, rhs_tensor_copy.grad)) self.assertTrue(approx_equal(block_tensor.grad, block_tensor_copy.grad))
def test_batch_diag(self): block_tensor = self.blocks.clone().requires_grad_(True) actual_block_diag = torch.zeros(2, 16, 16) for i in range(2): for j in range(4): actual_block_diag[i, j * 4:(j + 1) * 4, j * 4:(j + 1) * 4] = block_tensor[i * 4 + j] res = BlockDiagLazyTensor(NonLazyTensor(block_tensor), num_blocks=4).diag() actual = torch.cat([ actual_block_diag[0].diag().unsqueeze(0), actual_block_diag[1].diag().unsqueeze(0) ]) self.assertTrue(approx_equal(actual, res))
def test_batch_matmul(self): rhs_tensor = torch.randn(2, 4 * 4, 4, requires_grad=True) rhs_tensor_copy = rhs_tensor.clone().detach().requires_grad_(True) block_tensor = self.blocks.clone().requires_grad_(True) block_tensor_copy = self.blocks.clone().requires_grad_(True) actual_block_diag = torch.zeros(2, 16, 16) for i in range(2): for j in range(4): actual_block_diag[i, j * 4:(j + 1) * 4, j * 4:(j + 1) * 4] = block_tensor_copy[i * 4 + j] res = BlockDiagLazyTensor(NonLazyTensor(block_tensor), num_blocks=4).matmul(rhs_tensor) actual = actual_block_diag.matmul(rhs_tensor_copy) self.assertTrue(approx_equal(res, actual)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(rhs_tensor.grad, rhs_tensor_copy.grad)) self.assertTrue(approx_equal(block_tensor.grad, block_tensor_copy.grad))
def __call__(self, inputs, are_samples=False, **kwargs): deterministic_inputs = not are_samples if isinstance(inputs, MultitaskMultivariateNormal): inputs = torch.distributions.Normal( loc=inputs.mean, scale=inputs.variance.sqrt()).rsample() deterministic_inputs = False if settings.debug.on(): if not torch.is_tensor(inputs): raise ValueError( "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got " f"{inputs.__class__.__Name__}") if inputs.size(-1) != self.input_dims: raise RuntimeError( f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}]," f" expected [{self.input_dims}]") # Repeat the input for all possible outputs if self.output_dims is not None: inputs = inputs.unsqueeze(-3) inputs = inputs.expand(*inputs.shape[:-3], self.output_dims, *inputs.shape[-2:]) # Now run samples through the GP output = ApproximateGP.__call__(self, inputs) if self.output_dims is not None: mean = output.loc.transpose(-1, -2) covar = BlockDiagLazyTensor(output.lazy_covariance_matrix, block_dim=-3) output = MultitaskMultivariateNormal(mean, covar, interleaved=False) # Maybe expand inputs? if deterministic_inputs: output = output.expand( torch.Size([settings.num_likelihood_samples.value()]) + output.batch_shape) return output
def create_lazy_tensor(self): blocks = torch.randn(2, 6, 5, 4, 4) blocks = blocks.matmul(blocks.transpose(-1, -2)) blocks.add_(torch.eye(4, 4)) blocks.detach_() return BlockDiagLazyTensor(NonLazyTensor(blocks), block_dim=1)
def create_lazy_tensor(self): blocks = torch.randn(8, 4, 4) blocks = blocks.matmul(blocks.transpose(-1, -2)) blocks.add_(torch.eye(4, 4).unsqueeze_(0)) return BlockDiagLazyTensor(NonLazyTensor(blocks))