def test_matmul_vec(self): avar = Variable(a, requires_grad=True) bvar = Variable(b, requires_grad=True) cvar = Variable(c, requires_grad=True) vec = Variable(torch.randn(24), requires_grad=True) kp_lazy_var = KroneckerProductLazyVariable( NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar), ) res = kp_lazy_var.matmul(vec) avar_copy = Variable(a, requires_grad=True) bvar_copy = Variable(b, requires_grad=True) cvar_copy = Variable(c, requires_grad=True) vec_copy = Variable(vec.data.clone(), requires_grad=True) actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(vec_copy) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(avar_copy.grad.data, avar.grad.data)) self.assertTrue(approx_equal(bvar_copy.grad.data, bvar.grad.data)) self.assertTrue(approx_equal(cvar_copy.grad.data, cvar.grad.data)) self.assertTrue(approx_equal(vec_copy.grad.data, vec.grad.data))
def test_matmul_batch_mat(self): avar = Variable(a.repeat(3, 1, 1), requires_grad=True) bvar = Variable(b.repeat(3, 1, 1), requires_grad=True) cvar = Variable(c.repeat(3, 1, 1), requires_grad=True) mat = Variable(torch.randn(3, 24, 5), requires_grad=True) kp_lazy_var = KroneckerProductLazyVariable( NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar), ) res = kp_lazy_var.matmul(mat) avar_copy = Variable(a.repeat(3, 1, 1), requires_grad=True) bvar_copy = Variable(b.repeat(3, 1, 1), requires_grad=True) cvar_copy = Variable(c.repeat(3, 1, 1), requires_grad=True) mat_copy = Variable(mat.data.clone(), requires_grad=True) actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(mat_copy) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(avar_copy.grad.data, avar.grad.data)) self.assertTrue(approx_equal(bvar_copy.grad.data, bvar.grad.data)) self.assertTrue(approx_equal(cvar_copy.grad.data, cvar.grad.data)) self.assertTrue(approx_equal(mat_copy.grad.data, mat.grad.data))
def test_matmul_mat_random_rectangular(self): a = torch.randn(4, 2, 3) b = torch.randn(4, 5, 2) c = torch.randn(4, 6, 4) rhs = torch.randn(4, 3 * 2 * 4, 2) a_copy = torch.tensor(a) b_copy = b.clone() c_copy = c.clone() rhs_copy = rhs.clone() a.requires_grad = True b.requires_grad = True c.requires_grad = True a_copy.requires_grad = True b_copy.requires_grad = True c_copy.requires_grad = True rhs.requires_grad = True rhs_copy.requires_grad = True actual = kron(kron(a_copy, b_copy), c_copy).matmul(rhs_copy) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(a), NonLazyVariable(b), NonLazyVariable(c)) res = kp_lazy_var.matmul(rhs) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(a_copy.grad.data, a.grad.data)) self.assertTrue(approx_equal(b_copy.grad.data, b.grad.data)) self.assertTrue(approx_equal(c_copy.grad.data, c.grad.data)) self.assertTrue(approx_equal(rhs_copy.grad.data, rhs.grad.data))
def test_get_item_square_on_variable(): kronecker_product_var = KroneckerProductLazyVariable( Variable(torch.Tensor([[1, 2, 3, 4], [5, 6, 7, 8]])), added_diag=Variable(torch.ones(16) * 3)) evaluated = kronecker_product_var.evaluate().data assert utils.approx_equal(kronecker_product_var[2:4, 2:4].evaluate().data, evaluated[2:4, 2:4])
def test_get_item_on_interpolated_variable_no_diagonal(): no_diag_kronecker_product = KroneckerProductLazyVariable( lazy_kronecker_product_var.columns, lazy_kronecker_product_var.J_lefts, lazy_kronecker_product_var.C_lefts, lazy_kronecker_product_var.J_rights, lazy_kronecker_product_var.C_rights) evaluated = no_diag_kronecker_product.evaluate().data assert utils.approx_equal(no_diag_kronecker_product[4:6].evaluate().data, evaluated[4:6]) assert utils.approx_equal( no_diag_kronecker_product[4:6, 2:6].evaluate().data, evaluated[4:6, 2:6])
def forward(self, input): """ Adds the log task noises to the diagonal of the covariance matrix of the supplied :obj:`gpytorch.random_variables.GaussianRandomVariable` or :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`. To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyVariable` between :math:`I_{n}`, an identity matrix with size equal to the data and a diagonal matrix containing the task noises :math:`D_{t}`. We also incorporate a shared `log_noise` parameter from the base :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend. The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`. Args: input (:obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`): Random variable whose covariance matrix is a :obj:`gpytorch.lazy.LazyVariable` we intend to augment. Returns: :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`: A new random variable whose covariance matrix is a :obj:`gpytorch.lazy.LazyVariable` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}` added. """ mean, covar = input.representation() eye_lv = DiagLazyVariable( torch.ones(covar.size(-1) // self.n_tasks, device=self.log_noise.device)) task_var_lv = DiagLazyVariable(self.log_task_noises.exp()) diag_kron_lv = KroneckerProductLazyVariable(task_var_lv, eye_lv) noise = covar + diag_kron_lv noise = add_diag(noise, self.log_noise.exp()) return input.__class__(mean, noise)
def forward(self, x1, x2, **kwargs): if not torch.equal(x1.data, self._inducing_points) or \ not torch.equal(x1.data, self._inducing_points): raise RuntimeError( 'The kernel should only receive the inducing points as input') if not self.training and hasattr(self, '_cached_kernel_mat'): return self._cached_kernel_mat else: d = x1.size(1) grid_var = Variable(self.grid) if d > 1: k_UUs = Variable(x1.data.new(d, self.grid_size).zero_()) for i in range(d): k_UUs[i] = self.base_kernel_module(grid_var[i, 0], grid_var[i], **kwargs).squeeze() K_XX = KroneckerProductLazyVariable(k_UUs) else: if gpytorch.functions.use_toeplitz: k_UU = self.base_kernel_module(grid_var[0, 0], grid_var[0], **kwargs).squeeze() K_XX = ToeplitzLazyVariable(k_UU) else: for i in range(100): k_UU = self.base_kernel_module(grid_var[0], grid_var[0], **kwargs).squeeze() K_XX = NonLazyVariable(k_UU) if not self.training: self._cached_kernel_mat = K_XX return K_XX
def forward(self, x1, x2, **kwargs): n, d = x1.size() grid_size = self.grid_size if self.conditioning: J1, C1, J2, C2 = self._compute_grid(x1, x2) self.train_J1 = J1 self.train_C1 = C1 self.train_J2 = J2 self.train_C2 = C2 else: train_data = self.train_inputs[0].data if hasattr( self, 'train_inputs') else None if train_data is not None and torch.equal( x1.data, train_data) and torch.equal(x2.data, train_data): J1 = self.train_J1 C1 = self.train_C1 J2 = self.train_J2 C2 = self.train_C2 else: J1, C1, J2, C2 = self._compute_grid(x1, x2) grid_var = Variable(self.grid) if d > 1: k_UUs = Variable(x1.data.new(d, grid_size).zero_()) for i in range(d): k_UUs[i] = self.base_kernel_module(grid_var[i, 0], grid_var[i], **kwargs).squeeze() K_XX = KroneckerProductLazyVariable(k_UUs, J1, C1, J2, C2) else: k_UU = self.base_kernel_module(grid_var[0, 0], grid_var[0], **kwargs).squeeze() K_XX = ToeplitzLazyVariable(k_UU, J1, C1, J2, C2) return K_XX
def make_mul_lazy_var(): diag = Variable(torch.Tensor([1]), requires_grad=True) c1 = Variable(torch.Tensor([5, 1, 2, 0]), requires_grad=True) t1 = ToeplitzLazyVariable(c1) c2 = Variable(torch.Tensor([[6, 0], [1, -1]]), requires_grad=True) t2 = KroneckerProductLazyVariable(c2) c3 = Variable(torch.Tensor([7, 2, 1, 0]), requires_grad=True) t3 = ToeplitzLazyVariable(c3) return (t1 * t2 * t3).add_diag(diag), diag
def test_diag(self): avar = Variable(a) bvar = Variable(b) cvar = Variable(c) kp_lazy_var = KroneckerProductLazyVariable( NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar), ) res = kp_lazy_var.diag() actual = kron(kron(avar, bvar), cvar).diag() self.assertTrue(approx_equal(res.data, actual.data)) avar = Variable(a.repeat(3, 1, 1)) bvar = Variable(b.repeat(3, 1, 1)) cvar = Variable(c.repeat(3, 1, 1)) kp_lazy_var = KroneckerProductLazyVariable( NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar), ) res = kp_lazy_var.diag() actual_mat = kron(kron(avar, bvar), cvar) actual = torch.stack( [actual_mat[0].diag(), actual_mat[1].diag(), actual_mat[2].diag()]) self.assertTrue(approx_equal(res.data, actual.data))
def test_matmul_mat(): avar = Variable(a, requires_grad=True) bvar = Variable(b, requires_grad=True) cvar = Variable(c, requires_grad=True) mat = Variable(torch.randn(24, 5), requires_grad=True) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar)) res = kp_lazy_var.matmul(mat) avar_copy = Variable(a, requires_grad=True) bvar_copy = Variable(b, requires_grad=True) cvar_copy = Variable(c, requires_grad=True) mat_copy = Variable(mat.data.clone(), requires_grad=True) actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(mat_copy) assert approx_equal(res.data, actual.data) actual.sum().backward() res.sum().backward() assert approx_equal(avar_copy.grad.data, avar.grad.data) assert approx_equal(bvar_copy.grad.data, bvar.grad.data) assert approx_equal(cvar_copy.grad.data, cvar.grad.data) assert approx_equal(mat_copy.grad.data, mat.grad.data)
def test_matmul_vec_random_rectangular(self): ax = torch.randn(4, 2, 3) bx = torch.randn(4, 5, 2) cx = torch.randn(4, 6, 4) rhsx = torch.randn(4, 3 * 2 * 4, 1) rhsx = rhsx / torch.norm(rhsx) ax_copy = Variable(ax, requires_grad=True) bx_copy = bx.clone() cx_copy = cx.clone() rhsx_copy = rhsx.clone() ax.requires_grad = True bx.requires_grad = True cx.requires_grad = True ax_copy.requires_grad = True bx_copy.requires_grad = True cx_copy.requires_grad = True rhsx.requires_grad = True rhsx_copy.requires_grad = True kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(ax), NonLazyVariable(bx), NonLazyVariable(cx)) res = kp_lazy_var.matmul(rhsx) actual_mat = kron(kron(ax_copy, bx_copy), cx_copy) actual = actual_mat.matmul(rhsx_copy) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(ax_copy.grad.data, ax.grad.data)) self.assertTrue(approx_equal(bx_copy.grad.data, bx.grad.data)) self.assertTrue(approx_equal(cx_copy.grad.data, cx.grad.data)) self.assertTrue(approx_equal(rhsx_copy.grad.data, rhsx.grad.data))
def test_exact_posterior(): train_mean = Variable(torch.randn(4)) train_y = Variable(torch.randn(4)) test_mean = Variable(torch.randn(4)) # Test case c1_var = Variable(torch.Tensor([5, 1, 2, 0]), requires_grad=True) c2_var = Variable(torch.Tensor([[6, 0], [1, -1]]), requires_grad=True) c3_var = Variable(torch.Tensor([7, 2, 1, 0]), requires_grad=True) indices_1 = torch.arange(0, 4).long().view(4, 1) values_1 = torch.ones(4).view(4, 1) indices_2 = torch.arange(0, 2).expand(4, 2).long().view(2, 4, 1) values_2 = torch.ones(8).view(2, 4, 1) indices_3 = torch.arange(0, 4).long().view(4, 1) values_3 = torch.ones(4).view(4, 1) toeplitz_1 = InterpolatedLazyVariable(ToeplitzLazyVariable(c1_var), Variable(indices_1), Variable(values_1), Variable(indices_1), Variable(values_1)) kronecker_product = KroneckerProductLazyVariable(c2_var, indices_2, values_2, indices_2, values_2) toeplitz_2 = InterpolatedLazyVariable(ToeplitzLazyVariable(c3_var), Variable(indices_3), Variable(values_3), Variable(indices_3), Variable(values_3)) mul_lv = toeplitz_1 * kronecker_product * toeplitz_2 # Actual case actual = mul_lv.evaluate() # Test forward actual_alpha = gpytorch.posterior_strategy(actual).exact_posterior_alpha( train_mean, train_y) actual_mean = gpytorch.posterior_strategy(actual).exact_posterior_mean( test_mean, actual_alpha) mul_lv_alpha = mul_lv.posterior_strategy().exact_posterior_alpha( train_mean, train_y) mul_lv_mean = mul_lv.posterior_strategy().exact_posterior_mean( test_mean, mul_lv_alpha) assert (torch.norm(actual_mean.data - mul_lv_mean.data) < 1e-3)
def test_evaluate(self): avar = Variable(a) bvar = Variable(b) cvar = Variable(c) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar)) res = kp_lazy_var.evaluate() actual = kron(kron(avar, bvar), cvar) self.assertTrue(approx_equal(res.data, actual.data)) avar = Variable(a.repeat(3, 1, 1)) bvar = Variable(b.repeat(3, 1, 1)) cvar = Variable(c.repeat(3, 1, 1)) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar)) res = kp_lazy_var.evaluate() actual = kron(kron(avar, bvar), cvar) self.assertTrue(approx_equal(res.data, actual.data))
def test_trace_log_det_quad_form(): mu_diffs_var = Variable(torch.arange(1, 5, 1)) chol_covar_1_var = Variable(torch.eye(4)) # Test case c1_var = Variable(torch.Tensor([5, 1, 2, 0]), requires_grad=True) c2_var = Variable(torch.Tensor([[6, 0], [1, -1]]), requires_grad=True) c3_var = Variable(torch.Tensor([7, 2, 1, 0]), requires_grad=True) diag_var = Variable(torch.Tensor([1]), requires_grad=True) diag_var_expand = diag_var.expand(4) toeplitz_1 = ToeplitzLazyVariable(c1_var).evaluate() kronecker_product = KroneckerProductLazyVariable(c2_var).evaluate() toeplitz_2 = ToeplitzLazyVariable(c3_var).evaluate() actual = toeplitz_1 * kronecker_product * toeplitz_2 + diag_var_expand.diag( ) # Actual case mul_lv, diag = make_mul_lazy_var() t1, t2, t3 = mul_lv.lazy_vars # Test forward tldqf_res = mul_lv.trace_log_det_quad_form(mu_diffs_var, chol_covar_1_var) tldqf_actual = gpytorch._trace_logdet_quad_form_factory_class()( mu_diffs_var, chol_covar_1_var, actual) assert (math.fabs(tldqf_res.data.squeeze()[0] - tldqf_actual.data.squeeze()[0]) < 1.5) # Test backwards tldqf_res.backward() tldqf_actual.backward() assert ((c1_var.grad.data - t1.column.grad.data).abs().norm() / c1_var.grad.data.abs().norm() < 1e-1) assert ((c2_var.grad.data - t2.columns.grad.data).abs().norm() / c2_var.grad.data.abs().norm() < 1e-1) assert ((c3_var.grad.data - t3.column.grad.data).abs().norm() / c3_var.grad.data.abs().norm() < 1e-1) assert ((diag_var.grad.data - diag.grad.data).abs().norm() / diag_var.grad.data.abs().norm() < 1e-1)
def test_exact_gp_mll(): labels_var = Variable(torch.arange(1, 5, 1)) # Test case c1_var = Variable(torch.Tensor([5, 1, 2, 0]), requires_grad=True) c2_var = Variable(torch.Tensor([[6, 0], [1, -1]]), requires_grad=True) c3_var = Variable(torch.Tensor([7, 2, 1, 0]), requires_grad=True) diag_var = Variable(torch.Tensor([1]), requires_grad=True) diag_var_expand = diag_var.expand(4) toeplitz_1 = ToeplitzLazyVariable(c1_var).evaluate() kronecker_product = KroneckerProductLazyVariable(c2_var).evaluate() toeplitz_2 = ToeplitzLazyVariable(c3_var).evaluate() actual = toeplitz_1 * kronecker_product * toeplitz_2 + diag_var_expand.diag( ) # Actual case mul_lv, diag = make_mul_lazy_var() t1, t2, t3 = mul_lv.lazy_vars # Test forward mll_res = mul_lv.exact_gp_marginal_log_likelihood(labels_var) mll_actual = gpytorch.exact_gp_marginal_log_likelihood(actual, labels_var) assert (math.fabs(mll_res.data.squeeze()[0] - mll_actual.data.squeeze()[0]) < 1) # Test backwards mll_res.backward() mll_actual.backward() assert ((c1_var.grad.data - t1.column.grad.data).abs().norm() / c1_var.grad.data.abs().norm() < 1e-1) assert ((c2_var.grad.data - t2.columns.grad.data).abs().norm() / c2_var.grad.data.abs().norm() < 1e-1) assert ((c3_var.grad.data - t3.column.grad.data).abs().norm() / c3_var.grad.data.abs().norm() < 1e-1) assert ((diag_var.grad.data - diag.grad.data).abs().norm() / diag_var.grad.data.abs().norm() < 1e-1)