def test_matmul_vec(self): avar = Variable(a, requires_grad=True) bvar = Variable(b, requires_grad=True) cvar = Variable(c, requires_grad=True) vec = Variable(torch.randn(24), requires_grad=True) kp_lazy_var = KroneckerProductLazyVariable( NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar), ) res = kp_lazy_var.matmul(vec) avar_copy = Variable(a, requires_grad=True) bvar_copy = Variable(b, requires_grad=True) cvar_copy = Variable(c, requires_grad=True) vec_copy = Variable(vec.data.clone(), requires_grad=True) actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(vec_copy) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(avar_copy.grad.data, avar.grad.data)) self.assertTrue(approx_equal(bvar_copy.grad.data, bvar.grad.data)) self.assertTrue(approx_equal(cvar_copy.grad.data, cvar.grad.data)) self.assertTrue(approx_equal(vec_copy.grad.data, vec.grad.data))
def test_matmul_mat_random_rectangular(self): a = torch.randn(4, 2, 3) b = torch.randn(4, 5, 2) c = torch.randn(4, 6, 4) rhs = torch.randn(4, 3 * 2 * 4, 2) a_copy = torch.tensor(a) b_copy = b.clone() c_copy = c.clone() rhs_copy = rhs.clone() a.requires_grad = True b.requires_grad = True c.requires_grad = True a_copy.requires_grad = True b_copy.requires_grad = True c_copy.requires_grad = True rhs.requires_grad = True rhs_copy.requires_grad = True actual = kron(kron(a_copy, b_copy), c_copy).matmul(rhs_copy) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(a), NonLazyVariable(b), NonLazyVariable(c)) res = kp_lazy_var.matmul(rhs) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(a_copy.grad.data, a.grad.data)) self.assertTrue(approx_equal(b_copy.grad.data, b.grad.data)) self.assertTrue(approx_equal(c_copy.grad.data, c.grad.data)) self.assertTrue(approx_equal(rhs_copy.grad.data, rhs.grad.data))
def test_matmul_batch_mat(self): avar = Variable(a.repeat(3, 1, 1), requires_grad=True) bvar = Variable(b.repeat(3, 1, 1), requires_grad=True) cvar = Variable(c.repeat(3, 1, 1), requires_grad=True) mat = Variable(torch.randn(3, 24, 5), requires_grad=True) kp_lazy_var = KroneckerProductLazyVariable( NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar), ) res = kp_lazy_var.matmul(mat) avar_copy = Variable(a.repeat(3, 1, 1), requires_grad=True) bvar_copy = Variable(b.repeat(3, 1, 1), requires_grad=True) cvar_copy = Variable(c.repeat(3, 1, 1), requires_grad=True) mat_copy = Variable(mat.data.clone(), requires_grad=True) actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(mat_copy) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(avar_copy.grad.data, avar.grad.data)) self.assertTrue(approx_equal(bvar_copy.grad.data, bvar.grad.data)) self.assertTrue(approx_equal(cvar_copy.grad.data, cvar.grad.data)) self.assertTrue(approx_equal(mat_copy.grad.data, mat.grad.data))
def test_getitem_batch(self): block_var = Variable(blocks, requires_grad=True) actual_block_diagonal = Variable(torch.zeros(2, 16, 16)) for i in range(2): for j in range(4): actual_block_diagonal[i, j * 4:(j + 1) * 4, j * 4:(j + 1) * 4] = block_var[i * 4 + j] res = BlockDiagonalLazyVariable( NonLazyVariable(block_var), n_blocks=4, )[0].evaluate() actual = actual_block_diagonal[0] self.assertTrue(approx_equal(actual.data, res.data)) res = BlockDiagonalLazyVariable( NonLazyVariable(block_var), n_blocks=4, )[0, :5].evaluate() actual = actual_block_diagonal[0, :5] self.assertTrue(approx_equal(actual.data, res.data)) res = BlockDiagonalLazyVariable( NonLazyVariable(block_var), n_blocks=4, )[1:, :5, 2] actual = actual_block_diagonal[1:, :5, 2] self.assertTrue(approx_equal(actual.data, res.data))
def pending_test_inv_matmul(): left_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]])) left_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]])) right_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]])) right_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]])) base_lazy_variable_mat = torch.randn(6, 6) base_lazy_variable_mat = base_lazy_variable_mat.t().matmul(base_lazy_variable_mat) base_lazy_variable = NonLazyVariable(Variable(base_lazy_variable_mat)) test_matrix = torch.randn(3, 4) interp_lazy_var = InterpolatedLazyVariable(base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values) res = interp_lazy_var.inv_matmul(Variable(test_matrix)).data left_matrix = torch.Tensor([ [0, 0, 1, 2, 0, 0], [0, 0, 0, 0.5, 1, 0], [0, 0, 0, 0, 1, 3], ]) right_matrix = torch.Tensor([ [0, 0, 1, 2, 0, 0], [0, 0, 0, 0.5, 1, 0], [0, 0, 0, 0, 1, 3], ]) actual_mat = Variable(left_matrix.matmul(base_lazy_variable_mat).matmul(right_matrix.t())) actual = gpytorch.inv_matmul(actual_mat, Variable(test_matrix)).data assert approx_equal(res, actual)
def test_inv_quad_log_det_many_vectors(self): # Forward pass actual_inv_quad = ( torch.cat([self.mats_var_clone[0].inverse().unsqueeze(0), self.mats_var_clone[1].inverse().unsqueeze(0)]) .matmul(self.vecs_var_clone) .mul(self.vecs_var_clone) .sum(2) .sum(1) ) actual_log_det = torch.cat( [self.mats_var_clone[0].det().log().unsqueeze(0), self.mats_var_clone[1].det().log().unsqueeze(0)] ) with gpytorch.settings.num_trace_samples(1000): nlv = NonLazyVariable(self.mats_var) res_inv_quad, res_log_det = nlv.inv_quad_log_det(inv_quad_rhs=self.vecs_var, log_det=True) self.assertTrue(approx_equal(res_inv_quad.data, actual_inv_quad.data, epsilon=1e-1)) self.assertTrue(approx_equal(res_log_det.data, actual_log_det.data, epsilon=1e-1)) # Backward inv_quad_grad_output = torch.Tensor([3, 4]) log_det_grad_output = torch.Tensor([4, 2]) actual_inv_quad.backward(gradient=inv_quad_grad_output) actual_log_det.backward(gradient=log_det_grad_output) res_inv_quad.backward(gradient=inv_quad_grad_output, retain_graph=True) res_log_det.backward(gradient=log_det_grad_output) self.assertTrue(approx_equal(self.mats_var_clone.grad.data, self.mats_var.grad.data, epsilon=1e-1)) self.assertTrue(approx_equal(self.vecs_var_clone.grad.data, self.vecs_var.grad.data))
def test_evaluate(self): avar = Variable(a) bvar = Variable(b) cvar = Variable(c) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar)) res = kp_lazy_var.evaluate() actual = kron(kron(avar, bvar), cvar) self.assertTrue(approx_equal(res.data, actual.data)) avar = Variable(a.repeat(3, 1, 1)) bvar = Variable(b.repeat(3, 1, 1)) cvar = Variable(c.repeat(3, 1, 1)) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar)) res = kp_lazy_var.evaluate() actual = kron(kron(avar, bvar), cvar) self.assertTrue(approx_equal(res.data, actual.data))
def test_inv_quad_log_det_many_vectors(self): # Forward pass actual_inv_quad = torch.cat([ self.mats_var_clone[0].inverse().unsqueeze(0), self.mats_var_clone[1].inverse().unsqueeze(0), ]).matmul(self.vecs_var_clone).mul(self.vecs_var_clone).sum(2).sum(1) with gpytorch.settings.num_trace_samples(1000): nlv = NonLazyVariable(self.mats_var) res_inv_quad, res_log_det = nlv.inv_quad_log_det(inv_quad_rhs=self.vecs_var, log_det=True) for i in range(self.mats_var.size(0)): self.assert_scalar_almost_equal(res_inv_quad.data[i], actual_inv_quad.data[i], places=1) self.assert_scalar_almost_equal(res_log_det.data[i], self.log_dets[i], places=1) # Backward inv_quad_grad_output = torch.Tensor([3, 4]) log_det_grad_output = torch.Tensor([4, 2]) actual_inv_quad.backward(gradient=inv_quad_grad_output) mat_log_det_grad = torch.cat([ self.mats_var_clone[0].data.inverse().mul(log_det_grad_output[0]).unsqueeze(0), self.mats_var_clone[1].data.inverse().mul(log_det_grad_output[1]).unsqueeze(0), ]) self.mats_var_clone.grad.data.add_(mat_log_det_grad) res_inv_quad.backward(gradient=inv_quad_grad_output, retain_graph=True) res_log_det.backward(gradient=log_det_grad_output) self.assertTrue(approx_equal(self.mats_var_clone.grad.data, self.mats_var.grad.data, epsilon=1e-1)) self.assertTrue(approx_equal(self.vecs_var_clone.grad.data, self.vecs_var.grad.data))
def test_batch_diag(self): left_interp_indices = Variable( torch.LongTensor([[2, 3], [3, 4], [4, 5]]).repeat(5, 1, 1)) left_interp_values = Variable( torch.Tensor([[1, 1], [1, 1], [1, 1]]).repeat(5, 1, 1)) right_interp_indices = Variable( torch.LongTensor([[0, 1], [1, 2], [2, 3]]).repeat(5, 1, 1)) right_interp_values = Variable( torch.Tensor([[1, 1], [1, 1], [1, 1]]).repeat(5, 1, 1)) base_lazy_variable_mat = torch.randn(5, 6, 6) base_lazy_variable_mat = base_lazy_variable_mat.transpose( 1, 2).matmul(base_lazy_variable_mat) base_lazy_variable = NonLazyVariable( Variable(base_lazy_variable_mat, requires_grad=True)) interp_lazy_var = InterpolatedLazyVariable(base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values) actual = interp_lazy_var.evaluate() actual_diag = torch.stack([ actual[0].diag(), actual[1].diag(), actual[2].diag(), actual[3].diag(), actual[4].diag() ]) self.assertTrue( approx_equal(actual_diag.data, interp_lazy_var.diag().data))
def test_matmul(self): rhs = torch.randn(4 * 8, 4) rhs_var = Variable(rhs, requires_grad=True) rhs_var_copy = Variable(rhs, requires_grad=True) block_var = Variable(blocks, requires_grad=True) block_var_copy = Variable(blocks, requires_grad=True) actual_block_diagonal = Variable(torch.zeros(32, 32)) for i in range(8): actual_block_diagonal[i * 4:(i + 1) * 4, i * 4:(i + 1) * 4] = block_var_copy[i] res = BlockDiagonalLazyVariable( NonLazyVariable(block_var)).matmul(rhs_var) actual = actual_block_diagonal.matmul(rhs_var_copy) self.assertTrue(approx_equal(res.data, actual.data)) actual.sum().backward() res.sum().backward() self.assertTrue(approx_equal(rhs_var.grad.data, rhs_var_copy.grad.data)) self.assertTrue( approx_equal(block_var.grad.data, block_var_copy.grad.data))
def test_diag(self): left_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]])) left_interp_values = Variable(torch.Tensor([[1, 1], [1, 1], [1, 1]])) right_interp_indices = Variable(torch.LongTensor([[0, 1], [1, 2], [2, 3]])) right_interp_values = Variable(torch.Tensor([[1, 1], [1, 1], [1, 1]])) base_lazy_variable_mat = torch.randn(6, 6) base_lazy_variable_mat = ( base_lazy_variable_mat.t(). matmul(base_lazy_variable_mat) ) base_lazy_variable = NonLazyVariable( Variable(base_lazy_variable_mat, requires_grad=True) ) interp_lazy_var = InterpolatedLazyVariable( base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values, ) actual = interp_lazy_var.evaluate() self.assertTrue(approx_equal(actual.diag().data, interp_lazy_var.diag().data))
def test_derivatives(): left_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]])).repeat(5, 3, 1) left_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]])).repeat(5, 3, 1) right_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]])).repeat(5, 3, 1) right_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]])).repeat(5, 3, 1) base_lazy_variable_mat = torch.randn(5, 6, 6) base_lazy_variable_mat = base_lazy_variable_mat.transpose(1, 2).matmul(base_lazy_variable_mat) test_matrix = Variable(torch.randn(1, 9, 4)) base_lazy_variable = NonLazyVariable(Variable(base_lazy_variable_mat, requires_grad=True)) interp_lazy_var = InterpolatedLazyVariable(base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values) res = interp_lazy_var.matmul(test_matrix) res.sum().backward() base_lazy_variable2 = Variable(base_lazy_variable_mat, requires_grad=True) left_matrix = torch.Tensor([ [0, 0, 1, 2, 0, 0], [0, 0, 0, 0.5, 1, 0], [0, 0, 0, 0, 1, 3], [0, 0, 1, 2, 0, 0], [0, 0, 0, 0.5, 1, 0], [0, 0, 0, 0, 1, 3], [0, 0, 1, 2, 0, 0], [0, 0, 0, 0.5, 1, 0], [0, 0, 0, 0, 1, 3], ]).repeat(5, 1, 1) actual = Variable(left_matrix).matmul(base_lazy_variable2).matmul(Variable(left_matrix).transpose(-1, -2)) actual = actual.matmul(test_matrix) actual.sum().backward() assert approx_equal(base_lazy_variable.var.grad.data, base_lazy_variable2.grad.data)
def test_getitem_batch(): left_interp_indices = Variable( torch.LongTensor([[2, 3], [3, 4], [4, 5]]).repeat(5, 1, 1)) left_interp_values = Variable( torch.Tensor([[1, 1], [1, 1], [1, 1]]).repeat(5, 1, 1)) right_interp_indices = Variable( torch.LongTensor([[0, 1], [1, 2], [2, 3]]).repeat(5, 1, 1)) right_interp_values = Variable( torch.Tensor([[1, 1], [1, 1], [1, 1]]).repeat(5, 1, 1)) base_lazy_variable_mat = torch.randn(5, 6, 6) base_lazy_variable_mat = base_lazy_variable_mat.transpose( 1, 2).matmul(base_lazy_variable_mat) base_lazy_variable = NonLazyVariable( Variable(base_lazy_variable_mat, requires_grad=True)) interp_lazy_var = SumInterpolatedLazyVariable(base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values) actual = (base_lazy_variable[:, 2:5, 0:3] + base_lazy_variable[:, 2:5, 1:4] + base_lazy_variable[:, 3:6, 0:3] + base_lazy_variable[:, 3:6, 1:4]).evaluate() assert approx_equal(interp_lazy_var[:1, :2].evaluate().data, actual[:, :1, :2].data.sum(0)) assert approx_equal(interp_lazy_var[:1, 2].data, actual[:, :1, 2].data.sum(0))
def forward(self, x1, x2, **kwargs): if not torch.equal(x1.data, self._inducing_points) or \ not torch.equal(x1.data, self._inducing_points): raise RuntimeError( 'The kernel should only receive the inducing points as input') if not self.training and hasattr(self, '_cached_kernel_mat'): return self._cached_kernel_mat else: d = x1.size(1) grid_var = Variable(self.grid) if d > 1: k_UUs = Variable(x1.data.new(d, self.grid_size).zero_()) for i in range(d): k_UUs[i] = self.base_kernel_module(grid_var[i, 0], grid_var[i], **kwargs).squeeze() K_XX = KroneckerProductLazyVariable(k_UUs) else: if gpytorch.functions.use_toeplitz: k_UU = self.base_kernel_module(grid_var[0, 0], grid_var[0], **kwargs).squeeze() K_XX = ToeplitzLazyVariable(k_UU) else: for i in range(100): k_UU = self.base_kernel_module(grid_var[0], grid_var[0], **kwargs).squeeze() K_XX = NonLazyVariable(k_UU) if not self.training: self._cached_kernel_mat = K_XX return K_XX
def test_batch_matmul(): rhs = torch.randn(2, 4 * 4, 4) rhs_var = Variable(rhs, requires_grad=True) rhs_var_copy = Variable(rhs, requires_grad=True) block_var = Variable(blocks, requires_grad=True) block_var_copy = Variable(blocks, requires_grad=True) actual_block_diagonal = Variable(torch.zeros(2, 16, 16)) for i in range(2): for j in range(4): actual_block_diagonal[i, j * 4:(j + 1) * 4, j * 4:(j + 1) * 4] = block_var_copy[i * 4 + j] res = BlockDiagonalLazyVariable(NonLazyVariable(block_var), n_blocks=4).matmul(rhs_var) actual = actual_block_diagonal.matmul(rhs_var_copy) assert approx_equal(res.data, actual.data) actual.sum().backward() res.sum().backward() assert approx_equal(rhs_var.grad.data, rhs_var_copy.grad.data) assert approx_equal(block_var.grad.data, block_var_copy.grad.data)
def test_inv_quad_log_det_many_vectors(self): # Forward pass actual_inv_quad = (self.mat_var_clone.inverse().matmul( self.vecs_var_clone).mul(self.vecs_var_clone).sum()) actual_log_det = self.mat_var_clone.det().log() with gpytorch.settings.num_trace_samples(1000): nlv = NonLazyVariable(self.mat_var) res_inv_quad, res_log_det = nlv.inv_quad_log_det( inv_quad_rhs=self.vecs_var, log_det=True) self.assertAlmostEqual(res_inv_quad.item(), actual_inv_quad.item(), places=1) self.assertAlmostEqual(res_log_det.item(), actual_log_det.item(), places=1) # Backward inv_quad_grad_output = torch.Tensor([3]) log_det_grad_output = torch.Tensor([4]) actual_inv_quad.backward(gradient=inv_quad_grad_output) actual_log_det.backward(log_det_grad_output) res_inv_quad.backward(gradient=inv_quad_grad_output, retain_graph=True) res_log_det.backward(gradient=log_det_grad_output) self.assertTrue( approx_equal(self.mat_var_clone.grad.data, self.mat_var.grad.data, epsilon=1e-1)) self.assertTrue( approx_equal(self.vecs_var_clone.grad.data, self.vecs_var.grad.data))
def test_inv_matmul(self): base_lazy_variable_mat = torch.randn(6, 6) base_lazy_variable_mat = base_lazy_variable_mat.t().matmul( base_lazy_variable_mat) test_matrix = torch.randn(3, 4) left_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]]), requires_grad=True) left_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]]), requires_grad=True) right_interp_indices = Variable(torch.LongTensor([[2, 3], [3, 4], [4, 5]]), requires_grad=True) right_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]]), requires_grad=True) left_interp_values_copy = Variable(left_interp_values.data, requires_grad=True) right_interp_values_copy = Variable(right_interp_values.data, requires_grad=True) base_lazy_variable = Variable(base_lazy_variable_mat, requires_grad=True) base_lazy_variable_copy = Variable(base_lazy_variable_mat, requires_grad=True) test_matrix_var = Variable(test_matrix, requires_grad=True) test_matrix_var_copy = Variable(test_matrix, requires_grad=True) interp_lazy_var = InterpolatedLazyVariable( NonLazyVariable(base_lazy_variable), left_interp_indices, left_interp_values, right_interp_indices, right_interp_values, ) res = interp_lazy_var.inv_matmul(test_matrix_var) left_matrix = Variable(torch.zeros(3, 6)) right_matrix = Variable(torch.zeros(3, 6)) left_matrix.scatter_(1, left_interp_indices, left_interp_values_copy) right_matrix.scatter_(1, right_interp_indices, right_interp_values_copy) actual_mat = left_matrix.matmul(base_lazy_variable_copy).matmul( right_matrix.transpose(-1, -2)) actual = gpytorch.inv_matmul(actual_mat, test_matrix_var_copy) self.assertTrue(approx_equal(res.data, actual.data)) # Backward pass res.sum().backward() actual.sum().backward() self.assertTrue( approx_equal(base_lazy_variable.grad.data, base_lazy_variable_copy.grad.data)) self.assertTrue( approx_equal(left_interp_values.grad.data, left_interp_values_copy.grad.data))
def test_diag(self): avar = Variable(a) bvar = Variable(b) cvar = Variable(c) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar)) res = kp_lazy_var.diag() actual = kron(kron(avar, bvar), cvar).diag() self.assertTrue(approx_equal(res.data, actual.data)) avar = Variable(a.repeat(3, 1, 1)) bvar = Variable(b.repeat(3, 1, 1)) cvar = Variable(c.repeat(3, 1, 1)) kp_lazy_var = KroneckerProductLazyVariable(NonLazyVariable(avar), NonLazyVariable(bvar), NonLazyVariable(cvar)) res = kp_lazy_var.diag() actual_mat = kron(kron(avar, bvar), cvar) actual = torch.stack([actual_mat[0].diag(), actual_mat[1].diag(), actual_mat[2].diag()]) self.assertTrue(approx_equal(res.data, actual.data))
def test_getitem(self): block_var = Variable(blocks, requires_grad=True) actual_block_diagonal = Variable(torch.zeros(32, 32)) for i in range(8): actual_block_diagonal[i * 4 : (i + 1) * 4, i * 4 : (i + 1) * 4] = block_var[i] res = BlockDiagonalLazyVariable(NonLazyVariable(block_var))[:5, 2] actual = actual_block_diagonal[:5, 2] self.assertTrue(approx_equal(actual.data, res.data))
def test_root_decomposition_forward(): a = torch.randn(5, 5) a = torch.matmul(a, a.t()) a_lv = NonLazyVariable(Variable(a, requires_grad=True)) a_root = a_lv.root_decomposition() assert torch.max( ((a_root.matmul(a_root.transpose(-1, -2)).data - a)).abs()) < 1e-2
def test_batch_matmul(): left_interp_indices = Variable( torch.LongTensor([[2, 3], [3, 4], [4, 5]]).repeat(5, 3, 1)) left_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]]).repeat(5, 3, 1), requires_grad=True) left_interp_values_copy = Variable(left_interp_values.data, requires_grad=True) right_interp_indices = Variable( torch.LongTensor([[0, 1], [1, 2], [2, 3]]).repeat(5, 3, 1)) right_interp_values = Variable(torch.Tensor([[1, 2], [2, 0.5], [1, 3]]).repeat(5, 3, 1), requires_grad=True) right_interp_values_copy = Variable(right_interp_values.data, requires_grad=True) base_lazy_variable_mat = torch.randn(5, 6, 6) base_lazy_variable_mat = base_lazy_variable_mat.transpose( -1, -2).matmul(base_lazy_variable_mat) base_variable = Variable(base_lazy_variable_mat, requires_grad=True) base_variable_copy = Variable(base_lazy_variable_mat, requires_grad=True) base_lazy_variable = NonLazyVariable(base_variable) test_matrix = torch.randn(5, 9, 4) interp_lazy_var = InterpolatedLazyVariable(base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values) res = interp_lazy_var.matmul(Variable(test_matrix)) left_matrix_comps = [] right_matrix_comps = [] for i in range(5): left_matrix_comp = Variable(torch.zeros(9, 6)) right_matrix_comp = Variable(torch.zeros(9, 6)) left_matrix_comp.scatter_(1, left_interp_indices[i], left_interp_values_copy[i]) right_matrix_comp.scatter_(1, right_interp_indices[i], right_interp_values_copy[i]) left_matrix_comps.append(left_matrix_comp.unsqueeze(0)) right_matrix_comps.append(right_matrix_comp.unsqueeze(0)) left_matrix = torch.cat(left_matrix_comps) right_matrix = torch.cat(right_matrix_comps) actual = left_matrix.matmul(base_variable_copy).matmul( right_matrix.transpose(-1, -2)) actual = actual.matmul(Variable(test_matrix)) assert approx_equal(res.data, actual.data) res.sum().backward() actual.sum().backward() assert approx_equal(base_variable.grad.data, base_variable_copy.grad.data) assert approx_equal(left_interp_values.grad.data, left_interp_values_copy.grad.data)
def test_diag(): block_var = Variable(blocks, requires_grad=True) actual_block_diagonal = Variable(torch.zeros(32, 32)) for i in range(8): actual_block_diagonal[i * 4:(i + 1) * 4, i * 4:(i + 1) * 4] = block_var[i] res = BlockDiagonalLazyVariable(NonLazyVariable(block_var)).diag() actual = actual_block_diagonal.diag() assert approx_equal(actual.data, res.data)
def test_root_decomposition(self): # Forward root = NonLazyVariable(self.mat_var).root_decomposition() res = root.matmul(root.transpose(-1, -2)) self.assertTrue(approx_equal(res.data, self.mat_var.data)) # Backward res.trace().backward() self.mat_var_clone.trace().backward() self.assertTrue(approx_equal(self.mat_var.grad.data, self.mat_var_clone.grad.data))
def test_root_decomposition_inv_forward(self): a = torch.randn(5, 5) a = torch.matmul(a, a.t()) a_lv = NonLazyVariable(Variable(a, requires_grad=True)) a_root = a_lv.root_inv_decomposition() actual = a.inverse() diff = (a_root.matmul(a_root.transpose(-1, -2)).data - actual).abs() self.assertLess(torch.max(diff / actual), 1e-2)
def test_batch_diag(self): block_var = Variable(blocks, requires_grad=True) actual_block_diagonal = Variable(torch.zeros(2, 16, 16)) for i in range(2): for j in range(4): actual_block_diagonal[i, j * 4 : (j + 1) * 4, j * 4 : (j + 1) * 4] = block_var[i * 4 + j] res = BlockDiagonalLazyVariable(NonLazyVariable(block_var), n_blocks=4).diag() actual = torch.cat([actual_block_diagonal[0].diag().unsqueeze(0), actual_block_diagonal[1].diag().unsqueeze(0)]) self.assertTrue(approx_equal(actual.data, res.data))
def test_log_det_only(self): # Forward pass with gpytorch.settings.num_trace_samples(1000): res = NonLazyVariable(self.mat_var).log_det() self.assert_scalar_almost_equal(res, self.log_det, places=1) # Backward grad_output = torch.Tensor([3]) actual_mat_grad = self.mat_var_clone.data.inverse().mul(grad_output) res.backward(gradient=grad_output) self.assertTrue(approx_equal(actual_mat_grad, self.mat_var.grad.data, epsilon=1e-1))
def test_matmul(self): left_interp_indices = Variable( torch.LongTensor([[2, 3], [3, 4], [4, 5]]).repeat(3, 1)) left_interp_values = Variable(torch.Tensor([[1, 2], [0.5, 1], [1, 3]]).repeat(3, 1), requires_grad=True) left_interp_values_copy = Variable(left_interp_values.data, requires_grad=True) right_interp_indices = Variable( torch.LongTensor([[0, 1], [1, 2], [2, 3]]).repeat(3, 1)) right_interp_values = Variable(torch.Tensor([[1, 2], [2, 0.5], [1, 3]]).repeat(3, 1), requires_grad=True) right_interp_values_copy = Variable(right_interp_values.data, requires_grad=True) base_lazy_variable_mat = torch.randn(6, 6) base_lazy_variable_mat = ( base_lazy_variable_mat.t().matmul(base_lazy_variable_mat)) base_variable = Variable(base_lazy_variable_mat, requires_grad=True) base_variable_copy = Variable(base_lazy_variable_mat, requires_grad=True) base_lazy_variable = NonLazyVariable(base_variable) test_matrix = torch.randn(9, 4) interp_lazy_var = InterpolatedLazyVariable( base_lazy_variable, left_interp_indices, left_interp_values, right_interp_indices, right_interp_values, ) res = interp_lazy_var.matmul(Variable(test_matrix)) left_matrix = Variable(torch.zeros(9, 6)) right_matrix = Variable(torch.zeros(9, 6)) left_matrix.scatter_(1, left_interp_indices, left_interp_values_copy) right_matrix.scatter_(1, right_interp_indices, right_interp_values_copy) actual = (left_matrix.matmul(base_variable_copy).matmul( right_matrix.t()).matmul(Variable(test_matrix))) self.assertTrue(approx_equal(res.data, actual.data)) res.sum().backward() actual.sum().backward() self.assertTrue( approx_equal(base_variable.grad.data, base_variable_copy.grad.data)) self.assertTrue( approx_equal(left_interp_values.grad.data, left_interp_values_copy.grad.data))
def test_log_det_only(self): # Forward pass with gpytorch.settings.num_trace_samples(1000): res = NonLazyVariable(self.mat_var).log_det() actual = self.mat_var_clone.det().log() self.assertAlmostEqual(res.item(), actual.item(), places=1) # Backward actual.backward() res.backward() self.assertTrue(approx_equal(self.mat_var_clone.grad.data, self.mat_var.grad.data, epsilon=1e-1))
def test_inv_quad_only_many_vectors(self): # Forward pass res = NonLazyVariable(self.mat_var).inv_quad(self.vecs_var) actual = self.mat_var_clone.inverse().matmul(self.vecs_var_clone).mul(self.vecs_var_clone).sum() self.assertAlmostEqual(res.item(), actual.item(), places=1) # Backward actual.backward() res.backward() self.assertTrue(approx_equal(self.mat_var_clone.grad.data, self.mat_var.grad.data, epsilon=1e-1)) self.assertTrue(approx_equal(self.vecs_var_clone.grad.data, self.vecs_var.grad.data))
def test_matmul_multiple_vecs(self): # Forward res = NonLazyVariable(self.mat_var).matmul(self.vecs_var) actual = self.mat_var_clone.matmul(self.vecs_var_clone) self.assertTrue(approx_equal(res, actual)) # Backward grad_output = torch.Tensor(3, 4) res.backward(gradient=grad_output) actual.backward(gradient=grad_output) self.assertTrue(approx_equal(self.mat_var_clone.grad.data, self.mat_var.grad.data)) self.assertTrue(approx_equal(self.vecs_var_clone.grad.data, self.vecs_var.grad.data))