def test_mvn_kl_divergence_backward(): x = Variable(torch.linspace(0, 1, 4)) rbf_covar = RBFKernel() rbf_covar.initialize(log_lengthscale=-4) K = Variable(rbf_covar.forward(x.unsqueeze(1), x.unsqueeze(1)).data, requires_grad=True) mu1 = Variable(torch.randn(4), requires_grad=True) mu2 = Variable(torch.randn(4), requires_grad=True) U = torch.randn(4, 4).triu() U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()), requires_grad=True) mu_diff = mu2 - mu1 actual = 0.5 * (_det(K).log() + mu_diff.dot(K.inverse().mv(mu_diff)) + K.inverse().mm(U.t().mm(U)).trace() - U.diag().log().sum(0) * 2 - len(mu_diff)) actual.backward() actual_K_grad = K.grad.data.clone() actual_mu1_grad = mu1.grad.data.clone() actual_mu2_grad = mu2.grad.data.clone() actual_U_grad = U.grad.data.clone() K.grad.data.fill_(0) mu1.grad.data.fill_(0) mu2.grad.data.fill_(0) U.grad.data.fill_(0) res = gpytorch.mvn_kl_divergence(mu1, U, mu2, K, num_samples=10000) res.backward() res_K_grad = K.grad.data res_mu1_grad = mu1.grad.data res_mu2_grad = mu2.grad.data res_U_grad = U.grad.data assert torch.abs( (res_K_grad - actual_K_grad)).sum() / actual_K_grad.abs().sum() < 1e-1 assert torch.abs( (res_mu1_grad - actual_mu1_grad)).sum() / actual_mu1_grad.abs().sum() < 1e-5 assert torch.abs( (res_mu2_grad - actual_mu2_grad)).sum() / actual_mu2_grad.abs().sum() < 1e-5 assert torch.abs( (res_U_grad - actual_U_grad)).sum() / actual_U_grad.abs().sum() < 1e-2
def test_mvn_kl_divergence_forward(): x = Variable(torch.linspace(0, 1, 4)) rbf_covar = RBFKernel() rbf_covar.initialize(log_lengthscale=-4) K = rbf_covar.forward(x.unsqueeze(1), x.unsqueeze(1)) mu1 = Variable(torch.randn(4), requires_grad=True) mu2 = Variable(torch.randn(4), requires_grad=True) U = torch.randn(4, 4).triu() U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()), requires_grad=True) mu_diff = mu2 - mu1 actual = 0.5 * (_det(K).log() + mu_diff.dot(K.inverse().mv(mu_diff)) + K.inverse().mm(U.t().mm(U)).trace() - U.diag().log().sum(0) * 2 - len(mu_diff)) res = gpytorch.mvn_kl_divergence(mu1, U, mu2, K, num_samples=1000) assert all(torch.abs((res.data - actual.data) / actual.data) < 0.15)
import torch import matplotlib.pyplot as plt import math from gpytorch.kernels import RBFKernel from torch.autograd import Variable import sys sys.path.append('../kernels') from spectral_gp_kernel import SpectralGPKernel # input data x = Variable(torch.linspace(-100,100,201)).unsqueeze(1) # generating kernel values kernel = RBFKernel(log_lengthscale_bounds = (math.log(20),math.log(20))) k = kernel.forward(x, Variable(torch.Tensor([0])).unsqueeze(1)) # extracting approximate spectral density width = math.pi / 500 omega = torch.linspace(width/2, math.pi-width/2, 500) s = torch.zeros(500) for ii in range(500): s[ii] = torch.dot(k.data.squeeze(), torch.cos(x.data.squeeze() * omega[ii])) # reconstructing kernel kernel_rec = SpectralGPKernel(omega.unsqueeze(1), s.unsqueeze(1)) k_rec = kernel_rec.forward(x, Variable(torch.Tensor([0])).unsqueeze(1)) # calculate difference print torch.norm(k_rec - k)