def test_toeplitz_mvn_kl_divergence_forward():
    x = Variable(torch.linspace(0, 1, 5))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.initialize_interpolation_grid(10, grid_bounds=(0, 1))
    covar_x = covar_module.forward(x.unsqueeze(1), x.unsqueeze(1))

    c = Variable(covar_x.c.data, requires_grad=True)
    mu1 = Variable(torch.randn(10), requires_grad=True)
    mu2 = Variable(torch.randn(10), requires_grad=True)

    T = Variable(torch.zeros(len(c), len(c)))
    for i in range(len(c)):
        for j in range(len(c)):
            T[i, j] = utils.toeplitz.toeplitz_getitem(c, c, i, j)

    U = torch.randn(10, 10).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    actual = gpytorch.mvn_kl_divergence(mu1, U, mu2, T, num_samples=1000)

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, covar_x, num_samples=1000)

    assert all(torch.abs((res.data - actual.data) / actual.data) < 0.15)
Ejemplo n.º 2
0
    def marginal_log_likelihood(self, output, train_y, num_samples=10):
        chol_var_covar = self.chol_variational_covar.triu()

        # Negate each row with a negative diagonal (the Cholesky decomposition
        # of a matrix requires that the diagonal elements be positive).
        chol_var_covar = chol_var_covar.mul(
            chol_var_covar.diag().sign().unsqueeze(1).expand_as(
                chol_var_covar).triu())

        _, train_covar = output.representation()
        inducing_output = self.forward(*self.inducing_points)
        inducing_mean = inducing_output.mean()

        train_covar = gpytorch.add_jitter(train_covar)

        log_likelihood = gpytorch.monte_carlo_log_likelihood(
            self.prior_model.likelihood.log_probability, train_y,
            self.variational_mean, chol_var_covar, train_covar, num_samples)

        kl_divergence = gpytorch.mvn_kl_divergence(self.variational_mean,
                                                   chol_var_covar,
                                                   inducing_mean, train_covar,
                                                   num_samples)

        return log_likelihood.squeeze() - kl_divergence
def test_toeplitz_mvn_kl_divergence_backward():
    x = Variable(torch.linspace(0, 1, 5))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    covar_module = GridInterpolationKernel(rbf_covar)
    covar_module.initialize_interpolation_grid(4, grid_bounds=(0, 1))
    covar_x = covar_module.forward(x.unsqueeze(1), x.unsqueeze(1))
    covar_x.c = Variable(covar_x.c.data, requires_grad=True)

    c = covar_x.c
    mu1 = Variable(torch.randn(4), requires_grad=True)
    mu2 = Variable(torch.randn(4), requires_grad=True)

    mu_diff = mu2 - mu1

    T = Variable(torch.zeros(len(c), len(c)))
    for i in range(len(c)):
        for j in range(len(c)):
            T[i, j] = utils.toeplitz.toeplitz_getitem(c, c, i, j)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    actual = 0.5 * (_det(T).log() + mu_diff.dot(T.inverse().mv(mu_diff)) +
                    T.inverse().mm(U.t().mm(U)).trace() -
                    U.diag().log().sum(0) * 2 - len(mu_diff))
    actual.backward()

    actual_c_grad = c.grad.data.clone()
    actual_mu1_grad = mu1.grad.data.clone()
    actual_mu2_grad = mu2.grad.data.clone()
    actual_U_grad = U.grad.data.clone()

    c.grad.data.fill_(0)
    mu1.grad.data.fill_(0)
    mu2.grad.data.fill_(0)
    U.grad.data.fill_(0)

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, covar_x, num_samples=1000)
    res.backward()

    res_c_grad = c.grad.data
    res_mu1_grad = mu1.grad.data
    res_mu2_grad = mu2.grad.data
    res_U_grad = U.grad.data

    assert torch.abs(
        (res_c_grad - actual_c_grad)).sum() / actual_c_grad.abs().sum() < 1e-1
    assert torch.abs(
        (res_mu1_grad -
         actual_mu1_grad)).sum() / actual_mu1_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_mu2_grad -
         actual_mu2_grad)).sum() / actual_mu2_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_U_grad - actual_U_grad)).sum() / actual_U_grad.abs().sum() < 1e-2
Ejemplo n.º 4
0
    def marginal_log_likelihood(self, output, target):
        """
        Returns the marginal log likelihood of the data

        Args:
        - output: (GaussianRandomVariable) - the output of the model
        - target: (Variable) - target
        """
        mean, covar = output.representation()

        # Exact inference
        if self.exact_inference:
            return gpytorch.exact_gp_marginal_log_likelihood(
                covar, target - mean)

        # Approximate inference
        else:
            # Get inducing points
            if not hasattr(self, 'train_inputs'):
                raise RuntimeError('Must condition on data.')

            train_x = self.train_inputs[0]
            if hasattr(self, 'inducing_points'):
                inducing_points = Variable(self.inducing_points)
            else:
                inducing_points = train_x

            chol_var_covar = self.chol_variational_covar.triu()
            # Negate each row with a negative diagonal (the Cholesky decomposition
            # of a matrix requires that the diagonal elements be positive).
            inside = chol_var_covar.diag().sign().unsqueeze(1).expand_as(
                chol_var_covar).triu()
            chol_var_covar = chol_var_covar.mul(inside)

            _, train_covar = output.representation()
            inducing_output = super(GPModel, self).__call__(inducing_points)
            inducing_mean, inducing_covar = inducing_output.representation()

            train_covar = gpytorch.add_jitter(train_covar)
            log_likelihood = gpytorch.monte_carlo_log_likelihood(
                self.likelihood.log_probability, target, self.variational_mean,
                chol_var_covar, train_covar)

            inducing_covar = gpytorch.add_jitter(inducing_covar)
            kl_divergence = gpytorch.mvn_kl_divergence(self.variational_mean,
                                                       chol_var_covar,
                                                       inducing_mean,
                                                       inducing_covar)

            res = log_likelihood.squeeze() - kl_divergence
            return res
def test_mvn_kl_divergence_backward():
    x = Variable(torch.linspace(0, 1, 4))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    K = Variable(rbf_covar.forward(x.unsqueeze(1), x.unsqueeze(1)).data,
                 requires_grad=True)

    mu1 = Variable(torch.randn(4), requires_grad=True)
    mu2 = Variable(torch.randn(4), requires_grad=True)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    mu_diff = mu2 - mu1
    actual = 0.5 * (_det(K).log() + mu_diff.dot(K.inverse().mv(mu_diff)) +
                    K.inverse().mm(U.t().mm(U)).trace() -
                    U.diag().log().sum(0) * 2 - len(mu_diff))
    actual.backward()

    actual_K_grad = K.grad.data.clone()
    actual_mu1_grad = mu1.grad.data.clone()
    actual_mu2_grad = mu2.grad.data.clone()
    actual_U_grad = U.grad.data.clone()

    K.grad.data.fill_(0)
    mu1.grad.data.fill_(0)
    mu2.grad.data.fill_(0)
    U.grad.data.fill_(0)

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, K, num_samples=10000)
    res.backward()

    res_K_grad = K.grad.data
    res_mu1_grad = mu1.grad.data
    res_mu2_grad = mu2.grad.data
    res_U_grad = U.grad.data

    assert torch.abs(
        (res_K_grad - actual_K_grad)).sum() / actual_K_grad.abs().sum() < 1e-1
    assert torch.abs(
        (res_mu1_grad -
         actual_mu1_grad)).sum() / actual_mu1_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_mu2_grad -
         actual_mu2_grad)).sum() / actual_mu2_grad.abs().sum() < 1e-5
    assert torch.abs(
        (res_U_grad - actual_U_grad)).sum() / actual_U_grad.abs().sum() < 1e-2
def test_mvn_kl_divergence_forward():
    x = Variable(torch.linspace(0, 1, 4))
    rbf_covar = RBFKernel()
    rbf_covar.initialize(log_lengthscale=-4)
    K = rbf_covar.forward(x.unsqueeze(1), x.unsqueeze(1))

    mu1 = Variable(torch.randn(4), requires_grad=True)
    mu2 = Variable(torch.randn(4), requires_grad=True)

    U = torch.randn(4, 4).triu()
    U = Variable(U.mul(U.diag().sign().unsqueeze(1).expand_as(U).triu()),
                 requires_grad=True)

    mu_diff = mu2 - mu1
    actual = 0.5 * (_det(K).log() + mu_diff.dot(K.inverse().mv(mu_diff)) +
                    K.inverse().mm(U.t().mm(U)).trace() -
                    U.diag().log().sum(0) * 2 - len(mu_diff))

    res = gpytorch.mvn_kl_divergence(mu1, U, mu2, K, num_samples=1000)
    assert all(torch.abs((res.data - actual.data) / actual.data) < 0.15)