Ejemplo n.º 1
0
    def test_vb_reparametrize_noise(self):
        N = 4
        M = 3
        K = 2
        batch = np.array([[i for _ in range(M)]
                          for i in range(N)]).astype(float)
        noise = np.ones((N, K)).astype(float)
        beta_q = np.array([[1, 1], [1, 1], [1, 1]]).astype(float)
        sigma_q = np.array([2]).astype(float)

        vb_params = linear_regression_lvm.VB_PARAMS(
            beta=None,
            sigma=None,
            beta_q=utils.make_torch_variable(beta_q, True),
            sigma_q=utils.make_torch_variable(sigma_q, True),
        )

        batch_var = utils.make_torch_variable(batch, False)
        noise_var = utils.make_torch_variable(noise, False)
        reparam_noise = linear_regression_lvm._reparametrize_noise(
            batch_var, noise_var, vb_params)

        # Check values
        truth = np.array([[4 + 3 * i] * K for i in range(N)]).astype(float)
        assert_array_almost_equal(truth, reparam_noise.data.numpy())

        # Check gradients
        reparam_noise.sum().sum().backward()
        self.assertIsNotNone(vb_params.beta_q.grad)
        self.assertIsNotNone(vb_params.sigma_q.grad)
Ejemplo n.º 2
0
def vb_estimate_lower_bound(batch, noise, vb_params):
    if not isinstance(vb_params, VB_PARAMS):
        raise ValueError('parameter tuple must be of type VB_PARAMS')

    B, M = batch.size()
    B_1, K = noise.size()

    if B != B_1:
        raise ValueError('Batch size is inconsistent between batch and noise')

    # Compute components
    mu_x = torch.mm(noise, vb_params.beta)
    identity_x = utils.make_torch_variable(np.identity(M), False)
    sigma_x = torch.mul(vb_params.sigma**2, identity_x)

    mu_q = torch.mm(batch, vb_params.beta_q)
    identity_q = utils.make_torch_variable(np.identity(K), False)
    sigma_q = torch.mul(vb_params.sigma_q**2, identity_q)

    mu_prior = utils.make_torch_variable(np.zeros(K), False)
    sigma_prior = utils.make_torch_variable(np.identity(K), False)

    # Compute log likelihoods
    log_posterior = mvn.torch_mvn_density(noise, mu_q, sigma_q, log=True)
    log_likelihood = mvn.torch_mvn_density(batch, mu_x, sigma_x, log=True)
    log_prior = mvn.torch_mvn_density(noise, mu_prior, sigma_prior, log=True)

    lower_bound = log_posterior - log_likelihood - log_prior

    return lower_bound.sum()
Ejemplo n.º 3
0
    def test_make_cov(self):
        '''Make sure the covariance function is working'''
        # Check value
        x1 = np.array([0.0, 0.0])
        x2 = np.array([1.0, 1.0])
        alpha = 2.0
        sigma = 2.0
        log_l = np.log(2.0)

        sq_dist = ((x2 - x1)**2).sum()
        rbf = np.exp(-1.0 / np.exp(log_l) * sq_dist)
        expected_cov = (alpha**2) * rbf + sigma**2

        x1_var = make_torch_variable([x1], requires_grad=False)
        x2_var = make_torch_variable([x2], requires_grad=False)
        alpha_var = make_torch_variable([alpha], requires_grad=True)
        sigma_var = make_torch_variable([sigma], requires_grad=True)
        log_l_var = make_torch_variable([log_l], requires_grad=True)

        test_cov = _make_cov(x1_var, x2_var, alpha_var, sigma_var, log_l_var)

        assert_array_almost_equal(expected_cov,
                                  test_cov.data.numpy()[0, 0],
                                  decimal=5)

        # Make sure the gradient gets through

        test_cov.sum().backward()
        self.assertIsNotNone(alpha_var.grad)
        self.assertIsNotNone(sigma_var.grad)
        self.assertIsNotNone(log_l_var.grad)
Ejemplo n.º 4
0
def mle_estimate_batch_likelihood(batch, mle_params, sub_B, test_noise=None):
    '''Compute batch likelihood under naive method

    Args:
        batch: (torch.autograd.Variable) the batch of inputs X
        mle_params: (MLE_PARAMS torch.autograd.Variable tuple) the variables needed to compute
        sub_B: (int) the size of the batches used for monte carlo estimates of E_z[log lik(x)]
        test_noise: (torch.autograd.Variable) optional; introduce noise via function input instead
                    of randomly generated with in the function

    Returns:
        (torch.autograd.Variable) the marginal likelihood of the batch; shape (1, )
    '''
    # ### Validation

    # Check that params are of right type
    if not isinstance(mle_params, MLE_PARAMS):
        raise ValueError('parameter tuple must be of type MLE_PARAMS')

    # Check parameter sizes against batch size
    B, M = batch.size()
    K, M_1 = mle_params.beta.size()

    if M != M_1:
        raise AssertionError(
            'batch and beta do not agree on M ({} vs {})'.format((B, M),
                                                                 (K, M_1)))

    utils.check_autograd_variable_size(mle_params.sigma, [(1, )])

    # ### Computation

    # Sample noise required to compute monte carlo estimate of likelihood
    noise = utils.make_torch_variable(np.random.randn(sub_B * B, K), False)
    if test_noise is not None:  # For debugging, allow insertion of a deterministic noise variable
        utils.check_autograd_variable_size(test_noise, [(sub_B * B, K)])
        noise = test_noise

    # Expand minibatch to match shape of noise
    batch = _mle_expand_batch(batch, sub_B)

    # Construct mu and sigma & compute density
    mu = torch.mm(noise, mle_params.beta)

    identity = utils.make_torch_variable(np.identity(M), False)
    sigma = torch.mul(mle_params.sigma**2, identity)

    likelihood = mvn.torch_mvn_density(batch, mu, sigma)

    # Reshape density to (sub_B, B) and sum across first dimension
    utils.check_autograd_variable_size(likelihood, [(sub_B * B, )])
    likelihood = _mle_unpack_likelihood(likelihood, sub_B, B)

    # Compute approx expected likelihood of each batch sample
    approx_expected_likelihood_each_iter = likelihood.mean(dim=0)
    approx_marginal_log_likelihood = torch.log(
        approx_expected_likelihood_each_iter).sum()

    return approx_marginal_log_likelihood
Ejemplo n.º 5
0
    def test_em_full_data_log_likelihood(self):
        # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2
        B = 3
        M = 2
        K = 2

        batch = np.array([
            [0, 0],
            [1, 1],
            [2, 2],
        ]).astype(float)

        beta = np.array([[1, 0.1], [1, 0.1]]).astype(float)
        sigma = 1.0

        var_inv = np.linalg.pinv(
            np.dot(beta, beta.T) + (sigma**2) * np.identity(K))
        truth_e_z = np.dot(var_inv, np.dot(beta, batch.T)).T
        truth_e_z2 = np.empty((B, K, K))
        for i in range(
                B
        ):  # e.g., compute covariance matrix for latent of each batch point...
            dot_prod = np.dot(truth_e_z[[i], :].T, truth_e_z[[i], :])
            truth_e_z2[i, :, :] = (sigma**2) * var_inv + dot_prod

        truth_log_lik = np.empty((B, ))
        for i in range(B):
            a_1 = (M / 2.0) * np.log(sigma**2)
            a_2 = 0.5 * np.sum(np.diag(truth_e_z2[i, :, :]))
            a_3 = 0.5 * np.dot(batch[[i], :], batch[[i], :].T)
            a_4 = -1 * np.dot(truth_e_z[[i], :], np.dot(beta, batch[[i], :].T))
            a_5 = 0.5 * np.sum(
                np.diag(np.dot(beta, np.dot(beta.T, truth_e_z2[i, :, :]))))

            truth_log_lik[i] = -1 * (a_1 + a_2 + a_3 + a_4 + a_5)

        truth_log_lik = truth_log_lik.sum()

        em_params = linear_regression_lvm.EM_PARAMS(
            beta=utils.make_torch_variable(beta, True),
            sigma=utils.make_torch_variable([sigma], True))
        batch_var = utils.make_torch_variable(batch, False)
        test_e_z, test_e_z2 = linear_regression_lvm.em_compute_posterior(
            batch_var, em_params)
        test_log_lik = linear_regression_lvm.em_compute_full_data_log_likelihood(
            batch_var, em_params, test_e_z, test_e_z2)

        # Compare computation
        assert_array_almost_equal(truth_log_lik, test_log_lik.data.numpy())
        self.assertLess(test_log_lik.data.numpy()[0], 0)

        # Check for grads
        test_log_lik.backward()
        self.assertIsNotNone(em_params.beta.grad)
        self.assertIsNotNone(em_params.sigma.grad)
Ejemplo n.º 6
0
    def test_mle_log_likelihood(self):
        '''Check validity of computation'''
        # Check values
        x = np.array([[0.0, 0.0], [1.0, 1.0]])
        z = np.array([[0.0], [1.0]])
        alpha = 2.0
        sigma = 2.0
        log_l = np.log(2.0)

        sq_dist_matrix = np.array([[0.0, 1.0], [1.0, 0.0]])
        rbf_kernel = np.exp(-1.0 / np.exp(log_l) * sq_dist_matrix)
        cov = (alpha**2) * rbf_kernel + (sigma**2) * np.identity(2)

        mu = np.array([0.0, 0.0])

        expected = np.sum(
            np.log(ss.multivariate_normal(mean=mu, cov=cov).pdf(x.T)))

        x_var = make_torch_variable(x, requires_grad=False)
        z_var = make_torch_variable(z, requires_grad=False)
        alpha_var = make_torch_variable([alpha], requires_grad=True)
        sigma_var = make_torch_variable([sigma], requires_grad=True)
        log_l_var = make_torch_variable([log_l], requires_grad=True)

        test = _mle_log_likelihood(x_var, z_var, alpha_var, sigma_var,
                                   log_l_var)

        assert_array_almost_equal(expected, test.data.numpy()[0], decimal=5)

        # Check gradients
        test.backward()
        self.assertIsNotNone(alpha_var.grad)
        self.assertIsNotNone(sigma_var.grad)
        self.assertIsNotNone(log_l_var.grad)

        # Check that wrapper function also preserves gradients
        x_var = make_torch_variable(x, requires_grad=False)
        mle_params = MLE_PARAMS(
            z=make_torch_variable(z, requires_grad=False),
            alpha=make_torch_variable([alpha], requires_grad=True),
            sigma=make_torch_variable([sigma], requires_grad=True),
            log_l=make_torch_variable([log_l], requires_grad=True),
        )
        batch_ix = np.array([0, 1])

        test = mle_batch_log_likelihood(x_var, mle_params, batch_ix)

        assert_array_almost_equal(expected, test.data.numpy()[0], decimal=5)

        test.backward()
        self.assertIsNotNone(mle_params.alpha.grad)
        self.assertIsNotNone(mle_params.sigma.grad)
        self.assertIsNotNone(mle_params.log_l.grad)
Ejemplo n.º 7
0
    def test_batch_mvn_computation(self):
        # Make sure diagonal util works as expected
        M = 5
        diags = np.array([1.0, 2.0, 3.0])
        expected_matrices = np.concatenate(
            [d * np.identity(M).reshape(1, M, M) for d in diags], axis=0)

        diags_var = Variable(torch.Tensor(diags), requires_grad=True)
        test_matrices = mvn._make_batch_matrices_w_diagonal(diags_var, M)

        assert_array_almost_equal(expected_matrices,
                                  test_matrices.data.numpy())

        # Make sure likelihood computation is correct
        B = 2
        M = 3

        x = np.array([  # shape (B, M)
            [0, 1, 2], [2, 3, 4]
        ]).astype(float)

        mu = np.array([  # shape (B, M)
            [0, 0, 0], [1, 1, 1]
        ]).astype(float)

        var = np.array([2, 3]).astype(float)  # shape (B, )

        cov = np.concatenate([  # shape(B, M, M)
            np.identity(M).reshape(1, M, M) * x for x in var
        ])

        # Expected
        expected = np.array([  # shape (B, 1)
            ss.multivariate_normal.logpdf(x[i, :], mu[i, :], cov[i, :, :])
            for i in range(B)
        ])

        # Test
        x_var = make_torch_variable(x, requires_grad=True)
        mu_var = make_torch_variable(mu, requires_grad=True)
        var_var = make_torch_variable(var, requires_grad=True)
        test = mvn.torch_diagonal_mvn_density_batch(x_var,
                                                    mu_var,
                                                    var_var,
                                                    log=True)

        assert_array_almost_equal(expected, test.data.numpy())

        # Check gradients
        test.sum().backward()
        self.assertIsNotNone(x_var.grad)
        self.assertIsNotNone(mu_var.grad)
        self.assertIsNotNone(var_var.grad)
Ejemplo n.º 8
0
def compute_var(beta, sigma):
    '''Computes M = t(W) * W + sigma^2 * I, which is a commonly used quantity'''
    _, M = beta.size()
    identity = utils.make_torch_variable(np.identity(M), False)
    a1 = torch.mm(beta.t(), beta)
    a2 = torch.mul(sigma**2, identity)
    return torch.add(a1, a2)
Ejemplo n.º 9
0
    def test_compute_var(self):
        '''Make sure computing variance of posterior is autograd-able'''
        # Compute quantity
        beta = utils.make_torch_variable(
            np.array([[1, 1]]).astype(float), True)
        sigma = utils.make_torch_variable([1.0], True)
        var = compute_var(beta, sigma)

        # Check shape
        I, _ = var.size()
        self.assertEqual(2, I)

        # Check grad
        var.sum().sum().backward()
        self.assertIsNotNone(beta.grad)
        self.assertIsNotNone(sigma.grad)
Ejemplo n.º 10
0
def vb_forward_step_w_optim(x, vb_params, B, optimizer):
    # Create minibatch
    batch = utils.select_minibatch(x, B)

    # Sample noise
    K, _ = vb_params.beta.size()
    noise = utils.make_torch_variable(np.random.randn(B, K), False)
    noise = _reparametrize_noise(batch, noise, vb_params)

    # Estimate marginal likelihood of batch
    neg_lower_bound = vb_estimate_lower_bound(batch, noise, vb_params)

    # Do a backward step
    neg_lower_bound.backward()

    # Update step
    optimizer.step()

    # Constrain sigma
    vb_params.sigma.data[0] = max(1e-10, vb_params.sigma.data[0])
    vb_params.sigma_q.data[0] = max(1e-10, vb_params.sigma_q.data[0])

    # Clear gradients
    optimizer.zero_grad()

    return vb_params, neg_lower_bound
Ejemplo n.º 11
0
def _reparametrize_noise(batch, noise, vb_params):
    mu = torch.mm(batch, vb_params.beta_q)

    _, K = noise.size()
    identity = utils.make_torch_variable(np.identity(K), False)
    sigma = torch.mul(vb_params.sigma_q**2, identity)

    return mu + torch.mm(noise, sigma)
Ejemplo n.º 12
0
    def test_rbf_kernel_forward(self):
        '''Make sure we know the RBF kernel is working'''
        # Test basic functionality
        x1 = np.array([0.0, 0.0])
        x2 = np.array([1.0, 1.0])
        log_l = np.log(2.0)
        eps = 1e-5

        sq_dist = ((x2 - x1)**2).sum()
        expected = np.exp(-1.0 / np.exp(log_l) * sq_dist)

        x1_var = make_torch_variable([x1], requires_grad=False)
        x2_var = make_torch_variable([x2], requires_grad=False)
        log_l_var = make_torch_variable([log_l], requires_grad=True)

        test = rbf_kernel_forward(x1_var, x2_var, log_l_var, eps=eps)

        assert_array_almost_equal(expected, test.data.numpy()[0, 0], decimal=5)

        # Make sure the gradient gets through

        test.sum().backward()
        self.assertIsNotNone(log_l_var.grad)

        # Test safety valve

        bad_log_l = -1e6

        expected_bad = np.exp(-1.0 / eps * sq_dist)

        bad_log_l_var = make_torch_variable([bad_log_l], requires_grad=True)

        test_bad = rbf_kernel_forward(x1_var, x2_var, bad_log_l_var, eps=eps)

        assert_array_almost_equal(expected_bad,
                                  test_bad.data.numpy()[0, 0],
                                  decimal=5)

        # Make sure the gradient gets through

        test_bad.sum().backward()
        self.assertIsNotNone(bad_log_l_var.grad)
def vae_lower_bound_w_sampling(x, z, vae_model):
    '''Compute variational lower bound, as specified by variational autoencoder

    This is less efficient then the main method `vae_lower_bound`, as it uses sampling rather than
    exact computation.

    Args:
        x: (Variable) observations; shape n x m1
        z: (Variable) latent variables; shape n x m2
        vae_model: (VAE) the vector autoencoder model; implemented like a pytorch module

    Returns:
        (Variable) lower bound; dim (1, )
    '''
    # ### Get parameters

    # Some initial parameter setting
    n, m1 = x.size()
    _, m2 = z.size()

    # Parameters of the likelihood of x given the model & z
    x_mu, _ = vae_model.decode(z)
    x_sigma = make_torch_variable(torch.ones(n), requires_grad=False)

    # Parameters of the variational approximation of the posterior of z given model & x
    z_mu, z_logvar = vae_model.encode(x)
    z_sigma = torch.exp(torch.mul(z_logvar, 0.5))

    # Parameters of the (actual) prior distribution on z
    prior_mu = make_torch_variable(np.zeros((n, m2)), requires_grad=False)
    prior_sigma = make_torch_variable(np.ones(n), requires_grad=False)

    # ### Compute components (e.g., expected log ___ under posterior approximation)
    log_posterior = torch_diagonal_mvn_density_batch(z, z_mu, z_sigma, log=True)  # (B, )
    log_likelihood = torch_diagonal_mvn_density_batch(x, x_mu, x_sigma, log=True)  # (B, )
    log_prior = torch_diagonal_mvn_density_batch(z, prior_mu, prior_sigma, log=True)  # (B, )

    # ### Put it all together
    lower_bound = -1 * (log_posterior - log_likelihood - log_prior).sum()

    return lower_bound
    def forward(self, x, noise=None):
        '''Estimate variational lower bound of parameters given observations x'''
        n, _ = x.size()

        if noise is None:
            noise = make_torch_variable(np.random.randn(n, self.m2), requires_grad=False)

        sample_z = reparametrize_noise(x, noise, self)

        if self.use_sampling:
            return vae_lower_bound_w_sampling(x, sample_z, self)
        else:
            return vae_lower_bound(x, sample_z, self)
Ejemplo n.º 15
0
    def test_estimate_batch_log_likelihood_lawrence(self):
        '''Test computation of marginal likelihood with latent var's marginalized out'''
        # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2
        n = 3
        m1 = 2
        m2 = 1

        x = np.array([
            [0, 0],
            [1, 1],
            [2, 2],
        ]).astype(float)
        z = np.array([[0], [1], [2]]).astype(float)
        sigma = 2.0
        alpha = 2.0

        var = (alpha**2) * np.dot(z, z.T) + (sigma**2) * np.identity(n)

        a_0 = n * m1 * np.log(2 * np.pi)
        a_1 = m1 * np.log(np.linalg.det(var))
        a_2 = np.diag(np.dot(np.linalg.pinv(var), np.dot(x, x.T))).sum()
        truth_marginal_log_lik = -0.5 * (a_0 + a_1 + a_2)

        x_var = utils.make_torch_variable(x, False)
        z_var = utils.make_torch_variable(z, True)
        sigma_var = utils.make_torch_variable([sigma], True)
        alpha_var = utils.make_torch_variable([alpha], True)

        test_marginal_log_lik = linear_regression_lvm._log_likelihood_lawrence(
            x_var, z_var, sigma_var, alpha_var)

        assert_array_almost_equal(truth_marginal_log_lik,
                                  test_marginal_log_lik.data.numpy())

        # Check gradients
        test_marginal_log_lik.backward()
        self.assertIsNotNone(z_var.grad)
        self.assertIsNotNone(sigma_var.grad)
        self.assertIsNotNone(alpha_var.grad)
def vae_lower_bound(x, z, vae_model):
    '''Compute variational lower bound, as specified by variational autoencoder

    The VAE model specifies
      * likelihood x | z ~ MVN(encoder_mu(z), (encoder_logvar(z) ** 2) * I)
      * posterior z | x ~ MVN(decoder_mu(z), (decoder_logvar(z) ** 2) * I)
      * prior z ~ MVN(0, I)
    where the posterior is not the true posterior, but rather the variational approximation.

    This comes out to
      lower bound = E[log q(z | x) - log p(x, z)]
                  = E[log q(z | x) - log p(x | z) - log p(z)]
    where the expectation is over z ~ q(z | x).

    Args:
        x: (Variable) observations; shape n x m1
        z: (Variable) latent variables; shape n x m2
        vae_model: (VAE) the vector autoencoder model; implemented like a pytorch module

    Returns:
        (Variable) lower bound; dim (1, )
    '''
    # TODO: Reimplement sampling based approach for comparison
    # ### Get parameters

    # Some initial parameter setting
    n, m1 = x.size()
    _, m2 = z.size()

    # Parameters of the likelihood of x given the model & z
    x_mu, _ = vae_model.decode(z)
    x_sigma = make_torch_variable(torch.ones(n), requires_grad=False)

    # Parameters of the variational approximation of the posterior of z given model & x
    z_mu, z_logvar = vae_model.encode(x)

    # ### Compute components (e.g., expected log ___ under posterior approximation)

    # E[log posterior]: analytically, is -0.5 * sum_j [log(2 pi) + 1 + log(sigma_j ** 2)]
    log_posterior = -0.5 * (z_logvar + 1 + np.log(2 * np.pi)).sum(dim=1)  # (B, )

    # E[log likelihood]: can't get analytically, so we use the sample estimate...
    log_likelihood = torch_diagonal_mvn_density_batch(x, x_mu, x_sigma, log=True)  # (B, )

    # E[log prior]: analytically, is -0.5 * sum_j [log(2 pi) + mu_j ** 2 + sigma_j ** 2]
    log_prior = -0.5 * ((z_mu ** 2) + torch.exp(z_logvar) + np.log(2 * np.pi)).sum(dim=1)  # (B, )

    # ### Put it all together
    lower_bound = -1 * (log_posterior - log_likelihood - log_prior).sum()

    return lower_bound
Ejemplo n.º 17
0
    def test_estimate_batch_likelihood_v2(self):
        '''Test computation of marginal likelihood with latent var's marginalized out'''
        # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2
        B = 3
        M = 2
        K = 1
        sub_B = 2

        batch = np.array([
            [0, 0],
            [1, 1],
            [2, 2],
        ]).astype(float)

        beta = np.array([[1, 0.1]]).astype(float)
        sigma = 1.0
        var = np.dot(beta.T, beta) + (sigma**2) * np.identity(M)

        truth_marginal_lik = ss.multivariate_normal.pdf(
            batch, np.zeros(M), var)
        truth_marginal_log_lik = np.log(truth_marginal_lik).sum()

        mle_params = linear_regression_lvm.MLE_PARAMS(
            beta=utils.make_torch_variable(beta, True),
            sigma=utils.make_torch_variable([sigma], True))
        batch_var = utils.make_torch_variable(batch, False)
        test_marginal_log_lik = linear_regression_lvm.mle_estimate_batch_likelihood_v2(
            batch_var, mle_params)

        assert_array_almost_equal(truth_marginal_log_lik,
                                  test_marginal_log_lik.data.numpy())

        # Check gradients
        test_marginal_log_lik.backward()
        self.assertIsNotNone(mle_params.beta.grad)
        self.assertIsNotNone(mle_params.sigma.grad)
Ejemplo n.º 18
0
def mle_estimate_batch_likelihood_v2(batch, mle_params):
    if not isinstance(mle_params, MLE_PARAMS):
        raise ValueError('Input params must be of type MLE_PARAMS')

    B, M = batch.size()

    mu = utils.make_torch_variable(np.zeros(M), False)
    sigma = compute_var(mle_params.beta, mle_params.sigma)

    approx_marginal_log_likelihood = mvn.torch_mvn_density(batch,
                                                           mu,
                                                           sigma,
                                                           log=True)

    return approx_marginal_log_likelihood.sum()
Ejemplo n.º 19
0
def _log_likelihood_lawrence(x, z, sigma, alpha):
    '''Compute log likelihood of P(X | Z, sigma, alpha) using formula in lawrence paper, e.g.,

    P(X | Z, sigma, alpha) = ((2 pi)^N |K|)^{-0.5 * M1} exp{-0.5 * trace(K^-1 X t(X))}

    We compute K through the `build_marginal_covariance` function as

    K = (alpha ** 2) * Z * t(Z) + (sigma ** 2) * I

    e.g., of dimension (N, N) where alpha is the stdev of the weight prior and sigma is the
    stdev of the observation noise

    This is equivalent to computing the likelihood as
        P(X | Z, sigma, alpha) = prod_{j \in [1, M1]} MVN(X_{-, j}; 0, K)
    but more efficient, as it scales better with the dimension M1 (which is crucial when the
    whole goal is dimension reduction :P).

    Params:
        x: the observations (N, M1)
        z: the latent variables (N, M2)
        sigma: the standard deviation of the observation noise (1, )
        alpha: the standard deviation of the observation noise (1, )

    Returns:
        (1, ) vector holding the log likelihood
    '''
    n, m1 = x.size()
    _, m2 = z.size()

    k = build_marginal_covariance(z, sigma, alpha)
    k_inv = torch.inverse(k)

    x_inner_prod = torch.mm(x, x.t())

    a1 = utils.make_torch_variable([-0.5 * n * m1 * np.log(2 * np.pi)], False)
    a2 = -0.5 * m1 * mvn.torch_log_determinant(k)
    a3 = -0.5 * torch.sum(torch.diag(torch.mm(k_inv, x_inner_prod)))

    loglik = a1 + a2 + a3

    return loglik
Ejemplo n.º 20
0
    def test_mle_expand_and_unpack(self):
        '''Check that expands batch, then unpacks computation correctly'''
        N = 5
        x = np.arange(N).astype(float).reshape(
            -1, 1)  # e.g., N instances with 1 element
        x_var = utils.make_torch_variable(x, False)

        reps = 3
        test_expand = linear_regression_lvm._mle_expand_batch(
            x_var, reps)  # e.g., (3 * N, 1)
        test_unpack = linear_regression_lvm._mle_unpack_likelihood(
            test_expand.squeeze(), reps, N)

        truth_expand = np.array([[i for _ in range(reps)
                                  for i in range(N)]]).astype(float).T
        truth_unpack = np.array([range(N) for _ in range(reps)]).astype(float)

        assert_array_almost_equal(np.array([reps * N, 1]), test_expand.size())
        assert_array_almost_equal(np.array([reps, N]), test_unpack.size())

        assert_array_almost_equal(truth_expand, test_expand.data.numpy())
        assert_array_almost_equal(truth_unpack, test_unpack.data.numpy())
Ejemplo n.º 21
0
    def test_vb_lower_bound(self):
        '''Check computations'''
        # Establish parameters
        x = np.array([
            [0.0, 0.0],
            [1.0, 1.0],
            [2.0, 2.0],
        ])
        z = np.array([
            [0.0],
            [1.0],
            [2.0],
        ])

        alpha = 2.0
        sigma = 2.0
        log_l = np.log(2.0)

        alpha_q = 2.0
        sigma_q = 2.0
        log_l_q = np.log(2.0)

        # Compute covariances
        sq_dist_x = np.array([
            [0.0, 2.0, 8.0],
            [2.0, 0.0, 2.0],
            [8.0, 2.0, 0.0],
        ])
        rbf_x = np.exp(-1.0 / np.exp(log_l_q) * sq_dist_x)
        cov_z = (alpha_q**2) * rbf_x + (sigma_q**2) * np.identity(3)

        sq_dist_z = np.array([
            [0.0, 1.0, 4.0],
            [1.0, 0.0, 1.0],
            [4.0, 1.0, 0.0],
        ])
        rbf_z = np.exp(-1.0 / np.exp(log_l) * sq_dist_z)
        cov_x = (alpha**2) * rbf_z + (sigma**2) * np.identity(3)

        # Compute components of bound
        log_posterior = ss.multivariate_normal.logpdf(z.T,
                                                      mean=np.zeros(3),
                                                      cov=cov_z).sum()
        log_likelihood = ss.multivariate_normal.logpdf(x.T,
                                                       mean=np.zeros(3),
                                                       cov=cov_x).sum()
        log_prior = ss.multivariate_normal.logpdf(z,
                                                  mean=np.zeros(1),
                                                  cov=np.identity(1)).sum()

        expected_lower_bound = log_posterior - log_likelihood - log_prior

        # Compute test value
        x_var = make_torch_variable(x, requires_grad=False)
        z_var = make_torch_variable(z, requires_grad=False)
        alpha_var = make_torch_variable([alpha], requires_grad=True)
        sigma_var = make_torch_variable([sigma], requires_grad=True)
        log_l_var = make_torch_variable([log_l], requires_grad=True)
        alpha_q_var = make_torch_variable([alpha_q], requires_grad=True)
        sigma_q_var = make_torch_variable([sigma_q], requires_grad=True)
        log_l_q_var = make_torch_variable([log_l_q], requires_grad=True)
        test_lower_bound = _vb_lower_bound(x_var, z_var, alpha_var, sigma_var,
                                           log_l_var, alpha_q_var, sigma_q_var,
                                           log_l_q_var)

        assert_array_almost_equal(expected_lower_bound,
                                  test_lower_bound.data.numpy(),
                                  decimal=5)

        # ### Check gradients
        test_lower_bound.backward()
        self.assertIsNotNone(alpha_var.grad)
        self.assertIsNotNone(sigma_var.grad)
        self.assertIsNotNone(log_l_var.grad)
        self.assertIsNotNone(alpha_q_var.grad)
        self.assertIsNotNone(sigma_q_var.grad)
        self.assertIsNotNone(log_l_q_var.grad)
Ejemplo n.º 22
0
    def test_em_compute_posterior_and_extract_diagonals(self):
        '''Ensure EM code passes snuff'''
        # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2
        B = 3
        M = 2
        K = 2
        sub_B = 2

        batch = np.array([
            [0, 0],
            [1, 1],
            [2, 2],
        ]).astype(float)

        beta = np.array([[1, 0.1], [1, 0.1]]).astype(float)
        sigma = 1.0

        var_inv = np.linalg.pinv(
            np.dot(beta, beta.T) + (sigma**2) * np.identity(K))
        truth_e_z = np.dot(var_inv, np.dot(beta, batch.T)).T
        truth_e_z2 = np.empty((B, K, K))
        for i in range(
                B
        ):  # e.g., compute covariance matrix for latent of each batch point...
            dot_prod = np.dot(truth_e_z[[i], :].T, truth_e_z[[i], :])
            truth_e_z2[i, :, :] = (sigma**2) * var_inv + dot_prod

        em_params = linear_regression_lvm.EM_PARAMS(
            beta=utils.make_torch_variable(beta, True),
            sigma=utils.make_torch_variable([sigma], True))
        batch_var = utils.make_torch_variable(batch, False)
        test_e_z, test_e_z2 = linear_regression_lvm.em_compute_posterior(
            batch_var, em_params)

        # Compare posteriors
        assert_array_almost_equal(truth_e_z, test_e_z.data.numpy())
        assert_array_almost_equal(truth_e_z2, test_e_z2.data.numpy())

        # Check for gradients
        test_e_z.sum().backward(retain_graph=True)
        self.assertIsNotNone(em_params.beta.grad)
        self.assertIsNotNone(em_params.sigma.grad)

        em_params.beta.grad = None
        em_params.sigma.grad = None

        test_e_z2.sum().backward(retain_graph=True)
        self.assertIsNotNone(em_params.beta.grad)
        self.assertIsNotNone(em_params.sigma.grad)

        # Additionally, check that diagonal extraction step preserves gradients
        em_params.beta.grad = None
        em_params.sigma.grad = None

        truth_diagonals = np.empty((B, K))
        for i in range(B):
            truth_diagonals[i, :] = np.diag(truth_e_z2[i, :, :])

        test_diagonals = linear_regression_lvm.extract_diagonals(test_e_z2)

        assert_array_almost_equal(truth_diagonals, test_diagonals.data.numpy())

        test_diagonals.sum().backward(retain_graph=True)
        self.assertIsNotNone(em_params.beta.grad)
        self.assertIsNotNone(em_params.sigma.grad)
Ejemplo n.º 23
0
    def test_vb_estimate_lower_bound(self):
        # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2
        B = 3
        M = 2
        K = 1

        batch = np.array([
            [0, 0],
            [1, 1],
            [2, 2],
        ]).astype(float)

        noise = np.array([[1], [1], [1]]).astype(float)

        beta = np.array([[1, 1]]).astype(float)
        sigma = 1.0
        beta_q = np.array([[1], [1]]).astype(float)
        sigma_q = 1.0

        # e.g., noise * beta
        mu_x = np.array([
            [1, 1],
            [1, 1],
            [1, 1],
        ]).astype(float)

        # e.g., batch * beta_q
        mu_q = np.array([
            [0],
            [2],
            [4],
        ]).astype(float)

        diff_x = batch - mu_x
        diff_q = noise - mu_q

        likelihood = ss.multivariate_normal.pdf(diff_x, np.zeros(M),
                                                sigma * np.identity(M))
        posterior = ss.multivariate_normal.pdf(diff_q, np.zeros(K),
                                               sigma * np.identity(K))
        prior = ss.multivariate_normal.pdf(noise, np.zeros(K), np.identity(K))

        truth_lower_bound = (np.log(posterior) - np.log(likelihood) -
                             np.log(prior)).sum()

        vb_params = linear_regression_lvm.VB_PARAMS(
            beta=utils.make_torch_variable(beta, True),
            sigma=utils.make_torch_variable([sigma], True),
            beta_q=utils.make_torch_variable(beta_q, True),
            sigma_q=utils.make_torch_variable([sigma_q], True))
        batch_var = utils.make_torch_variable(batch, False)
        noise_var = utils.make_torch_variable(noise, False)
        test_lower_bound = linear_regression_lvm.vb_estimate_lower_bound(
            batch_var, noise_var, vb_params)

        assert_array_almost_equal(truth_lower_bound,
                                  test_lower_bound.data.numpy())

        # Check gradients
        test_lower_bound.backward()
        self.assertIsNotNone(vb_params.beta.grad)
        self.assertIsNotNone(vb_params.sigma.grad)
        self.assertIsNotNone(vb_params.beta_q.grad)
        self.assertIsNotNone(vb_params.sigma_q.grad)
Ejemplo n.º 24
0
    def test_mle_estimate_batch_likelihood(self):
        '''Check that the batch likelihood creates correct calculations & leads to gradients'''
        # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2
        B = 3
        M = 2
        K = 1
        sub_B = 2

        batch = np.array([
            [0, 0],
            [1, 1],
            [2, 2],
        ]).astype(float)

        noise = np.array([[-1], [-1], [-1], [1], [1], [1]]).astype(float)

        beta = np.array([[1, 1]]).astype(float)
        sigma = 1.0

        expanded_batch = np.array([
            [0, 0],
            [1, 1],
            [2, 2],
            [0, 0],
            [1, 1],
            [2, 2],
        ]).astype(float)

        iter_mu = np.array([
            [-1, -1],
            [-1, -1],
            [-1, -1],
            [1, 1],
            [1, 1],
            [1, 1],
        ]).astype(float)

        diff = expanded_batch - iter_mu
        likelihoods = ss.multivariate_normal.pdf(diff, np.zeros(M),
                                                 sigma * np.identity(M))
        expected_each_iter = np.array([
            (likelihoods[0] + likelihoods[3]) / 2,
            (likelihoods[1] + likelihoods[4]) / 2,
            (likelihoods[2] + likelihoods[5]) / 2,
        ])
        truth_marginal_log_lik = np.log(expected_each_iter).sum()

        mle_params = linear_regression_lvm.MLE_PARAMS(
            beta=utils.make_torch_variable(beta, True),
            sigma=utils.make_torch_variable([sigma], True))
        batch_var = utils.make_torch_variable(batch, False)
        noise_var = utils.make_torch_variable(noise, False)
        test_marginal_log_lik = linear_regression_lvm.mle_estimate_batch_likelihood(
            batch_var, mle_params, sub_B, test_noise=noise_var)

        assert_array_almost_equal(truth_marginal_log_lik,
                                  test_marginal_log_lik.data.numpy())

        # Check gradients
        test_marginal_log_lik.backward()
        self.assertIsNotNone(mle_params.beta.grad)
        self.assertIsNotNone(mle_params.sigma.grad)
Ejemplo n.º 25
0
def vb_initialize_parameters(M, K):
    beta = utils.make_torch_variable(np.random.randn(K, M), True)
    sigma = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True)
    beta_q = utils.make_torch_variable(np.random.randn(M, K), True)
    sigma_q = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True)
    return VB_PARAMS(beta=beta, sigma=sigma, beta_q=beta_q, sigma_q=sigma_q)
Ejemplo n.º 26
0
def em_initialize_parameters(M, K):
    '''Initialize the parameters before fitting'''
    beta = utils.make_torch_variable(np.random.randn(K, M), True)
    sigma = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True)
    return EM_PARAMS(beta=beta, sigma=sigma)
Ejemplo n.º 27
0
def build_marginal_covariance(z, sigma, alpha):
    n, _ = z.size()

    inner_prod = torch.mm(z, z.t())
    identity = utils.make_torch_variable(np.identity(n), False)
    return (alpha**2) * inner_prod + (sigma**2) * identity
Ejemplo n.º 28
0
def mle_initialize_parameters_v3(n, m1, m2):
    '''Initialize the parameters before fitting'''
    z = utils.make_torch_variable(np.random.randn(n, m2), True)
    sigma = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True)
    alpha = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True)
    return MLE_PARAMS_2(z=z, sigma=sigma, alpha=alpha)
Ejemplo n.º 29
0
    def test_inactive_active_likelihood(self):
        # ### Check computation of E[x | z, active set] and Var(x | z, active set)

        # Compute the active set's covariance
        active_x = np.array([
            [0.0, 0.0],
            [1.0, 1.0],
            [2.0, 2.0],
        ])
        active_z = np.array([
            [0.0],
            [1.0],
            [2.0],
        ])
        alpha = 2.0
        var = 2.0
        log_l = np.log(2.0)

        active_sq_dist_matrix = cdist(active_z, active_z, 'sqeuclidean')
        active_rbf_kernel = np.exp(-1.0 * active_sq_dist_matrix /
                                   np.exp(log_l))
        active_cov = (alpha**2) * active_rbf_kernel + var * np.identity(3)
        inv_active_cov = np.linalg.pinv(active_cov)

        # Compute values relative to inactive set
        inactive_x = np.array([[0.0, 0.0], [0.0, 0.0]])

        inactive_z = np.array([[0.5], [1.5]])

        inactive_sq_dist_matrix = cdist(active_z, inactive_z, 'sqeuclidean')
        inactive_rbf_kernel = np.exp(-1.0 * inactive_sq_dist_matrix /
                                     np.exp(log_l))
        cross_cov = (alpha**2) * inactive_rbf_kernel
        inactive_var = (alpha**2)

        expected_loglik = 0
        for i in range(2):
            expected_mu = np.dot(active_x.T,
                                 np.dot(inv_active_cov, cross_cov[:, [i]]))
            expected_var = inactive_var - np.dot(
                cross_cov[:, [i]].T, np.dot(inv_active_cov, cross_cov[:, [i]]))
            expected_cov = expected_var * np.identity(2)

            expected_loglik += ss.multivariate_normal.logpdf(
                inactive_x[i, :], mean=expected_mu[:, 0], cov=expected_cov)

        # Compute the test values
        active_x_var = make_torch_variable(active_x, requires_grad=False)
        active_z_var = make_torch_variable(active_z, requires_grad=False)
        alpha_var = make_torch_variable([alpha], requires_grad=False)
        sigma_var = make_torch_variable([np.sqrt(var)], requires_grad=False)
        log_l_var = make_torch_variable([log_l], requires_grad=False)
        inactive_x_var = make_torch_variable(inactive_x, requires_grad=False)
        inactive_z_var = make_torch_variable(inactive_z, requires_grad=True)

        # ### Next, check computation of likelihood
        test_loglik = _inactive_point_likelihood(active_x_var, active_z_var,
                                                 inactive_x_var,
                                                 inactive_z_var, alpha_var,
                                                 sigma_var, log_l_var)

        assert_array_almost_equal(expected_loglik,
                                  test_loglik.data.numpy(),
                                  decimal=5)

        # Check grad
        test_loglik.sum().backward()
        self.assertIsNotNone(inactive_z_var.grad)
Ejemplo n.º 30
0
    def test_vb_sample_noise(self):
        '''Check computations'''
        active_x = np.array([
            [0.0, 0.0],
            [1.0, 1.0],
            [2.0, 2.0],
        ])
        active_z = np.array([
            [0.0],
            [1.0],
            [2.0],
        ])
        alpha_q = 2.0
        sigma_q = 2.0
        log_l_q = np.log(2.0)

        active_sq_dist_matrix = np.array([
            [0.0, 2.0, 8.0],
            [2.0, 0.0, 2.0],
            [8.0, 2.0, 0.0],
        ])
        active_rbf_kernel = np.exp(-1.0 / np.exp(log_l_q) *
                                   active_sq_dist_matrix)
        active_cov = (alpha_q**2) * active_rbf_kernel + (sigma_q**
                                                         2) * np.identity(3)
        inv_active_cov = np.linalg.pinv(active_cov)

        # Compute values relative to inactive set
        inactive_x = np.array([
            [0.5, 0.5],
            [0.5, 0.5],
        ])

        inactive_sq_dist_matrix = np.array([
            [2 * (0.5**2)],
            [2 * (0.5**2)],
            [2 * (1.5**2)],
        ])
        inactive_rbf_kernel = np.exp(-1.0 / np.exp(log_l_q) *
                                     inactive_sq_dist_matrix)
        cross_cov = (alpha_q**2) * inactive_rbf_kernel
        inactive_var = (alpha_q**2)

        # Compute the true values
        expected_mu = np.dot(active_z.T, np.dot(inv_active_cov, cross_cov)).T
        expected_var = inactive_var - np.dot(cross_cov.T,
                                             np.dot(inv_active_cov, cross_cov))

        inactive_noise = np.array([[0.0], [1.0]])
        expected_reparam = inactive_noise * expected_var + expected_mu

        # Compute the test values
        active_x_var = make_torch_variable(active_x, requires_grad=False)
        active_z_var = make_torch_variable(active_z, requires_grad=False)
        alpha_q_var = make_torch_variable([alpha_q], requires_grad=True)
        sigma_q_var = make_torch_variable([sigma_q], requires_grad=True)
        log_l_q_var = make_torch_variable([log_l_q], requires_grad=True)
        inactive_x_var = make_torch_variable(inactive_x, requires_grad=False)
        inactive_noise_var = make_torch_variable(inactive_noise,
                                                 requires_grad=False)
        test_reparam = _reparametrize_noise(inactive_x_var, inactive_noise_var,
                                            active_x_var, active_z_var,
                                            alpha_q_var, sigma_q_var,
                                            log_l_q_var)

        assert_array_almost_equal(expected_reparam,
                                  test_reparam.data.numpy(),
                                  decimal=5)

        # Check gradient
        test_reparam.sum().backward()
        self.assertIsNotNone(alpha_q_var.grad)
        self.assertIsNotNone(sigma_q_var.grad)
        self.assertIsNotNone(log_l_q_var.grad)