def test_vb_reparametrize_noise(self): N = 4 M = 3 K = 2 batch = np.array([[i for _ in range(M)] for i in range(N)]).astype(float) noise = np.ones((N, K)).astype(float) beta_q = np.array([[1, 1], [1, 1], [1, 1]]).astype(float) sigma_q = np.array([2]).astype(float) vb_params = linear_regression_lvm.VB_PARAMS( beta=None, sigma=None, beta_q=utils.make_torch_variable(beta_q, True), sigma_q=utils.make_torch_variable(sigma_q, True), ) batch_var = utils.make_torch_variable(batch, False) noise_var = utils.make_torch_variable(noise, False) reparam_noise = linear_regression_lvm._reparametrize_noise( batch_var, noise_var, vb_params) # Check values truth = np.array([[4 + 3 * i] * K for i in range(N)]).astype(float) assert_array_almost_equal(truth, reparam_noise.data.numpy()) # Check gradients reparam_noise.sum().sum().backward() self.assertIsNotNone(vb_params.beta_q.grad) self.assertIsNotNone(vb_params.sigma_q.grad)
def vb_estimate_lower_bound(batch, noise, vb_params): if not isinstance(vb_params, VB_PARAMS): raise ValueError('parameter tuple must be of type VB_PARAMS') B, M = batch.size() B_1, K = noise.size() if B != B_1: raise ValueError('Batch size is inconsistent between batch and noise') # Compute components mu_x = torch.mm(noise, vb_params.beta) identity_x = utils.make_torch_variable(np.identity(M), False) sigma_x = torch.mul(vb_params.sigma**2, identity_x) mu_q = torch.mm(batch, vb_params.beta_q) identity_q = utils.make_torch_variable(np.identity(K), False) sigma_q = torch.mul(vb_params.sigma_q**2, identity_q) mu_prior = utils.make_torch_variable(np.zeros(K), False) sigma_prior = utils.make_torch_variable(np.identity(K), False) # Compute log likelihoods log_posterior = mvn.torch_mvn_density(noise, mu_q, sigma_q, log=True) log_likelihood = mvn.torch_mvn_density(batch, mu_x, sigma_x, log=True) log_prior = mvn.torch_mvn_density(noise, mu_prior, sigma_prior, log=True) lower_bound = log_posterior - log_likelihood - log_prior return lower_bound.sum()
def test_make_cov(self): '''Make sure the covariance function is working''' # Check value x1 = np.array([0.0, 0.0]) x2 = np.array([1.0, 1.0]) alpha = 2.0 sigma = 2.0 log_l = np.log(2.0) sq_dist = ((x2 - x1)**2).sum() rbf = np.exp(-1.0 / np.exp(log_l) * sq_dist) expected_cov = (alpha**2) * rbf + sigma**2 x1_var = make_torch_variable([x1], requires_grad=False) x2_var = make_torch_variable([x2], requires_grad=False) alpha_var = make_torch_variable([alpha], requires_grad=True) sigma_var = make_torch_variable([sigma], requires_grad=True) log_l_var = make_torch_variable([log_l], requires_grad=True) test_cov = _make_cov(x1_var, x2_var, alpha_var, sigma_var, log_l_var) assert_array_almost_equal(expected_cov, test_cov.data.numpy()[0, 0], decimal=5) # Make sure the gradient gets through test_cov.sum().backward() self.assertIsNotNone(alpha_var.grad) self.assertIsNotNone(sigma_var.grad) self.assertIsNotNone(log_l_var.grad)
def mle_estimate_batch_likelihood(batch, mle_params, sub_B, test_noise=None): '''Compute batch likelihood under naive method Args: batch: (torch.autograd.Variable) the batch of inputs X mle_params: (MLE_PARAMS torch.autograd.Variable tuple) the variables needed to compute sub_B: (int) the size of the batches used for monte carlo estimates of E_z[log lik(x)] test_noise: (torch.autograd.Variable) optional; introduce noise via function input instead of randomly generated with in the function Returns: (torch.autograd.Variable) the marginal likelihood of the batch; shape (1, ) ''' # ### Validation # Check that params are of right type if not isinstance(mle_params, MLE_PARAMS): raise ValueError('parameter tuple must be of type MLE_PARAMS') # Check parameter sizes against batch size B, M = batch.size() K, M_1 = mle_params.beta.size() if M != M_1: raise AssertionError( 'batch and beta do not agree on M ({} vs {})'.format((B, M), (K, M_1))) utils.check_autograd_variable_size(mle_params.sigma, [(1, )]) # ### Computation # Sample noise required to compute monte carlo estimate of likelihood noise = utils.make_torch_variable(np.random.randn(sub_B * B, K), False) if test_noise is not None: # For debugging, allow insertion of a deterministic noise variable utils.check_autograd_variable_size(test_noise, [(sub_B * B, K)]) noise = test_noise # Expand minibatch to match shape of noise batch = _mle_expand_batch(batch, sub_B) # Construct mu and sigma & compute density mu = torch.mm(noise, mle_params.beta) identity = utils.make_torch_variable(np.identity(M), False) sigma = torch.mul(mle_params.sigma**2, identity) likelihood = mvn.torch_mvn_density(batch, mu, sigma) # Reshape density to (sub_B, B) and sum across first dimension utils.check_autograd_variable_size(likelihood, [(sub_B * B, )]) likelihood = _mle_unpack_likelihood(likelihood, sub_B, B) # Compute approx expected likelihood of each batch sample approx_expected_likelihood_each_iter = likelihood.mean(dim=0) approx_marginal_log_likelihood = torch.log( approx_expected_likelihood_each_iter).sum() return approx_marginal_log_likelihood
def test_em_full_data_log_likelihood(self): # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2 B = 3 M = 2 K = 2 batch = np.array([ [0, 0], [1, 1], [2, 2], ]).astype(float) beta = np.array([[1, 0.1], [1, 0.1]]).astype(float) sigma = 1.0 var_inv = np.linalg.pinv( np.dot(beta, beta.T) + (sigma**2) * np.identity(K)) truth_e_z = np.dot(var_inv, np.dot(beta, batch.T)).T truth_e_z2 = np.empty((B, K, K)) for i in range( B ): # e.g., compute covariance matrix for latent of each batch point... dot_prod = np.dot(truth_e_z[[i], :].T, truth_e_z[[i], :]) truth_e_z2[i, :, :] = (sigma**2) * var_inv + dot_prod truth_log_lik = np.empty((B, )) for i in range(B): a_1 = (M / 2.0) * np.log(sigma**2) a_2 = 0.5 * np.sum(np.diag(truth_e_z2[i, :, :])) a_3 = 0.5 * np.dot(batch[[i], :], batch[[i], :].T) a_4 = -1 * np.dot(truth_e_z[[i], :], np.dot(beta, batch[[i], :].T)) a_5 = 0.5 * np.sum( np.diag(np.dot(beta, np.dot(beta.T, truth_e_z2[i, :, :])))) truth_log_lik[i] = -1 * (a_1 + a_2 + a_3 + a_4 + a_5) truth_log_lik = truth_log_lik.sum() em_params = linear_regression_lvm.EM_PARAMS( beta=utils.make_torch_variable(beta, True), sigma=utils.make_torch_variable([sigma], True)) batch_var = utils.make_torch_variable(batch, False) test_e_z, test_e_z2 = linear_regression_lvm.em_compute_posterior( batch_var, em_params) test_log_lik = linear_regression_lvm.em_compute_full_data_log_likelihood( batch_var, em_params, test_e_z, test_e_z2) # Compare computation assert_array_almost_equal(truth_log_lik, test_log_lik.data.numpy()) self.assertLess(test_log_lik.data.numpy()[0], 0) # Check for grads test_log_lik.backward() self.assertIsNotNone(em_params.beta.grad) self.assertIsNotNone(em_params.sigma.grad)
def test_mle_log_likelihood(self): '''Check validity of computation''' # Check values x = np.array([[0.0, 0.0], [1.0, 1.0]]) z = np.array([[0.0], [1.0]]) alpha = 2.0 sigma = 2.0 log_l = np.log(2.0) sq_dist_matrix = np.array([[0.0, 1.0], [1.0, 0.0]]) rbf_kernel = np.exp(-1.0 / np.exp(log_l) * sq_dist_matrix) cov = (alpha**2) * rbf_kernel + (sigma**2) * np.identity(2) mu = np.array([0.0, 0.0]) expected = np.sum( np.log(ss.multivariate_normal(mean=mu, cov=cov).pdf(x.T))) x_var = make_torch_variable(x, requires_grad=False) z_var = make_torch_variable(z, requires_grad=False) alpha_var = make_torch_variable([alpha], requires_grad=True) sigma_var = make_torch_variable([sigma], requires_grad=True) log_l_var = make_torch_variable([log_l], requires_grad=True) test = _mle_log_likelihood(x_var, z_var, alpha_var, sigma_var, log_l_var) assert_array_almost_equal(expected, test.data.numpy()[0], decimal=5) # Check gradients test.backward() self.assertIsNotNone(alpha_var.grad) self.assertIsNotNone(sigma_var.grad) self.assertIsNotNone(log_l_var.grad) # Check that wrapper function also preserves gradients x_var = make_torch_variable(x, requires_grad=False) mle_params = MLE_PARAMS( z=make_torch_variable(z, requires_grad=False), alpha=make_torch_variable([alpha], requires_grad=True), sigma=make_torch_variable([sigma], requires_grad=True), log_l=make_torch_variable([log_l], requires_grad=True), ) batch_ix = np.array([0, 1]) test = mle_batch_log_likelihood(x_var, mle_params, batch_ix) assert_array_almost_equal(expected, test.data.numpy()[0], decimal=5) test.backward() self.assertIsNotNone(mle_params.alpha.grad) self.assertIsNotNone(mle_params.sigma.grad) self.assertIsNotNone(mle_params.log_l.grad)
def test_batch_mvn_computation(self): # Make sure diagonal util works as expected M = 5 diags = np.array([1.0, 2.0, 3.0]) expected_matrices = np.concatenate( [d * np.identity(M).reshape(1, M, M) for d in diags], axis=0) diags_var = Variable(torch.Tensor(diags), requires_grad=True) test_matrices = mvn._make_batch_matrices_w_diagonal(diags_var, M) assert_array_almost_equal(expected_matrices, test_matrices.data.numpy()) # Make sure likelihood computation is correct B = 2 M = 3 x = np.array([ # shape (B, M) [0, 1, 2], [2, 3, 4] ]).astype(float) mu = np.array([ # shape (B, M) [0, 0, 0], [1, 1, 1] ]).astype(float) var = np.array([2, 3]).astype(float) # shape (B, ) cov = np.concatenate([ # shape(B, M, M) np.identity(M).reshape(1, M, M) * x for x in var ]) # Expected expected = np.array([ # shape (B, 1) ss.multivariate_normal.logpdf(x[i, :], mu[i, :], cov[i, :, :]) for i in range(B) ]) # Test x_var = make_torch_variable(x, requires_grad=True) mu_var = make_torch_variable(mu, requires_grad=True) var_var = make_torch_variable(var, requires_grad=True) test = mvn.torch_diagonal_mvn_density_batch(x_var, mu_var, var_var, log=True) assert_array_almost_equal(expected, test.data.numpy()) # Check gradients test.sum().backward() self.assertIsNotNone(x_var.grad) self.assertIsNotNone(mu_var.grad) self.assertIsNotNone(var_var.grad)
def compute_var(beta, sigma): '''Computes M = t(W) * W + sigma^2 * I, which is a commonly used quantity''' _, M = beta.size() identity = utils.make_torch_variable(np.identity(M), False) a1 = torch.mm(beta.t(), beta) a2 = torch.mul(sigma**2, identity) return torch.add(a1, a2)
def test_compute_var(self): '''Make sure computing variance of posterior is autograd-able''' # Compute quantity beta = utils.make_torch_variable( np.array([[1, 1]]).astype(float), True) sigma = utils.make_torch_variable([1.0], True) var = compute_var(beta, sigma) # Check shape I, _ = var.size() self.assertEqual(2, I) # Check grad var.sum().sum().backward() self.assertIsNotNone(beta.grad) self.assertIsNotNone(sigma.grad)
def vb_forward_step_w_optim(x, vb_params, B, optimizer): # Create minibatch batch = utils.select_minibatch(x, B) # Sample noise K, _ = vb_params.beta.size() noise = utils.make_torch_variable(np.random.randn(B, K), False) noise = _reparametrize_noise(batch, noise, vb_params) # Estimate marginal likelihood of batch neg_lower_bound = vb_estimate_lower_bound(batch, noise, vb_params) # Do a backward step neg_lower_bound.backward() # Update step optimizer.step() # Constrain sigma vb_params.sigma.data[0] = max(1e-10, vb_params.sigma.data[0]) vb_params.sigma_q.data[0] = max(1e-10, vb_params.sigma_q.data[0]) # Clear gradients optimizer.zero_grad() return vb_params, neg_lower_bound
def _reparametrize_noise(batch, noise, vb_params): mu = torch.mm(batch, vb_params.beta_q) _, K = noise.size() identity = utils.make_torch_variable(np.identity(K), False) sigma = torch.mul(vb_params.sigma_q**2, identity) return mu + torch.mm(noise, sigma)
def test_rbf_kernel_forward(self): '''Make sure we know the RBF kernel is working''' # Test basic functionality x1 = np.array([0.0, 0.0]) x2 = np.array([1.0, 1.0]) log_l = np.log(2.0) eps = 1e-5 sq_dist = ((x2 - x1)**2).sum() expected = np.exp(-1.0 / np.exp(log_l) * sq_dist) x1_var = make_torch_variable([x1], requires_grad=False) x2_var = make_torch_variable([x2], requires_grad=False) log_l_var = make_torch_variable([log_l], requires_grad=True) test = rbf_kernel_forward(x1_var, x2_var, log_l_var, eps=eps) assert_array_almost_equal(expected, test.data.numpy()[0, 0], decimal=5) # Make sure the gradient gets through test.sum().backward() self.assertIsNotNone(log_l_var.grad) # Test safety valve bad_log_l = -1e6 expected_bad = np.exp(-1.0 / eps * sq_dist) bad_log_l_var = make_torch_variable([bad_log_l], requires_grad=True) test_bad = rbf_kernel_forward(x1_var, x2_var, bad_log_l_var, eps=eps) assert_array_almost_equal(expected_bad, test_bad.data.numpy()[0, 0], decimal=5) # Make sure the gradient gets through test_bad.sum().backward() self.assertIsNotNone(bad_log_l_var.grad)
def vae_lower_bound_w_sampling(x, z, vae_model): '''Compute variational lower bound, as specified by variational autoencoder This is less efficient then the main method `vae_lower_bound`, as it uses sampling rather than exact computation. Args: x: (Variable) observations; shape n x m1 z: (Variable) latent variables; shape n x m2 vae_model: (VAE) the vector autoencoder model; implemented like a pytorch module Returns: (Variable) lower bound; dim (1, ) ''' # ### Get parameters # Some initial parameter setting n, m1 = x.size() _, m2 = z.size() # Parameters of the likelihood of x given the model & z x_mu, _ = vae_model.decode(z) x_sigma = make_torch_variable(torch.ones(n), requires_grad=False) # Parameters of the variational approximation of the posterior of z given model & x z_mu, z_logvar = vae_model.encode(x) z_sigma = torch.exp(torch.mul(z_logvar, 0.5)) # Parameters of the (actual) prior distribution on z prior_mu = make_torch_variable(np.zeros((n, m2)), requires_grad=False) prior_sigma = make_torch_variable(np.ones(n), requires_grad=False) # ### Compute components (e.g., expected log ___ under posterior approximation) log_posterior = torch_diagonal_mvn_density_batch(z, z_mu, z_sigma, log=True) # (B, ) log_likelihood = torch_diagonal_mvn_density_batch(x, x_mu, x_sigma, log=True) # (B, ) log_prior = torch_diagonal_mvn_density_batch(z, prior_mu, prior_sigma, log=True) # (B, ) # ### Put it all together lower_bound = -1 * (log_posterior - log_likelihood - log_prior).sum() return lower_bound
def forward(self, x, noise=None): '''Estimate variational lower bound of parameters given observations x''' n, _ = x.size() if noise is None: noise = make_torch_variable(np.random.randn(n, self.m2), requires_grad=False) sample_z = reparametrize_noise(x, noise, self) if self.use_sampling: return vae_lower_bound_w_sampling(x, sample_z, self) else: return vae_lower_bound(x, sample_z, self)
def test_estimate_batch_log_likelihood_lawrence(self): '''Test computation of marginal likelihood with latent var's marginalized out''' # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2 n = 3 m1 = 2 m2 = 1 x = np.array([ [0, 0], [1, 1], [2, 2], ]).astype(float) z = np.array([[0], [1], [2]]).astype(float) sigma = 2.0 alpha = 2.0 var = (alpha**2) * np.dot(z, z.T) + (sigma**2) * np.identity(n) a_0 = n * m1 * np.log(2 * np.pi) a_1 = m1 * np.log(np.linalg.det(var)) a_2 = np.diag(np.dot(np.linalg.pinv(var), np.dot(x, x.T))).sum() truth_marginal_log_lik = -0.5 * (a_0 + a_1 + a_2) x_var = utils.make_torch_variable(x, False) z_var = utils.make_torch_variable(z, True) sigma_var = utils.make_torch_variable([sigma], True) alpha_var = utils.make_torch_variable([alpha], True) test_marginal_log_lik = linear_regression_lvm._log_likelihood_lawrence( x_var, z_var, sigma_var, alpha_var) assert_array_almost_equal(truth_marginal_log_lik, test_marginal_log_lik.data.numpy()) # Check gradients test_marginal_log_lik.backward() self.assertIsNotNone(z_var.grad) self.assertIsNotNone(sigma_var.grad) self.assertIsNotNone(alpha_var.grad)
def vae_lower_bound(x, z, vae_model): '''Compute variational lower bound, as specified by variational autoencoder The VAE model specifies * likelihood x | z ~ MVN(encoder_mu(z), (encoder_logvar(z) ** 2) * I) * posterior z | x ~ MVN(decoder_mu(z), (decoder_logvar(z) ** 2) * I) * prior z ~ MVN(0, I) where the posterior is not the true posterior, but rather the variational approximation. This comes out to lower bound = E[log q(z | x) - log p(x, z)] = E[log q(z | x) - log p(x | z) - log p(z)] where the expectation is over z ~ q(z | x). Args: x: (Variable) observations; shape n x m1 z: (Variable) latent variables; shape n x m2 vae_model: (VAE) the vector autoencoder model; implemented like a pytorch module Returns: (Variable) lower bound; dim (1, ) ''' # TODO: Reimplement sampling based approach for comparison # ### Get parameters # Some initial parameter setting n, m1 = x.size() _, m2 = z.size() # Parameters of the likelihood of x given the model & z x_mu, _ = vae_model.decode(z) x_sigma = make_torch_variable(torch.ones(n), requires_grad=False) # Parameters of the variational approximation of the posterior of z given model & x z_mu, z_logvar = vae_model.encode(x) # ### Compute components (e.g., expected log ___ under posterior approximation) # E[log posterior]: analytically, is -0.5 * sum_j [log(2 pi) + 1 + log(sigma_j ** 2)] log_posterior = -0.5 * (z_logvar + 1 + np.log(2 * np.pi)).sum(dim=1) # (B, ) # E[log likelihood]: can't get analytically, so we use the sample estimate... log_likelihood = torch_diagonal_mvn_density_batch(x, x_mu, x_sigma, log=True) # (B, ) # E[log prior]: analytically, is -0.5 * sum_j [log(2 pi) + mu_j ** 2 + sigma_j ** 2] log_prior = -0.5 * ((z_mu ** 2) + torch.exp(z_logvar) + np.log(2 * np.pi)).sum(dim=1) # (B, ) # ### Put it all together lower_bound = -1 * (log_posterior - log_likelihood - log_prior).sum() return lower_bound
def test_estimate_batch_likelihood_v2(self): '''Test computation of marginal likelihood with latent var's marginalized out''' # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2 B = 3 M = 2 K = 1 sub_B = 2 batch = np.array([ [0, 0], [1, 1], [2, 2], ]).astype(float) beta = np.array([[1, 0.1]]).astype(float) sigma = 1.0 var = np.dot(beta.T, beta) + (sigma**2) * np.identity(M) truth_marginal_lik = ss.multivariate_normal.pdf( batch, np.zeros(M), var) truth_marginal_log_lik = np.log(truth_marginal_lik).sum() mle_params = linear_regression_lvm.MLE_PARAMS( beta=utils.make_torch_variable(beta, True), sigma=utils.make_torch_variable([sigma], True)) batch_var = utils.make_torch_variable(batch, False) test_marginal_log_lik = linear_regression_lvm.mle_estimate_batch_likelihood_v2( batch_var, mle_params) assert_array_almost_equal(truth_marginal_log_lik, test_marginal_log_lik.data.numpy()) # Check gradients test_marginal_log_lik.backward() self.assertIsNotNone(mle_params.beta.grad) self.assertIsNotNone(mle_params.sigma.grad)
def mle_estimate_batch_likelihood_v2(batch, mle_params): if not isinstance(mle_params, MLE_PARAMS): raise ValueError('Input params must be of type MLE_PARAMS') B, M = batch.size() mu = utils.make_torch_variable(np.zeros(M), False) sigma = compute_var(mle_params.beta, mle_params.sigma) approx_marginal_log_likelihood = mvn.torch_mvn_density(batch, mu, sigma, log=True) return approx_marginal_log_likelihood.sum()
def _log_likelihood_lawrence(x, z, sigma, alpha): '''Compute log likelihood of P(X | Z, sigma, alpha) using formula in lawrence paper, e.g., P(X | Z, sigma, alpha) = ((2 pi)^N |K|)^{-0.5 * M1} exp{-0.5 * trace(K^-1 X t(X))} We compute K through the `build_marginal_covariance` function as K = (alpha ** 2) * Z * t(Z) + (sigma ** 2) * I e.g., of dimension (N, N) where alpha is the stdev of the weight prior and sigma is the stdev of the observation noise This is equivalent to computing the likelihood as P(X | Z, sigma, alpha) = prod_{j \in [1, M1]} MVN(X_{-, j}; 0, K) but more efficient, as it scales better with the dimension M1 (which is crucial when the whole goal is dimension reduction :P). Params: x: the observations (N, M1) z: the latent variables (N, M2) sigma: the standard deviation of the observation noise (1, ) alpha: the standard deviation of the observation noise (1, ) Returns: (1, ) vector holding the log likelihood ''' n, m1 = x.size() _, m2 = z.size() k = build_marginal_covariance(z, sigma, alpha) k_inv = torch.inverse(k) x_inner_prod = torch.mm(x, x.t()) a1 = utils.make_torch_variable([-0.5 * n * m1 * np.log(2 * np.pi)], False) a2 = -0.5 * m1 * mvn.torch_log_determinant(k) a3 = -0.5 * torch.sum(torch.diag(torch.mm(k_inv, x_inner_prod))) loglik = a1 + a2 + a3 return loglik
def test_mle_expand_and_unpack(self): '''Check that expands batch, then unpacks computation correctly''' N = 5 x = np.arange(N).astype(float).reshape( -1, 1) # e.g., N instances with 1 element x_var = utils.make_torch_variable(x, False) reps = 3 test_expand = linear_regression_lvm._mle_expand_batch( x_var, reps) # e.g., (3 * N, 1) test_unpack = linear_regression_lvm._mle_unpack_likelihood( test_expand.squeeze(), reps, N) truth_expand = np.array([[i for _ in range(reps) for i in range(N)]]).astype(float).T truth_unpack = np.array([range(N) for _ in range(reps)]).astype(float) assert_array_almost_equal(np.array([reps * N, 1]), test_expand.size()) assert_array_almost_equal(np.array([reps, N]), test_unpack.size()) assert_array_almost_equal(truth_expand, test_expand.data.numpy()) assert_array_almost_equal(truth_unpack, test_unpack.data.numpy())
def test_vb_lower_bound(self): '''Check computations''' # Establish parameters x = np.array([ [0.0, 0.0], [1.0, 1.0], [2.0, 2.0], ]) z = np.array([ [0.0], [1.0], [2.0], ]) alpha = 2.0 sigma = 2.0 log_l = np.log(2.0) alpha_q = 2.0 sigma_q = 2.0 log_l_q = np.log(2.0) # Compute covariances sq_dist_x = np.array([ [0.0, 2.0, 8.0], [2.0, 0.0, 2.0], [8.0, 2.0, 0.0], ]) rbf_x = np.exp(-1.0 / np.exp(log_l_q) * sq_dist_x) cov_z = (alpha_q**2) * rbf_x + (sigma_q**2) * np.identity(3) sq_dist_z = np.array([ [0.0, 1.0, 4.0], [1.0, 0.0, 1.0], [4.0, 1.0, 0.0], ]) rbf_z = np.exp(-1.0 / np.exp(log_l) * sq_dist_z) cov_x = (alpha**2) * rbf_z + (sigma**2) * np.identity(3) # Compute components of bound log_posterior = ss.multivariate_normal.logpdf(z.T, mean=np.zeros(3), cov=cov_z).sum() log_likelihood = ss.multivariate_normal.logpdf(x.T, mean=np.zeros(3), cov=cov_x).sum() log_prior = ss.multivariate_normal.logpdf(z, mean=np.zeros(1), cov=np.identity(1)).sum() expected_lower_bound = log_posterior - log_likelihood - log_prior # Compute test value x_var = make_torch_variable(x, requires_grad=False) z_var = make_torch_variable(z, requires_grad=False) alpha_var = make_torch_variable([alpha], requires_grad=True) sigma_var = make_torch_variable([sigma], requires_grad=True) log_l_var = make_torch_variable([log_l], requires_grad=True) alpha_q_var = make_torch_variable([alpha_q], requires_grad=True) sigma_q_var = make_torch_variable([sigma_q], requires_grad=True) log_l_q_var = make_torch_variable([log_l_q], requires_grad=True) test_lower_bound = _vb_lower_bound(x_var, z_var, alpha_var, sigma_var, log_l_var, alpha_q_var, sigma_q_var, log_l_q_var) assert_array_almost_equal(expected_lower_bound, test_lower_bound.data.numpy(), decimal=5) # ### Check gradients test_lower_bound.backward() self.assertIsNotNone(alpha_var.grad) self.assertIsNotNone(sigma_var.grad) self.assertIsNotNone(log_l_var.grad) self.assertIsNotNone(alpha_q_var.grad) self.assertIsNotNone(sigma_q_var.grad) self.assertIsNotNone(log_l_q_var.grad)
def test_em_compute_posterior_and_extract_diagonals(self): '''Ensure EM code passes snuff''' # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2 B = 3 M = 2 K = 2 sub_B = 2 batch = np.array([ [0, 0], [1, 1], [2, 2], ]).astype(float) beta = np.array([[1, 0.1], [1, 0.1]]).astype(float) sigma = 1.0 var_inv = np.linalg.pinv( np.dot(beta, beta.T) + (sigma**2) * np.identity(K)) truth_e_z = np.dot(var_inv, np.dot(beta, batch.T)).T truth_e_z2 = np.empty((B, K, K)) for i in range( B ): # e.g., compute covariance matrix for latent of each batch point... dot_prod = np.dot(truth_e_z[[i], :].T, truth_e_z[[i], :]) truth_e_z2[i, :, :] = (sigma**2) * var_inv + dot_prod em_params = linear_regression_lvm.EM_PARAMS( beta=utils.make_torch_variable(beta, True), sigma=utils.make_torch_variable([sigma], True)) batch_var = utils.make_torch_variable(batch, False) test_e_z, test_e_z2 = linear_regression_lvm.em_compute_posterior( batch_var, em_params) # Compare posteriors assert_array_almost_equal(truth_e_z, test_e_z.data.numpy()) assert_array_almost_equal(truth_e_z2, test_e_z2.data.numpy()) # Check for gradients test_e_z.sum().backward(retain_graph=True) self.assertIsNotNone(em_params.beta.grad) self.assertIsNotNone(em_params.sigma.grad) em_params.beta.grad = None em_params.sigma.grad = None test_e_z2.sum().backward(retain_graph=True) self.assertIsNotNone(em_params.beta.grad) self.assertIsNotNone(em_params.sigma.grad) # Additionally, check that diagonal extraction step preserves gradients em_params.beta.grad = None em_params.sigma.grad = None truth_diagonals = np.empty((B, K)) for i in range(B): truth_diagonals[i, :] = np.diag(truth_e_z2[i, :, :]) test_diagonals = linear_regression_lvm.extract_diagonals(test_e_z2) assert_array_almost_equal(truth_diagonals, test_diagonals.data.numpy()) test_diagonals.sum().backward(retain_graph=True) self.assertIsNotNone(em_params.beta.grad) self.assertIsNotNone(em_params.sigma.grad)
def test_vb_estimate_lower_bound(self): # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2 B = 3 M = 2 K = 1 batch = np.array([ [0, 0], [1, 1], [2, 2], ]).astype(float) noise = np.array([[1], [1], [1]]).astype(float) beta = np.array([[1, 1]]).astype(float) sigma = 1.0 beta_q = np.array([[1], [1]]).astype(float) sigma_q = 1.0 # e.g., noise * beta mu_x = np.array([ [1, 1], [1, 1], [1, 1], ]).astype(float) # e.g., batch * beta_q mu_q = np.array([ [0], [2], [4], ]).astype(float) diff_x = batch - mu_x diff_q = noise - mu_q likelihood = ss.multivariate_normal.pdf(diff_x, np.zeros(M), sigma * np.identity(M)) posterior = ss.multivariate_normal.pdf(diff_q, np.zeros(K), sigma * np.identity(K)) prior = ss.multivariate_normal.pdf(noise, np.zeros(K), np.identity(K)) truth_lower_bound = (np.log(posterior) - np.log(likelihood) - np.log(prior)).sum() vb_params = linear_regression_lvm.VB_PARAMS( beta=utils.make_torch_variable(beta, True), sigma=utils.make_torch_variable([sigma], True), beta_q=utils.make_torch_variable(beta_q, True), sigma_q=utils.make_torch_variable([sigma_q], True)) batch_var = utils.make_torch_variable(batch, False) noise_var = utils.make_torch_variable(noise, False) test_lower_bound = linear_regression_lvm.vb_estimate_lower_bound( batch_var, noise_var, vb_params) assert_array_almost_equal(truth_lower_bound, test_lower_bound.data.numpy()) # Check gradients test_lower_bound.backward() self.assertIsNotNone(vb_params.beta.grad) self.assertIsNotNone(vb_params.sigma.grad) self.assertIsNotNone(vb_params.beta_q.grad) self.assertIsNotNone(vb_params.sigma_q.grad)
def test_mle_estimate_batch_likelihood(self): '''Check that the batch likelihood creates correct calculations & leads to gradients''' # Implied dimensions: B = 3, M = 2, K = 1, sub_B = 2 B = 3 M = 2 K = 1 sub_B = 2 batch = np.array([ [0, 0], [1, 1], [2, 2], ]).astype(float) noise = np.array([[-1], [-1], [-1], [1], [1], [1]]).astype(float) beta = np.array([[1, 1]]).astype(float) sigma = 1.0 expanded_batch = np.array([ [0, 0], [1, 1], [2, 2], [0, 0], [1, 1], [2, 2], ]).astype(float) iter_mu = np.array([ [-1, -1], [-1, -1], [-1, -1], [1, 1], [1, 1], [1, 1], ]).astype(float) diff = expanded_batch - iter_mu likelihoods = ss.multivariate_normal.pdf(diff, np.zeros(M), sigma * np.identity(M)) expected_each_iter = np.array([ (likelihoods[0] + likelihoods[3]) / 2, (likelihoods[1] + likelihoods[4]) / 2, (likelihoods[2] + likelihoods[5]) / 2, ]) truth_marginal_log_lik = np.log(expected_each_iter).sum() mle_params = linear_regression_lvm.MLE_PARAMS( beta=utils.make_torch_variable(beta, True), sigma=utils.make_torch_variable([sigma], True)) batch_var = utils.make_torch_variable(batch, False) noise_var = utils.make_torch_variable(noise, False) test_marginal_log_lik = linear_regression_lvm.mle_estimate_batch_likelihood( batch_var, mle_params, sub_B, test_noise=noise_var) assert_array_almost_equal(truth_marginal_log_lik, test_marginal_log_lik.data.numpy()) # Check gradients test_marginal_log_lik.backward() self.assertIsNotNone(mle_params.beta.grad) self.assertIsNotNone(mle_params.sigma.grad)
def vb_initialize_parameters(M, K): beta = utils.make_torch_variable(np.random.randn(K, M), True) sigma = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True) beta_q = utils.make_torch_variable(np.random.randn(M, K), True) sigma_q = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True) return VB_PARAMS(beta=beta, sigma=sigma, beta_q=beta_q, sigma_q=sigma_q)
def em_initialize_parameters(M, K): '''Initialize the parameters before fitting''' beta = utils.make_torch_variable(np.random.randn(K, M), True) sigma = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True) return EM_PARAMS(beta=beta, sigma=sigma)
def build_marginal_covariance(z, sigma, alpha): n, _ = z.size() inner_prod = torch.mm(z, z.t()) identity = utils.make_torch_variable(np.identity(n), False) return (alpha**2) * inner_prod + (sigma**2) * identity
def mle_initialize_parameters_v3(n, m1, m2): '''Initialize the parameters before fitting''' z = utils.make_torch_variable(np.random.randn(n, m2), True) sigma = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True) alpha = utils.make_torch_variable(np.random.rand(1) * 10 + 1e-10, True) return MLE_PARAMS_2(z=z, sigma=sigma, alpha=alpha)
def test_inactive_active_likelihood(self): # ### Check computation of E[x | z, active set] and Var(x | z, active set) # Compute the active set's covariance active_x = np.array([ [0.0, 0.0], [1.0, 1.0], [2.0, 2.0], ]) active_z = np.array([ [0.0], [1.0], [2.0], ]) alpha = 2.0 var = 2.0 log_l = np.log(2.0) active_sq_dist_matrix = cdist(active_z, active_z, 'sqeuclidean') active_rbf_kernel = np.exp(-1.0 * active_sq_dist_matrix / np.exp(log_l)) active_cov = (alpha**2) * active_rbf_kernel + var * np.identity(3) inv_active_cov = np.linalg.pinv(active_cov) # Compute values relative to inactive set inactive_x = np.array([[0.0, 0.0], [0.0, 0.0]]) inactive_z = np.array([[0.5], [1.5]]) inactive_sq_dist_matrix = cdist(active_z, inactive_z, 'sqeuclidean') inactive_rbf_kernel = np.exp(-1.0 * inactive_sq_dist_matrix / np.exp(log_l)) cross_cov = (alpha**2) * inactive_rbf_kernel inactive_var = (alpha**2) expected_loglik = 0 for i in range(2): expected_mu = np.dot(active_x.T, np.dot(inv_active_cov, cross_cov[:, [i]])) expected_var = inactive_var - np.dot( cross_cov[:, [i]].T, np.dot(inv_active_cov, cross_cov[:, [i]])) expected_cov = expected_var * np.identity(2) expected_loglik += ss.multivariate_normal.logpdf( inactive_x[i, :], mean=expected_mu[:, 0], cov=expected_cov) # Compute the test values active_x_var = make_torch_variable(active_x, requires_grad=False) active_z_var = make_torch_variable(active_z, requires_grad=False) alpha_var = make_torch_variable([alpha], requires_grad=False) sigma_var = make_torch_variable([np.sqrt(var)], requires_grad=False) log_l_var = make_torch_variable([log_l], requires_grad=False) inactive_x_var = make_torch_variable(inactive_x, requires_grad=False) inactive_z_var = make_torch_variable(inactive_z, requires_grad=True) # ### Next, check computation of likelihood test_loglik = _inactive_point_likelihood(active_x_var, active_z_var, inactive_x_var, inactive_z_var, alpha_var, sigma_var, log_l_var) assert_array_almost_equal(expected_loglik, test_loglik.data.numpy(), decimal=5) # Check grad test_loglik.sum().backward() self.assertIsNotNone(inactive_z_var.grad)
def test_vb_sample_noise(self): '''Check computations''' active_x = np.array([ [0.0, 0.0], [1.0, 1.0], [2.0, 2.0], ]) active_z = np.array([ [0.0], [1.0], [2.0], ]) alpha_q = 2.0 sigma_q = 2.0 log_l_q = np.log(2.0) active_sq_dist_matrix = np.array([ [0.0, 2.0, 8.0], [2.0, 0.0, 2.0], [8.0, 2.0, 0.0], ]) active_rbf_kernel = np.exp(-1.0 / np.exp(log_l_q) * active_sq_dist_matrix) active_cov = (alpha_q**2) * active_rbf_kernel + (sigma_q** 2) * np.identity(3) inv_active_cov = np.linalg.pinv(active_cov) # Compute values relative to inactive set inactive_x = np.array([ [0.5, 0.5], [0.5, 0.5], ]) inactive_sq_dist_matrix = np.array([ [2 * (0.5**2)], [2 * (0.5**2)], [2 * (1.5**2)], ]) inactive_rbf_kernel = np.exp(-1.0 / np.exp(log_l_q) * inactive_sq_dist_matrix) cross_cov = (alpha_q**2) * inactive_rbf_kernel inactive_var = (alpha_q**2) # Compute the true values expected_mu = np.dot(active_z.T, np.dot(inv_active_cov, cross_cov)).T expected_var = inactive_var - np.dot(cross_cov.T, np.dot(inv_active_cov, cross_cov)) inactive_noise = np.array([[0.0], [1.0]]) expected_reparam = inactive_noise * expected_var + expected_mu # Compute the test values active_x_var = make_torch_variable(active_x, requires_grad=False) active_z_var = make_torch_variable(active_z, requires_grad=False) alpha_q_var = make_torch_variable([alpha_q], requires_grad=True) sigma_q_var = make_torch_variable([sigma_q], requires_grad=True) log_l_q_var = make_torch_variable([log_l_q], requires_grad=True) inactive_x_var = make_torch_variable(inactive_x, requires_grad=False) inactive_noise_var = make_torch_variable(inactive_noise, requires_grad=False) test_reparam = _reparametrize_noise(inactive_x_var, inactive_noise_var, active_x_var, active_z_var, alpha_q_var, sigma_q_var, log_l_q_var) assert_array_almost_equal(expected_reparam, test_reparam.data.numpy(), decimal=5) # Check gradient test_reparam.sum().backward() self.assertIsNotNone(alpha_q_var.grad) self.assertIsNotNone(sigma_q_var.grad) self.assertIsNotNone(log_l_q_var.grad)