def gradient(samples, params, Q, c_bar, mu_bar, Sigma_bar, operator, n_samples, phi, psi, n_weights, lambda_, max_iter_ukl, C, K, precision=None, t_step=0, ukl_tight_freq=1): """Computes the objective function gradient""" c, mu, L = unpack(params, C, K) grad_c = np.zeros(c.shape) _, vs = utils.sample_mvn(n_weights * C, mu[0, :], L[0, :, :]) ws = np.matmul(vs.reshape(C, n_weights, K), np.transpose( L, (0, 2, 1))) + mu[:, np.newaxis] be_grad = operator.gradient_be(Q, samples, ws.reshape(C * n_weights, K)).reshape(C, n_weights, K) # Gradient of the expected Bellman error wrt mu ebe_grad_mu = np.average(be_grad, axis=1) # Gradient of the expected Bellman error wrt L. ebe_grad_L = np.average(np.matmul( be_grad[:, :, :, np.newaxis], vs.reshape(C, n_weights, K)[:, :, np.newaxis]), axis=1) ebe_grad_mu = c[:, np.newaxis] * ebe_grad_mu ebe_grad_L = c[:, np.newaxis, np.newaxis] * ebe_grad_L kl_grad_c, kl_grad_mu, kl_grad_L, phi, psi = gradient_KL( c, mu, L, c_bar, mu_bar, Sigma_bar, phi, psi, max_iter_ukl, C, K, precision=precision, tight_bound=(t_step % ukl_tight_freq == 0)) grad_mu = ebe_grad_mu + lambda_ * kl_grad_mu / n_samples grad_L = ebe_grad_L + lambda_ * kl_grad_L / n_samples return pack(grad_c, grad_mu, grad_L)
def objective(samples, params, Q, mu_bar, Sigma_bar_inv, operator, n_samples, lambda_, n_weights): """Computes the negative ELBO""" mu, L = unpack(params, Q._w.size) # We add a small constant to make sure Sigma is always positive definite Sigma = np.dot(L, L.T) weights, _ = utils.sample_mvn(n_weights, mu, L) likelihood = operator.expected_bellman_error(Q, samples, weights) assert likelihood >= 0 kl = utils.KL(mu, Sigma, mu_bar, Sigma_bar_inv) assert kl >= 0 return likelihood + lambda_ * kl / n_samples
def gradient(samples, params, Q, mu_bar, Sigma_bar_inv, operator, n_samples, lambda_, n_weights): """Computes the objective function gradient""" mu, L = unpack(params, Q._w.size) ws, vs = utils.sample_mvn(n_weights, mu, L) be_grad = operator.gradient_be(Q, samples, ws) # Gradient of the expected Bellman error wrt mu ebe_grad_mu = np.average(be_grad, axis=0) # Gradient of the expected Bellman error wrt L. ebe_grad_L = np.average(be_grad[:, :, np.newaxis] * vs[:, np.newaxis, :], axis=0) kl_grad_mu, kl_grad_L = utils.gradient_KL(mu, L, mu_bar, Sigma_bar_inv) grad_mu = ebe_grad_mu + lambda_ * kl_grad_mu / n_samples grad_L = ebe_grad_L + lambda_ * kl_grad_L / n_samples return pack(grad_mu, grad_L)