def test_entropy(self): mu = np.asarray([1.0, 0.0, -1.0]) sigma = np.asarray([1.0, 2.0, 3.0]) dist = prob.MultiNormal(mean=mu, scale=sigma) exp_mn = sp_stats.multivariate_normal(mean=mu, cov=np.diag(sigma)**2) self.assertArrayEqual(exp_mn.entropy().shape, dist.entropy().shape) self.assertAllClose(exp_mn.entropy(), dist.entropy())
def test_sample(self): mu = np.asarray([1.0, -1.0]) sigma = np.asarray([1.0, 5.0]) dist = prob.MultiNormal(mean=mu, scale=sigma) set_test_seed() draws = np.asarray(dist.sample(100000)) self.assertArrayEqual(draws.shape, (100000, 2)) self.assertAllClose(draws.mean(axis=0), mu, atol=1e-1) self.assertAllClose(draws.var(axis=0), sigma**2, atol=1e-1)
def test_kl(self): mu_a = np.array([3.0, -1.0]) sigma_a = np.array([1.0, 2.5]) mu_b = np.array([-3.0, 1.5]) sigma_b = np.array([0.5, 1.0]) dist_a = prob.MultiNormal(mean=mu_a, scale=sigma_a) dist_b = prob.MultiNormal(mean=mu_b, scale=sigma_b) kl = dist_a.kl(dist_b) expected_kl = ((mu_a - mu_b)**2 / (2 * sigma_b**2) + 0.5 * ( (sigma_a**2 / sigma_b**2) - 1 - 2 * np.log(sigma_a / sigma_b)) ).sum() self.assertArrayEqual(kl.shape, []) self.assertAllClose(expected_kl, kl) # test estimate kl set_test_seed() draws = dist_a.sample(100000) sample_kl = dist_a.log_prob(draws) - dist_b.log_prob(draws) sample_kl = tf.reduce_mean(sample_kl, axis=0) self.assertAllClose(expected_kl, sample_kl, atol=0.0, rtol=1e-2)
def test_prob(self): mu = np.asarray([1.0, -1.0], dtype=np.float32) sigma = np.asarray([3.0, 2.0], dtype=np.float32) x = np.array([2.5, 0.5], dtype=np.float32) dist = prob.MultiNormal(mean=mu, scale=sigma) # test mean scale self.assertArrayEqual(mu, dist.mean) self.assertArrayEqual(sigma, dist.scale) # test prob, log_prob exp_mvn = sp_stats.multivariate_normal(mu, np.diag(sigma)**2) self.assertArrayClose(exp_mvn.logpdf(x), dist.log_prob(x)) self.assertArrayClose(np.exp(exp_mvn.logpdf(x)), dist.prob(x))
def make_multinormal(mean_shape, scale_shape, dtype=tf.float32, seed=get_test_seed()): mean = tf.random.uniform(list(mean_shape), minval=-10, maxval=10, dtype=dtype, seed=seed) scale = tf.random.uniform(list(scale_shape), minval=-10, maxval=10, dtype=dtype, seed=seed) return prob.MultiNormal(mean, scale, dtype)
def call(self, inputs, training=True): '''Forward network Args: inputs (tf.Tensor): Expecting a latent vector in shape (b, latent), tf.float32 training (bool, optional): Training mode. Defaults to True. Returns: MultiNormal: A multi variate gaussian distribution ''' # forward model mean = self._mean_model(inputs, training=training) logstd = self._logstd_model(inputs, training=training) std = tf.math.softplus(logstd) + 1e-5 # reshape as action space shape (-1 = batch dim) output_shape = [-1] + list(self.action_shape) mean = tf.reshape(mean, output_shape) std = tf.reshape(std, output_shape) # create multi variate gauss dist with tah squashed distrib = ub_prob.MultiNormal(mean, std) if self.squash: distrib = ub_prob.Tanh(distrib) return distrib
def test_shape_no_exception(self): mu = [1.] sigma = [-5.] prob.MultiNormal(mean=mu, scale=sigma)
def test_shape_exception(self): mu = 1. sigma = -5. with self.assertRaises(RuntimeError): prob.MultiNormal(mean=mu, scale=sigma)