コード例 #1
0
 def test_entropy(self):
     mu = np.asarray([1.0, 0.0, -1.0])
     sigma = np.asarray([1.0, 2.0, 3.0])
     dist = prob.MultiNormal(mean=mu, scale=sigma)
     exp_mn = sp_stats.multivariate_normal(mean=mu, cov=np.diag(sigma)**2)
     self.assertArrayEqual(exp_mn.entropy().shape, dist.entropy().shape)
     self.assertAllClose(exp_mn.entropy(), dist.entropy())
コード例 #2
0
 def test_sample(self):
     mu = np.asarray([1.0, -1.0])
     sigma = np.asarray([1.0, 5.0])
     dist = prob.MultiNormal(mean=mu, scale=sigma)
     set_test_seed()
     draws = np.asarray(dist.sample(100000))
     self.assertArrayEqual(draws.shape, (100000, 2))
     self.assertAllClose(draws.mean(axis=0), mu, atol=1e-1)
     self.assertAllClose(draws.var(axis=0), sigma**2, atol=1e-1)
コード例 #3
0
 def test_kl(self):
     mu_a = np.array([3.0, -1.0])
     sigma_a = np.array([1.0, 2.5])
     mu_b = np.array([-3.0, 1.5])
     sigma_b = np.array([0.5, 1.0])
     dist_a = prob.MultiNormal(mean=mu_a, scale=sigma_a)
     dist_b = prob.MultiNormal(mean=mu_b, scale=sigma_b)
     kl = dist_a.kl(dist_b)
     expected_kl = ((mu_a - mu_b)**2 / (2 * sigma_b**2) + 0.5 * (
         (sigma_a**2 / sigma_b**2) - 1 - 2 * np.log(sigma_a / sigma_b))
                    ).sum()
     self.assertArrayEqual(kl.shape, [])
     self.assertAllClose(expected_kl, kl)
     # test estimate kl
     set_test_seed()
     draws = dist_a.sample(100000)
     sample_kl = dist_a.log_prob(draws) - dist_b.log_prob(draws)
     sample_kl = tf.reduce_mean(sample_kl, axis=0)
     self.assertAllClose(expected_kl, sample_kl, atol=0.0, rtol=1e-2)
コード例 #4
0
 def test_prob(self):
     mu = np.asarray([1.0, -1.0], dtype=np.float32)
     sigma = np.asarray([3.0, 2.0], dtype=np.float32)
     x = np.array([2.5, 0.5], dtype=np.float32)
     dist = prob.MultiNormal(mean=mu, scale=sigma)
     # test mean scale
     self.assertArrayEqual(mu, dist.mean)
     self.assertArrayEqual(sigma, dist.scale)
     # test prob, log_prob
     exp_mvn = sp_stats.multivariate_normal(mu, np.diag(sigma)**2)
     self.assertArrayClose(exp_mvn.logpdf(x), dist.log_prob(x))
     self.assertArrayClose(np.exp(exp_mvn.logpdf(x)), dist.prob(x))
コード例 #5
0
def make_multinormal(mean_shape,
                     scale_shape,
                     dtype=tf.float32,
                     seed=get_test_seed()):
    mean = tf.random.uniform(list(mean_shape),
                             minval=-10,
                             maxval=10,
                             dtype=dtype,
                             seed=seed)
    scale = tf.random.uniform(list(scale_shape),
                              minval=-10,
                              maxval=10,
                              dtype=dtype,
                              seed=seed)
    return prob.MultiNormal(mean, scale, dtype)
コード例 #6
0
    def call(self, inputs, training=True):
        '''Forward network

        Args:
            inputs (tf.Tensor): Expecting a latent vector in shape
                (b, latent), tf.float32
            training (bool, optional): Training mode. Defaults to True.

        Returns:
            MultiNormal: A multi variate gaussian distribution
        '''
        # forward model
        mean = self._mean_model(inputs, training=training)
        logstd = self._logstd_model(inputs, training=training)
        std = tf.math.softplus(logstd) + 1e-5
        # reshape as action space shape (-1 = batch dim)
        output_shape = [-1] + list(self.action_shape)
        mean = tf.reshape(mean, output_shape)
        std = tf.reshape(std, output_shape)
        # create multi variate gauss dist with tah squashed
        distrib = ub_prob.MultiNormal(mean, std)
        if self.squash:
            distrib = ub_prob.Tanh(distrib)
        return distrib
コード例 #7
0
 def test_shape_no_exception(self):
     mu = [1.]
     sigma = [-5.]
     prob.MultiNormal(mean=mu, scale=sigma)
コード例 #8
0
 def test_shape_exception(self):
     mu = 1.
     sigma = -5.
     with self.assertRaises(RuntimeError):
         prob.MultiNormal(mean=mu, scale=sigma)