def __call__(self, x): """Computes regularization given an ed.Normal random variable as input.""" if not isinstance(x, ed.RandomVariable): raise ValueError('Input must be an ed.RandomVariable.') prior = ed.Independent( ed.Normal(loc=x.distribution.mean(), scale=self.stddev).distribution, reinterpreted_batch_ndims=len(x.distribution.event_shape)) regularization = x.distribution.kl_divergence(prior.distribution) return self.scale_factor * regularization
def testHalfCauchyKLDivergence(self): shape = (3, ) regularizer = ed.regularizers.get('half_cauchy_kl_divergence') variational_posterior = ed.Independent(ed.LogNormal( loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) kl_value = self.evaluate(kl) self.assertGreaterEqual(kl_value, 0.)
def testHalfCauchyKLDivergence(self): shape = (3,) regularizer = ed.regularizers.get('half_cauchy_kl_divergence') variational_posterior = ed.Independent( ed.LogNormal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) # KL uses a single-sample estimate, which is not necessarily >0. We only # check shape. self.assertEqual(kl.shape, ())
def testNormalEmpiricalBayesKLDivergenceTFFunction(self): """Checks that KL evaluates properly multiple times when compiled.""" shape = (3,) regularizer = ed.regularizers.get('normal_empirical_bayes_kl_divergence') regularizer_compiled = tf.function(regularizer) weights_one = ed.Independent( ed.Normal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=len(shape)) kl_one = regularizer(weights_one).numpy() kl_one_c = regularizer_compiled(weights_one).numpy() weights_two = ed.Independent( ed.Normal(loc=5. + tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=len(shape)) kl_two = regularizer(weights_two).numpy() kl_two_c = regularizer_compiled(weights_two).numpy() self.assertAllClose(kl_one, kl_one_c) self.assertAllClose(kl_two, kl_two_c) self.assertNotAlmostEqual(kl_one_c, kl_two_c)
def testTrainableNormalKLDivergenceStddev(self): tf.random.set_seed(83271) shape = (3,) regularizer = ed.regularizers.get('trainable_normal_kl_divergence_stddev') variational_posterior = ed.Independent( ed.Normal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) self.assertGreaterEqual(kl, 0.) prior_stddev = regularizer.stddev_constraint(regularizer.stddev) self.assertAllClose(prior_stddev, np.ones(prior_stddev.shape), atol=0.1)
def testNormalKLDivergence(self): shape = (3,) regularizer = ed.regularizers.get('normal_kl_divergence') variational_posterior = ed.Independent( ed.Normal(loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) self.assertGreaterEqual(kl, 0.) dataset_size = 100 scale_factor = 1. / dataset_size regularizer = ed.regularizers.NormalKLDivergence(scale_factor=scale_factor) scaled_kl = regularizer(variational_posterior) self.assertEqual(scale_factor * kl, scaled_kl)
def testNormalEmpiricalBayesKLDivergence(self, gen_stddev, eb_prior_stddev): """Tests ed.regularizers.NormalEmpiricalBayesKLDivergence. Check that EB KL estimate should always be smaller but close to the true generating Normal-InverseGamma KL due to it being explicitly optimized. Args: gen_stddev: Standard deviation of the generating normal distribution. eb_prior_stddev: Standard deviation of the EB hyperprior. """ tf.random.set_seed(89323) shape = (99, 101) gen_mean = 0. eb_prior_mean = eb_prior_stddev**2 cvar = (eb_prior_mean / eb_prior_stddev)**2 variance_concentration = cvar + 2. variance_scale = eb_prior_mean * (cvar + 1.) weight = ed.Independent(ed.Normal(gen_mean + tf.zeros(shape), gen_stddev).distribution, reinterpreted_batch_ndims=len(shape)) # Compute KL(q(w)|| N(w|gen_stddev)) - log IG(gen_stddev**2) under a fixed # setting of the prior stddev. normal_regularizer = ed.regularizers.NormalKLDivergence( mean=gen_mean, stddev=gen_stddev) kl = normal_regularizer(weight) kl -= tf.reduce_sum( ed.InverseGamma(variance_concentration, variance_scale).distribution.log_prob( gen_stddev**2)) eb_regularizer = ed.regularizers.NormalEmpiricalBayesKLDivergence( mean=gen_mean, variance_concentration=variance_concentration, variance_scale=variance_scale) eb_kl = eb_regularizer(weight) # Normalize comparison by total number of weights. (Note this also scales # the IG log prob.) kl /= float(np.prod(shape)) eb_kl /= float(np.prod(shape)) kl_value, eb_kl_value = self.evaluate([kl, eb_kl]) self.assertGreaterEqual(kl_value, eb_kl_value) self.assertAlmostEqual(kl_value, eb_kl_value, delta=0.05, msg='Parameters score KL=%.6f on generating ' 'Normal-IG KL and KL=%.6f on EB-fitted KL, ' 'too much difference.' % (kl_value, eb_kl_value))
def testUniformKLDivergence(self): shape = (3, ) regularizer = ed.regularizers.get('uniform_kl_divergence') variational_posterior = ed.Independent(ed.Normal( loc=tf.zeros(shape), scale=1.).distribution, reinterpreted_batch_ndims=1) kl = regularizer(variational_posterior) kl_value = self.evaluate(kl) self.assertNotEqual(kl_value, 0.) dataset_size = 100 scale_factor = 1. / dataset_size regularizer = ed.regularizers.UniformKLDivergence( scale_factor=scale_factor) kl = regularizer(variational_posterior) scaled_kl_value = self.evaluate(kl) self.assertAlmostEqual(scale_factor * kl_value, scaled_kl_value)
def next_state(self, previous_state, user_response, slate_docs): """The state value after the initial value.""" user_interests = previous_state.get('user_interests') chosen_docs = user_response.get('choice') chosen_doc_features = selectors.get_chosen(slate_docs, chosen_docs) doc_features = chosen_doc_features.get('doc_features') # Define similarities to be affinities(user_interest, doc_features) + 2. similarities = self._utility_model.affinities( user_interests, doc_features, False).get('affinities') + 2.0 return Value( utilities=ed.Normal(loc=similarities, scale=self._utility_stddev, validate_args=True), user_interests=ed.Independent( tfd.Deterministic(user_interests + self._interest_step_size * (user_interests - doc_features)), reinterpreted_batch_ndims=1))
def _deterministic_with_correct_batch_shape(self, field): return ed.Independent(tfd.Deterministic(loc=field), reinterpreted_batch_ndims=len(field.shape) - self._batch_ndims)