def testDenseReparameterizationKL(self): inputs = tf.to_float(np.random.rand(5, 12)) layer = bayes.DenseReparameterization(10) # Imagine this is the 1st epoch. with tf.GradientTape() as tape: layer(inputs) # first call forces a build, here inside this tape layer(inputs) # ensure robustness after multiple calls loss = tf.reduce_sum([tf.reduce_sum(l) for l in layer.losses]) variables = [layer.kernel_initializer.mean, layer.kernel_initializer.stddev] for v in variables: self.assertIn(v, layer.variables) # This will be fine, since the layer was built inside this tape, and thus # the distribution init ops were inside this tape. grads = tape.gradient(loss, variables) for grad in grads: self.assertIsNotNone(grad) # Imagine this is the 2nd epoch. with tf.GradientTape() as tape: layer(inputs) # build won't be called again loss = tf.reduce_sum([tf.reduce_sum(l) for l in layer.losses]) variables = [layer.kernel_initializer.mean, layer.kernel_initializer.stddev] for v in variables: self.assertIn(v, layer.variables) # This would fail, since the layer was built inside the tape from the 1st # epoch, and thus the distribution init ops were inside that tape instead of # this tape. By using a callable for the variable, this will no longer fail. grads = tape.gradient(loss, variables) for grad in grads: self.assertIsNotNone(grad)
def testTrainableNormalStddevConstraint(self): layer = bayes.DenseReparameterization( 100, kernel_initializer="trainable_normal") inputs = tf.random_normal([1, 1]) out = layer(inputs) stddev = layer.kernel.distribution.stddev() self.evaluate(tf.global_variables_initializer()) res, _ = self.evaluate([stddev, out]) self.assertAllGreater(res, 0.)
def testDenseReparameterization(self): inputs = tf.to_float(np.random.rand(5, 3, 12)) layer = bayes.DenseReparameterization(4, activation=tf.nn.relu) outputs1 = layer(inputs) outputs2 = layer(inputs) self.evaluate(tf.global_variables_initializer()) res1, _ = self.evaluate([outputs1, outputs2]) self.assertEqual(res1.shape, (5, 3, 4)) self.assertAllGreaterEqual(res1, 0.)
def testDenseReparameterizationModel(self): inputs = tf.to_float(np.random.rand(3, 4, 4, 1)) model = tf.keras.Sequential([ tf.keras.layers.Conv2D(3, kernel_size=2, padding="SAME", activation=tf.nn.relu), tf.keras.layers.Flatten(), bayes.DenseReparameterization(2, activation=None), ]) outputs = model(inputs) self.evaluate(tf.global_variables_initializer()) res = self.evaluate(outputs) self.assertEqual(res.shape, (3, 2)) self.assertLen(model.losses, 1)
def testDenseReparameterizationLoss(self): features = tf.to_float(np.random.rand(5, 12)) labels = tf.to_float(np.random.rand(5, 10)) layer = bayes.DenseReparameterization(10) # Imagine this is the 1st epoch. with tf.GradientTape(persistent=True) as tape: predictions = layer(features) # first call forces build layer(features) # ensure robustness after multiple calls nll = tf.losses.mean_squared_error(labels, predictions) kl = sum(layer.losses) variables = [ layer.kernel_initializer.mean, layer.kernel_initializer.stddev ] for v in variables: self.assertIn(v, layer.variables) # This will be fine, since the layer was built inside this tape, and thus # the distribution init ops were inside this tape. grads = tape.gradient(nll, variables) for grad in grads: self.assertIsNotNone(grad) grads = tape.gradient(kl, variables) for grad in grads: self.assertIsNotNone(grad) # Imagine this is the 2nd epoch. with tf.GradientTape(persistent=True) as tape: predictions = layer(features) # build is not called nll = tf.losses.mean_squared_error(labels, predictions) kl = sum(layer.losses) variables = [ layer.kernel_initializer.mean, layer.kernel_initializer.stddev ] for v in variables: self.assertIn(v, layer.variables) # This would fail, since the layer was built inside the tape from the 1st # epoch, and thus the distribution init ops were inside that tape instead of # this tape. By using a callable for the variable, this will no longer fail. grads = tape.gradient(nll, variables) for grad in grads: self.assertIsNotNone(grad) grads = tape.gradient(kl, variables) for grad in grads: self.assertIsNotNone(grad)
def testDenseReparameterizationKernel( self, kernel_initializer, bias_initializer, all_close): inputs = tf.to_float(np.random.rand(5, 3, 12)) layer = bayes.DenseReparameterization( 4, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, activation=tf.nn.relu) outputs1 = layer(inputs) outputs2 = layer(inputs) self.evaluate(tf.global_variables_initializer()) res1, res2 = self.evaluate([outputs1, outputs2]) self.assertEqual(res1.shape, (5, 3, 4)) self.assertAllGreaterEqual(res1, 0.) if all_close: self.assertAllClose(res1, res2) else: self.assertNotAllClose(res1, res2) layer.get_config()
def testDenseReparameterizationMean(self): """Tests that forward pass can use other values, e.g., posterior mean.""" def take_mean(f, *args, **kwargs): """Sets random variable value to its mean.""" rv = f(*args, **kwargs) rv._value = rv.distribution.mean() return rv inputs = tf.to_float(np.random.rand(5, 3, 7)) layer = bayes.DenseReparameterization(4, activation=tf.nn.relu, use_bias=False) outputs1 = layer(inputs) with ed.interception(take_mean): outputs2 = layer(inputs) self.evaluate(tf.global_variables_initializer()) res1, res2 = self.evaluate([outputs1, outputs2]) self.assertEqual(res1.shape, (5, 3, 4)) self.assertNotAllClose(res1, res2) self.assertAllClose(res2, np.zeros((5, 3, 4)), atol=1e-4)