def testLSTMCellReparameterizationModel(self): batch_size, timesteps, dim = 5, 3, 12 hidden_size = 10 inputs = tf.to_float(np.random.rand(batch_size, timesteps, dim)) cell = bayes.LSTMCellReparameterization(hidden_size) model = tf.keras.Sequential([ tf.keras.layers.RNN(cell, return_sequences=True) ]) outputs1 = model(inputs) outputs2 = model(inputs) state = (tf.zeros([1, hidden_size]), tf.zeros([1, hidden_size])) outputs3 = [] for t in range(timesteps): out, state = cell(inputs[:, t, :], state) outputs3.append(out) outputs3 = tf.stack(outputs3, axis=1) self.evaluate(tf.global_variables_initializer()) res1, res2, res3 = self.evaluate([outputs1, outputs2, outputs3]) self.assertEqual(res1.shape, (batch_size, timesteps, hidden_size)) self.assertEqual(res3.shape, (batch_size, timesteps, hidden_size)) # NOTE: `cell.sample_weights` should have been called at the beginning of # each call, so these should be different. self.assertNotAllClose(res1, res2) # NOTE: We didn't call `cell.sample_weights` again before computing # `outputs3`, so the cell should have had the same weights as it did during # computation of `outputs2`, and thus yielded the same output tensor. self.assertAllClose(res2, res3) self.assertLen(model.losses, 2)
def testLSTMCellReparameterization( self, kernel_initializer, recurrent_initializer, bias_initializer, all_close): batch_size, timesteps, dim = 5, 3, 12 hidden_size = 10 inputs = tf.to_float(np.random.rand(batch_size, timesteps, dim)) cell = bayes.LSTMCellReparameterization( hidden_size, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer) noise = tf.to_float(np.random.rand(1, hidden_size)) h0, c0 = cell.get_initial_state(inputs) state = (h0 + noise, c0) outputs1, _ = cell(inputs[:, 0, :], state) outputs2, _ = cell(inputs[:, 0, :], state) cell.sample_weights() outputs3, _ = cell(inputs[:, 0, :], state) self.evaluate(tf.global_variables_initializer()) res1, res2, res3 = self.evaluate([outputs1, outputs2, outputs3]) self.assertEqual(res1.shape, (batch_size, hidden_size)) self.assertAllClose(res1, res2) if all_close: self.assertAllClose(res1, res3) else: self.assertNotAllClose(res1, res3) cell.get_config()
def testLSTMCellReparameterizationLoss(self): features = tf.to_float(np.random.rand(5, 1, 12)) labels = tf.to_float(np.random.rand(5, 10)) cell = bayes.LSTMCellReparameterization(10) state = (tf.zeros([1, 10]), tf.zeros([1, 10])) # Imagine this is the 1st epoch. with tf.GradientTape(persistent=True) as tape: predictions, _ = cell(features[:, 0, :], state) # first call forces build cell(features[:, 0, :], state) # ensure robustness after multiple calls cell.get_initial_state(features[:, 0, :]) cell(features[:, 0, :], state) # ensure robustness after multiple calls nll = tf.losses.mean_squared_error(labels, predictions) kl = sum(cell.losses) variables = [ cell.kernel_initializer.mean, cell.kernel_initializer.stddev, cell.recurrent_initializer.mean, cell.recurrent_initializer.stddev, ] for v in variables: self.assertIn(v, cell.variables) # This will be fine, since the layer was built inside this tape, and thus # the distribution init ops were inside this tape. grads = tape.gradient(nll, variables) for grad in grads: self.assertIsNotNone(grad) grads = tape.gradient(kl, variables) for grad in grads: self.assertIsNotNone(grad) # Imagine this is the 2nd epoch. with tf.GradientTape(persistent=True) as tape: cell.get_initial_state(features[:, 0, :]) predictions, _ = cell(features[:, 0, :], state) # build is not called nll = tf.losses.mean_squared_error(labels, predictions) kl = sum(cell.losses) variables = [ cell.kernel_initializer.mean, cell.kernel_initializer.stddev, cell.recurrent_initializer.mean, cell.recurrent_initializer.stddev, ] for v in variables: self.assertIn(v, cell.variables) # This would fail, since the layer was built inside the tape from the 1st # epoch, and thus the distribution init ops were inside that tape instead of # this tape. By using a callable for the variable, this will no longer fail. grads = tape.gradient(nll, variables) for grad in grads: self.assertIsNotNone(grad) grads = tape.gradient(kl, variables) for grad in grads: self.assertIsNotNone(grad)
def testLSTMCellReparameterizationKL(self): inputs = tf.to_float(np.random.rand(5, 1, 12)) cell = bayes.LSTMCellReparameterization(10) state = (tf.zeros([1, 10]), tf.zeros([1, 10])) # Imagine this is the 1st epoch. with tf.GradientTape() as tape: cell(inputs[:, 0, :], state) # first call forces a build, inside the tape cell(inputs[:, 0, :], state) # ensure robustness after multiple calls cell.get_initial_state(inputs[:, 0, :]) cell(inputs[:, 0, :], state) # ensure robustness after multiple calls loss = sum(cell.losses) variables = [ cell.kernel_initializer.mean, cell.kernel_initializer.stddev, cell.recurrent_initializer.mean, cell.recurrent_initializer.stddev, ] for v in variables: self.assertIn(v, cell.variables) # This will be fine, since the layer was built inside this tape, and thus # the distribution init ops were inside this tape. grads = tape.gradient(loss, variables) for grad in grads: self.assertIsNotNone(grad) # Imagine this is the 2nd epoch. with tf.GradientTape() as tape: cell(inputs[:, 0, :], state) # build won't be called again loss = sum(cell.losses) variables = [ cell.kernel_initializer.mean, cell.kernel_initializer.stddev, cell.recurrent_initializer.mean, cell.recurrent_initializer.stddev, ] for v in variables: self.assertIn(v, cell.variables) # This would fail, since the layer was built inside the tape from the 1st # epoch, and thus the distribution init ops were inside that tape instead of # this tape. By using a callable for the variable, this will no longer fail. grads = tape.gradient(loss, variables) for grad in grads: self.assertIsNotNone(grad)