def test_explicit_device_with_go_backward_and_mask(self): batch_size = 8 timestep = 7 masksteps = 5 units = 4 inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) mask = np.ones((batch_size, timestep)).astype(np.bool) mask[:, masksteps:] = 0 # Test for V1 behavior. lstm_v1 = rnn_v1.LSTM(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked_v1 = lstm_v1(inputs, mask=tf.constant(mask)) outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps]) self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) # Test for V2 behavior. lstm = rnn.LSTM(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked = lstm(inputs, mask=tf.constant(mask)) outputs_trimmed = lstm(inputs[:, :masksteps]) self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
def test_explicit_device_with_go_backward_and_mask(self): if tf.test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') batch_size = 8 timestep = 7 masksteps = 5 units = 4 inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) mask = np.ones((batch_size, timestep)).astype(np.bool) mask[:, masksteps:] = 0 # Test for V1 behavior. lstm_v1 = rnn_v1.GRU(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked_v1 = lstm_v1(inputs, mask=tf.constant(mask)) outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps]) self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) # Test for V2 behavior. lstm = rnn.GRU(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked = lstm(inputs, mask=tf.constant(mask)) outputs_trimmed = lstm(inputs[:, :masksteps]) self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. with self.test_session(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint(optimizer=optimizer, model=model, global_step=tf.compat.v1.train. get_or_create_global_step()) manager = tf.train.CheckpointManager(root, checkpoint_directory, max_to_keep=1) status = root.restore(save_path=manager.latest_checkpoint) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() manager.save() self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(root.global_step)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def test_device_placement(self, layer): if not tf.test.is_gpu_available(): self.skipTest('Need GPU for testing.') vocab_size = 20 embedding_dim = 10 batch_size = 8 timestep = 12 units = 5 x = np.random.randint(0, vocab_size, size=(batch_size, timestep)) y = np.random.randint(0, vocab_size, size=(batch_size, timestep)) # Test when GPU is available but not used, the graph should be properly # created with CPU ops. with testing_utils.device(should_use_gpu=False): model = keras.Sequential([ keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, timestep]), layer(units, return_sequences=True, stateful=True), keras.layers.Dense(vocab_size) ]) model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', run_eagerly=testing_utils.should_run_eagerly()) model.fit(x, y, epochs=1, shuffle=False)
def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. with self.test_session(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() optimizer = adam.Adam(0.001) def _train_fn(model, input_value): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() root = tf.train.Checkpoint( optimizer=optimizer, model=model) manager = tf.train.CheckpointManager( root, checkpoint_directory, max_to_keep=1) status = root.restore(save_path=manager.latest_checkpoint) input_value = tf.constant([[3.]]) train_fn = functools.partial(_train_fn, model, input_value) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() manager.save() self.assertEqual((training_continuation + 1) * num_training_steps, self.evaluate(root.optimizer.iterations)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def test_compare_ragged_with_masks(self, layer): vocab_size = 100 timestep = 20 units = 32 embedder = embeddings.Embedding(input_dim=vocab_size, output_dim=units) layer = layer(units, return_sequences=True) data = tf.constant( np.random.RandomState(0).randint(0, vocab_size, [timestep, timestep])) mask = tf.sequence_mask(tf.range(1, timestep + 1)) data_ragged = tf.ragged.boolean_mask(data, mask) outputs = [] devices = [testing_utils.device(should_use_gpu=False)] if tf.test.is_gpu_available(): devices.append(testing_utils.device(should_use_gpu=True)) for device in devices: with device: outputs.append(tf.boolean_mask(layer(embedder(data), mask=mask), mask)) outputs.append(layer(embedder(data_ragged)).values) for i in range(len(outputs) - 1): self.assertAllClose(outputs[i], outputs[i + 1], atol=1e-4)
def test_gru_v2_output_on_multiple_kernel(self): input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 100 x_train = np.random.random((batch, timestep, input_shape)) inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=tf.float32) with testing_utils.device(should_use_gpu=False): layer = rnn.GRU(rnn_state_size) output = layer(inputs) cpu_model = keras.models.Model(inputs, output) weights = cpu_model.get_weights() y_1 = cpu_model.predict(x_train) with testing_utils.device(should_use_gpu=True): layer = rnn.GRU(rnn_state_size) output = layer(inputs) gpu_model = keras.models.Model(inputs, output) gpu_model.set_weights(weights) y_2 = gpu_model.predict(x_train) # Note that CuDNN uses 'sigmoid' as activation, so the GRU V2 uses # 'sigmoid' as default. Construct the canonical GRU with sigmoid to achieve # the same output. with testing_utils.device(should_use_gpu=True): layer = rnn_v1.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) output = layer(inputs) canonical_model = keras.models.Model(inputs, output) canonical_model.set_weights(weights) y_3 = canonical_model.predict(x_train) self.assertAllClose(y_1, y_2, rtol=1e-5, atol=1e-5) self.assertAllClose(y_2, y_3, rtol=1e-5, atol=1e-5)
def test_gru_v2_feature_parity_with_canonical_gru(self): if tf.test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 20 (x_train, y_train), _ = testing_utils.get_test_data(train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size, random_seed=87654321) y_train = np_utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. x_train[-2:, -1, :] = 0.0 y_train[-2:] = 0 inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=tf.float32) masked_input = keras.layers.Masking()(inputs) gru_layer = rnn_v1.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) output = gru_layer(masked_input) gru_model = keras.models.Model(inputs, output) weights = gru_model.get_weights() y_1 = gru_model.predict(x_train) gru_model.compile('rmsprop', 'mse') gru_model.fit(x_train, y_train) y_2 = gru_model.predict(x_train) with testing_utils.device(should_use_gpu=True): cudnn_layer = rnn.GRU(rnn_state_size, recurrent_activation='sigmoid', reset_after=True) cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) cudnn_model.set_weights(weights) y_3 = cudnn_model.predict(x_train) cudnn_model.compile('rmsprop', 'mse') cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) self.assertAllClose(y_1, y_3, rtol=2e-5, atol=2e-5) self.assertAllClose(y_2, y_4, rtol=2e-5, atol=2e-5)
def test_lstm_output_on_multiple_kernel(self): input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 100 x_train = np.random.random((batch, timestep, input_shape)) inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=tf.float32) with testing_utils.device(should_use_gpu=False): layer = rnn.LSTM(rnn_state_size) output = layer(inputs) cpu_model = keras.models.Model(inputs, output) weights = cpu_model.get_weights() y_1 = cpu_model.predict(x_train) with testing_utils.device(should_use_gpu=True): layer = rnn.LSTM(rnn_state_size) output = layer(inputs) gpu_model = keras.models.Model(inputs, output) gpu_model.set_weights(weights) y_2 = gpu_model.predict(x_train) # Note that CuDNN uses 'sigmoid' as activation, so the LSTM V2 uses # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve # the same output. with testing_utils.device(should_use_gpu=True): layer = rnn_v1.LSTM(rnn_state_size, recurrent_activation='sigmoid') output = layer(inputs) canonical_model = keras.models.Model(inputs, output) # Remove the extra cudnn bias since canonical lstm will not use it. canonical_model.set_weights(weights[:3]) y_3 = canonical_model.predict(x_train) self.assertAllClose(y_1, y_2) self.assertAllClose(y_2, y_3)
def testWithDefun(self): with self.test_session(): num_training_steps = 2 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() # Don't actually train so we can test variable values optimizer = tf.compat.v1.train.AdamOptimizer(0.) root = tf.train.Checkpoint(optimizer=optimizer, model=model, global_step=tf.compat.v1.train. get_or_create_global_step()) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) def train_fn(): @tf.function def _call_model(x): return model(x) with tf.GradientTape() as tape: loss = _call_model(tf.constant([[3.]])) gradients = tape.gradient(loss, model.variables) return optimizer.apply_gradients( zip(gradients, model.variables), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() if training_continuation > 0: status.assert_consumed() self.assertAllClose([[42.]], self.evaluate(model.variables[0])) else: self.evaluate(model.variables[0].assign([[42.]])) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(root.global_step)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with testing_utils.device(should_use_gpu=True): with self.test_session(): save_path = self._write_name_based_checkpoint() root = self._initialized_model() self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) object_saver = trackable_utils.TrackableSaver( graph_view.ObjectGraphView(root)) self._set_sentinels(root) status = object_saver.restore(save_path) if tf.executing_eagerly(): self._check_sentinels(root) if tf.executing_eagerly(): status.assert_consumed() status.assert_existing_objects_matched() status.assert_nontrivial_match() else: # When graph building, we haven't read any keys, so we don't know # whether the restore will be complete. with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_consumed() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_existing_objects_matched() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_nontrivial_match() status.run_restore_ops() self._check_sentinels(root) self._set_sentinels(root) status = object_saver.restore(save_path) status.initialize_or_restore() status.assert_nontrivial_match() self._check_sentinels(root) # Check that there is no error when keys are missing from the name-based # checkpoint. root.not_in_name_checkpoint = tf.Variable([1.]) status = object_saver.restore(save_path) with self.assertRaises(AssertionError): status.assert_existing_objects_matched()
def test_initialize_if_not_restoring(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001) root = tf.train.Checkpoint( model=model ) # Do not save the optimizer with the checkpoint. optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = tf.constant([[3.]]) def train_fn(): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() # TODO(tanzheny): Add hyper variables to .variables(), and set them with # set_weights etc. variables_not_in_the_variables_property = [ obj for obj in optimizer._hyper.values() if isinstance(obj, tf.Variable) ] self.evaluate([ v.initializer for v in optimizer.variables() + variables_not_in_the_variables_property ]) train_fn() model_save_path = root.save(file_prefix=checkpoint_prefix) self.evaluate(optimizer.beta_1.assign(42.)) optimizer_save_path = optimizer_checkpoint.save( optimizer_only_prefix) del train_fn # Restore into a graph with the optimizer with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001) root = tf.train.Checkpoint(optimizer=optimizer, model=model) status = root.restore(save_path=model_save_path) input_value = tf.constant([[3.]]) def train_fn1(): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not tf.executing_eagerly(): train_fn1 = functools.partial(self.evaluate, train_fn1()) status.initialize_or_restore() train_fn1() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() del train_fn1 # Make sure initialization doesn't clobber later restores with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001, beta_1=1.0) root = tf.train.Checkpoint(optimizer=optimizer, model=model) opt_root = tf.train.Checkpoint(optimizer=optimizer) status = root.restore(save_path=model_save_path) init_only_optimizer_status = opt_root.restore(save_path=None) optimizer_status = opt_root.restore( save_path=optimizer_save_path) input_value = tf.constant([[3.]]) def train_fn2(): with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not tf.executing_eagerly(): train_fn2 = functools.partial(self.evaluate, train_fn2()) optimizer_status.run_restore_ops() status.initialize_or_restore() init_only_optimizer_status.initialize_or_restore() train_fn2() self.assertEqual(42., self.evaluate(optimizer.beta_1))
def test_initialize_if_not_restoring(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint( model= model, # Do not save the optimizer with the checkpoint. global_step=tf.compat.v1.train.get_or_create_global_step()) optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() self.evaluate([v.initializer for v in optimizer.variables()]) train_fn() model_save_path = root.save(file_prefix=checkpoint_prefix) self.evaluate(optimizer.variables()[0].assign(42.)) optimizer_save_path = optimizer_checkpoint.save( optimizer_only_prefix) # Restore into a graph with the optimizer with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint( optimizer=optimizer, model=model, global_step=tf.compat.v1.train.get_or_create_global_step()) status = root.restore(save_path=model_save_path) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() train_fn() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() # Make sure initialization doesn't clobber later restores with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001, beta1=1.0) root = tf.train.Checkpoint( optimizer=optimizer, model=model, global_step=tf.compat.v1.train.get_or_create_global_step()) opt_root = tf.train.Checkpoint(optimizer=optimizer) status = root.restore(save_path=model_save_path) init_only_optimizer_status = opt_root.restore(save_path=None) optimizer_status = opt_root.restore( save_path=optimizer_save_path) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) optimizer_status.run_restore_ops() status.initialize_or_restore() init_only_optimizer_status.initialize_or_restore() train_fn() self.assertEqual(42., self.evaluate(optimizer.variables()[0]))