def opt_combinations_only(): """Returns two combinations for running with the two base optimizers.""" experimental_opt_combinations = test_combinations.combine( mode='eager', opt_cls=optimizer_experimental.Optimizer) orig_opt_combination = test_combinations.combine( opt_cls=optimizer_v2.OptimizerV2) return experimental_opt_combinations + orig_opt_combination
class InterfaceTests(test_combinations.TestCase): def testNoDependency(self): root = tf.Module() hasdep = tf.Module() root.hasdep = hasdep nodep = tf.Module() root.nodep = data_structures.NoDependency(nodep) self.assertLen(root._trackable_children(), 1) self.assertIs(root._trackable_children()["hasdep"], root.hasdep) self.assertIs(root.hasdep, hasdep) self.assertIs(root.nodep, nodep) class NoDependencyModel(training.Model): @tf.__internal__.tracking.no_automatic_dependency_tracking def __init__(self): super(NoDependencyModel, self).__init__() self.a = [] self.b = tf.Module() nodeps = NoDependencyModel() self.assertEqual([nodeps], util.list_objects(nodeps)) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testDictionariesBasic(self): a = training.Model() b = training.Model() a.attribute = {"b": b} c = training.Model() a.attribute["c"] = [] a.attribute["c"].append(c) a_deps = util.list_objects(a) self.assertIn(b, a_deps) self.assertIn(c, a_deps) self.assertIs(b, a.attribute["b"]) self.assertEqual({"b", "c"}, a.attribute._trackable_children().keys()) self.assertEqual([b, c], a.layers) self.assertEqual([b, c], a.attribute.layers) self.assertEqual([c], a.attribute["c"].layers) checkpoint = tf.train.Checkpoint(a=a) save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) with self.cached_session(): checkpoint.restore( save_path).assert_consumed().initialize_or_restore() @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testNoDepList(self): a = training.Model() a.l1 = data_structures.NoDependency([]) a.l1.insert(1, 0) self.assertIsInstance(a.l1, list) checkpoint = tf.train.Checkpoint(a=a) checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt")) a.l2 = [] a.l2.insert(1, tf.Module()) with self.assertRaisesRegex(ValueError, "A list element was replaced"): checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
class MixedPrecisionTest(test_combinations.TestCase): IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE' def setUp(self): super(MixedPrecisionTest, self).setUp() # Enable the tests to be run on pre-Volta GPUs by telling the grappler pass # to ignore performance and always transform the graph. self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR) os.environ[self.IGNORE_PERF_VAR] = '1' def tearDown(self): # Set the IGNORE_PERF_VAR variable back to it's original value. if self._original_ignore_perf_value is not None: os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value else: del os.environ[self.IGNORE_PERF_VAR] tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite() super(MixedPrecisionTest, self).tearDown() @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_wrap_optimizer(self): opt = gradient_descent_v2.SGD(1.0) opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt, 123.) self.assertIsInstance( opt, loss_scale_optimizer_v2.LossScaleOptimizerV1) self.assertEqual(self.evaluate(opt.loss_scale), 123.) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_optimizer_errors(self): opt = gradient_descent_v2.SGD(1.0) opt = loss_scale_optimizer_v2.LossScaleOptimizerV1(opt, 'dynamic') with self.assertRaisesRegex( ValueError, '"opt" must not already be an instance of a ' 'LossScaleOptimizer.'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt) self.assertFalse(tf.config.optimizer.get_experimental_options() .get('auto_mixed_precision', False)) @test_utils.enable_v2_dtype_behavior def test_error_if_policy_is_set(self): with policy.policy_scope('mixed_float16'): with self.assertRaisesRegex(ValueError, 'the global Keras dtype Policy has been set'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0)) # Test no error is thrown when the policy is currently the default. tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0)) # Test no error is thrown when the policy is a non-mixed policy. with policy.policy_scope('float64'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0))
class GRULayerGradientTapeTest(test_combinations.TestCase): @test_combinations.generate(test_combinations.combine(mode=["eager"])) def test_in_tape(self): with self.test_session(config=_config): time_steps = 10 embedding_size = 11 gru_unit_size = 12 gru_layer = keras.layers.GRU( gru_unit_size, return_sequences=True, return_state=True, recurrent_activation="sigmoid", recurrent_initializer="glorot_uniform", ) x = tf.random.uniform([1, time_steps, embedding_size]) y = tf.random.uniform([1, gru_unit_size]) with tf.GradientTape() as tape: hidden_state = tf.zeros([1, gru_unit_size], dtype=tf.float32) _, state = gru_layer(x, initial_state=hidden_state) loss = tf.reduce_mean(tf.square(state - y)) tape.gradient(loss, gru_layer.variables)
def opt_and_strategy_and_mode_combinations(): """Returns combinations for running with multiple optimizers and strategies. Returns: Combinations that run with both OptimizerV2 and the experimental optimizer; and with the default strategy and mirrored strategy; and in both graph and eager mode. """ # For the experimental optimizer, don't use graph mode directly since it's # unsupported. Instead, run both without and with a tf.function, in order to # test both graph and eager mode. experimental_opt_combinations = test_combinations.combine( opt_cls=optimizer_experimental.Optimizer, strategy_fn=STRATEGY_FNS, mode='eager', use_tf_function=[False, True]) orig_opt_combinations = test_combinations.combine( opt_cls=optimizer_v2.OptimizerV2, strategy_fn=STRATEGY_FNS, mode=['graph', 'eager'], use_tf_function=False) return experimental_opt_combinations + orig_opt_combinations
class BatchNormalizationV1Test(test_combinations.TestCase): @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_v1_fused_attribute(self): norm = batch_normalization_v1.BatchNormalization() inp = keras.layers.Input((4, 4, 4)) norm(inp) self.assertEqual(norm.fused, True) norm = batch_normalization_v1.BatchNormalization(fused=False) self.assertEqual(norm.fused, False) inp = keras.layers.Input(shape=(4, 4, 4)) norm(inp) self.assertEqual(norm.fused, False) norm = batch_normalization_v1.BatchNormalization(virtual_batch_size=2) self.assertEqual(norm.fused, True) inp = keras.layers.Input(shape=(2, 2, 2)) norm(inp) self.assertEqual(norm.fused, False)
class SequenceFeaturesSavingTest(tf.test.TestCase, parameterized.TestCase): @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def test_saving_with_sequence_features(self): cols = [ tf.feature_column.sequence_numeric_column("a"), tf.feature_column.indicator_column( tf.feature_column. sequence_categorical_column_with_vocabulary_list( "b", ["one", "two"])), ] input_layers = { "a": keras.layers.Input(shape=(None, 1), sparse=True, name="a"), "b": keras.layers.Input(shape=(None, 1), sparse=True, name="b", dtype="string"), } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer="rmsprop", metrics=[keras.metrics.categorical_accuracy], ) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = tf.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = tf.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.tables_initializer()) self.assertLen( loaded_model.predict({ "a": inputs_a, "b": inputs_b }, steps=1), batch_size, )
class TestSaveModel(tf.test.TestCase, parameterized.TestCase): def setUp(self): super(TestSaveModel, self).setUp() self.model = test_utils.get_small_sequential_mlp(1, 2, 3) self.subclassed_model = test_utils.get_small_subclass_mlp(1, 2) def assert_h5_format(self, path): if h5py is not None: self.assertTrue(h5py.is_hdf5(path), 'Model saved at path {} is not a valid hdf5 file.' .format(path)) def assert_saved_model(self, path): tf.__internal__.saved_model.parse_saved_model(path) @test_utils.run_v2_only def test_load_file_not_found(self): path = pathlib.Path(self.get_temp_dir()) / 'does_not_exist' with self.assertRaisesRegex(IOError, 'No file or directory found at'): save.load_model(path) @test_utils.run_v2_only def test_save_format_defaults(self): path = os.path.join(self.get_temp_dir(), 'model_path') save.save_model(self.model, path) self.assert_saved_model(path) @test_utils.run_v2_only def test_save_format_defaults_pathlib(self): path = pathlib.Path(self.get_temp_dir()) / 'model_path' save.save_model(self.model, path) self.assert_saved_model(path) @test_utils.run_v2_only def test_save_hdf5(self): path = os.path.join(self.get_temp_dir(), 'model') save.save_model(self.model, path, save_format='h5') self.assert_h5_format(path) with self.assertRaisesRegex( NotImplementedError, 'requires the model to be a Functional model or a Sequential model.'): save.save_model(self.subclassed_model, path, save_format='h5') @test_utils.run_v2_only def test_save_load_hdf5_pathlib(self): path = pathlib.Path(self.get_temp_dir()) / 'model' save.save_model(self.model, path, save_format='h5') save.load_model(path) @test_utils.run_v2_only def test_save_tf(self): path = os.path.join(self.get_temp_dir(), 'model') save.save_model(self.model, path, save_format='tf') self.assert_saved_model(path) with self.assertRaisesRegex( ValueError, r'Model.*cannot be saved.*as opposed to `model.call\(\).*'): save.save_model(self.subclassed_model, path, save_format='tf') self.subclassed_model.predict(np.random.random((3, 5))) save.save_model(self.subclassed_model, path, save_format='tf') self.assert_saved_model(path) @test_utils.run_v2_only def test_save_load_tf_string(self): path = os.path.join(self.get_temp_dir(), 'model') save.save_model(self.model, path, save_format='tf') save.load_model(path) @test_utils.run_v2_only def test_save_load_tf_pathlib(self): path = pathlib.Path(self.get_temp_dir()) / 'model' save.save_model(self.model, path, save_format='tf') save.load_model(path) @test_utils.run_v2_only def test_save_load_weights_tf_pathlib(self): path = pathlib.Path(self.get_temp_dir()) / 'model' self.model.save_weights(path, save_format='tf') self.model.load_weights(path) @test_utils.run_v2_only def test_save_load_weights_hdf5_pathlib(self): path = pathlib.Path(self.get_temp_dir()) / 'model' self.model.save_weights(path, save_format='h5') self.model.load_weights(path) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_saving_h5_for_rnn_layers(self): # See https://github.com/tensorflow/tensorflow/issues/35731 for details. inputs = keras.Input([10, 91], name='train_input') rnn_layers = [ keras.layers.LSTMCell(size, recurrent_dropout=0, name='rnn_cell%d' % i) for i, size in enumerate([512, 512]) ] rnn_output = keras.layers.RNN( rnn_layers, return_sequences=True, name='rnn_layer')(inputs) pred_feat = keras.layers.Dense(91, name='prediction_features')(rnn_output) pred = keras.layers.Softmax()(pred_feat) model = keras.Model(inputs=[inputs], outputs=[pred, pred_feat]) path = os.path.join(self.get_temp_dir(), 'model_path.h5') model.save(path) # Make sure the variable name is unique. self.assertNotEqual(rnn_layers[0].kernel.name, rnn_layers[1].kernel.name) self.assertIn('rnn_cell1', rnn_layers[1].kernel.name) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_saving_optimizer_weights(self): class MyModel(keras.Model): def __init__(self): super(MyModel, self).__init__() self.layer = keras.layers.Dense(1) def call(self, x): return self.layer(x) path = os.path.join(self.get_temp_dir(), 'weights_path') x, y = np.ones((10, 10)), np.ones((10, 1)) model = MyModel() model.compile('rmsprop', loss='bce') model.train_on_batch(x, y) model.reset_metrics() model.save_weights(path, save_format='tf') batch_loss = model.train_on_batch(x, y) new_model = MyModel() new_model.compile('rmsprop', loss='bce') new_model.train_on_batch(x, y) new_model.reset_metrics() new_model.load_weights(path) new_batch_loss = new_model.train_on_batch(x, y) self.assertAllClose(batch_loss, new_batch_loss) @test_combinations.generate( test_combinations.combine(mode=['eager', 'graph'])) def test_save_include_optimizer_false(self): def get_variables(file_name): reader = tf.train.load_checkpoint( os.path.join(file_name, 'variables/variables')) shape_from_key = reader.get_variable_to_shape_map() return sorted(shape_from_key.keys()) path = os.path.join(self.get_temp_dir(), 'no_optimizer') x, y = np.ones((10, 10)), np.ones((10, 1)) model = keras.models.Sequential() model.add(keras.layers.Dense(1)) model.compile('adam', loss='mse') model.train_on_batch(x, y) model.save(path, save_format='tf', include_optimizer=False) variables = get_variables(path) for v in variables: self.assertNotIn('optimizer', v) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_saving_model_with_custom_object(self): with generic_utils.custom_object_scope(), self.cached_session(): @generic_utils.register_keras_serializable() class CustomLoss(losses.MeanSquaredError): pass model = sequential.Sequential( [core.Dense(units=1, input_shape=(1,))]) model.compile(optimizer='sgd', loss=CustomLoss()) model.fit(np.zeros([10, 1]), np.zeros([10, 1])) temp_dir = self.get_temp_dir() filepath = os.path.join(temp_dir, 'saving') model.save(filepath) # Make sure the model can be correctly load back. _ = save.load_model(filepath, compile=True) def test_saving_model_with_name_conflict(self): class Sequential(keras.Model): def __init__(self): super(Sequential, self).__init__() self.layer = keras.layers.Dense(1) def call(self, x): return self.layer(x) model = Sequential() model(tf.ones((10, 10))) temp_dir = self.get_temp_dir() filepath = os.path.join(temp_dir, 'Sequential') with self.assertLogs() as logs: model.save(filepath, save_format='tf') expected_substring = 'has the same name \'Sequential\' as a built-in Keras' matched = [log for log in logs.output if expected_substring in log] self.assertNotEmpty(matched) def test_saving_built_in_model(self): model = LinearModel() model(tf.constant([[5.]])) temp_dir = self.get_temp_dir() filepath = os.path.join(temp_dir, 'LinearModel') with self.assertLogs() as logs: model.save(filepath, save_format='tf') expected_substring = 'has the same name \'LinearModel\' as a built-in Keras' matched = [log for log in logs.output if expected_substring in log] # Check that a warning is *not* logged for a premade model. self.assertEmpty(matched)
class TestJson(test_combinations.TestCase): """Tests to_json()/from_json().""" @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_saving_with_dense_features(self): cols = [ tf.feature_column.numeric_column('a'), tf.feature_column.indicator_column( tf.feature_column.categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(1,), name='a'), 'b': keras.layers.Input(shape=(1,), name='b', dtype='string') } fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype('str') with self.cached_session(): # Initialize tables for V1 lookup. if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.tables_initializer()) self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_saving_with_sequence_features(self): cols = [ tf.feature_column.sequence_numeric_column('a'), tf.feature_column.indicator_column( tf.feature_column.sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input( shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = tf.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = tf.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_nested_layers(self): class MyLayer(keras.layers.Layer): def __init__(self, sublayers, **kwargs): super(MyLayer, self).__init__(**kwargs) self.sublayers = sublayers def get_config(self): config = super(MyLayer, self).get_config() config['sublayers'] = self.sublayers return config layer = MyLayer([keras.layers.Dense(2, name='MyDense'), RegisteredSubLayer(name='MySubLayer')]) model = keras.Sequential([keras.Input([None]), layer]) model_json = model.to_json() self.assertIn('Foo>RegisteredSubLayer', model_json) loaded_model = model_config.model_from_json( model_json, custom_objects={'MyLayer': MyLayer}) loaded_layer = loaded_model.layers[0] self.assertIsInstance(loaded_layer.sublayers[0], keras.layers.Dense) self.assertEqual(loaded_layer.sublayers[0].name, 'MyDense') self.assertIsInstance(loaded_layer.sublayers[1], RegisteredSubLayer) self.assertEqual(loaded_layer.sublayers[1].name, 'MySubLayer')
class MultiHeadAttentionTest(test_combinations.TestCase): @parameterized.named_parameters( ("key_value_same_proj", None, None, [40, 80]), ("key_value_different_proj", 32, 60, [40, 60]), ) def test_non_masked_attention(self, value_dim, output_shape, output_dims): """Test that the attention layer can be created without a mask tensor.""" test_layer = keras.layers.MultiHeadAttention( num_heads=12, key_dim=64, value_dim=value_dim, output_shape=output_shape, ) # Create a 3-dimensional input (the first dimension is implicit). query = keras.Input(shape=(40, 80)) value = keras.Input(shape=(20, 80)) output = test_layer(query=query, value=value) self.assertEqual(output.shape.as_list(), [None] + output_dims) def test_non_masked_self_attention(self): """Test with one input (self-attenntion) and no mask tensor.""" test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64) # Create a 3-dimensional input (the first dimension is implicit). query = keras.Input(shape=(40, 80)) output = test_layer(query, query) self.assertEqual(output.shape.as_list(), [None, 40, 80]) def test_attention_scores(self): """Test attention outputs with coefficients.""" test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64) # Create a 3-dimensional input (the first dimension is implicit). query = keras.Input(shape=(40, 80)) output, coef = test_layer(query, query, return_attention_scores=True) self.assertEqual(output.shape.as_list(), [None, 40, 80]) self.assertEqual(coef.shape.as_list(), [None, 12, 40, 40]) def test_attention_scores_with_values(self): """Test attention outputs with coefficients.""" test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64) # Create a 3-dimensional input (the first dimension is implicit). query = keras.Input(shape=(40, 80)) value = keras.Input(shape=(60, 80)) output, coef = test_layer(query, value, return_attention_scores=True) self.assertEqual(output.shape.as_list(), [None, 40, 80]) self.assertEqual(coef.shape.as_list(), [None, 12, 40, 60]) @parameterized.named_parameters(("with_bias", True), ("no_bias", False)) def test_masked_attention(self, use_bias): """Test with a mask tensor.""" test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2, use_bias=use_bias) # Create a 3-dimensional input (the first dimension is implicit). batch_size = 3 query = keras.Input(shape=(4, 8)) value = keras.Input(shape=(2, 8)) mask_tensor = keras.Input(shape=(4, 2)) output = test_layer(query=query, value=value, attention_mask=mask_tensor) # Create a model containing the test layer. model = keras.Model([query, value, mask_tensor], output) # Generate data for the input (non-mask) tensors. from_data = 10 * np.random.random_sample((batch_size, 4, 8)) to_data = 10 * np.random.random_sample((batch_size, 2, 8)) # Invoke the data with a random set of mask data. This should mask at least # one element. mask_data = np.random.randint(2, size=(batch_size, 4, 2)) masked_output_data = model.predict([from_data, to_data, mask_data]) # Invoke the same data, but with a null mask (where no elements are masked). null_mask_data = np.ones((batch_size, 4, 2)) unmasked_output_data = model.predict( [from_data, to_data, null_mask_data]) # Because one data is masked and one is not, the outputs should not be the # same. self.assertNotAllClose(masked_output_data, unmasked_output_data) # Tests the layer with three inputs: Q, K, V. key = keras.Input(shape=(2, 8)) output = test_layer(query, value=value, key=key, attention_mask=mask_tensor) model = keras.Model([query, value, key, mask_tensor], output) masked_output_data = model.predict( [from_data, to_data, to_data, mask_data]) unmasked_output_data = model.predict( [from_data, to_data, to_data, null_mask_data]) # Because one data is masked and one is not, the outputs should not be the # same. self.assertNotAllClose(masked_output_data, unmasked_output_data) if use_bias: self.assertLen(test_layer._query_dense.trainable_variables, 2) self.assertLen(test_layer._output_dense.trainable_variables, 2) else: self.assertLen(test_layer._query_dense.trainable_variables, 1) self.assertLen(test_layer._output_dense.trainable_variables, 1) def test_initializer(self): """Test with a specified initializer.""" test_layer = keras.layers.MultiHeadAttention( num_heads=12, key_dim=64, kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02), ) # Create a 3-dimensional input (the first dimension is implicit). query = keras.Input(shape=(40, 80)) output = test_layer(query, query) self.assertEqual(output.shape.as_list(), [None, 40, 80]) # Make sure the sub layers have different kernel init value, and not reusing # the initializers. self.assertNotAllClose( keras.backend.eval(test_layer._query_dense.kernel), keras.backend.eval(test_layer._key_dense.kernel), ) self.assertNotAllClose( keras.backend.eval(test_layer._query_dense.kernel), keras.backend.eval(test_layer._value_dense.kernel), ) self.assertNotAllClose( keras.backend.eval(test_layer._query_dense.kernel), keras.backend.eval(test_layer._output_dense.kernel), ) def test_masked_attention_with_scores(self): """Test with a mask tensor.""" test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2) # Create a 3-dimensional input (the first dimension is implicit). batch_size = 3 query = keras.Input(shape=(4, 8)) value = keras.Input(shape=(2, 8)) mask_tensor = keras.Input(shape=(4, 2)) output = test_layer(query=query, value=value, attention_mask=mask_tensor) # Create a model containing the test layer. model = keras.Model([query, value, mask_tensor], output) # Generate data for the input (non-mask) tensors. from_data = 10 * np.random.random_sample((batch_size, 4, 8)) to_data = 10 * np.random.random_sample((batch_size, 2, 8)) # Invoke the data with a random set of mask data. This should mask at least # one element. mask_data = np.random.randint(2, size=(batch_size, 4, 2)) masked_output_data = model.predict([from_data, to_data, mask_data]) # Invoke the same data, but with a null mask (where no elements are masked). null_mask_data = np.ones((batch_size, 4, 2)) unmasked_output_data = model.predict( [from_data, to_data, null_mask_data]) # Because one data is masked and one is not, the outputs should not be the # same. self.assertNotAllClose(masked_output_data, unmasked_output_data) # Create a model containing attention scores. output, scores = test_layer( query=query, value=value, attention_mask=mask_tensor, return_attention_scores=True, ) model = keras.Model([query, value, mask_tensor], [output, scores]) masked_output_data_score, masked_score = model.predict( [from_data, to_data, mask_data]) unmasked_output_data_score, unmasked_score = model.predict( [from_data, to_data, null_mask_data]) self.assertNotAllClose(masked_output_data_score, unmasked_output_data_score) self.assertAllClose(masked_output_data, masked_output_data_score) self.assertAllClose(unmasked_output_data, unmasked_output_data_score) self.assertNotAllClose(masked_score, unmasked_score) @parameterized.named_parameters( ("4d_inputs_1freebatch_mask2", [3, 4], [3, 2], [4, 2], (2, )), ("4d_inputs_1freebatch_mask3", [3, 4], [3, 2], [3, 4, 2], (2, )), ("4d_inputs_1freebatch_mask4", [3, 4], [3, 2], [3, 2, 4, 2], (2, )), ("4D_inputs_2D_attention", [3, 4], [3, 2], [3, 4, 3, 2], (1, 2)), ("5D_inputs_2D_attention", [5, 3, 4], [5, 3, 2], [3, 4, 3, 2], (2, 3)), ( "5D_inputs_2D_attention_fullmask", [5, 3, 4], [5, 3, 2], [5, 3, 4, 3, 2], (2, 3), ), ) def test_high_dim_attention(self, q_dims, v_dims, mask_dims, attention_axes): """Test with a mask tensor.""" test_layer = keras.layers.MultiHeadAttention( num_heads=2, key_dim=2, attention_axes=attention_axes) batch_size, hidden_size = 3, 8 # Generate data for the input (non-mask) tensors. query_shape = [batch_size] + q_dims + [hidden_size] value_shape = [batch_size] + v_dims + [hidden_size] mask_shape = [batch_size] + mask_dims query = 10 * np.random.random_sample(query_shape) value = 10 * np.random.random_sample(value_shape) # Invoke the data with a random set of mask data. This should mask at least # one element. mask_data = np.random.randint(2, size=mask_shape).astype("bool") # Invoke the same data, but with a null mask (where no elements are masked). null_mask_data = np.ones(mask_shape) # Because one data is masked and one is not, the outputs should not be the # same. query_tensor = keras.Input(query_shape[1:], name="query") value_tensor = keras.Input(value_shape[1:], name="value") mask_tensor = keras.Input(mask_shape[1:], name="mask") output = test_layer(query=query_tensor, value=value_tensor, attention_mask=mask_tensor) model = keras.Model([query_tensor, value_tensor, mask_tensor], output) self.assertNotAllClose( model.predict([query, value, mask_data]), model.predict([query, value, null_mask_data]), ) def test_dropout(self): test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2, dropout=0.5) # Generate data for the input (non-mask) tensors. from_data = keras.backend.ones(shape=(32, 4, 8)) to_data = keras.backend.ones(shape=(32, 2, 8)) train_out = test_layer(from_data, to_data, None, None, None, True) test_out = test_layer(from_data, to_data, None, None, None, False) # Output should be close when not in training mode, # and should not be close when enabling dropout in training mode. self.assertNotAllClose(keras.backend.eval(train_out), keras.backend.eval(test_out)) @test_combinations.generate( test_combinations.combine( ragged_query=[True, False], ragged_value=[True, False], ragged_key=[True, False], )) def test_ragged_tensor(self, ragged_query, ragged_value, ragged_key): if ragged_query: query = tf.ragged.constant( [ [[3.0, 1.0], [4.0, 1.0]], [[5.0, 9.0], [2.0, 6.0], [3.0, 1.0]], [[1.0, 2.0]], ], inner_shape=(2, ), ) else: query = keras.backend.ones(shape=(3, 2, 2)) if ragged_value: value = tf.ragged.constant( [[[3.0, 1.0], [4.0, 1.0]], [[5.0, 9.0]], [[1.0, 2.0]]], inner_shape=(2, ), ) else: value = keras.backend.ones(shape=(3, 4, 2)) if ragged_key: key = tf.ragged.constant( [ [[3.0, 1.0], [4.0, 1.0]], [[5.0, 9.0], [2.0, 6.0], [3.0, 1.0], [1.0, 5.0]], [[1.0, 2.0]], ], inner_shape=(2, ), ) else: key = keras.backend.ones(shape=(3, 4, 2)) test_layer = keras.layers.MultiHeadAttention(num_heads=5, key_dim=2) results = test_layer(query, value, key) self.assertAllEqual(results.shape.as_list(), query.shape.as_list())
class DenseTest(tf.test.TestCase, parameterized.TestCase): @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testDenseProperties(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') self.assertEqual(dense.units, 2) self.assertEqual(dense.activation, tf.nn.relu) self.assertEqual(dense.kernel_regularizer, None) self.assertEqual(dense.bias_regularizer, None) self.assertEqual(dense.activity_regularizer, None) self.assertEqual(dense.use_bias, True) # Test auto-naming dense = core_layers.Dense(2, activation=tf.nn.relu) dense(tf.random.uniform((5, 2))) self.assertEqual(dense.name, 'dense_1') dense = core_layers.Dense(2, activation=tf.nn.relu) dense(tf.random.uniform((5, 2))) self.assertEqual(dense.name, 'dense_2') @tf_test_utils.run_deprecated_v1 def testVariableInput(self): with self.cached_session(): v = tf.compat.v1.get_variable( 'X', initializer=tf.compat.v1.zeros_initializer(), shape=(1, 1)) x = core_layers.Dense(1)(v) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllEqual(x, [[0.0]]) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testCall(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') inputs = tf.random.uniform((5, 4), seed=1) outputs = dense(inputs) self.assertListEqual([5, 2], outputs.get_shape().as_list()) self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.non_trainable_variables, []) if not tf.executing_eagerly(): self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias.name, 'my_dense/bias:0') @tf_test_utils.assert_no_new_pyobjects_executing_eagerly def testNoEagerLeak(self): # Tests that repeatedly constructing and building a Layer does not leak # Python objects. inputs = tf.random.uniform((5, 4), seed=1) core_layers.Dense(5)(inputs) core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')(inputs) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testCallTensorDot(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense') inputs = tf.random.uniform((5, 4, 3), seed=1) outputs = dense(inputs) self.assertListEqual([5, 4, 2], outputs.get_shape().as_list()) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testNoBias(self): dense = core_layers.Dense(2, use_bias=False, name='my_dense') inputs = tf.random.uniform((5, 2), seed=1) _ = dense(inputs) self.assertListEqual(dense.variables, [dense.kernel]) self.assertListEqual(dense.trainable_variables, [dense.kernel]) self.assertListEqual(dense.non_trainable_variables, []) if not tf.executing_eagerly(): self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 1) self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias, None) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testNonTrainable(self): dense = core_layers.Dense(2, trainable=False, name='my_dense') inputs = tf.random.uniform((5, 2), seed=1) _ = dense(inputs) self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.non_trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.trainable_variables, []) if not tf.executing_eagerly(): self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 0) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testOutputShape(self): dense = core_layers.Dense(7, activation=tf.nn.relu, name='my_dense') inputs = tf.random.uniform((5, 3), seed=1) outputs = dense(inputs) self.assertEqual(outputs.get_shape().as_list(), [5, 7]) inputs = tf.random.uniform((5, 2, 3), seed=1) outputs = dense(inputs) self.assertEqual(outputs.get_shape().as_list(), [5, 2, 7]) inputs = tf.random.uniform((1, 2, 4, 3), seed=1) outputs = dense(inputs) self.assertEqual(outputs.get_shape().as_list(), [1, 2, 4, 7]) @tf_test_utils.run_deprecated_v1 def testCallOnPlaceHolder(self): inputs = tf.compat.v1.placeholder(dtype=tf.float32) dense = core_layers.Dense(4, name='my_dense') with self.assertRaises(ValueError): dense(inputs) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None]) dense = core_layers.Dense(4, name='my_dense') with self.assertRaises(ValueError): dense(inputs) inputs = tf.compat.v1.placeholder( dtype=tf.float32, shape=[None, None, None]) dense = core_layers.Dense(4, name='my_dense') with self.assertRaises(ValueError): dense(inputs) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3]) dense = core_layers.Dense(4, name='my_dense') dense(inputs) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None, 3]) dense = core_layers.Dense(4, name='my_dense') dense(inputs) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testActivation(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1') inputs = tf.random.uniform((5, 3), seed=1) outputs = dense(inputs) if not tf.executing_eagerly(): self.assertEqual(outputs.op.name, 'dense1/Relu') dense = core_layers.Dense(2, name='dense2') inputs = tf.random.uniform((5, 3), seed=1) outputs = dense(inputs) if not tf.executing_eagerly(): self.assertEqual(outputs.op.name, 'dense2/BiasAdd') @tf_test_utils.run_deprecated_v1 def testActivityRegularizer(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 dense = core_layers.Dense( 2, name='my_dense', activity_regularizer=regularizer) inputs = tf.random.uniform((5, 3), seed=1) _ = dense(inputs) loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(len(loss_keys), 1) self.assertListEqual(dense.losses, loss_keys) @tf_test_utils.run_deprecated_v1 def testKernelRegularizer(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 dense = core_layers.Dense( 2, name='my_dense', kernel_regularizer=regularizer) inputs = tf.random.uniform((5, 3), seed=1) _ = dense(inputs) loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(len(loss_keys), 1) self.evaluate([v.initializer for v in dense.variables]) self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys)) @tf_test_utils.run_deprecated_v1 def testKernelRegularizerWithReuse(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 inputs = tf.random.uniform((5, 3), seed=1) _ = core_layers.dense( inputs, 2, name='my_dense', kernel_regularizer=regularizer) self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1) _ = core_layers.dense( inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True) self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1) @tf_test_utils.run_deprecated_v1 def testBiasRegularizer(self): regularizer = lambda x: tf.reduce_sum(x) * 1e-3 dense = core_layers.Dense(2, name='my_dense', bias_regularizer=regularizer) inputs = tf.random.uniform((5, 3), seed=1) _ = dense(inputs) loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(len(loss_keys), 1) self.evaluate([v.initializer for v in dense.variables]) self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys)) @tf_test_utils.run_deprecated_v1 def testFunctionalDense(self): with self.cached_session(): inputs = tf.random.uniform((5, 3), seed=1) outputs = core_layers.dense( inputs, 2, activation=tf.nn.relu, name='my_dense') self.assertEqual( len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(outputs.op.name, 'my_dense/Relu') @tf_test_utils.run_deprecated_v1 def testFunctionalDenseTwice(self): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) vars1 = _get_variable_dict_from_varstore().values() core_layers.dense(inputs, 2) vars2 = _get_variable_dict_from_varstore().values() self.assertEqual(len(vars1), 2) self.assertEqual(len(vars2), 4) # TODO(alive): get this to work in eager mode. def testFunctionalDenseTwiceReuse(self): with self.cached_session(): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = tf.compat.v1.trainable_variables() core_layers.dense(inputs, 2, name='my_dense', reuse=True) vars2 = tf.compat.v1.trainable_variables() self.assertEqual(vars1, vars2) # TODO(alive): get this to work in eager mode. def testFunctionalDenseTwiceReuseFromScope(self): with self.cached_session(): with tf.compat.v1.variable_scope('scope'): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') vars1 = tf.compat.v1.trainable_variables() with tf.compat.v1.variable_scope('scope', reuse=True): core_layers.dense(inputs, 2, name='my_dense') vars2 = tf.compat.v1.trainable_variables() self.assertEqual(vars1, vars2) @tf_test_utils.run_deprecated_v1 def testFunctionalDenseInitializerFromScope(self): with tf.compat.v1.variable_scope( 'scope', initializer=tf.compat.v1.ones_initializer()), self.cached_session(): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) self.evaluate(tf.compat.v1.global_variables_initializer()) weights = _get_variable_dict_from_varstore() self.assertEqual(len(weights), 2) # Check that the matrix weights got initialized to ones (from scope). self.assertAllClose(weights['scope/dense/kernel'].read_value(), np.ones((3, 2))) # Check that the bias still got initialized to zeros. self.assertAllClose(weights['scope/dense/bias'].read_value(), np.zeros( (2))) def testFunctionalDenseWithCustomGetter(self): called = [0] def custom_getter(getter, *args, **kwargs): called[0] += 1 return getter(*args, **kwargs) with tf.compat.v1.variable_scope('test', custom_getter=custom_getter): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) self.assertEqual(called[0], 2) @tf_test_utils.run_deprecated_v1 def testFunctionalDenseInScope(self): with self.cached_session(): with tf.compat.v1.variable_scope('test'): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') var_dict = _get_variable_dict_from_varstore() var_key = 'test/my_dense/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) with tf.compat.v1.variable_scope('test1') as scope: inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name=scope) var_dict = _get_variable_dict_from_varstore() var_key = 'test1/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) with tf.compat.v1.variable_scope('test2'): inputs = tf.random.uniform((5, 3), seed=1) core_layers.dense(inputs, 2) var_dict = _get_variable_dict_from_varstore() var_key = 'test2/dense/kernel' self.assertEqual(var_dict[var_key].name, '%s:0' % var_key) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testComputeOutputShape(self): dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1') ts = tf.TensorShape # pylint: disable=protected-access with self.assertRaises(ValueError): dense.compute_output_shape(ts(None)) with self.assertRaises(ValueError): dense.compute_output_shape(ts([])) with self.assertRaises(ValueError): dense.compute_output_shape(ts([1])) self.assertEqual( [None, 2], dense.compute_output_shape((None, 3)).as_list()) self.assertEqual( [None, 2], dense.compute_output_shape(ts([None, 3])).as_list()) self.assertEqual( [None, 4, 2], dense.compute_output_shape(ts([None, 4, 3])).as_list()) # pylint: enable=protected-access @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testConstraints(self): k_constraint = lambda x: x / tf.reduce_sum(x) b_constraint = lambda x: x / tf.reduce_max(x) dense = core_layers.Dense(2, kernel_constraint=k_constraint, bias_constraint=b_constraint) inputs = tf.random.uniform((5, 3), seed=1) dense(inputs) self.assertEqual(dense.kernel_constraint, k_constraint) self.assertEqual(dense.bias_constraint, b_constraint)
class CheckpointingTests(test_combinations.TestCase): @tf_test_utils.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testNamingWithOptimizer(self): input_value = tf.constant([[3.]]) model = MyModel() # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) optimizer_step = tf.compat.v1.train.get_or_create_global_step() root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=optimizer_step) if tf.executing_eagerly(): optimizer.minimize(lambda: model(input_value), global_step=optimizer_step) optimizer.minimize(lambda: other_model(input_value), global_step=optimizer_step) else: train_op = optimizer.minimize(model(input_value), global_step=optimizer_step) optimizer.minimize(other_model(input_value), global_step=optimizer_step) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) named_variables, serialized_graph, _ = tf.__internal__.tracking.ObjectGraphView( root_trackable).serialize_object_graph() expected_checkpoint_names = ( # Created in the root node, so no prefix. "optimizer_step", "model/_second/kernel", "model/_named_dense/kernel", "model/_named_dense/bias", # non-Layer dependency of the model "model/_non_layer/a_variable", # The optimizer creates two non-slot variables "optimizer/beta1_power", "optimizer/beta2_power", # Slot variables "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names ] named_variables = {v.name: v for v in named_variables} self.assertEqual(len(expected_checkpoint_names), len(named_variables.keys())) # Check that we've created the right full_names of objects (not exhaustive) expected_names = { "optimizer_step" + suffix: "global_step", "model/_second/kernel" + suffix: "my_model/dense_1/kernel", "model/_named_dense/kernel" + suffix: "my_model/dense/kernel", "optimizer/beta1_power" + suffix: "beta1_power", "optimizer/beta2_power" + suffix: "beta2_power", } for nodes in serialized_graph.nodes: for attribute in nodes.attributes: expected_name = expected_names.pop(attribute.checkpoint_key, None) if expected_name is not None: self.assertEqual(expected_name, attribute.full_name) self.assertEmpty(expected_names) # Spot check the generated protocol buffers. self.assertEqual("optimizer", serialized_graph.nodes[0].children[1].local_name) optimizer_node = serialized_graph.nodes[ serialized_graph.nodes[0].children[1].node_id] self.assertEqual("beta1_power", optimizer_node.children[0].local_name) self.assertEqual( "beta1_power", serialized_graph.nodes[ optimizer_node.children[0].node_id].attributes[0].full_name) self.assertEqual( "my_model/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[ 0].original_variable_node_id].attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( "my_model/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[ 0].slot_variable_node_id].attributes[0].full_name) self.assertEqual( "my_model/dense/kernel/Adam:0", optimizer.get_slot(var=model._named_dense.kernel, name="m").name) self.assertEqual( "model/_named_dense/kernel" + suffix, serialized_graph.nodes[optimizer_node.slot_variables[ 0].original_variable_node_id].attributes[0].checkpoint_key) self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) self.assertEqual( "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, serialized_graph.nodes[optimizer_node.slot_variables[ 0].slot_variable_node_id].attributes[0].checkpoint_key) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testSaveRestore(self): with self.test_session(): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model) input_value = tf.constant([[3.]]) if tf.executing_eagerly(): optimizer.minimize(lambda: model(input_value)) else: train_op = optimizer.minimize(model(input_value)) # TODO(allenl): Make initialization more pleasant when graph building. root_trackable.save_counter # pylint: disable=pointless-statement self.evaluate( trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") self.evaluate( tf.compat.v1.assign(model._named_dense.variables[1], [42.])) m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5])) save_path = root_trackable.save(file_prefix=prefix) self.evaluate( tf.compat.v1.assign(model._named_dense.variables[1], [43.])) self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3)) optimizer_variables = self.evaluate(optimizer.variables()) self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_trackable.restore( save_path=save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if not tf.executing_eagerly(): return # Restore-on-create is only supported when executing eagerly on_create_model = MyModel() on_create_optimizer = tf.compat.v1.train.AdamOptimizer( 0.001, # Preserve beta1_power and beta2_power when applying gradients # so we can test that they've been restored correctly. beta1=1.0, beta2=1.0) on_create_root = tf.train.Checkpoint(optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) status.assert_nontrivial_match() status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() on_create_model(tf.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( on_create_model._named_dense.variables[1], "m") status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) self.assertAllEqual(optimizer_variables[2:], self.evaluate(on_create_optimizer.variables())) dummy_var = tf.Variable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value) status.assert_existing_objects_matched() status.assert_consumed() beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators( ) self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) # TODO(allenl): Debug garbage created by this test in python3. def testDeferredRestorationUsageEager(self): """An idiomatic eager execution example.""" num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint( optimizer=optimizer, model=model, optimizer_step=tf.compat.v1.train.get_or_create_global_step()) root.restore(tf.train.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): # TODO(allenl): Use a Dataset and serialize/checkpoint it. input_value = tf.constant([[3.]]) optimizer.minimize( lambda: model(input_value), # pylint: disable=cell-var-from-loop global_step=root.optimizer_step) root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, root.optimizer_step.numpy()) def testEagerDistributionStrategy(self): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") def _train_fn(optimizer, model, root): input_value = tf.constant([[3.]]) optimizer.minimize(functools.partial(model, input_value), global_step=root.optimizer_step) strategy = tf.distribute.MirroredStrategy() with strategy.scope(): for training_continuation in range(3): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=tf.compat.v1.train. get_or_create_global_step()) root.restore(tf.train.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): strategy.extended.call_for_each_replica( functools.partial(_train_fn, optimizer, model, root)) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, root.optimizer_step.numpy()) def testGraphDistributionStrategy(self): self.skipTest("b/121381184") num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") def _train_fn(optimizer, model, root): input_value = tf.constant([[3.]]) return optimizer.minimize(functools.partial(model, input_value), global_step=root.optimizer_step) for training_continuation in range(3): with tf.Graph().as_default(): strategy = tf.distribute.MirroredStrategy() with strategy.scope(): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint( optimizer=optimizer, model=model, optimizer_step=tf.compat.v1.train. get_or_create_global_step()) status = root.restore( tf.train.latest_checkpoint(checkpoint_directory)) train_op = strategy.extended.call_for_each_replica( functools.partial(_train_fn, optimizer, model, root)) with self.session() as session: if training_continuation > 0: status.assert_consumed() status.initialize_or_restore() for _ in range(num_training_steps): session.run(train_op) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, root.optimizer_step.numpy()) def testUsageGraph(self): """Expected usage when graph building.""" with context.graph_mode(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with tf.Graph().as_default(): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.compat.v1.train.Checkpoint( optimizer=optimizer, model=model, global_step=tf.compat.v1.train. get_or_create_global_step()) input_value = tf.constant([[3.]]) train_op = optimizer.minimize(model(input_value), global_step=root.global_step) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) with self.session( graph=tf.compat.v1.get_default_graph()) as session: status = root.restore(save_path=checkpoint_path) status.initialize_or_restore(session=session) if checkpoint_path is None: self.assertEqual(0, training_continuation) with self.assertRaises(AssertionError): status.assert_consumed() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() else: status.assert_consumed() status.assert_existing_objects_matched() for _ in range(num_training_steps): session.run(train_op) root.save(file_prefix=checkpoint_prefix, session=session) self.assertEqual( (training_continuation + 1) * num_training_steps, session.run(root.global_step)) self.assertEqual(training_continuation + 1, session.run(root.save_counter)) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. with self.test_session(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() for training_continuation in range(3): with test_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint(optimizer=optimizer, model=model, global_step=tf.compat.v1.train. get_or_create_global_step()) manager = tf.train.CheckpointManager(root, checkpoint_directory, max_to_keep=1) status = root.restore(save_path=manager.latest_checkpoint) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() manager.save() self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(root.global_step)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter)) # pylint: disable=cell-var-from-loop @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testWithDefun(self): with self.test_session(): num_training_steps = 2 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with test_utils.device(should_use_gpu=True): model = MyModel() # Don't actually train so we can test variable values optimizer = tf.compat.v1.train.AdamOptimizer(0.) root = tf.train.Checkpoint(optimizer=optimizer, model=model, global_step=tf.compat.v1.train. get_or_create_global_step()) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) def train_fn(): @tf.function def _call_model(x): return model(x) with tf.GradientTape() as tape: loss = _call_model(tf.constant([[3.]])) gradients = tape.gradient(loss, model.variables) return optimizer.apply_gradients( zip(gradients, model.variables), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() if training_continuation > 0: status.assert_consumed() self.assertAllClose([[42.]], self.evaluate(model.variables[0])) else: self.evaluate(model.variables[0].assign([[42.]])) root.save(file_prefix=checkpoint_prefix) self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(root.global_step)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter)) # pylint: enable=cell-var-from-loop @test_combinations.generate(test_combinations.combine(mode=["eager"])) def testAnonymousVarsInInit(self): class Model(training.Model): def __init__(self): super().__init__() self.w = tf.Variable(0.0) self.b = tf.Variable(0.0) self.vars = [self.w, self.b] def call(self, x): return x * self.w + self.b model = Model() optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.05) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) for _ in range(2): checkpoint.save(checkpoint_prefix) with tf.GradientTape() as tape: loss = (tf.constant(1.) - model(tf.constant(1.)))**2 grad = tape.gradient(loss, model.vars) optimizer.apply_gradients([(g, v) for g, v in zip(grad, model.vars)]) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def test_initialize_if_not_restoring(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") with test_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint( model= model, # Do not save the optimizer with the checkpoint. global_step=tf.compat.v1.train.get_or_create_global_step()) optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer) checkpoint_path = tf.train.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() self.evaluate([v.initializer for v in optimizer.variables()]) train_fn() model_save_path = root.save(file_prefix=checkpoint_prefix) self.evaluate(optimizer.variables()[0].assign(42.)) optimizer_save_path = optimizer_checkpoint.save( optimizer_only_prefix) # Restore into a graph with the optimizer with test_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) root = tf.train.Checkpoint( optimizer=optimizer, model=model, global_step=tf.compat.v1.train.get_or_create_global_step()) status = root.restore(save_path=model_save_path) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() train_fn() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() # Make sure initialization doesn't clobber later restores with test_utils.device(should_use_gpu=True): model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001, beta1=1.0) root = tf.train.Checkpoint( optimizer=optimizer, model=model, global_step=tf.compat.v1.train.get_or_create_global_step()) opt_root = tf.train.Checkpoint(optimizer=optimizer) status = root.restore(save_path=model_save_path) init_only_optimizer_status = opt_root.restore(save_path=None) optimizer_status = opt_root.restore( save_path=optimizer_save_path) input_value = tf.constant([[3.]]) train_fn = functools.partial(optimizer.minimize, functools.partial( model, input_value), global_step=root.global_step) if not tf.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) optimizer_status.run_restore_ops() status.initialize_or_restore() init_only_optimizer_status.initialize_or_restore() train_fn() self.assertEqual(42., self.evaluate(optimizer.variables()[0]))
class TimeDistributedTest(test_combinations.TestCase): @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_timedistributed_dense(self): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense(2), input_shape=(3, 4) ) ) model.compile(optimizer="rmsprop", loss="mse") model.fit( np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), epochs=1, batch_size=10, ) # test config model.get_config() # check whether the model variables are present in the # trackable list of objects checkpointed_object_ids = { id(o) for o in trackable_util.list_objects(model) } for v in model.variables: self.assertIn(id(v), checkpointed_object_ids) def test_timedistributed_static_batch_size(self): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense(2), input_shape=(3, 4), batch_size=10 ) ) model.compile(optimizer="rmsprop", loss="mse") model.fit( np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), epochs=1, batch_size=10, ) def test_timedistributed_invalid_init(self): x = tf.constant(np.zeros((1, 1)).astype("float32")) with self.assertRaisesRegex( ValueError, "Please initialize `TimeDistributed` layer with a " "`tf.keras.layers.Layer` instance.", ): keras.layers.TimeDistributed(x) def test_timedistributed_conv2d(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Conv2D(5, (2, 2), padding="same"), input_shape=(2, 4, 4, 3), ) ) model.add(keras.layers.Activation("relu")) model.compile(optimizer="rmsprop", loss="mse") model.train_on_batch( np.random.random((1, 2, 4, 4, 3)), np.random.random((1, 2, 4, 4, 5)), ) model = keras.models.model_from_json(model.to_json()) model.summary() def test_timedistributed_stacked(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense(2), input_shape=(3, 4) ) ) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.add(keras.layers.Activation("relu")) model.compile(optimizer="rmsprop", loss="mse") model.fit( np.random.random((10, 3, 4)), np.random.random((10, 3, 3)), epochs=1, batch_size=10, ) def test_regularizers(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense( 2, kernel_regularizer="l1", activity_regularizer="l1" ), input_shape=(3, 4), ) ) model.add(keras.layers.Activation("relu")) model.compile(optimizer="rmsprop", loss="mse") self.assertEqual(len(model.losses), 2) def test_TimeDistributed_learning_phase(self): with self.cached_session(): keras.utils.set_random_seed(0) x = keras.layers.Input(shape=(3, 2)) y = keras.layers.TimeDistributed(keras.layers.Dropout(0.999))( x, training=True ) model = keras.models.Model(x, y) y = model.predict(np.random.random((10, 3, 2))) self.assertAllClose(np.mean(y), 0.0, atol=1e-1, rtol=1e-1) def test_TimeDistributed_batchnorm(self): with self.cached_session(): # test that wrapped BN updates still work. model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.BatchNormalization(center=True, scale=True), name="bn", input_shape=(10, 2), ) ) model.compile(optimizer="rmsprop", loss="mse") # Assert that mean and variance are 0 and 1. td = model.layers[0] self.assertAllClose(td.get_weights()[2], np.array([0, 0])) assert np.array_equal(td.get_weights()[3], np.array([1, 1])) # Train model.train_on_batch( np.random.normal(loc=2, scale=2, size=(1, 10, 2)), np.broadcast_to(np.array([0, 1]), (1, 10, 2)), ) # Assert that mean and variance changed. assert not np.array_equal(td.get_weights()[2], np.array([0, 0])) assert not np.array_equal(td.get_weights()[3], np.array([1, 1])) def test_TimeDistributed_trainable(self): # test layers that need learning_phase to be set x = keras.layers.Input(shape=(3, 2)) layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization()) _ = layer(x) self.assertEqual(len(layer.trainable_weights), 2) layer.trainable = False assert not layer.trainable_weights layer.trainable = True assert len(layer.trainable_weights) == 2 def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self): with self.cached_session(): # test with unspecified shape and Embeddings with mask_zero model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Embedding(5, 6, mask_zero=True), input_shape=(None, None), ) ) # N by t_1 by t_2 by 6 model.add( keras.layers.TimeDistributed( keras.layers.SimpleRNN(7, return_sequences=True) ) ) model.add( keras.layers.TimeDistributed( keras.layers.SimpleRNN(8, return_sequences=False) ) ) model.add(keras.layers.SimpleRNN(1, return_sequences=False)) model.compile(optimizer="rmsprop", loss="mse") model_input = np.random.randint( low=1, high=5, size=(10, 3, 4), dtype="int32" ) for i in range(4): model_input[i, i:, i:] = 0 model.fit( model_input, np.random.random((10, 1)), epochs=1, batch_size=10 ) mask_outputs = [model.layers[0].compute_mask(model.input)] for layer in model.layers[1:]: mask_outputs.append( layer.compute_mask(layer.input, mask_outputs[-1]) ) func = keras.backend.function([model.input], mask_outputs[:-1]) mask_outputs_val = func([model_input]) ref_mask_val_0 = model_input > 0 # embedding layer ref_mask_val_1 = ref_mask_val_0 # first RNN layer ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1) # second RNN layer ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2] for i in range(3): self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i]) self.assertIs(mask_outputs[-1], None) # final layer @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_TimeDistributed_with_masking_layer(self): # test with Masking layer model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Masking( mask_value=0.0, ), input_shape=(None, 4), ) ) model.add(keras.layers.TimeDistributed(keras.layers.Dense(5))) model.compile(optimizer="rmsprop", loss="mse") model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) for i in range(4): model_input[i, i:, :] = 0.0 model.compile(optimizer="rmsprop", loss="mse") model.fit( model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6 ) mask_outputs = [model.layers[0].compute_mask(model.input)] mask_outputs += [ model.layers[1].compute_mask( model.layers[1].input, mask_outputs[-1] ) ] func = keras.backend.function([model.input], mask_outputs) mask_outputs_val = func([model_input]) self.assertEqual((mask_outputs_val[0]).all(), model_input.all()) self.assertEqual((mask_outputs_val[1]).all(), model_input.all()) def test_TimeDistributed_with_different_time_shapes(self): time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) ph_1 = keras.backend.placeholder(shape=(None, 10, 13)) out_1 = time_dist(ph_1) self.assertEqual(out_1.shape.as_list(), [None, 10, 5]) ph_2 = keras.backend.placeholder(shape=(None, 1, 13)) out_2 = time_dist(ph_2) self.assertEqual(out_2.shape.as_list(), [None, 1, 5]) ph_3 = keras.backend.placeholder(shape=(None, 1, 18)) with self.assertRaisesRegex(ValueError, "is incompatible with"): time_dist(ph_3) def test_TimeDistributed_with_invalid_dimensions(self): time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) ph = keras.backend.placeholder(shape=(None, 10)) with self.assertRaisesRegex( ValueError, "`TimeDistributed` Layer should be passed an `input_shape `", ): time_dist(ph) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_TimeDistributed_reshape(self): class NoReshapeLayer(keras.layers.Layer): def call(self, inputs): return inputs # Built-in layers that aren't stateful use the reshape implementation. td1 = keras.layers.TimeDistributed(keras.layers.Dense(5)) self.assertTrue(td1._always_use_reshape) # Built-in layers that are stateful don't use the reshape # implementation. td2 = keras.layers.TimeDistributed( keras.layers.RNN(keras.layers.SimpleRNNCell(10), stateful=True) ) self.assertFalse(td2._always_use_reshape) # Custom layers are not allowlisted for the fast reshape implementation. td3 = keras.layers.TimeDistributed(NoReshapeLayer()) self.assertFalse(td3._always_use_reshape) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_TimeDistributed_output_shape_return_types(self): class TestLayer(keras.layers.Layer): def call(self, inputs): return tf.concat([inputs, inputs], axis=-1) def compute_output_shape(self, input_shape): output_shape = tf.TensorShape(input_shape).as_list() output_shape[-1] = output_shape[-1] * 2 output_shape = tf.TensorShape(output_shape) return output_shape class TestListLayer(TestLayer): def compute_output_shape(self, input_shape): shape = super().compute_output_shape(input_shape) return shape.as_list() class TestTupleLayer(TestLayer): def compute_output_shape(self, input_shape): shape = super().compute_output_shape(input_shape) return tuple(shape.as_list()) # Layers can specify output shape as list/tuple/TensorShape test_layers = [TestLayer, TestListLayer, TestTupleLayer] for layer in test_layers: input_layer = keras.layers.TimeDistributed(layer()) inputs = keras.backend.placeholder(shape=(None, 2, 4)) output = input_layer(inputs) self.assertEqual(output.shape.as_list(), [None, 2, 8]) self.assertEqual( input_layer.compute_output_shape([None, 2, 4]).as_list(), [None, 2, 8], ) @test_combinations.run_all_keras_modes(always_skip_v1=True) # TODO(scottzhu): check why v1 session failed. def test_TimeDistributed_with_mask_first_implementation(self): np.random.seed(100) rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True) data = np.array( [ [[[1.0], [1.0]], [[0.0], [1.0]]], [[[1.0], [0.0]], [[1.0], [1.0]]], [[[1.0], [0.0]], [[1.0], [1.0]]], ] ) x = keras.layers.Input(shape=(2, 2, 1), batch_size=3) x_masking = keras.layers.Masking()(x) y = keras.layers.TimeDistributed(rnn_layer)(x_masking) model_1 = keras.models.Model(x, y) model_1.compile( "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() ) output_with_mask = model_1.predict(data, steps=1) y = keras.layers.TimeDistributed(rnn_layer)(x) model_2 = keras.models.Model(x, y) model_2.compile( "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly() ) output = model_2.predict(data, steps=1) self.assertNotAllClose(output_with_mask, output, atol=1e-7) @test_combinations.run_all_keras_modes @parameterized.named_parameters( *test_utils.generate_combinations_with_testcase_name( layer=[keras.layers.LSTM, keras.layers.Dense] ) ) def test_TimeDistributed_with_ragged_input(self, layer): if tf.executing_eagerly(): self.skipTest("b/143103634") np.random.seed(100) layer = layer(4) ragged_data = tf.ragged.constant( [ [[[1.0], [1.0]], [[2.0], [2.0]]], [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]], ], ragged_rank=1, ) x_ragged = keras.Input(shape=(None, 2, 1), dtype="float32", ragged=True) y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) model_1 = keras.models.Model(x_ragged, y_ragged) model_1._run_eagerly = test_utils.should_run_eagerly() output_ragged = model_1.predict(ragged_data, steps=1) x_dense = keras.Input(shape=(None, 2, 1), dtype="float32") masking = keras.layers.Masking()(x_dense) y_dense = keras.layers.TimeDistributed(layer)(masking) model_2 = keras.models.Model(x_dense, y_dense) dense_data = ragged_data.to_tensor() model_2._run_eagerly = test_utils.should_run_eagerly() output_dense = model_2.predict(dense_data, steps=1) output_ragged = convert_ragged_tensor_value(output_ragged) self.assertAllEqual(output_ragged.to_tensor(), output_dense) @test_combinations.run_all_keras_modes def test_TimeDistributed_with_ragged_input_with_batch_size(self): np.random.seed(100) layer = keras.layers.Dense(16) ragged_data = tf.ragged.constant( [ [[[1.0], [1.0]], [[2.0], [2.0]]], [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]], ], ragged_rank=1, ) # Use the first implementation by specifying batch_size x_ragged = keras.Input( shape=(None, 2, 1), batch_size=3, dtype="float32", ragged=True ) y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) model_1 = keras.models.Model(x_ragged, y_ragged) output_ragged = model_1.predict(ragged_data, steps=1) x_dense = keras.Input(shape=(None, 2, 1), batch_size=3, dtype="float32") masking = keras.layers.Masking()(x_dense) y_dense = keras.layers.TimeDistributed(layer)(masking) model_2 = keras.models.Model(x_dense, y_dense) dense_data = ragged_data.to_tensor() output_dense = model_2.predict(dense_data, steps=1) output_ragged = convert_ragged_tensor_value(output_ragged) self.assertAllEqual(output_ragged.to_tensor(), output_dense) def test_TimeDistributed_set_static_shape(self): layer = keras.layers.TimeDistributed(keras.layers.Conv2D(16, (3, 3))) inputs = keras.Input(batch_shape=(1, None, 32, 32, 1)) outputs = layer(inputs) # Make sure the batch dim is not lost after array_ops.reshape. self.assertListEqual(outputs.shape.as_list(), [1, None, 30, 30, 16]) @test_combinations.run_all_keras_modes def test_TimeDistributed_with_mimo(self): dense_1 = keras.layers.Dense(8) dense_2 = keras.layers.Dense(16) class TestLayer(keras.layers.Layer): def __init__(self): super().__init__() self.dense_1 = dense_1 self.dense_2 = dense_2 def call(self, inputs): return self.dense_1(inputs[0]), self.dense_2(inputs[1]) def compute_output_shape(self, input_shape): output_shape_1 = self.dense_1.compute_output_shape( input_shape[0] ) output_shape_2 = self.dense_2.compute_output_shape( input_shape[1] ) return output_shape_1, output_shape_2 np.random.seed(100) layer = TestLayer() data_1 = tf.constant( [ [[[1.0], [1.0]], [[2.0], [2.0]]], [[[4.0], [4.0]], [[5.0], [5.0]]], [[[7.0], [7.0]], [[8.0], [8.0]]], ] ) data_2 = tf.constant( [ [[[1.0], [1.0]], [[2.0], [2.0]]], [[[4.0], [4.0]], [[5.0], [5.0]]], [[[7.0], [7.0]], [[8.0], [8.0]]], ] ) x1 = keras.Input(shape=(None, 2, 1), dtype="float32") x2 = keras.Input(shape=(None, 2, 1), dtype="float32") y1, y2 = keras.layers.TimeDistributed(layer)([x1, x2]) model_1 = keras.models.Model([x1, x2], [y1, y2]) model_1.compile( optimizer="rmsprop", loss="mse", run_eagerly=test_utils.should_run_eagerly(), ) output_1 = model_1.predict((data_1, data_2), steps=1) y1 = dense_1(x1) y2 = dense_2(x2) model_2 = keras.models.Model([x1, x2], [y1, y2]) output_2 = model_2.predict((data_1, data_2), steps=1) self.assertAllClose(output_1, output_2) model_1.fit( x=[ np.random.random((10, 2, 2, 1)), np.random.random((10, 2, 2, 1)), ], y=[ np.random.random((10, 2, 2, 8)), np.random.random((10, 2, 2, 16)), ], epochs=1, batch_size=3, ) def test_TimeDistributed_Attention(self): query_input = keras.layers.Input(shape=(None, 1, 10), dtype="float32") value_input = keras.layers.Input(shape=(None, 4, 10), dtype="float32") # Query-value attention of shape [batch_size, Tq, filters]. query_value_attention_seq = keras.layers.TimeDistributed( keras.layers.Attention() )([query_input, value_input]) model = keras.models.Model( [query_input, value_input], query_value_attention_seq ) model.compile(optimizer="rmsprop", loss="mse") model.fit( [ np.random.random((10, 8, 1, 10)), np.random.random((10, 8, 4, 10)), ], np.random.random((10, 8, 1, 10)), epochs=1, batch_size=10, ) # test config and serialization/deserialization model.get_config() model = keras.models.model_from_json(model.to_json()) model.summary()
class ListTests(test_combinations.TestCase): @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testTracking(self): with self.test_session(): model = HasList() output = model(tf.ones([32, 2])) self.assertAllEqual([32, 12], output.shape) self.assertEqual(11, len(model.layers)) self.assertEqual(10, len(model.layer_list.layers)) self.assertEqual( len(model.layers), len(model.layer_list.layers + model.layers_with_updates)) for index in range(10): self.assertEqual(3 + index, model.layer_list.layers[index].units) children = model._trackable_children() self.assertLen(children, 2) self.assertIs(model.layer_list, children["layer_list"]) self.assertIs(model.layers_with_updates, children["layers_with_updates"]) self.assertLen(children["layer_list"]._trackable_children(), 10) self.evaluate([v.initializer for v in model.variables]) self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) model.load_weights(save_path) self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], self.evaluate(model.variables[0])) v = tf.Variable(1.) model.var_list = [v] self.assertTrue(any(v is t for t in model.variables)) self.assertTrue(any(v is t for t in model.trainable_variables)) self.assertFalse(any(v is t for t in model.non_trainable_variables)) self.assertTrue( any(model.layer_list[0].trainable_weights[0] is t for t in model.trainable_weights)) def testSubModelTracking(self): model = training.Model() model.v = tf.Variable(1.) self.assertIn(model.v, model.trainable_weights) model2 = training.Model() model2.m = [model] self.assertIn(model.v, model2.trainable_weights) def testSubSequentialTracking(self): class _Subclassed(training.Model): def __init__(self, wrapped): super(_Subclassed, self).__init__() self._wrapped = wrapped def call(self, x): return self._wrapped(x) model = sequential.Sequential() layer = core.Dense(1) model.add(layer) model2 = _Subclassed(model) model2(tf.ones([1, 2])) model2.m = [model] self.assertIn(layer.kernel, model2.trainable_weights) def testLayerTrackedThroughSequential(self): class AttrDict(dict): def __init__(self, *args, **kwargs): super(AttrDict, self).__init__(*args, **kwargs) self.__dict__ = self def ffnet(layer_sizes, name): ff = sequential.Sequential(name=name) for i, width in enumerate(layer_sizes): ff.add( core.Dense(width, activation=("relu" if i < len(layer_sizes) - 1 else None))) return ff class MyModel2(training.Model): def __init__(self, config, name="my_model_2"): super(MyModel2, self).__init__(name=name) self._num_tokens = config.num_tokens # list of sub-models self._ffnet = [ ffnet(config.module_layers + (self._num_tokens, ), "ff") ] def null_input(self): return tf.zeros([1, self._num_tokens], dtype=tf.float32) def call(self, input_, module_index=None): return self._ffnet[0](input_) m2 = MyModel2(AttrDict(num_tokens=5, module_layers=(50, 30))) # Construct m2(m2.null_input()) self.assertLen(m2.trainable_variables, 6) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testUpdatesForwarded(self): model = HasList() model_input = tf.ones([32, 2]) model(model_input) if tf.executing_eagerly(): self.assertEqual(0, len(model.updates)) else: self.assertGreater(len(model.layers_with_updates[0].updates), 0) self.assertEqual(set(model.layers_with_updates[0].updates), set(model.updates)) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testLossesForwarded(self): model = HasList() model_input = tf.ones([32, 2]) model(model_input) self.assertEqual(2, len(model.losses)) def testModelContainersCompareEqual(self): class HasEqualContainers(training.Model): def __init__(self): super(HasEqualContainers, self).__init__() self.l1 = [] self.l2 = [] model = HasEqualContainers() first_layer = HasEqualContainers() model.l1.append(first_layer) second_layer = HasEqualContainers() model.l2.append(second_layer) self.assertEqual([first_layer, second_layer], model.layers) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testTensorConversion(self): class ListToTensor(training.Model): def __init__(self): super(ListToTensor, self).__init__() self.l = [1., 2., 3.] self.assertAllEqual([1., 2., 3.], self.evaluate(tf.constant(ListToTensor().l))) self.assertAllEqual([1., 2., 3.], self.evaluate( tf.raw_ops.Pack(values=ListToTensor().l)))
class TupleTests(test_combinations.TestCase): @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testTracking(self): with self.test_session(): model = HasTuple() output = model(tf.ones([32, 2])) self.assertAllEqual([32, 5], output.shape.as_list()) self.assertLen(model.layers, 4) self.assertLen(model.layer_list.layers, 3) self.assertEqual( len(model.layers), len( tuple(model.layer_list.layers) + model.layers_with_updates)) self.assertEqual(3, model.layer_list.layers[0].units) self.assertEqual(4, model.layer_list.layers[1].units) self.assertEqual(5, model.layer_list.layers[2].units) self.assertLen(model._trackable_children(), 2) self.assertIs(model.layer_list, model._trackable_children()["layer_list"]) self.assertIs(model.layers_with_updates, model._trackable_children()["layers_with_updates"]) self.assertLen(model.layer_list._trackable_children(), 3) self.evaluate([v.initializer for v in model.variables]) self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) save_path = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_path) self.evaluate(model.variables[0].assign(tf.zeros([2, 3]))) model.load_weights(save_path) self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], self.evaluate(model.variables[0])) v = tf.Variable(1.) model.var_list = (v, ) self.assertIn(id(v), [id(obj) for obj in model.variables]) self.assertIn(id(v), [id(obj) for obj in model.trainable_variables]) self.assertNotIn( id(v), [id(obj) for obj in model.non_trainable_variables]) self.assertIn(id(model.layer_list[0].trainable_weights[0]), [id(obj) for obj in model.trainable_weights]) @parameterized.named_parameters( ("Module", tf.Module), ("Model", training.Model), ) def testSubModelTracking(self, module_subclass): model = module_subclass() model.v = tf.Variable(1.) self.assertIn(model.v, model.trainable_variables) model2 = module_subclass() model2.m = (model, ) self.assertIn(model.v, model2.trainable_variables) def testSubSequentialTracking(self): class _Subclassed(training.Model): def __init__(self, wrapped): super(_Subclassed, self).__init__() self._wrapped = wrapped def call(self, x): return self._wrapped(x) model = sequential.Sequential() layer = core.Dense(1) model.add(layer) model2 = _Subclassed(model) model2(tf.ones([1, 2])) model2.m = (model, ) self.assertIn(layer.kernel, model2.trainable_weights) def testUpdatesForwarded(self): with tf.Graph().as_default(): model = HasTuple() model_input = tf.ones([32, 2]) model(model_input) self.assertNotEmpty(model.layers_with_updates[0].updates) self.assertEqual(set(model.layers_with_updates[0].updates), set(model.updates)) model = HasTuple() model_input = tf.ones([32, 2]) model(model_input) self.assertEmpty(model.updates) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testLossesForwarded(self): model = HasTuple() model_input = tf.ones([32, 2]) model(model_input) self.assertLen(model.losses, 1) def testModelContainersCompareEqual(self): class HasEqualContainers(training.Model): def __init__(self): super(HasEqualContainers, self).__init__() self.l1 = () self.l2 = () model = HasEqualContainers() first_layer = HasEqualContainers() model.l1 = (first_layer, ) second_layer = HasEqualContainers() model.l2 = (second_layer, ) self.assertEqual((first_layer, ), model.l1) d = {model.l1: 1, model.l2: 2} self.assertEqual(1, d[model.l1]) self.assertEqual(1, d[(first_layer, )]) self.assertEqual(2, d[model.l2]) self.assertEqual(2, d[(second_layer, )]) self.assertEqual([first_layer, second_layer], model.layers) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testTensorConversion(self): class TupleToTensor(training.Model): def __init__(self): super(TupleToTensor, self).__init__() self.l = (1., 2., 3.) self.assertAllEqual((1., 2., 3.), self.evaluate(tf.constant(TupleToTensor().l))) self.assertAllEqual( (1., 2., 3.), self.evaluate(tf.raw_ops.Pack(values=TupleToTensor().l)))
class RMSpropOptimizerTest(tf.test.TestCase, parameterized.TestCase): def _rmsprop_update_numpy( self, var, g, mg, rms, mom, lr, rho, momentum, epsilon, centered ): rms_t = rms * rho + (1 - rho) * g * g if centered: mg_t = mg * rho + (1 - rho) * g denom_t = rms_t - mg_t * mg_t else: mg_t = mg denom_t = rms_t if momentum > 0.0: mom_t = momentum * mom + lr * g / (np.sqrt(denom_t + epsilon)) var_t = var - mom_t else: mom_t = mom var_t = var - lr * g / (np.sqrt(denom_t) + epsilon) return var_t, mg_t, rms_t, mom_t def _sparse_rmsprop_update_numpy( self, var, gindexs, gvalues, mg, rms, mom, lr, rho, momentum, epsilon, centered, ): mg_t = copy.deepcopy(mg) rms_t = copy.deepcopy(rms) mom_t = copy.deepcopy(mom) var_t = copy.deepcopy(var) for i in range(len(gindexs)): gindex = gindexs[i] gvalue = gvalues[i] rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue if centered: mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue denom_t = rms_t[gindex] - mg_t[gindex] * mg_t[gindex] else: denom_t = rms_t[gindex] if momentum > 0.0: mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt( denom_t + epsilon ) var_t[gindex] = var[gindex] - mom_t[gindex] else: mom_t[gindex] = mom[gindex] var_t[gindex] = var[gindex] - lr * gvalue / ( np.sqrt(denom_t) + epsilon ) return var_t, mg_t, rms_t, mom_t def testDense(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for ( dtype, learning_rate, rho, momentum, epsilon, centered, ) in _TESTPARAMS: with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu(): # noqa: E501 # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, dtype=dtype) var1 = tf.Variable(var1_np, dtype=dtype) grads0 = tf.constant(grads0_np, dtype=dtype) grads1 = tf.constant(grads1_np, dtype=dtype) opt = rmsprop.RMSprop( learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered, ) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1]) ) self.evaluate(tf.compat.v1.global_variables_initializer()) if centered: mg0 = opt.get_slot(var0, "mg") mg1 = opt.get_slot(var1, "mg") else: mg0 = None mg1 = None if momentum > 0.0: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of RMSprop for _ in range(1, 4): self.evaluate(update) ( var0_np, mg0_np, rms0_np, mom0_np, ) = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho, momentum, epsilon, centered, ) ( var1_np, mg1_np, rms1_np, mom1_np, ) = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho, momentum, epsilon, centered, ) # Validate updated params if centered: self.assertAllCloseAccordingToType( mg0_np, self.evaluate(mg0) ) self.assertAllCloseAccordingToType( mg1_np, self.evaluate(mg1) ) if momentum > 0.0: self.assertAllCloseAccordingToType( mom0_np, self.evaluate(mom0) ) self.assertAllCloseAccordingToType( mom1_np, self.evaluate(mom1) ) self.assertAllCloseAccordingToType( rms0_np, self.evaluate(rms0) ) self.assertAllCloseAccordingToType( rms1_np, self.evaluate(rms1) ) self.assertAllCloseAccordingToType( var0_np, self.evaluate(var0) ) self.assertAllCloseAccordingToType( var1_np, self.evaluate(var1) ) def testDenseWithLearningRateDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): var0_np = np.array([1.0, 2.0]) grads0_np = np.array([0.1, 0.2]) var1_np = np.array([3.0, 4.0]) grads1_np = np.array([0.01, 0.2]) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.01 rho = 0.9 momentum = 0.0 epsilon = 1e-7 centered = False decay = 0.5 opt = rmsprop.RMSprop( learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered, decay=decay, ) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) if momentum > 0.0: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None mg0_np = np.array([0.0, 0.0]) mg1_np = np.array([0.0, 0.0]) rms0_np = np.array([0.0, 0.0]) rms1_np = np.array([0.0, 0.0]) mom0_np = np.array([0.0, 0.0]) mom1_np = np.array([0.0, 0.0]) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 4 steps of RMSprop for t in range(2): self.evaluate(update) lr = learning_rate / (1 + decay * t) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho, momentum, epsilon, centered, ) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho, momentum, epsilon, centered, ) # Validate updated params self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) if momentum > 0.0: self.assertAllCloseAccordingToType( mom0_np, self.evaluate(mom0) ) self.assertAllCloseAccordingToType( mom1_np, self.evaluate(mom1) ) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testDenseWithLearningRateInverseTimeDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): var0_np = np.array([1.0, 2.0]) grads0_np = np.array([0.1, 0.2]) var1_np = np.array([3.0, 4.0]) grads1_np = np.array([0.01, 0.2]) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.01 rho = 0.9 momentum = 0.0 epsilon = 1e-7 centered = False decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay ) opt = rmsprop.RMSprop( learning_rate=lr_schedule, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered, ) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) if momentum > 0.0: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None mg0_np = np.array([0.0, 0.0]) mg1_np = np.array([0.0, 0.0]) rms0_np = np.array([0.0, 0.0]) rms1_np = np.array([0.0, 0.0]) mom0_np = np.array([0.0, 0.0]) mom1_np = np.array([0.0, 0.0]) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 4 steps of RMSprop for t in range(2): self.evaluate(update) lr = learning_rate / (1 + decay * t) var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho, momentum, epsilon, centered, ) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho, momentum, epsilon, centered, ) # Validate updated params self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0)) self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1)) if momentum > 0.0: self.assertAllCloseAccordingToType( mom0_np, self.evaluate(mom0) ) self.assertAllCloseAccordingToType( mom1_np, self.evaluate(mom1) ) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x ) return pred * pred sgd_op = rmsprop.RMSprop( learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=0.0, centered=False, ).minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType( [[1.0, 2.0]], self.evaluate(var0) ) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType( [[0.0, 1.0]], self.evaluate(var0), atol=0.01 ) def testMinimizeSparseResourceVariableCentered(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x ) return pred * pred # loss = lambda: pred * pred # disable=cell-var-from-loop sgd_op = rmsprop.RMSprop( learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=1.0, centered=True, ).minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType( [[1.0, 2.0]], self.evaluate(var0) ) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType( [[-111, -138]], self.evaluate(var0), atol=0.01 ) def testSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for ( dtype, learning_rate, rho, momentum, epsilon, centered, ) in _TESTPARAMS: with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu(): # noqa: E501 # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np), tf.constant(grads0_np_indices), tf.constant([1]), ) grads1_np_indices = np.array([1], dtype=np.int32) grads1 = tf.IndexedSlices( tf.constant(grads1_np), tf.constant(grads1_np_indices), tf.constant([1]), ) opt = rmsprop.RMSprop( learning_rate=learning_rate, rho=rho, momentum=momentum, epsilon=epsilon, centered=centered, ) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1]) ) self.evaluate(tf.compat.v1.global_variables_initializer()) if centered: mg0 = opt.get_slot(var0, "mg") self.assertEqual(mg0 is not None, centered) mg1 = opt.get_slot(var1, "mg") self.assertEqual(mg1 is not None, centered) else: mg0 = None mg1 = None rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) if momentum > 0.0: mom0 = opt.get_slot(var0, "momentum") mom1 = opt.get_slot(var1, "momentum") else: mom0 = None mom1 = None mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of RMSprop for _ in range(1, 4): self.evaluate(update) ( var0_np, mg0_np, rms0_np, mom0_np, ) = self._sparse_rmsprop_update_numpy( var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho, momentum, epsilon, centered, ) ( var1_np, mg1_np, rms1_np, mom1_np, ) = self._sparse_rmsprop_update_numpy( var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho, momentum, epsilon, centered, ) # Validate updated params if centered: self.assertAllCloseAccordingToType( mg0_np, self.evaluate(mg0) ) self.assertAllCloseAccordingToType( mg1_np, self.evaluate(mg1) ) self.assertAllCloseAccordingToType( rms0_np, self.evaluate(rms0) ) self.assertAllCloseAccordingToType( rms1_np, self.evaluate(rms1) ) if momentum > 0.0: self.assertAllCloseAccordingToType( mom0_np, self.evaluate(mom0) ) self.assertAllCloseAccordingToType( mom1_np, self.evaluate(mom1) ) self.assertAllCloseAccordingToType( var0_np, self.evaluate(var0) ) self.assertAllCloseAccordingToType( var1_np, self.evaluate(var1) ) @test_combinations.generate(test_combinations.combine(mode=["eager"])) def testCallableParams(self): for dtype in _DATA_TYPES: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) learning_rate = lambda: 2.0 rho = lambda: 0.9 momentum = lambda: 0.0 epsilon = 1.0 opt = rmsprop.RMSprop(learning_rate, rho, momentum, epsilon) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Step 1: the rms accumulators where 1. So we should see a normal # update: v -= grad * learning_rate opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check the parameters. self.assertAllCloseAccordingToType( np.array( [ 1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)), 2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)), ] ), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array( [ 3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)), 4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)), ] ), self.evaluate(var1), ) # Step 2: the root mean square accumulators contain the previous # update. opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check the parameters. self.assertAllCloseAccordingToType( np.array( [ 1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)), 2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)), ] ), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array( [ 3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)), 4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)), ] ), self.evaluate(var1), ) def testConstructRMSpropWithLR(self): opt = rmsprop.RMSprop(lr=1.0) opt_2 = rmsprop.RMSprop(learning_rate=0.1, lr=1.0) opt_3 = rmsprop.RMSprop(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) @test_combinations.generate(test_combinations.combine(mode=["eager"])) def testSlotsUniqueEager(self): v1 = tf.Variable(1.0) v2 = tf.Variable(1.0) opt = rmsprop.RMSprop(1.0, momentum=0.0, centered=False) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and one unique slot variable for v1 and v2. self.assertLen(set({id(v) for v in opt.variables()}), 3) self.assertEqual( self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations) ) opt = rmsprop.RMSprop(learning_rate=1.0, momentum=0.2, centered=False) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and two unique slot variables for v1 and # v2. self.assertLen(set({id(v) for v in opt.variables()}), 5) self.assertEqual( self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations) ) opt = rmsprop.RMSprop(learning_rate=1.0, momentum=0.2, centered=True) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and three unique slot variables for v1 and # v2 self.assertLen(set({id(v) for v in opt.variables()}), 7) self.assertEqual( self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations) ) @test_combinations.generate(test_combinations.combine(mode=["eager"])) def testMomentumProperValue(self): with self.assertRaisesRegex( ValueError, r"`momentum` must be between \[0, 1\]. " r"Received: momentum=2.5 \(of type <class " r"\'float\'>\).", ): rmsprop.RMSprop(1.0, momentum=2.5, centered=False)
class MixedPrecisionTest(test_combinations.TestCase): IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE' def setUp(self): super().setUp() # Enable the tests to be run on pre-Volta GPUs by telling the grappler pass # to ignore performance and always transform the graph. self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR) os.environ[self.IGNORE_PERF_VAR] = '1' def tearDown(self): # Set the IGNORE_PERF_VAR variable back to it's original value. if self._original_ignore_perf_value is not None: os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value else: del os.environ[self.IGNORE_PERF_VAR] tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite() super().tearDown() @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_wrap_optimizer_fixed_loss_scale(self): opt = gradient_descent_v2.SGD(1.0) opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( opt, 123) self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(opt.loss_scale), 123.) self.assertFalse(opt.dynamic) self.assertTrue(opt.initial_scale, 123.) opt = gradient_descent_v2.SGD(1.0) opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( opt, tf.compat.v1.mixed_precision.FixedLossScale(123)) self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(opt.loss_scale), 123.) self.assertFalse(opt.dynamic) self.assertTrue(opt.initial_scale, 123.) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_wrap_optimizer_dynamic_loss_scale(self): opt = gradient_descent_v2.SGD(1.0) opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( opt, 'dynamic') self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(opt.loss_scale), 2.**15) self.assertTrue(opt.dynamic) self.assertTrue(opt.initial_scale, 2.**15) self.assertTrue(opt.dynamic_growth_steps, 2000) opt = gradient_descent_v2.SGD(1.0) opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( opt, tf.compat.v1.mixed_precision.DynamicLossScale( initial_loss_scale=4, increment_period=1000)) self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(opt.loss_scale), 4.) self.assertTrue(opt.dynamic) self.assertTrue(opt.initial_scale, 4.) self.assertTrue(opt.dynamic_growth_steps, 1000) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_wrap_optimizer_dynamic_loss_scale_errors(self): opt = gradient_descent_v2.SGD(1.0) with self.assertRaisesRegex( ValueError, 'When passing a DynamicLossScale to "loss_scale", ' 'DynamicLossScale.multiplier must be 2. Got: ' 'DynamicLossScale'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( opt, tf.compat.v1.mixed_precision.DynamicLossScale(multiplier=4.)) class MyLossScale(tf.compat.v1.mixed_precision.LossScale): def __call__(self): return 1. def update(self, grads): return None, True def get_config(self): return {} with self.assertRaisesRegex( TypeError, 'Passing a LossScale that is not a FixedLossScale or a ' 'DynamicLossScale is not supported. Got:'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( opt, MyLossScale()) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_optimizer_errors(self): opt = gradient_descent_v2.SGD(1.0) opt = loss_scale_optimizer_v2.LossScaleOptimizer(opt) with self.assertRaisesRegex( ValueError, '"opt" must not already be an instance of a ' 'LossScaleOptimizer.'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( opt) self.assertFalse(tf.config.optimizer.get_experimental_options().get( 'auto_mixed_precision', False)) @test_utils.enable_v2_dtype_behavior def test_error_if_policy_is_set(self): with policy.policy_scope('mixed_float16'): with self.assertRaisesRegex( ValueError, 'the global Keras dtype Policy has been set'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0)) # Test no error is thrown when the policy is currently the default. tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0)) # Test no error is thrown when the policy is a non-mixed policy. with policy.policy_scope('float64'): tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( gradient_descent_v2.SGD(1.0))
class CheckpointCompatibilityTests(test_combinations.TestCase): def _initialized_model(self): input_value = tf.constant([[3.]]) model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) optimizer_step = tf.compat.v1.train.get_or_create_global_step() root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=optimizer_step) train_op = optimizer.minimize(functools.partial(model, input_value), global_step=optimizer_step) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) # A regular variable, a slot variable, and a non-slot Optimizer variable # with known values to check when loading. self.evaluate(model._named_dense.bias.assign([1.])) self.evaluate( optimizer.get_slot(var=model._named_dense.bias, name="m").assign([2.])) beta1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta1_power.assign(3.)) return root_trackable def _set_sentinels(self, root_trackable): self.evaluate(root_trackable.model._named_dense.bias.assign([101.])) self.evaluate( root_trackable.optimizer.get_slot( var=root_trackable.model._named_dense.bias, name="m").assign([102.])) beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() self.evaluate(beta1_power.assign(103.)) def _check_sentinels(self, root_trackable): self.assertAllEqual([1.], self.evaluate( root_trackable.model._named_dense.bias)) self.assertAllEqual([2.], self.evaluate( root_trackable.optimizer.get_slot( var=root_trackable.model._named_dense.bias, name="m"))) beta1_power, _ = root_trackable.optimizer._get_beta_accumulators() self.assertAllEqual(3., self.evaluate(beta1_power)) def _write_name_based_checkpoint(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = tf.Graph() with save_graph.as_default(), self.session( graph=save_graph) as session: root = self._initialized_model() name_saver = tf.compat.v1.train.Saver() return name_saver.save(sess=session, save_path=checkpoint_prefix, global_step=root.optimizer_step) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with test_utils.device(should_use_gpu=True): with self.test_session(): save_path = self._write_name_based_checkpoint() root = self._initialized_model() self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) object_saver = tf.train.Checkpoint(root=root) self._set_sentinels(root) status = object_saver.read(save_path) if tf.executing_eagerly(): self._check_sentinels(root) if tf.executing_eagerly(): status.assert_consumed() status.assert_existing_objects_matched() status.assert_nontrivial_match() else: # When graph building, we haven't read any keys, so we don't know # whether the restore will be complete. with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_consumed() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_existing_objects_matched() with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_nontrivial_match() status.run_restore_ops() self._check_sentinels(root) self._set_sentinels(root) status = object_saver.read(save_path) status.initialize_or_restore() self._check_sentinels(root) # Check that there is no error when keys are missing from the name-based # checkpoint. root.not_in_name_checkpoint = tf.Variable([1.]) status = object_saver.read(save_path) with self.assertRaises(AssertionError): status.assert_existing_objects_matched() def testSaveGraphLoadEager(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = tf.Graph() with save_graph.as_default(), self.session(graph=save_graph): root = self._initialized_model() save_path = root.save(file_prefix=checkpoint_prefix) with tf.__internal__.eager_context.eager_mode(): root = self._initialized_model() self._set_sentinels(root) root.restore(save_path).assert_consumed() self._check_sentinels(root) def testSaveEagerLoadGraph(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with tf.__internal__.eager_context.eager_mode(): root = self._initialized_model() save_path = root.save(file_prefix=checkpoint_prefix) with context.graph_mode(): save_graph = tf.Graph() with save_graph.as_default(), self.session(graph=save_graph): root = self._initialized_model() self._set_sentinels(root) root.restore(save_path).assert_consumed().run_restore_ops() self._check_sentinels(root)
class DropoutTest(tf.test.TestCase, parameterized.TestCase): @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testDropoutProperties(self): dp = core_layers.Dropout(0.5, name='dropout') self.assertEqual(dp.rate, 0.5) self.assertEqual(dp.noise_shape, None) dp(tf.ones(())) self.assertEqual(dp.name, 'dropout') @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testBooleanLearningPhase(self): dp = core_layers.Dropout(0.5) inputs = tf.ones((5, 3)) dropped = dp(inputs, training=True) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = dp(inputs, training=False) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 3)), np_output) @tf_test_utils.run_deprecated_v1 def testDynamicLearningPhase(self): with self.cached_session() as sess: dp = core_layers.Dropout(0.5, seed=1) inputs = tf.ones((5, 5)) training = tf.compat.v1.placeholder(dtype='bool') dropped = dp(inputs, training=training) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={training: True}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={training: False}) self.assertAllClose(np.ones((5, 5)), np_output) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testDynamicNoiseShape(self): inputs = tf.ones((5, 3, 2)) noise_shape = [None, 1, None] dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) dropped = dp(inputs, training=True) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) def testCustomNoiseShape(self): inputs = tf.ones((5, 3, 2)) noise_shape = [5, 1, 2] dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1) dropped = dp(inputs, training=True) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :]) @tf_test_utils.run_deprecated_v1 def testFunctionalDropout(self): with self.cached_session(): inputs = tf.ones((5, 5)) dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1) np_output = self.evaluate(dropped) self.assertAllClose(np.ones((5, 5)), np_output) @tf_test_utils.run_deprecated_v1 def testDynamicRate(self): with self.cached_session() as sess: rate = tf.compat.v1.placeholder(dtype='float32', name='rate') dp = core_layers.Dropout(rate, name='dropout') inputs = tf.ones((5, 5)) dropped = dp(inputs, training=True) self.evaluate(tf.compat.v1.global_variables_initializer()) np_output = sess.run(dropped, feed_dict={rate: 0.5}) self.assertAlmostEqual(0., np_output.min()) np_output = sess.run(dropped, feed_dict={rate: 0.0}) self.assertAllClose(np.ones((5, 5)), np_output)
from keras.optimizers.optimizer_v2 import gradient_descent from keras.optimizers.schedules import learning_rate_schedule from keras.testing_infra import test_combinations def _maybe_serialized(lr_decay, serialize_and_deserialize): if serialize_and_deserialize: serialized = learning_rate_schedule.serialize(lr_decay) return learning_rate_schedule.deserialize(serialized) else: return lr_decay @test_combinations.generate( test_combinations.combine(serialize=[False, True], mode=["graph", "eager"])) class LRDecayTestV2(tf.test.TestCase, parameterized.TestCase): def testContinuous(self, serialize): self.evaluate(tf.compat.v1.global_variables_initializer()) step = 5 decayed_lr = learning_rate_schedule.ExponentialDecay(0.05, 10, 0.96) decayed_lr = _maybe_serialized(decayed_lr, serialize) expected = 0.05 * 0.96**(5.0 / 10.0) self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6) def testStaircase(self, serialize): if tf.executing_eagerly(): step = tf.Variable(0) self.evaluate(tf.compat.v1.global_variables_initializer()) decayed_lr = learning_rate_schedule.ExponentialDecay( 0.1, 3, 0.96, staircase=True)
class TraceModelCallTest(test_combinations.TestCase): def _assert_all_close(self, expected, actual): if not tf.executing_eagerly(): with self.cached_session() as sess: backend._initialize_variables(sess) self.assertAllClose(expected, actual) else: self.assertAllClose(expected, actual) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes def test_trace_model_outputs(self): input_dim = 5 if test_utils.get_model_type() == "functional" else None model = test_utils.get_small_mlp(10, 3, input_dim) inputs = tf.ones((8, 5)) if input_dim is None: with self.assertRaisesRegex( ValueError, ".*input shape is not availabl*" ): saving_utils.trace_model_call(model) model._set_inputs(inputs) fn = saving_utils.trace_model_call(model) signature_outputs = fn(inputs) if model.output_names: expected_outputs = {model.output_names[0]: model(inputs)} else: expected_outputs = {"output_1": model(inputs)} self._assert_all_close(expected_outputs, signature_outputs) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes def test_trace_model_outputs_after_fitting(self): input_dim = 5 if test_utils.get_model_type() == "functional" else None model = test_utils.get_small_mlp(10, 3, input_dim) model.compile( optimizer="sgd", loss="mse", run_eagerly=test_utils.should_run_eagerly(), ) model.fit( x=np.random.random((8, 5)).astype(np.float32), y=np.random.random((8, 3)).astype(np.float32), epochs=2, ) inputs = tf.ones((8, 5)) fn = saving_utils.trace_model_call(model) signature_outputs = fn(inputs) if model.output_names: expected_outputs = {model.output_names[0]: model(inputs)} else: expected_outputs = {"output_1": model(inputs)} self._assert_all_close(expected_outputs, signature_outputs) @test_combinations.run_with_all_model_types(exclude_models="sequential") @test_combinations.run_all_keras_modes def test_trace_multi_io_model_outputs(self): input_dim = 5 num_classes = 3 num_classes_b = 4 input_a = keras.layers.Input(shape=(input_dim,), name="input_a") input_b = keras.layers.Input(shape=(input_dim,), name="input_b") dense = keras.layers.Dense(num_classes, name="dense") dense2 = keras.layers.Dense(num_classes_b, name="dense2") dropout = keras.layers.Dropout(0.5, name="dropout") branch_a = [input_a, dense] branch_b = [input_b, dense, dense2, dropout] model = test_utils.get_multi_io_model(branch_a, branch_b) input_a_ts = tf.constant( np.random.random((10, input_dim)).astype(np.float32) ) input_b_ts = tf.constant( np.random.random((10, input_dim)).astype(np.float32) ) if test_utils.get_model_type() == "subclass": with self.assertRaisesRegex( ValueError, ".*input shape is not availabl*" ): saving_utils.trace_model_call(model) model.compile( optimizer="sgd", loss="mse", run_eagerly=test_utils.should_run_eagerly(), ) model.fit( x=[ np.random.random((8, input_dim)).astype(np.float32), np.random.random((8, input_dim)).astype(np.float32), ], y=[ np.random.random((8, num_classes)).astype(np.float32), np.random.random((8, num_classes_b)).astype(np.float32), ], epochs=2, ) fn = saving_utils.trace_model_call(model) # tf.function requires that the input structures match when calling a # ConcreteFunction. For some reason V1 models defines the inputs as a list, # while V2 models sets the inputs as a tuple. if ( not tf.executing_eagerly() and test_utils.get_model_type() != "functional" ): signature_outputs = fn([input_a_ts, input_b_ts]) else: signature_outputs = fn((input_a_ts, input_b_ts)) outputs = model([input_a_ts, input_b_ts]) if model.output_names: expected_outputs = { model.output_names[0]: outputs[0], model.output_names[1]: outputs[1], } else: expected_outputs = {"output_1": outputs[0], "output_2": outputs[1]} self._assert_all_close(expected_outputs, signature_outputs) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_trace_features_layer(self): columns = [tf.feature_column.numeric_column("x")] model = sequential.Sequential([dense_features.DenseFeatures(columns)]) model_input = {"x": tf.constant([[1.0]])} model.predict(model_input, steps=1) fn = saving_utils.trace_model_call(model) self.assertAllClose({"output_1": [[1.0]]}, fn(model_input)) columns = [ tf.feature_column.numeric_column("x"), tf.feature_column.numeric_column("y"), ] model = sequential.Sequential([dense_features.DenseFeatures(columns)]) model_input = {"x": tf.constant([[1.0]]), "y": tf.constant([[2.0]])} model.predict(model_input, steps=1) fn = saving_utils.trace_model_call(model) self.assertAllClose({"output_1": [[1.0, 2.0]]}, fn(model_input)) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_specify_input_signature(self): model = test_utils.get_small_sequential_mlp(10, 3, None) inputs = tf.ones((8, 5)) with self.assertRaisesRegex( ValueError, ".*input shape is not availabl*" ): saving_utils.trace_model_call(model) fn = saving_utils.trace_model_call( model, [tf.TensorSpec(shape=[None, 5], dtype=tf.float32)] ) signature_outputs = fn(inputs) if model.output_names: expected_outputs = {model.output_names[0]: model(inputs)} else: expected_outputs = {"output_1": model(inputs)} self._assert_all_close(expected_outputs, signature_outputs) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_subclassed_model_with_input_signature(self): class Model(keras.Model): def __init__(self): super().__init__() self.dense = keras.layers.Dense(3, name="dense") @tf.function( input_signature=[ [ tf.TensorSpec([None, 5], tf.float32), tf.TensorSpec([None], tf.float32), ] ], ) def call(self, inputs, *args): x, y = inputs return self.dense(x) + y model = Model() fn = saving_utils.trace_model_call(model) x = tf.ones((8, 5), dtype=tf.float32) y = tf.ones((3,), dtype=tf.float32) expected_outputs = {"output_1": model([x, y])} signature_outputs = fn([x, y]) self._assert_all_close(expected_outputs, signature_outputs) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes def test_model_with_fixed_input_dim(self): """Ensure that the batch_dim is removed when saving. When serving or retraining, it is important to reset the batch dim. This can be an issue inside of tf.function. See b/132783590 for context. """ model = test_utils.get_small_mlp(10, 3, 5) loss_object = keras.losses.MeanSquaredError() optimizer = gradient_descent.SGD() @tf.function def train_step(data, labels): with tf.GradientTape() as tape: predictions = model(data) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) x = np.random.random((8, 5)) y = np.random.random((8, 3)) train_step(x, y) fn = saving_utils.trace_model_call(model) self.assertEqual( fn.structured_input_signature[0][0].shape.as_list(), tf.TensorShape([None, 5]).as_list(), )
class EmbeddingTest(test_combinations.TestCase): @test_combinations.run_all_keras_modes def test_embedding(self): if tf.test.is_gpu_available(): self.skipTest("Only test embedding on CPU.") test_utils.layer_test( keras.layers.Embedding, kwargs={"output_dim": 4, "input_dim": 10, "input_length": 2}, input_shape=(3, 2), input_dtype="int32", expected_output_dtype="float32", ) test_utils.layer_test( keras.layers.Embedding, kwargs={"output_dim": 4, "input_dim": 10, "mask_zero": True}, input_shape=(3, 2), input_dtype="int32", expected_output_dtype="float32", ) test_utils.layer_test( keras.layers.Embedding, kwargs={"output_dim": 4, "input_dim": 10, "mask_zero": True}, input_shape=(3, 4, 2), input_dtype="int32", expected_output_dtype="float32", ) test_utils.layer_test( keras.layers.Embedding, kwargs={ "output_dim": 4, "input_dim": 10, "mask_zero": True, "input_length": (None, 2), }, input_shape=(3, 4, 2), input_dtype="int32", expected_output_dtype="float32", ) @test_combinations.run_all_keras_modes def test_embedding_correctness(self): layer = keras.layers.Embedding(output_dim=2, input_dim=2) model = keras.models.Sequential([layer]) layer.set_weights([np.array([[1, 1], [2, 2]])]) model.run_eagerly = test_utils.should_run_eagerly() outputs = model.predict(np.array([[0, 1, 0]], dtype="int32")) self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]]) def test_embedding_incorrect_dimension(self): with self.assertRaises(ValueError): keras.layers.Embedding(input_dim=0, output_dim=1) with self.assertRaises(ValueError): keras.layers.Embedding(input_dim=1, output_dim=0) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_eager_gpu_cpu(self): l = keras.layers.Embedding(output_dim=2, input_dim=2) l.build((None, 2)) inputs = keras.backend.constant([[0, 1, 0]], dtype="int32") with tf.GradientTape() as tape: output = l(inputs) gs = tape.gradient(output, l.weights) opt = tf.compat.v1.train.AdagradOptimizer(0.1) opt.apply_gradients(zip(gs, l.weights)) self.assertAllEqual(len(gs), 1) @test_combinations.run_all_keras_modes def test_embedding_with_ragged_input(self): layer = keras.layers.Embedding( input_dim=3, output_dim=2, weights=[np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]])], ) inputs = keras.layers.Input( shape=(None,), dtype=tf.float32, ragged=True ) outputs = keras.layers.Lambda( lambda args: keras.backend.identity(args) )(inputs) outputs = layer(outputs) model = keras.Model(inputs, outputs) model.run_eagerly = test_utils.should_run_eagerly() outputs = model.predict( tf.ragged.constant( [[1.0, 2.0, 2.0], [0.0], [1.0, 2.0]], ragged_rank=1 ) ) self.assertAllClose( outputs, tf.ragged.constant( [ [[1.0, 1.0], [2.0, 2.0], [2.0, 2.0]], [[0.0, 0.0]], [[1.0, 1.0], [2.0, 2.0]], ], ragged_rank=1, ), ) @test_utils.enable_v2_dtype_behavior def test_mixed_precision_embedding(self): try: policy.set_global_policy("mixed_float16") layer = keras.layers.Embedding(input_dim=5, output_dim=2) self.assertEqual(layer._dtype_policy.name, "mixed_float16") outputs = layer(np.array([0, 1, 2])) self.assertEqual(outputs.dtype, "float16") finally: policy.set_global_policy("float32")
class TestWholeModelSaving(test_combinations.TestCase): def _save_model_dir(self, dirname='saved_model'): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) return os.path.join(temp_dir, dirname) def _assert_same_weights_and_metrics(self, model, loaded_model): """Checks that the loaded weights and metrics are the same as the original. Args: model: original model loaded_model: loaded model """ self.assertAllClose(model.weights, loaded_model.weights) if loaded_model.optimizer: if test_utils.get_save_format() == 'tf': # TODO(b/153110928): Keras TF format doesn't restore optimizer weights # currently. return self.assertAllClose(model.optimizer.weights, loaded_model.optimizer.weights) # In V1/Graph mode, the model isn't built, so the metrics are not loaded # immediately (requires model to be called on some data before building # metrics). check_metrics = tf.__internal__.tf2.enabled() and tf.executing_eagerly() if check_metrics: self.assertAllEqual([m.name for m in model.metrics], [m.name for m in loaded_model.metrics]) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes def test_save_and_load(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() save_kwargs = test_utils.get_save_kwargs() if ((save_format == 'h5' or not save_kwargs.get('save_traces', True)) and test_utils.get_model_type() == 'subclass'): # HDF5 format currently does not allow saving subclassed models. # When saving with `save_traces=False`, the subclassed model must have a # get_config/from_config, which the autogenerated model does not have. return with self.cached_session(): model = test_utils.get_model_from_layers( [keras.layers.Dense(2), keras.layers.RepeatVector(3), keras.layers.TimeDistributed(keras.layers.Dense(3))], input_shape=(3,)) model.compile( loss=keras.losses.MSE, optimizer=keras.optimizers.optimizer_v2.rmsprop.RMSprop(lr=0.0001), metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalCrossentropy( name='cce', label_smoothing=tf.constant(0.2)), ], weighted_metrics=[ keras.metrics.categorical_crossentropy, keras.metrics.CategoricalCrossentropy( name='cce', label_smoothing=tf.constant(0.2)), ], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model( model, saved_model_dir, save_format=save_format, **save_kwargs) loaded_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, loaded_model) out2 = loaded_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) eval_out = model.evaluate(x, y) eval_out2 = loaded_model.evaluate(x, y) self.assertArrayNear(eval_out, eval_out2, 0.001) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_sequential_model_saving_without_input_shape(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2)) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy(name='cat_acc') ], weighted_metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy(name='cat_acc2') ], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) model.save(saved_model_dir, save_format=save_format) new_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_sequential_model_saving_without_compile(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) x = np.random.random((1, 3)) out = model.predict(x) # Save the model without any compilation or training. keras.models.save_model(model, saved_model_dir, save_format=save_format) new_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_sequential_model_saving_2(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with tf.Graph().as_default(), self.cached_session(): # test with custom optimizer, loss class CustomOp(optimizer_v1.RMSprop): pass def custom_loss(y_true, y_pred): return keras.losses.mse(y_true, y_pred) model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss=custom_loss, optimizer=CustomOp(), metrics=['acc']) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) new_model = keras.models.load_model( saved_model_dir, custom_objects={'CustomOp': CustomOp, 'custom_loss': custom_loss}) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_saving_without_compilation(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_with_tf_optimizer(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer=tf.compat.v1.train.AdadeltaOptimizer(0.1), metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_right_after_compilation(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) if not tf.compat.v1.executing_eagerly_outside_functions(): model._make_train_function() keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_lambda_numpy_array_arguments(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() if h5py is None: self.skipTest('h5py required to run this test') mean = np.random.random((4, 2, 3)) std = np.abs(np.random.random((4, 2, 3))) + 1e-5 inputs = keras.layers.Input(shape=(4, 2, 3)) output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std, arguments={'mu': mean, 'std': std})(inputs) model = keras.models.Model(inputs, output) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) self.assertAllClose(mean, model.layers[1].arguments['mu']) self.assertAllClose(std, model.layers[1].arguments['std']) def test_saving_model_with_long_layer_names(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with self.cached_session(): # This layer name will make the `layers_name` HDF5 attribute blow # out of proportion. Note that it fits into the internal HDF5 # attribute memory limit on its own but because h5py converts # the list of layer names into numpy array, which uses the same # amount of memory for every item, it increases the memory # requirements substantially. x = keras.Input(shape=(2,), name='input_' + ('x' * (2**15))) f = x for i in range(4): f = keras.layers.Dense(2, name='dense_%d' % (i,))(f) model = keras.Model(inputs=[x], outputs=[f]) model.compile( 'adam', loss=keras.losses.MeanSquaredError(), metrics=['acc']) x = np.random.random((1, 2)) y = np.random.random((1, 2)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) if save_format in ['tf', 'tensorflow']: return # Check that the HDF5 files contains chunked array # of layer names. with h5py.File(saved_model_dir, 'r') as h5file: num_names_arrays = len([attr for attr in h5file['model_weights'].attrs if attr.startswith('layer_names')]) # The chunking of layer names array should have happened. self.assertGreater(num_names_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_saving_model_with_long_weights_names(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with self.cached_session(): x = keras.Input(shape=(2,), name='nested_model_input') f = x for i in range(4): f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i,))(f) # This layer name will make the `weights_name` # HDF5 attribute blow out of proportion. f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**14)))(f) nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model') x = keras.Input(shape=(2,), name='outer_model_input') f = nested_model(x) f = keras.layers.Dense(2, name='outer_model_output')(f) model = keras.Model(inputs=[x], outputs=[f]) model.compile(loss='mse', optimizer='adam', metrics=['acc']) x = np.random.random((1, 2)) y = np.random.random((1, 2)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) if save_format in ['h5', 'hdf5', 'keras']: # Check that the HDF5 files contains chunked array # of weight names. with h5py.File(saved_model_dir, 'r') as h5file: num_weight_arrays = len( [attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names')]) # The chunking of layer names array should have happened. self.assertGreater(num_weight_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_model_saving_to_pre_created_h5py_file(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with tf.Graph().as_default(), self.cached_session(): inputs = keras.Input(shape=(3,)) x = keras.layers.Dense(2)(inputs) outputs = keras.layers.Dense(3)(x) model = keras.Model(inputs, outputs) model.compile( loss=keras.losses.MSE, optimizer=optimizer_v1.Adam(), metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy() ]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded_model = keras.models.load_model(saved_model_dir) out1 = loaded_model.predict(x) self.assertAllClose(out, out1, atol=1e-05) if save_format in ['tf', 'tensorflow']: return # Test h5 format specifically fd, fname = tempfile.mkstemp('.h5') with h5py.File(fname, mode='r+') as h5file: keras.models.save_model(model, h5file) loaded_model = keras.models.load_model(h5file) out2 = loaded_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) # Test non-default options in h5 with h5py.File( '_', driver='core', mode='w', backing_store=False) as h5file: keras.models.save_model(model, h5file) loaded_model = keras.models.load_model(h5file) out2 = loaded_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) # Cleanup os.close(fd) os.remove(fname) def test_model_saving_to_new_dir_path(self): saved_model_dir = os.path.join(self._save_model_dir(), 'newdir', 'saved_model') save_format = test_utils.get_save_format() with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) x = np.random.random((1, 3)) out = model.predict(x) keras.models.save_model(model, saved_model_dir, save_format=save_format) new_model = keras.models.load_model(saved_model_dir) self._assert_same_weights_and_metrics(model, new_model) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) def test_model_raise_exception_with_failed_saving(self): if h5py is None: self.skipTest('h5py required to run this test') saved_model_dir = self._save_model_dir() saved_model_path = os.path.join(saved_model_dir, 'saved_model.h5') with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) with self.assertRaisesRegex(OSError, 'Unable to create file'): with h5py.File(saved_model_path, 'w'): keras.models.save_model(model, saved_model_path) def test_saving_constant_initializer_with_numpy(self): saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() model = keras.models.Sequential() model.add( keras.layers.Dense( 2, input_shape=(3,), kernel_initializer=keras.initializers.Constant(np.ones((3, 2))))) model.add(keras.layers.Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) keras.models.save_model(model, saved_model_dir, save_format=save_format) model = keras.models.load_model(saved_model_dir) def test_saving_group_naming_h5py(self): # Test saving model with layer which name is prefix to a previous layer # name. temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir) h5_path = os.path.join(temp_dir, 'test.h5') input_layer = keras.layers.Input((None, None, 3), name='test_input') x = keras.layers.Conv2D(1, 1, name='conv1/conv')(input_layer) x = keras.layers.Activation('relu', name='conv1')(x) model = keras.models.Model(inputs=input_layer, outputs=x) model.save_weights(h5_path) model.load_weights(h5_path) def test_primitive_attrs_contain_no_extraneous_strings(self): if h5py is None: self.skipTest('h5py required to run this test') saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() model = keras.models.Sequential() model.add(keras.layers.Dense(1, input_shape=[2])) model.save(saved_model_dir, save_format=save_format) if save_format in ['tf', 'tensorflow']: return h5file = h5py.File(saved_model_dir, 'r') self.assertRegex(h5file.attrs['keras_version'], r'^[\d]+\.[\d]+\.[\S]+$') @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_functional_model_with_custom_loss_and_metric(self): def _make_model(): inputs = keras.Input(shape=(4,)) x = keras.layers.Dense(8, activation='relu')(inputs) outputs = keras.layers.Dense(3, activation='softmax')(x) model = keras.Model(inputs=inputs, outputs=outputs) custom_loss = keras.layers.Lambda(lambda x: keras.backend.sum(x * x))(x) model.add_loss(custom_loss) model.add_metric(custom_loss, aggregation='mean', name='custom_loss') return model saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() with self.cached_session(): model = _make_model() model.compile( loss=keras.losses.SparseCategoricalCrossentropy(), optimizer=optimizers.gradient_descent_v2.SGD(), metrics=[keras.metrics.SparseCategoricalCrossentropy()]) x = np.random.normal(size=(32, 4)) y = np.random.randint(0, 3, size=32) model.train_on_batch(x, y) evaluation_results = model.evaluate(x, y) # Save and reload model. model.save(saved_model_dir, save_format=save_format) del model # Prevent misuse. loaded_model = keras.models.load_model(saved_model_dir) loaded_model_eval_results = loaded_model.evaluate(x, y) # Assert all evaluation results are the same. self.assertAllClose(evaluation_results, loaded_model_eval_results, 1e-9) # Check correctness of the loss calculation. self.assertAllGreater(evaluation_results, 0.) evaluation_results = dict( zip(loaded_model.metrics_names, evaluation_results)) self.assertNear( evaluation_results['sparse_categorical_crossentropy'] + evaluation_results['custom_loss'], evaluation_results['loss'], 1e-6) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_save_uncompiled_model_with_optimizer(self): with self.cached_session() as session: saved_model_dir = self._save_model_dir() save_format = test_utils.get_save_format() model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(3,))]) # Set the model's optimizer but don't compile. This can happen if the # model is trained with a custom training loop. model.optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop(lr=0.0001) if not tf.executing_eagerly(): session.run([v.initializer for v in model.variables]) model.save(saved_model_dir, save_format=save_format) if save_format in ['tf', 'tensorflow']: loaded = keras.models.load_model(saved_model_dir) self.assertIsInstance( loaded.optimizer, keras.optimizers.optimizer_v2.optimizer_v2.OptimizerV2) @test_combinations.generate(test_combinations.combine(mode=['eager'])) def test_functional_model_with_getitem_op_layer(self): inp = keras.Input(shape=(8)) out = inp[:] model = keras.Model( inputs=[inp], outputs=out) batch_size = 7 x = tf.stack([ tf.range(8) for _ in range(batch_size)]) args = [x] expected = x[:] self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) # Make sure it can be successfully saved and loaded. save_format = test_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded_model = keras.models.load_model(saved_model_dir) self.assertAllEqual(loaded_model(args), expected) self.assertAllEqual(loaded_model.predict(args, batch_size=batch_size), expected) @test_combinations.generate(test_combinations.combine( mode=['eager', 'graph'])) def test_custom_functional_registered(self): def _get_cls_definition(): class CustomModel(keras.Model): def c(self): return 'c' return CustomModel cls = _get_cls_definition() self.assertEqual(cls.__bases__[0], keras.Model) with self.cached_session() as sess: input_ = keras.layers.Input(shape=(1,)) output = keras.layers.Dense(1)(input_) model = cls(input_, output) # `cls` now inherits from `Functional` class. self.assertEqual(cls.__bases__[0], functional.Functional) if not tf.executing_eagerly(): sess.run([v.initializer for v in model.variables]) save_format = test_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded_model = keras.models.load_model( saved_model_dir, custom_objects={'CustomModel': cls}) self.assertIsInstance(loaded_model, cls) # Check with "new" `CustomModel` class definition. new_cls = _get_cls_definition() # The new `CustomModel` class is *not* derived from `Functional`. self.assertEqual(new_cls.__bases__[0], keras.Model) reloaded_model = keras.models.load_model( saved_model_dir, custom_objects={'CustomModel': new_cls}) self.assertIsInstance(reloaded_model, new_cls) @test_combinations.generate(test_combinations.combine(mode=['eager'])) def test_shared_objects(self): class OuterLayer(keras.layers.Layer): def __init__(self, inner_layer): super(OuterLayer, self).__init__() self.inner_layer = inner_layer def call(self, inputs): return self.inner_layer(inputs) def get_config(self): return { 'inner_layer': generic_utils.serialize_keras_object( self.inner_layer) } @classmethod def from_config(cls, config): return cls(generic_utils.deserialize_keras_object( config['inner_layer'])) class InnerLayer(keras.layers.Layer): def __init__(self): super(InnerLayer, self).__init__() self.v = self.add_weight(name='v', shape=[], dtype=tf.float32) def call(self, inputs): return self.v + inputs @classmethod def from_config(cls, config): return cls() # Create a model with 2 output layers that share the same inner layer. inner_layer = InnerLayer() outer_layer_1 = OuterLayer(inner_layer) outer_layer_2 = OuterLayer(inner_layer) input_ = keras.Input(shape=(1,)) model = keras.Model( inputs=input_, outputs=[outer_layer_1(input_), outer_layer_2(input_)]) # Changes to the shared layer should affect both outputs. model.layers[1].inner_layer.v.assign(5) self.assertAllEqual(model(1), [6.0, 6.0]) model.layers[1].inner_layer.v.assign(3) self.assertAllEqual(model(1), [4.0, 4.0]) # After loading, changes to the shared layer should still affect both # outputs. def _do_assertions(loaded): loaded.layers[1].inner_layer.v.assign(5) self.assertAllEqual(loaded(1), [6.0, 6.0]) loaded.layers[1].inner_layer.v.assign(3) self.assertAllEqual(loaded(1), [4.0, 4.0]) loaded.layers[2].inner_layer.v.assign(5) self.assertAllEqual(loaded(1), [6.0, 6.0]) loaded.layers[2].inner_layer.v.assign(3) self.assertAllEqual(loaded(1), [4.0, 4.0]) # We'd like to make sure we only attach shared object IDs when strictly # necessary, so we'll recursively traverse the generated config to count # whether we have the exact number we expect. def _get_all_keys_recursive(dict_or_iterable): if isinstance(dict_or_iterable, dict): for key in dict_or_iterable.keys(): yield key for key in _get_all_keys_recursive(dict_or_iterable.values()): yield key elif isinstance(dict_or_iterable, str): return else: try: for item in dict_or_iterable: for key in _get_all_keys_recursive(item): yield key # Not an iterable or dictionary except TypeError: return with generic_utils.CustomObjectScope({ 'OuterLayer': OuterLayer, 'InnerLayer': InnerLayer}): # Test saving and loading to disk save_format = test_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded = keras.models.load_model(saved_model_dir) _do_assertions(loaded) # Test recreating directly from config config = model.get_config() key_count = collections.Counter(_get_all_keys_recursive(config)) self.assertEqual(key_count[generic_utils.SHARED_OBJECT_KEY], 2) loaded = keras.Model.from_config(config) _do_assertions(loaded) @test_combinations.generate(test_combinations.combine(mode=['eager'])) def test_shared_objects_wrapper(self): """Tests that shared layers wrapped with `Wrapper` restore correctly.""" input_ = keras.Input(shape=(1,)) unwrapped = keras.layers.Layer(name='unwrapped') wrapped = keras.layers.Wrapper(unwrapped, name='wrapped') model = keras.Model(inputs=input_, outputs=[unwrapped(input_), wrapped(input_)]) # Test recreating directly from config config = model.get_config() loaded = keras.Model.from_config(config) self.assertIs(loaded.layers[1], loaded.layers[2].layer) # Test saving and loading to disk save_format = test_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded = keras.models.load_model(saved_model_dir) self.assertIs(loaded.layers[1], loaded.layers[2].layer) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'], fit=[True, False])) def test_multi_output_metrics_name_stay_same(self, fit): """Tests that metric names don't change with each save/load cycle. e.g. "head_0_accuracy" should not become "head_0_head_0_accuracy" after saving and loading a model. Arguments: fit: Whether the model should be fit before saving. """ # This doesn't work at all, so we can't check whether metric names are # correct. if not tf.executing_eagerly() and not fit: self.skipTest('b/181767784') input_ = keras.Input((4,)) model = keras.Model( input_, [keras.layers.Softmax(name='head_0')(keras.layers.Dense(3)(input_)), keras.layers.Softmax(name='head_1')(keras.layers.Dense(5)(input_))]) metric = keras.metrics.BinaryAccuracy() model.compile(optimizer='rmsprop', loss='mse', metrics={'head_0': [metric, 'accuracy']}) x = np.random.rand(2, 4) y = {'head_0': np.random.randint(2, size=(2, 3)), 'head_1': np.random.randint(2, size=(2, 5))} # Make sure metrix prefixing works the same regardless of whether the user # has fit the model before saving. if fit: model.fit(x, y, verbose=0) # Save and reload. save_format = test_utils.get_save_format() saved_model_dir = self._save_model_dir() keras.models.save_model(model, saved_model_dir, save_format=save_format) loaded = keras.models.load_model(saved_model_dir) # Make sure the metrics names from the model before saving match the loaded # model. self.assertSequenceEqual(model.metrics_names, loaded.metrics_names) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_warning_when_saving_invalid_custom_mask_layer(self): class MyMasking(keras.layers.Layer): def call(self, inputs): return inputs def compute_mask(self, inputs, mask=None): mask = tf.not_equal(inputs, 0) return mask class MyLayer(keras.layers.Layer): def call(self, inputs, mask=None): return tf.identity(inputs) samples = np.random.random((2, 2)) model = keras.Sequential([MyMasking(), MyLayer()]) model.predict(samples) with warnings.catch_warnings(record=True) as w: model.save(self._save_model_dir(), test_utils.get_save_format()) self.assertIn(generic_utils.CustomMaskWarning, {warning.category for warning in w}) # Test that setting up a custom mask correctly does not issue a warning. class MyCorrectMasking(keras.layers.Layer): def call(self, inputs): return inputs def compute_mask(self, inputs, mask=None): mask = tf.not_equal(inputs, 0) return mask # This get_config doesn't actually do anything because our mask is # static and doesn't need any external information to work. We do need a # dummy get_config method to prevent the warning from appearing, however. def get_config(self, *args, **kwargs): return {} model = keras.Sequential([MyCorrectMasking(), MyLayer()]) model.predict(samples) with warnings.catch_warnings(record=True) as w: model.save(self._save_model_dir(), test_utils.get_save_format()) self.assertNotIn(generic_utils.CustomMaskWarning, {warning.category for warning in w}) # Test only in eager mode because ragged tensor inputs # cannot be used in graph mode. @test_combinations.generate( test_combinations.combine(mode=['eager'])) @test_utils.run_v2_only def test_save_functional_with_ragged_constant_input(self): input1 = keras.Input(shape=[]) input2 = tf.ragged.constant([[1., 2.], [3.]]) outputs = keras.layers.Add()([input1, input2]) model = keras.Model(input1, outputs) saved_model_dir = self._save_model_dir() model.save(saved_model_dir) keras.models.load_model(saved_model_dir) @test_combinations.generate( test_combinations.combine(mode=['eager'])) @test_utils.run_v2_only def test_save_functional_with_constant_input(self): input1 = keras.Input(shape=[2]) input2 = tf.constant([[1., 2.]]) outputs = keras.layers.Add()([input1, input2]) model = keras.Model(input1, outputs) saved_model_dir = self._save_model_dir() model.save(saved_model_dir) keras.models.load_model(saved_model_dir)
class KerasModelTest(test_combinations.TestCase): """Test mixed precision with Keras models.""" def _skip_if_strategy_unsupported(self, strategy_fn): if ( strategy_fn != default_strategy_fn and test_utils.get_model_type() == "subclass" ): self.skipTest( "Non-default strategies are unsupported with subclassed " "models" ) def _skip_if_save_format_unsupported(self, save_format): model_type = test_utils.get_model_type() if save_format == "h5" and model_type == "subclass": self.skipTest( "Saving subclassed models with the HDF5 format is " "unsupported" ) if ( save_format == "tf" and model_type == "subclass" and not tf.executing_eagerly() ): self.skipTest( "b/148820505: This combination of features is currently " "broken." ) @test_combinations.run_with_all_model_types @test_combinations.run_all_keras_modes @parameterized.named_parameters( {"testcase_name": "base", "strategy_fn": default_strategy_fn}, { "testcase_name": "distribute", "strategy_fn": create_mirrored_strategy, }, { "testcase_name": "operator", "strategy_fn": create_mirrored_strategy, "use_operator": True, }, { "testcase_name": "regularizer", "strategy_fn": create_mirrored_strategy, "use_regularizer": True, }, { "testcase_name": "get_config", "strategy_fn": create_mirrored_strategy, "get_config": True, "use_regularizer": True, }, { "testcase_name": "saved_model", "strategy_fn": default_strategy_fn, "save_format": "tf", "use_regularizer": True, }, { "testcase_name": "saved_model_input_spec", "strategy_fn": default_strategy_fn, "save_format": "tf", "use_regularizer": True, "use_input_spec": True, }, { "testcase_name": "h5", "strategy_fn": default_strategy_fn, "save_format": "h5", "use_regularizer": True, }, { "testcase_name": "saved_model_distribute", "strategy_fn": create_mirrored_strategy, "save_format": "tf", "use_regularizer": True, }, { "testcase_name": "saved_model_input_spec_distribute", "strategy_fn": create_mirrored_strategy, "save_format": "tf", "use_regularizer": True, "use_input_spec": True, }, { "testcase_name": "h5_distribute", "strategy_fn": create_mirrored_strategy, "save_format": "h5", "use_regularizer": True, }, ) def test_model( self, strategy_fn, use_operator=False, use_regularizer=False, policy_name="mixed_float16", get_config=False, save_format=None, use_input_spec=False, ): self._skip_if_strategy_unsupported(strategy_fn) self._skip_if_save_format_unsupported(save_format) if use_regularizer: weight_regularizer = mp_test_util.IdentityRegularizer() activity_regularizer = mp_test_util.ReduceSumRegularizer() else: weight_regularizer = activity_regularizer = None with strategy_fn().scope(): with policy.policy_scope(policy_name): layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=use_operator, regularizer=weight_regularizer, activity_regularizer=activity_regularizer, input_shape=(1,), ) if use_input_spec: layer.input_spec = input_spec.InputSpec(shape=(None, 1)) model = test_utils.get_model_from_layers( [layer], input_shape=(1,), input_dtype=tf.float16 ) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={ "MultiplyLayer": mp_test_util.MultiplyLayer }, ) (layer,) = ( layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer) ) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) # Learning rate is small enough that if applied to a float16 variable, # the variable will not change. So this tests the learning rate not # applied to a float16 value, but instead the float32 variable. opt = gradient_descent.SGD(2**-14) # Use a fixed loss scale, as this test will fail if gradients are # skipped for a step due to dynamic loss scaling. opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=8 ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) # Variable starts at 1, and should have gradient of 2 ** -14 subtracted # from it. expected = 1 - 2**-14 if use_regularizer: # Weight and activity regularizer each add another 2 ** -14 to the # gradient. expected -= 2 * 2**-14 self.assertEqual(backend.eval(layer.v), expected) if save_format: with generic_utils.CustomObjectScope( { "MultiplyLayer": mp_test_util.MultiplyLayer, "loss_fn": loss_fn, } ): self._test_saving(model, dataset, save_format, use_regularizer) def _test_saving(self, model, dataset, save_format, use_regularizer): # Save and load model, asserting variable does not change save_path = os.path.join(self.get_temp_dir(), "model") model.save(save_path, save_format=save_format) model = save.load_model(save_path) (layer,) = ( layer for layer in model.layers if "MultiplyLayer" in layer.__class__.__name__ ) expected = 1 - 2**-14 if use_regularizer: expected -= 2 * 2**-14 self.assertEqual(backend.eval(layer.v), expected) # Continue training, and assert variable is correct value model.fit(dataset) new_expected = expected - 2**-14 if use_regularizer: new_expected -= 2 * 2**-14 self.assertEqual(backend.eval(layer.v), new_expected) # Load saved model again, and assert variable is previous value model = save.load_model(save_path) (layer,) = ( layer for layer in model.layers if "MultiplyLayer" in layer.__class__.__name__ ) self.assertEqual(backend.eval(layer.v), expected) # Ensure various dtype-related aspects of the layer are correct self.assertEqual(layer.dtype, "float32") self.assertEqual(layer.dtype_policy.name, "mixed_float16") self.assertEqual(layer.v.dtype, "float32") self.assertEqual(layer(np.ones((2, 1))).dtype, "float16") self.assertEqual(type(model.dtype_policy), policy.Policy) self.assertEqual( layer.get_config()["dtype"], {"class_name": "Policy", "config": {"name": "mixed_float16"}}, ) @test_combinations.run_all_keras_modes @parameterized.named_parameters( {"testcase_name": "base", "strategy_fn": default_strategy_fn}, { "testcase_name": "distribute", "strategy_fn": create_mirrored_strategy, }, ) def test_fixed_loss_scaling(self, strategy_fn): # Note: We do not test mixed precision in this method, only loss scaling. loss_scale = 8.0 batch_size = 4 with strategy_fn().scope(): x = layers.Input(shape=(1,), batch_size=batch_size) layer = mp_test_util.MultiplyLayer() y = layer(x) # The gradient of 'y' at this point is 1. With loss scaling, the gradient # is 'loss_scale'. We divide by the batch size since the loss is averaged # across batch elements. expected_gradient = loss_scale / batch_size identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( [expected_gradient] ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly() ) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # Variable starts at 1, and should have gradient of 1 subtracted from it. expected = 0 self.assertEqual(backend.eval(layer.v), expected) @test_combinations.run_all_keras_modes @parameterized.named_parameters( {"testcase_name": "base", "strategy_fn": default_strategy_fn}, { "testcase_name": "distribute", "strategy_fn": create_mirrored_strategy, }, { "testcase_name": "loss_scaling", "strategy_fn": create_mirrored_strategy, "use_loss_scaling": True, }, ) def test_advanced_model(self, strategy_fn, use_loss_scaling=False): # The advanced model tests mixed-precision-related features that would occur # in a resnet50 model. It tests a model that has: # * Multiple layers, some which use auto-cast variables and some which do # not # * Regularization on some variables and not others. # * A fixed loss scale (if use_loss_scaling is True) strategy = strategy_fn() if use_loss_scaling: loss_scale = 8.0 learning_rate = 2**-14 with strategy.scope(): with policy.policy_scope(policy.Policy("mixed_float16")): x = layers.Input(shape=(1,), batch_size=2) layer1 = mp_test_util.MultiplyLayer( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=True, ) layer2 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, use_operator=True ) layer3 = mp_test_util.MultiplyLayer( assert_type=tf.float16, use_operator=False ) layer4 = mp_test_util.MultiplyLayerWithoutAutoCast( assert_type=tf.float16, regularizer=mp_test_util.IdentityRegularizer(), use_operator=False, ) y = layer1(x) y = layer2(y) y = layer3(y) y = layer4(y) if use_loss_scaling: # The gradient of 'y' at this point is 1. With loss scaling, the # gradient is 'loss_scale'. We divide by the batch size of 2 since the # loss is averaged across batch elements. expected_gradient = loss_scale / 2 identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=[expected_gradient], ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) opt = gradient_descent.SGD(learning_rate) if use_loss_scaling: opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=loss_scale ) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) x = np.ones((2, 1)) y = np.ones((2, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2) model.fit(dataset) for layer in (layer1, layer2, layer3, layer4): if layer.losses: # Layer has weight regularizer self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate) else: # Layer does not have weight regularizer self.assertEqual(backend.eval(layer.v), 1 - learning_rate) @test_combinations.run_all_keras_modes(always_skip_v1=True) @parameterized.named_parameters( {"testcase_name": "base", "strategy_fn": default_strategy_fn}, { "testcase_name": "distribute", "strategy_fn": create_mirrored_strategy, }, { "testcase_name": "get_config", "strategy_fn": create_mirrored_strategy, "get_config": True, }, ) def test_dynamic_loss_scaling(self, strategy_fn, get_config=False): strategy = strategy_fn() initial_loss_scale = 2.0 batch_size = 4 expected_gradient = backend.variable( [initial_loss_scale / batch_size], dtype=tf.float16 ) # If this variable is set to True, the model below will have NaN gradients have_nan_gradients = backend.variable(False, dtype=tf.bool) with strategy.scope(): opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2 ) with policy.policy_scope("mixed_float16"): x = layers.Input( shape=(1,), batch_size=batch_size, dtype=tf.float16 ) layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) y = layer(x) identity_with_nan_grads = ( mp_test_util.create_identity_with_nan_gradients_fn( have_nan_gradients ) ) y = core.Lambda(identity_with_nan_grads)(y) identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=tf.float16, expected_gradient=expected_gradient, ) ) y = core.Lambda(identity_with_grad_check_fn)(y) model = models.Model(inputs=x, outputs=y) if get_config: config = model.get_config() model = model.__class__.from_config( config, custom_objects={ "MultiplyLayer": mp_test_util.MultiplyLayer }, ) (layer,) = ( layer for layer in model.layers if isinstance(layer, mp_test_util.MultiplyLayer) ) def loss_fn(y_true, y_pred): del y_true return tf.reduce_mean(y_pred) model.compile( opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly(), ) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # The variables starts with 1 and has a gradient of 1, so will go down by 1 # each step. self.assertEqual(backend.eval(layer.v), 0) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -1) # There have been two steps without NaNs, so the loss scale will double backend.set_value( expected_gradient, backend.get_value(expected_gradient * 2) ) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -2) # Next test with NaN gradients. backend.set_value(have_nan_gradients, True) model.fit(dataset) # Variable should not be updated self.assertEqual(backend.eval(layer.v), -2) # Test with finite gradients again backend.set_value(have_nan_gradients, False) # The loss scale will be halved due to the NaNs, so the gradient will also # be halved backend.set_value( expected_gradient, backend.get_value(expected_gradient / 2) ) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -3) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_compile_wraps_with_loss_scale_optimizer(self): x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) with policy.policy_scope("mixed_float16"): # Test optimizer is automatically wrapped with LSO model = models.Model(x, y) model.compile(gradient_descent.SGD(1.0), "mse") self.assertIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) self.assertEqual( backend.get_value(model.optimizer.learning_rate), 1.0 ) # Test optimizer specified as string is automatically wrapped in LSO model = models.Model(x, y) model.compile("sgd", "mse") self.assertIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) # Test if an LSO is passed, optimizer is not automatically wrapped with # another LSO model = models.Model(x, y) optimizer = loss_scale_optimizer.LossScaleOptimizer( gradient_descent.SGD(1.0), dynamic_growth_steps=2 ) model.compile(optimizer, "mse") self.assertIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) self.assertEqual(model.optimizer.dynamic_growth_steps, 2) with policy.policy_scope("mixed_bfloat16"): # Test mixed_bfloat16 models are not automatically wrapped with LSO model = models.Model(x, y) model.compile(gradient_descent.SGD(1.0), "mse") self.assertNotIsInstance( model.optimizer, loss_scale_optimizer.LossScaleOptimizer ) self.assertIsInstance(model.optimizer, gradient_descent.SGD) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_pass_invalid_optimizer_with_loss_scaling(self): with policy.policy_scope(policy.Policy("mixed_float16")): x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) if tf.executing_eagerly(): error_msg = "Use a `tf.keras` Optimizer instead" else: error_msg = 'optimizer" must be an instance of ' with self.assertRaisesRegex(ValueError, error_msg): model.compile(optimizer_v1.SGD(1.0), "mse") @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"]) ) def test_functional_model_loss_dtype(self): with policy.policy_scope("float16"): x = layers.Input(shape=(1,)) y = mp_test_util.MultiplyLayer()(x) model = models.Model(x, y) model.add_loss(tf.cast(y, "float32")) # The loss should not be casted to the policy's dtype. self.assertEqual(model.losses[0].dtype, "float32") @test_combinations.run_all_keras_modes @parameterized.named_parameters( { "testcase_name": "base", "strategy_fn": default_strategy_fn, }, { "testcase_name": "distribute", "strategy_fn": create_mirrored_strategy, }, { "testcase_name": "base_h5", "strategy_fn": default_strategy_fn, "h5": True, }, { "testcase_name": "distribute_h5", "strategy_fn": create_mirrored_strategy, "h5": True, }, ) def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False): with strategy_fn().scope(): with policy.policy_scope("mixed_float16"): x = layers.Input(shape=(1,), batch_size=2) layer = mp_test_util.MultiplyLayer(assert_type=tf.float16) y = layer(x) model = models.Model(inputs=x, outputs=y) model.set_weights([np.array(100.0)]) x = np.ones((2, 1)) self.assertAllClose(backend.get_value(model(x)), x * 100.0) suffix = ".h5" if h5 else "" weights_file = os.path.join(self.get_temp_dir(), "weights" + suffix) model.save_weights(weights_file) model.set_weights([np.array(200.0)]) self.assertAllClose(backend.get_value(model(x)), x * 200.0) model.load_weights(weights_file) self.assertAllClose(backend.get_value(model(x)), x * 100.0) self.assertEqual(model.get_weights(), [np.array(100.0)]) @test_combinations.run_all_keras_modes @parameterized.named_parameters( { "testcase_name": "base", "strategy_fn": default_strategy_fn, }, { "testcase_name": "distribute", "strategy_fn": create_mirrored_strategy, }, { "testcase_name": "different_var_name", "strategy_fn": default_strategy_fn, "var_name": "w", }, { "testcase_name": "different_var_name_distribute", "strategy_fn": create_mirrored_strategy, "var_name": "w", }, ) def test_save_slot_variables_with_autocast_vars( self, strategy_fn, var_name="v" ): p = policy.Policy("mixed_float16") with strategy_fn().scope(), policy.policy_scope(p): x = layers.Input(shape=(2,), batch_size=2) # Having a var_name other than 'v' tests that a fixed bug (b/134713714) # does not reoccur. The bug was that a crash would occur when saving a # checkpoint where an AutoCastVariable with a slot variable would have a # different name than the layer attribute's name (layer.v in this case). layer = mp_test_util.MultiplyLayer( assert_type=tf.float16, var_name=var_name ) y = layer(x) model = models.Model(inputs=x, outputs=y) opt = gradient_descent.SGD(1.0, 1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=1 ) model.compile( optimizer=opt, loss="mse", run_eagerly=test_utils.should_run_eagerly(), ) model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) weights_file = os.path.join(self.get_temp_dir(), "weights") model.save_weights(weights_file) saved_slot = backend.get_value(opt.get_slot(layer.v, "momentum")) model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) new_slot = backend.get_value(opt.get_slot(layer.v, "momentum")) self.assertNotEqual(new_slot, saved_slot) model.load_weights(weights_file) restored_slot = backend.get_value(opt.get_slot(layer.v, "momentum")) self.assertEqual(restored_slot, saved_slot) @test_combinations.run_all_keras_modes @parameterized.named_parameters(*TESTCASES) def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn): strategy = strategy_fn() if ( isinstance(strategy, tf.distribute.MirroredStrategy) and not tf.executing_eagerly() ): # TODO(b/121381184): Enable running the test in this case. return # Create and run model. with strategy.scope(): x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32) y = mp_test_util.MultiplyLayer(assert_type=tf.float32)(x) model = models.Model(inputs=x, outputs=y) opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=1.0, dynamic_growth_steps=2.0 ) model.compile( optimizer=opt, loss="mse", run_eagerly=test_utils.should_run_eagerly(), ) # Run for 3 steps (6 examples with a batch size of 2) model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2) self.assertEqual(backend.get_value(opt.loss_scale), 2) self.assertEqual(backend.get_value(opt.dynamic_counter), 1) # Save model weights. save_prefix = os.path.join(self.get_temp_dir(), "ckpt") model.save_weights(save_prefix) # Run model again for 1 step (2 examples with a batch size of 2) model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2) self.assertEqual(backend.get_value(opt.loss_scale), 4) self.assertEqual(backend.get_value(opt.dynamic_counter), 0) # Load model weights and ensure loss scale weights are restored. model.load_weights(save_prefix) self.assertEqual(backend.get_value(opt.loss_scale), 2) self.assertEqual(backend.get_value(opt.dynamic_counter), 1) @test_combinations.run_all_keras_modes def test_restore_old_loss_scale_checkpoint(self): # Ensure a checkpoint from TF 2.2 can be loaded. The checkpoint format # of LossScaleOptimizer changed, but old checkpoints can still be loaded opt = gradient_descent.SGD(0.1, momentum=0.1) opt = loss_scale_optimizer.LossScaleOptimizer(opt) model = sequential.Sequential( [ core.Dense( 2, ) ] ) # The checkpoint and expected values were obtained from the program in # testdata/BUILD. ckpt_dir = os.path.join( flags.FLAGS["test_srcdir"].value, "org_keras/keras", "mixed_precision/testdata/lso_ckpt_tf2.2", ) # ckpt_dir = test.test_src_dir_path( # 'python/keras/mixed_precision/testdata/lso_ckpt_tf2.2') model.load_weights(os.path.join(ckpt_dir, "ckpt")) model.compile(opt, "mse", run_eagerly=test_utils.should_run_eagerly()) model(np.zeros((2, 2))) # Create model weights opt._create_all_weights(model.weights) expected_kernel = np.array( [[9.229685, 10.901115], [10.370763, 9.757362]] ) expected_slot = np.array([[10.049943, 9.917691], [10.049943, 9.917691]]) self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel) self.assertAllClose( self.evaluate(opt.get_slot(model.weights[0], "momentum")), expected_slot, ) self.assertEqual(self.evaluate(opt.loss_scale), 32768) self.assertEqual(self.evaluate(opt.dynamic_counter), 1) # Check restoring works even after the model is compiled and the weights # have been created. model.fit(np.random.normal(size=(2, 2)), np.random.normal(size=(2, 2))) self.assertNotAllClose(self.evaluate(model.weights[0]), expected_kernel) self.assertNotAllClose( self.evaluate(opt.get_slot(model.weights[0], "momentum")), expected_slot, ) model.load_weights(os.path.join(ckpt_dir, "ckpt")) self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel) self.assertAllClose( self.evaluate(opt.get_slot(model.weights[0], "momentum")), expected_slot, ) self.assertEqual(self.evaluate(opt.loss_scale), 32768) self.assertEqual(self.evaluate(opt.dynamic_counter), 1) def test_restore_old_saved_model(self): saved_model_dir = os.path.join( flags.FLAGS["test_srcdir"].value, "org_keras/keras", "mixed_precision/testdata/lso_savedmodel_tf2.2", ) # saved_model_dir = test.test_src_dir_path( # 'python/keras/mixed_precision/testdata/' # 'lso_savedmodel_tf2.2') model = save.load_model(saved_model_dir) expected_kernel = np.array( [[9.229685, 10.901115], [10.370763, 9.757362]] ) self.assertAllClose(backend.eval(model.weights[0]), expected_kernel) self.assertEqual( type(model.optimizer), loss_scale_optimizer.LossScaleOptimizer ) @test_combinations.run_all_keras_modes @parameterized.named_parameters( { "testcase_name": "base", "strategy_fn": default_strategy_fn, }, { "testcase_name": "distribute", "strategy_fn": create_mirrored_strategy, }, { "testcase_name": "base_h5", "strategy_fn": default_strategy_fn, "h5": True, }, { "testcase_name": "distribute_h5", "strategy_fn": create_mirrored_strategy, "h5": True, }, ) def test_save_model_with_dynamic_loss_scaling(self, strategy_fn, h5=False): # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy # as well. strategy = strategy_fn() if ( isinstance(strategy, tf.distribute.MirroredStrategy) and not tf.executing_eagerly() ): # TODO(b/121381184): Enable running the test in this case. return # Create and run model. with strategy.scope(): x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32) y = mp_test_util.MultiplyLayer()(x) model = models.Model(inputs=x, outputs=y) opt = gradient_descent.SGD(1.0) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=1.0, dynamic_growth_steps=2.0 ) model.compile( optimizer=opt, loss="mse", run_eagerly=test_utils.should_run_eagerly(), ) # Run for 3 steps (6 examples with a batch size of 2) model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2) self.assertEqual(backend.get_value(opt.loss_scale), 2) self.assertEqual(backend.get_value(opt.dynamic_counter), 1) (weight,) = model.trainable_weights orig_weight = backend.get_value(weight) # Save model weights. save_path = os.path.join(self.get_temp_dir(), "model") model.save(save_path, save_format="h5" if h5 else "tf") # Run model again for 1 step (2 examples with a batch size of 2) model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2) new_weight = backend.get_value(weight) self.assertNotEqual(new_weight, orig_weight) self.assertEqual(backend.get_value(opt.loss_scale), 4) self.assertEqual(backend.get_value(opt.dynamic_counter), 0) # Load model weights and ensure loss scale weights are restored. model = save.load_model( save_path, custom_objects={"MultiplyLayer": mp_test_util.MultiplyLayer}, ) (weight,) = model.trainable_weights loaded_weight = backend.get_value(weight) self.assertEqual(loaded_weight, orig_weight) # Currently the loss scale isn't always saved when the model is saved with # Model.save(). So we assert the loss scale either has the value when it was # saved, or the value it was initialized with. # TODO(reedwm): Always save/restore the loss scale with Model.save(). self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2)) self.assertIn( backend.get_value(model.optimizer.dynamic_counter), (0, 1) ) # Test optimizer attributes and type self.assertEqual(model.optimizer.initial_scale, 1.0) self.assertEqual(model.optimizer.dynamic_growth_steps, 2.0) self.assertEqual( type(model.optimizer), loss_scale_optimizer.LossScaleOptimizer )
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import numpy as np import tensorflow.compat.v2 as tf import keras from keras import backend from keras.testing_infra import test_combinations from keras.engine import base_layer_utils @test_combinations.generate(test_combinations.combine(mode=['graph', 'eager'])) class TrackableWeightHandlerTest(test_combinations.TestCase): def get_table_handler(self): # Note: There is some repetition in these tests' setup. However, Tensorflow # does not play nicely with a separate setUp() call (causing errors related # to graph building), so we have to use a called setup instead of a setUp() # call. table = tf.lookup.experimental.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int32, default_value=0) return base_layer_utils.TrackableWeightHandler(table) def test_get_num_tensors(self): table_handler = self.get_table_handler() self.assertEqual(2, table_handler.num_tensors)
class AdamaxOptimizerTest(tf.test.TestCase, parameterized.TestCase): def testResourceSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) # pylint: disable=cell-var-from-loop m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots( ), zero_slots() var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0_np_indices = np.array([0, 1], dtype=np.int32) grads0 = tf.IndexedSlices(tf.constant(grads0_np), tf.constant(grads0_np_indices), tf.constant([3])) grads1_np_indices = np.array([2, 1], dtype=np.int32) grads1 = tf.IndexedSlices(tf.constant(grads1_np), tf.constant(grads1_np_indices), tf.constant([3])) opt = adamax.Adamax() update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0, 3.0], var0) self.assertAllClose([4.0, 5.0, 6.0], var1) beta1_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adamax for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) update.run() var0_np, m0, v0 = adamax_sparse_update_numpy( var0_np, grads0_np_indices, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_sparse_update_numpy( var1_np, grads1_np_indices, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, var0) self.assertAllCloseAccordingToType(var1_np, var1) def testSparseDevicePlacement(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for index_dtype in [tf.int32, tf.int64]: with tf.Graph().as_default(), self.cached_session( force_gpu=tf.test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = tf.Variable([[1.0], [2.0]]) indices = tf.constant([0, 1], dtype=index_dtype) g_sum = lambda: tf.reduce_sum(tf.gather(var, indices)) # pylint: disable=cell-var-from-loop optimizer = adamax.Adamax(3.0) minimize_op = optimizer.minimize(g_sum, var_list=[var]) self.evaluate(tf.compat.v1.global_variables_initializer()) minimize_op.run() def testSparseRepeatedIndices(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): repeated_index_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices( tf.constant([0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) repeated_update = adamax.Adamax().apply_gradients([ (grad_repeated_index, repeated_index_update_var) ]) aggregated_update = adamax.Adamax().apply_gradients([ (grad_aggregated, aggregated_update_var) ]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(aggregated_update_var, repeated_index_update_var.eval()) for _ in range(3): repeated_update.run() aggregated_update.run() self.assertAllClose(aggregated_update_var, repeated_index_update_var.eval()) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testBasic(self): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with self.session(graph=tf.Graph(), use_gpu=True): # Initialize variables for numpy implementation. m0 = np.array([0.0, 0.0]) v0 = np.array([0.0, 0.0]) m1 = np.array([0.0, 0.0]) v1 = np.array([0.0, 0.0]) var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adamax.Adamax() if not tf.executing_eagerly(): update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of Adamax for t in range(3): beta_1_power = get_beta_accumulators(opt, dtype) self.assertAllCloseAccordingToType( 0.9**(t + 1), self.evaluate(beta_1_power)) if not tf.executing_eagerly(): self.evaluate(update) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, m0, v0 = adamax_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), rtol=1e-2) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), rtol=1e-2) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateDecay(self): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): with self.session(graph=tf.Graph(), use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np, name="var0_%d" % i) var1 = tf.Variable(var1_np, name="var1_%d" % i) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 0.001 decay = 0.002 opt = adamax.Adamax(learning_rate=learning_rate, decay=decay) if not tf.executing_eagerly(): update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Run 3 steps of Adamax for t in range(3): beta_1_power = get_beta_accumulators(opt, dtype) self.assertAllCloseAccordingToType( 0.9**(t + 1), self.evaluate(beta_1_power)) if not tf.executing_eagerly(): self.evaluate(update) else: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr = learning_rate / (1 + decay * t) var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0, alpha=lr) var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1, alpha=lr) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), rtol=1e-2) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), rtol=1e-2) def testTensorLearningRate(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adamax.Adamax(tf.constant(0.001)) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0) self.assertAllClose([3.0, 4.0], var1) beta1_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adamax for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) update.run() var0_np, m0, v0 = adamax_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, var0) self.assertAllCloseAccordingToType(var1_np, var1) def testSharing(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [tf.half, tf.float32, tf.float64]: with tf.Graph().as_default(), self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) opt = adamax.Adamax() update1 = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) update2 = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) beta1_power = get_beta_accumulators(opt, dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0) self.assertAllClose([3.0, 4.0], var1) # Run 3 steps of intertwined Adamax1 and Adamax2. for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), beta1_power) if t % 2 == 0: update1.run() else: update2.run() var0_np, m0, v0 = adamax_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adamax_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, var0) self.assertAllCloseAccordingToType(var1_np, var1) @test_combinations.generate(test_combinations.combine(mode=["eager"])) def testSlotsUniqueEager(self): v1 = tf.Variable(1.) v2 = tf.Variable(1.) opt = adamax.Adamax(1.) opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) # There should be iteration, and two unique slot variables for v1 and v2. self.assertLen({id(v) for v in opt.variables()}, 5) def testConstructAdamaxWithLR(self): opt = adamax.Adamax(lr=1.0) opt_2 = adamax.Adamax(learning_rate=0.1, lr=1.0) opt_3 = adamax.Adamax(learning_rate=0.1) self.assertIsInstance(opt.lr, tf.Variable) self.assertIsInstance(opt_2.lr, tf.Variable) self.assertIsInstance(opt_3.lr, tf.Variable) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(opt.lr), (1.0)) self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
import numpy as np import keras from keras.testing_infra import test_combinations from keras.feature_column import sequence_feature_column as ksfc from keras.saving import model_config def _initialized_session(config=None): sess = tf.compat.v1.Session(config=config) sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) return sess @test_combinations.generate(test_combinations.combine(mode=["graph", "eager"])) class SequenceFeaturesTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters( { "testcase_name": "2D", "sparse_input_args_a": { # example 0, ids [2] # example 1, ids [0, 1] "indices": ((0, 0), (1, 0), (1, 1)), "values": (2, 0, 1), "dense_shape": (2, 2), }, "sparse_input_args_b": { # example 0, ids [1] # example 1, ids [2, 0]
class LayerNormalizationNumericsTest(test_combinations.TestCase): """Tests LayerNormalization has correct and numerically stable outputs.""" def _expected_layer_norm(self, x, beta, gamma, batch_input_shape, axis, epsilon): """Returns the layer norm, which is computed using NumPy.""" broadcast_shape = [batch_input_shape[i] if i in axis else 1 for i in range(len(batch_input_shape))] mean = np.mean(x, axis=axis, keepdims=True) var = np.var(x, axis=axis, keepdims=True) expected = (x - mean) / np.sqrt(var + epsilon) expected *= np.reshape(gamma, broadcast_shape) expected += np.reshape(beta, broadcast_shape) return expected def _test_forward_pass(self, batch_input_shape, axis, fp64_tol=1e-14, fp32_tol=1e-6, fp16_tol=1e-2): """Tests the forward pass of layer layer_normalization. Args: batch_input_shape: The input shape that will be used to test, including the batch dimension. axis: A list of axes to normalize. Will be passed to the `axis` argument of Layerlayer_normalization. fp64_tol: The relative and absolute tolerance for float64. fp32_tol: The relative and absolute tolerance for float32. fp16_tol: The relative and absolute tolerance for float16. """ param_shape = [batch_input_shape[i] for i in axis] param_elems = 1 for dim in param_shape: param_elems *= dim beta = np.arange(param_elems, dtype='float64').reshape(param_shape) gamma = np.arange(1, param_elems + 1, dtype='float64').reshape(param_shape) x = np.random.normal(size=batch_input_shape) for epsilon in 1e-12, 1e-3: expected = self._expected_layer_norm(x, beta, gamma, batch_input_shape, axis, epsilon) for dtype in 'float64', 'float32', 'float16': norm = layer_normalization.LayerNormalization( axis=axis, dtype=dtype, batch_input_shape=batch_input_shape, epsilon=epsilon, beta_initializer=keras.initializers.constant(beta), gamma_initializer=keras.initializers.constant(gamma)) y = norm(keras.backend.cast(x, dtype)) actual = keras.backend.eval(y) if dtype == 'float64': tol = fp64_tol elif dtype == 'float32': tol = fp32_tol else: assert dtype == 'float16' tol = fp16_tol # We use absolute tolerances in addition to relative tolerances, because # some of the values are very close to zero. self.assertAllClose(expected, actual, rtol=tol, atol=tol) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_forward(self): # For numeric stability, we ensure the axis's dimension(s) have at least 4 # elements. self._test_forward_pass((4, 3), (0,)) self._test_forward_pass((3, 4), (1,)) self._test_forward_pass((4, 3, 2), (0,)) self._test_forward_pass((2, 4, 2), (1,)) self._test_forward_pass((2, 3, 4), (2,), fp16_tol=5e-2) self._test_forward_pass((2, 3, 2), (0, 2)) self._test_forward_pass((2, 2, 2, 2), (1, 3)) self._test_forward_pass((2, 2, 2, 2), (2, 3)) self._test_forward_pass((2, 3, 4, 5), (3,)) def _test_backward_pass(self, batch_input_shape, axis, fp64_tol=1e-5, fp32_tol=1e-5, fp16_tol=2e-2): """Tests the backwards pass of layer layer_normalization. Args: batch_input_shape: The input shape that will be used to test, including the batch dimension. axis: A list of axes to normalize. Will be passed to the `axis` argument of Layerlayer_normalization. fp64_tol: The relative and absolute tolerance for float64. fp32_tol: The relative and absolute tolerance for float32. fp16_tol: The relative and absolute tolerance for float16. """ param_shape = [batch_input_shape[i] for i in axis] param_elems = 1 for dim in param_shape: param_elems *= dim beta = np.arange(param_elems, dtype='float64').reshape(param_shape) gamma = np.arange(1, param_elems + 1, dtype='float64').reshape(param_shape) x = np.random.normal(size=batch_input_shape) for epsilon in 1e-12, 1e-3: # Float64 must come first in this list, as we use the float64 numerical # gradients to compare to the float32 and float16 symbolic gradients as # well. Computing float32/float16 numerical gradients is too numerically # unstable. for dtype in 'float64', 'float32', 'float16': norm = layer_normalization.LayerNormalization( axis=axis, dtype=dtype, batch_input_shape=batch_input_shape, epsilon=epsilon, beta_initializer=keras.initializers.constant(beta), gamma_initializer=keras.initializers.constant(gamma)) norm.build(x.shape) # pylint: disable=cell-var-from-loop def forward_fn(x, beta, gamma): # We must monkey-patch the attributes of `norm` with the function # arguments, so that the gradient checker will properly compute their # gradients. The gradient checker computes gradients with respect to # the input arguments of `f`. with tf.compat.v1.test.mock.patch.object(norm, 'beta', beta): with tf.compat.v1.test.mock.patch.object(norm, 'gamma', gamma): return norm(x) # pylint: enable=cell-var-from-loop results = tf.test.compute_gradient( forward_fn, [keras.backend.cast(x, dtype), norm.beta, norm.gamma]) ([x_grad_t, beta_grad_t, gamma_grad_t], [x_grad_n, beta_grad_n, gamma_grad_n]) = results if dtype == 'float64': # We use the float64 numeric gradients as the reference, to compare # against the symbolic gradients for all dtypes. x_grad_ref = x_grad_n beta_grad_ref = beta_grad_n gamma_grad_ref = gamma_grad_n tol = fp64_tol elif dtype == 'float32': tol = fp32_tol else: assert dtype == 'float16' tol = fp16_tol # We use absolute tolerances in addition to relative tolerances, because # some of the values are very close to zero. self.assertAllClose(x_grad_t, x_grad_ref, rtol=tol, atol=tol) self.assertAllClose(beta_grad_t, beta_grad_ref, rtol=tol, atol=tol) self.assertAllClose(gamma_grad_t, gamma_grad_ref, rtol=tol, atol=tol) # The gradient_checker_v2 does not work properly with LayerNorm in graph mode. @test_utils.run_v2_only def test_backward(self): # For numeric stability, we ensure the axis's dimension(s) have at least 4 # elements. self._test_backward_pass((4, 3), (0,)) self._test_backward_pass((2, 4, 2), (1,)) self._test_backward_pass((2, 3, 4), (2,)) self._test_backward_pass((2, 3, 2), (0, 2), fp64_tol=5e-4, fp32_tol=5e-4) self._test_backward_pass((2, 2, 2, 2), (1, 3)) self._test_backward_pass((2, 2, 2, 2), (2, 3))
class TestTensorBoardV1(tf.test.TestCase, parameterized.TestCase): def test_TensorBoard(self): np.random.seed(1337) temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( train_samples=TRAIN_SAMPLES, test_samples=TEST_SAMPLES, input_shape=(INPUT_DIM, ), num_classes=NUM_CLASSES, ) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def data_generator(train): if train: max_batch_index = len(x_train) // BATCH_SIZE else: max_batch_index = len(x_test) // BATCH_SIZE i = 0 while 1: if train: yield ( x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE], y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE], ) else: yield ( x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE], y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE], ) i += 1 i %= max_batch_index # case: Sequential with tf.Graph().as_default(), self.cached_session(): model = sequential.Sequential() model.add( layers.Dense(NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu")) # non_trainable_weights: moving_variance, moving_mean model.add(layers.BatchNormalization()) model.add(layers.Dense(NUM_CLASSES, activation="softmax")) model.compile( loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"], ) tsb = callbacks_v1.TensorBoard( log_dir=temp_dir, histogram_freq=1, write_images=True, write_grads=True, batch_size=5, ) cbks = [tsb] # fit with validation data model.fit( x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, epochs=3, verbose=0, ) # fit with validation data and accuracy model.fit( x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, epochs=2, verbose=0, ) # fit generator with validation data model.fit_generator( data_generator(True), len(x_train), epochs=2, validation_data=(x_test, y_test), callbacks=cbks, verbose=0, ) # fit generator without validation data # histogram_freq must be zero tsb.histogram_freq = 0 model.fit_generator( data_generator(True), len(x_train), epochs=2, callbacks=cbks, verbose=0, ) # fit generator with validation data and accuracy tsb.histogram_freq = 1 model.fit_generator( data_generator(True), len(x_train), epochs=2, validation_data=(x_test, y_test), callbacks=cbks, verbose=0, ) # fit generator without validation data and accuracy tsb.histogram_freq = 0 model.fit_generator(data_generator(True), len(x_train), epochs=2, callbacks=cbks) assert os.path.exists(temp_dir) def test_TensorBoard_multi_input_output(self): np.random.seed(1337) tmpdir = self.get_temp_dir() self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) with tf.Graph().as_default(), self.cached_session(): filepath = os.path.join(tmpdir, "logs") (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( train_samples=TRAIN_SAMPLES, test_samples=TEST_SAMPLES, input_shape=(INPUT_DIM, ), num_classes=NUM_CLASSES, ) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def data_generator(train): if train: max_batch_index = len(x_train) // BATCH_SIZE else: max_batch_index = len(x_test) // BATCH_SIZE i = 0 while 1: if train: # simulate multi-input/output models yield ( [x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2, [y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2, ) else: yield ( [x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2, [y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2, ) i += 1 i %= max_batch_index inp1 = input_layer.Input((INPUT_DIM, )) inp2 = input_layer.Input((INPUT_DIM, )) inp = layers.add([inp1, inp2]) hidden = layers.Dense(2, activation="relu")(inp) hidden = layers.Dropout(0.1)(hidden) output1 = layers.Dense(NUM_CLASSES, activation="softmax")(hidden) output2 = layers.Dense(NUM_CLASSES, activation="softmax")(hidden) model = training.Model([inp1, inp2], [output1, output2]) model.compile( loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"], ) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq): return [ callbacks_v1.TensorBoard( log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, batch_size=5, ) ] # fit without validation data model.fit( [x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, callbacks=callbacks_factory(histogram_freq=0), epochs=3, ) # fit with validation data and accuracy model.fit( [x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, validation_data=([x_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1), epochs=2, ) # fit generator without validation data model.fit_generator( data_generator(True), len(x_train), epochs=2, callbacks=callbacks_factory(histogram_freq=0), ) # fit generator with validation data and accuracy model.fit_generator( data_generator(True), len(x_train), epochs=2, validation_data=([x_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1), ) assert os.path.isdir(filepath) def test_Tensorboard_histogram_summaries_in_test_function(self): class FileWriterStub: def __init__(self, logdir, graph=None): self.logdir = logdir self.graph = graph self.steps_seen = [] def add_summary(self, summary, global_step): summary_obj = tf.compat.v1.Summary() # ensure a valid Summary proto is being sent if isinstance(summary, bytes): summary_obj.ParseFromString(summary) else: assert isinstance(summary, tf.compat.v1.Summary) summary_obj = summary # keep track of steps seen for the merged_summary op, # which contains the histogram summaries if len(summary_obj.value) > 1: self.steps_seen.append(global_step) def flush(self): pass def close(self): pass def _init_writer(obj, _): obj.writer = FileWriterStub(obj.log_dir) np.random.seed(1337) tmpdir = self.get_temp_dir() self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( train_samples=TRAIN_SAMPLES, test_samples=TEST_SAMPLES, input_shape=(INPUT_DIM, ), num_classes=NUM_CLASSES, ) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) with tf.Graph().as_default(), self.cached_session(): model = sequential.Sequential() model.add( layers.Dense(NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu")) # non_trainable_weights: moving_variance, moving_mean model.add(layers.BatchNormalization()) model.add(layers.Dense(NUM_CLASSES, activation="softmax")) model.compile( loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"], ) callbacks_v1.TensorBoard._init_writer = _init_writer tsb = callbacks_v1.TensorBoard( log_dir=tmpdir, histogram_freq=1, write_images=True, write_grads=True, batch_size=5, ) cbks = [tsb] # fit with validation data model.fit( x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, epochs=3, verbose=0, ) self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5]) def test_Tensorboard_histogram_summaries_with_generator(self): np.random.seed(1337) tmpdir = self.get_temp_dir() self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) def generator(): x = np.random.randn(10, 100).astype(np.float32) y = np.random.randn(10, 10).astype(np.float32) while True: yield x, y with tf.Graph().as_default(), self.cached_session(): model = test_utils.get_small_sequential_mlp(num_hidden=10, num_classes=10, input_dim=100) model.compile( loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"], ) tsb = callbacks_v1.TensorBoard( log_dir=tmpdir, histogram_freq=1, write_images=True, write_grads=True, batch_size=5, ) cbks = [tsb] # fit with validation generator model.fit_generator( generator(), steps_per_epoch=2, epochs=2, validation_data=generator(), validation_steps=2, callbacks=cbks, verbose=0, ) with self.assertRaises(ValueError): # fit with validation generator but no # validation_steps model.fit_generator( generator(), steps_per_epoch=2, epochs=2, validation_data=generator(), callbacks=cbks, verbose=0, ) self.assertTrue(os.path.exists(tmpdir)) def test_TensorBoard_with_ReduceLROnPlateau(self): with self.cached_session(): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( train_samples=TRAIN_SAMPLES, test_samples=TEST_SAMPLES, input_shape=(INPUT_DIM, ), num_classes=NUM_CLASSES, ) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = test_utils.get_small_sequential_mlp( num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM, ) model.compile( loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy"], ) cbks = [ callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=4, verbose=1), callbacks_v1.TensorBoard(log_dir=temp_dir), ] model.fit( x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, epochs=2, verbose=0, ) assert os.path.exists(temp_dir) def test_Tensorboard_batch_logging(self): class FileWriterStub: def __init__(self, logdir, graph=None): self.logdir = logdir self.graph = graph self.batches_logged = [] self.summary_values = [] self.summary_tags = [] def add_summary(self, summary, step): self.summary_values.append(summary.value[0].simple_value) self.summary_tags.append(summary.value[0].tag) self.batches_logged.append(step) def flush(self): pass def close(self): pass with tf.Graph().as_default(): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch") tb_cbk.writer = FileWriterStub(temp_dir) for batch in range(5): tb_cbk.on_batch_end(batch, {"acc": batch}) self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4]) self.assertEqual(tb_cbk.writer.summary_values, [0.0, 1.0, 2.0, 3.0, 4.0]) self.assertEqual(tb_cbk.writer.summary_tags, ["batch_acc"] * 5) def test_Tensorboard_epoch_and_batch_logging(self): class FileWriterStub: def __init__(self, logdir, graph=None): self.logdir = logdir self.graph = graph def add_summary(self, summary, step): if "batch_" in summary.value[0].tag: self.batch_summary = (step, summary) elif "epoch_" in summary.value[0].tag: self.epoch_summary = (step, summary) def flush(self): pass def close(self): pass with tf.Graph().as_default(): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch") tb_cbk.writer = FileWriterStub(temp_dir) tb_cbk.on_batch_end(0, {"acc": 5.0}) tb_cbk.on_train_end() batch_step, batch_summary = tb_cbk.writer.batch_summary self.assertEqual(batch_step, 0) self.assertEqual(batch_summary.value[0].simple_value, 5.0) tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="epoch") tb_cbk.writer = FileWriterStub(temp_dir) tb_cbk.on_epoch_end(0, {"acc": 10.0}) tb_cbk.on_train_end() epoch_step, epoch_summary = tb_cbk.writer.epoch_summary self.assertEqual(epoch_step, 0) self.assertEqual(epoch_summary.value[0].simple_value, 10.0) @test_combinations.generate( test_combinations.combine(mode=["graph", "eager"])) def test_Tensorboard_eager(self): temp_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( train_samples=TRAIN_SAMPLES, test_samples=TEST_SAMPLES, input_shape=(INPUT_DIM, ), num_classes=NUM_CLASSES, ) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = test_utils.get_small_sequential_mlp(num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) model.compile( loss="binary_crossentropy", optimizer=tf.compat.v1.train.AdamOptimizer(0.01), metrics=["accuracy"], ) cbks = [callbacks_v1.TensorBoard(log_dir=temp_dir)] model.fit( x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, epochs=2, verbose=0, ) self.assertTrue(os.path.exists(temp_dir)) def test_TensorBoard_update_freq(self): class FileWriterStub: def __init__(self, logdir, graph=None): self.logdir = logdir self.graph = graph self.batch_summaries = [] self.epoch_summaries = [] def add_summary(self, summary, step): if "batch_" in summary.value[0].tag: self.batch_summaries.append((step, summary)) elif "epoch_" in summary.value[0].tag: self.epoch_summaries.append((step, summary)) def flush(self): pass def close(self): pass with tf.Graph().as_default(): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) # Epoch mode tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="epoch") tb_cbk.writer = FileWriterStub(temp_dir) tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1}) self.assertEqual(tb_cbk.writer.batch_summaries, []) tb_cbk.on_epoch_end(0, {"acc": 10.0, "size": 1}) self.assertLen(tb_cbk.writer.epoch_summaries, 1) tb_cbk.on_train_end() # Batch mode tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch") tb_cbk.writer = FileWriterStub(temp_dir) tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1}) self.assertLen(tb_cbk.writer.batch_summaries, 1) tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1}) self.assertLen(tb_cbk.writer.batch_summaries, 2) self.assertFalse(tb_cbk.writer.epoch_summaries) tb_cbk.on_train_end() # Integer mode tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq=20) tb_cbk.writer = FileWriterStub(temp_dir) tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) self.assertFalse(tb_cbk.writer.batch_summaries) tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) self.assertLen(tb_cbk.writer.batch_summaries, 1) tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) self.assertLen(tb_cbk.writer.batch_summaries, 1) tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10}) self.assertLen(tb_cbk.writer.batch_summaries, 2) tb_cbk.on_batch_end(0, {"acc": 10.0, "size": 10}) self.assertLen(tb_cbk.writer.batch_summaries, 2) self.assertFalse(tb_cbk.writer.epoch_summaries) tb_cbk.on_train_end()
class LayerNormalizationTest(test_combinations.TestCase): @test_combinations.run_all_keras_modes def test_basic_layernorm(self): test_utils.layer_test( keras.layers.LayerNormalization, kwargs={ 'gamma_regularizer': keras.regularizers.l2(0.01), 'beta_regularizer': keras.regularizers.l2(0.01) }, input_shape=(3, 4, 2)) test_utils.layer_test( keras.layers.LayerNormalization, kwargs={ 'gamma_initializer': 'ones', 'beta_initializer': 'ones', }, input_shape=(3, 4, 2)) test_utils.layer_test( keras.layers.LayerNormalization, kwargs={'scale': False, 'center': False}, input_shape=(3, 3)) test_utils.layer_test( keras.layers.LayerNormalization, kwargs={'axis': (-3, -2, -1)}, input_shape=(2, 8, 8, 3)) test_utils.layer_test( keras.layers.LayerNormalization, input_shape=(1, 0, 10)) @test_combinations.run_all_keras_modes def test_non_fused_layernorm(self): test_utils.layer_test( keras.layers.LayerNormalization, kwargs={'axis': -2}, input_shape=(3, 4, 2)) test_utils.layer_test( keras.layers.LayerNormalization, kwargs={'axis': (-3, -2)}, input_shape=(2, 8, 8, 3)) test_utils.layer_test( keras.layers.LayerNormalization, kwargs={'axis': (-3, -1)}, input_shape=(2, 8, 8, 3)) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_layernorm_weights(self): layer = keras.layers.LayerNormalization(scale=False, center=False) layer.build((None, 3, 4)) self.assertEqual(len(layer.trainable_weights), 0) self.assertEqual(len(layer.weights), 0) layer = keras.layers.LayerNormalization() layer.build((None, 3, 4)) self.assertEqual(len(layer.trainable_weights), 2) self.assertEqual(len(layer.weights), 2) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def test_layernorm_regularization(self): layer = keras.layers.LayerNormalization( gamma_regularizer='l1', beta_regularizer='l1') layer.build((None, 3, 4)) self.assertEqual(len(layer.losses), 2) max_norm = keras.constraints.max_norm layer = keras.layers.LayerNormalization( gamma_constraint=max_norm, beta_constraint=max_norm) layer.build((None, 3, 4)) self.assertEqual(layer.gamma.constraint, max_norm) self.assertEqual(layer.beta.constraint, max_norm) @test_combinations.run_all_keras_modes def test_layernorm_convnet_channel_last(self): model = keras.models.Sequential() norm = keras.layers.LayerNormalization(input_shape=(4, 4, 3)) model.add(norm) model.compile( loss='mse', optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01), run_eagerly=test_utils.should_run_eagerly()) # centered on 5.0, variance 10.0 x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) model.fit(x, x, epochs=4, verbose=0) out = model.predict(x) out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) @test_combinations.run_all_keras_modes def test_layernorm_ragged_tensor(self): x = tf.ragged.constant( [[[3., 1., 1.], [4., 1., 1.]], [[5., 9., 1.]], [[1., 2., 1.]]], inner_shape=(3,)) layer = keras.layers.LayerNormalization() self.assertEqual(layer(x).shape, (3, None, 3)) @test_combinations.run_all_keras_modes def test_layernorm_correctness(self): _run_layernorm_correctness_test( layer_normalization.LayerNormalization, dtype='float32') @test_combinations.run_all_keras_modes def test_layernorm_mixed_precision(self): _run_layernorm_correctness_test( layer_normalization.LayerNormalization, dtype='float16') @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testIncorrectAxisType(self): with self.assertRaisesRegex(TypeError, r'Expected an int or a list/tuple of ints'): _ = layer_normalization.LayerNormalization(axis={'axis': -1}) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testInvalidAxis(self): with self.assertRaisesRegex( ValueError, r'Invalid value for `axis` argument. Expected 0 <= axis < inputs.rank'): layer_norm = layer_normalization.LayerNormalization(axis=3) layer_norm.build(input_shape=(2, 2, 2)) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testDuplicateAxis(self): with self.assertRaisesRegex(ValueError, r'Duplicate axis:'): layer_norm = layer_normalization.LayerNormalization(axis=[-1, -1]) layer_norm.build(input_shape=(2, 2, 2)) @test_combinations.generate( test_combinations.combine(mode=['graph', 'eager'])) def testFusedAttr(self): layer_norm = layer_normalization.LayerNormalization(axis=[-2, -1]) layer_norm.build(input_shape=(2, 2, 2)) self.assertEqual(layer_norm._fused, True)