class CuDNNTest(keras_parameterized.TestCase): @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], return_sequences=[True, False])) @test_util.run_gpu_only def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences): input_size = 10 timesteps = 6 units = 2 num_samples = 32 testing_utils.layer_test(layer_class, kwargs={ 'units': units, 'return_sequences': return_sequences }, input_shape=(num_samples, timesteps, input_size)) @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], go_backwards=[True, False])) @test_util.run_gpu_only def test_cudnn_rnn_go_backward(self, layer_class, go_backwards): input_size = 10 timesteps = 6 units = 2 num_samples = 32 testing_utils.layer_test(layer_class, kwargs={ 'units': units, 'go_backwards': go_backwards }, input_shape=(num_samples, timesteps, input_size)) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) @test_util.run_gpu_only def test_return_state(self, layer_class): input_size = 10 timesteps = 6 units = 2 num_samples = 32 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) layer = layer_class(units, return_state=True, stateful=True) outputs = layer(inputs) _, state = outputs[0], outputs[1:] self.assertEqual(len(state), num_states) model = keras.models.Model(inputs, state[0]) model.run_eagerly = testing_utils.should_run_eagerly() inputs = np.random.random((num_samples, timesteps, input_size)) state = model.predict(inputs) np.testing.assert_allclose(keras.backend.eval(layer.states[0]), state, atol=1e-4) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) @test_util.run_gpu_only def test_time_major_input(self, layer_class): input_size = 10 timesteps = 6 units = 2 num_samples = 32 model = keras.models.Sequential() model.add( keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))) layer = layer_class(units, time_major=True, return_sequences=True) model.add(layer) model.add( keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.001)) model.fit(np.ones((num_samples, timesteps, input_size)), np.ones((num_samples, timesteps, units))) out = model.predict(np.ones((num_samples, timesteps, input_size))) self.assertEqual(out.shape, (num_samples, timesteps, units)) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) @test_util.run_gpu_only def test_specify_initial_state_keras_tensor(self, layer_class): input_size = 10 timesteps = 6 units = 2 num_samples = 32 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input((timesteps, input_size)) initial_state = [keras.Input((units, )) for _ in range(num_states)] layer = layer_class(units) if len(initial_state) == 1: output = layer(inputs, initial_state=initial_state[0]) else: output = layer(inputs, initial_state=initial_state) self.assertIn(initial_state[0], layer._inbound_nodes[0].input_tensors) model = keras.models.Model([inputs] + initial_state, output) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.001), run_eagerly=testing_utils.should_run_eagerly()) inputs = np.random.random((num_samples, timesteps, input_size)) initial_state = [ np.random.random((num_samples, units)) for _ in range(num_states) ] targets = np.random.random((num_samples, units)) model.fit([inputs] + initial_state, targets)
class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, decay, momentum, centered): rms_t = rms * decay + (1 - decay) * g * g if centered: mg_t = mg * decay + (1 - decay) * g denom_t = rms_t - mg_t * mg_t else: mg_t = mg denom_t = rms_t mom_t = momentum * mom + lr * g / np.sqrt(denom_t, dtype=denom_t.dtype) var_t = var - mom_t return var_t, mg_t, rms_t, mom_t def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom, lr, decay, momentum, centered): mg_t = copy.deepcopy(mg) rms_t = copy.deepcopy(rms) mom_t = copy.deepcopy(mom) var_t = copy.deepcopy(var) for i in range(len(gindexs)): gindex = gindexs[i] gvalue = gvalues[i] rms_t[gindex] = rms[gindex] * decay + (1 - decay) * gvalue * gvalue denom_t = rms_t[gindex] if centered: mg_t[gindex] = mg_t[gindex] * decay + (1 - decay) * gvalue denom_t -= mg_t[gindex] * mg_t[gindex] mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt( denom_t) var_t[gindex] = var[gindex] - mom_t[gindex] return var_t, mg_t, rms_t, mom_t @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES)) def testDense(self, dtype, param_value): (learning_rate, decay, momentum, epsilon, centered, use_resource) = tuple(param_value) with self.test_session(use_gpu=True): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) if use_resource: var0 = resource_variable_ops.ResourceVariable(var0_np) var1 = resource_variable_ops.ResourceVariable(var1_np) else: var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) opt = rmsprop.RMSPropOptimizer(learning_rate=learning_rate, decay=decay, momentum=momentum, epsilon=epsilon, centered=centered) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() mg0 = opt.get_slot(var0, "mg") self.assertEqual(mg0 is not None, centered) mg1 = opt.get_slot(var1, "mg") self.assertEqual(mg1 is not None, centered) rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) mom0 = opt.get_slot(var0, "momentum") self.assertIsNotNone(mom0) mom1 = opt.get_slot(var1, "momentum") self.assertIsNotNone(mom1) mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype) rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 4 steps of RMSProp for _ in range(4): update.run() var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, decay, momentum, centered) var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, decay, momentum, centered) # Validate updated params if centered: self.assertAllCloseAccordingToType(mg0_np, mg0.eval()) self.assertAllCloseAccordingToType(mg1_np, mg1.eval()) self.assertAllCloseAccordingToType(rms0_np, rms0.eval()) self.assertAllCloseAccordingToType(rms1_np, rms1.eval()) self.assertAllCloseAccordingToType(mom0_np, mom0.eval()) self.assertAllCloseAccordingToType(mom1_np, mom1.eval()) self.assertAllCloseAccordingToType(var0_np, var0.eval()) self.assertAllCloseAccordingToType(var1_np, var1.eval()) @parameterized.parameters([dtypes.float32, dtypes.float64]) def testMinimizeSparseResourceVariable(self, dtype): with self.cached_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = rmsprop.RMSPropOptimizer(learning_rate=1.0, decay=0.0, momentum=0.0, epsilon=0.0, centered=False).minimize(loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[0., 1.]], var0.eval(), atol=0.01) @parameterized.parameters([dtypes.float32, dtypes.float64]) def testMinimizeSparseResourceVariableCentered(self, dtype): with self.cached_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) loss = pred * pred sgd_op = rmsprop.RMSPropOptimizer(learning_rate=1.0, decay=0.1, momentum=0.0, epsilon=1.0, centered=True).minimize(loss) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType([[-7 / 3.0, -4 / 3.0]], var0.eval(), atol=0.01) @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES)) def testSparse(self, dtype, param_value): (learning_rate, decay, momentum, epsilon, centered, _) = tuple(param_value) with self.test_session(use_gpu=True): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0_np_indices = np.array([0], dtype=np.int32) grads0 = ops.IndexedSlices(constant_op.constant(grads0_np), constant_op.constant(grads0_np_indices), constant_op.constant([1])) grads1_np_indices = np.array([1], dtype=np.int32) grads1 = ops.IndexedSlices(constant_op.constant(grads1_np), constant_op.constant(grads1_np_indices), constant_op.constant([1])) opt = rmsprop.RMSPropOptimizer(learning_rate=learning_rate, decay=decay, momentum=momentum, epsilon=epsilon, centered=centered) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() mg0 = opt.get_slot(var0, "mg") self.assertEqual(mg0 is not None, centered) mg1 = opt.get_slot(var1, "mg") self.assertEqual(mg1 is not None, centered) rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) mom0 = opt.get_slot(var0, "momentum") self.assertIsNotNone(mom0) mom1 = opt.get_slot(var1, "momentum") self.assertIsNotNone(mom1) mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype) rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype) mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 4 steps of RMSProp for _ in range(4): update.run() var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy( var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, decay, momentum, centered) var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy( var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, decay, momentum, centered) # Validate updated params if centered: self.assertAllCloseAccordingToType(mg0_np, mg0.eval()) self.assertAllCloseAccordingToType(mg1_np, mg1.eval()) self.assertAllCloseAccordingToType(rms0_np, rms0.eval()) self.assertAllCloseAccordingToType(rms1_np, rms1.eval()) self.assertAllCloseAccordingToType(mom0_np, mom0.eval()) self.assertAllCloseAccordingToType(mom1_np, mom1.eval()) self.assertAllCloseAccordingToType(var0_np, var0.eval()) self.assertAllCloseAccordingToType(var1_np, var1.eval()) @parameterized.parameters(_DATA_TYPES) def testWithoutMomentum(self, dtype): with self.test_session(use_gpu=True): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) opt = rmsprop.RMSPropOptimizer(learning_rate=2.0, decay=0.9, momentum=0.0, epsilon=1.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) mom0 = opt.get_slot(var0, "momentum") self.assertIsNotNone(mom0) mom1 = opt.get_slot(var1, "momentum") self.assertIsNotNone(mom1) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Step 1: the rms accumulators where 1. So we should see a normal # update: v -= grad * learning_rate update.run() # Check the root mean square accumulators. self.assertAllCloseAccordingToType(np.array([0.901, 0.901]), rms0.eval()) self.assertAllCloseAccordingToType(np.array([0.90001, 0.90001]), rms1.eval()) # Check the parameters. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0 / math.sqrt(0.901)), 2.0 - (0.1 * 2.0 / math.sqrt(0.901)) ]), var0.eval()) self.assertAllCloseAccordingToType( np.array([ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001)), 4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) ]), var1.eval()) # Step 2: the root mean square accumulators contain the previous update. update.run() # Check the rms accumulators. self.assertAllCloseAccordingToType( np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval()) self.assertAllCloseAccordingToType( np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval()) # Check the parameters. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0 / math.sqrt(0.901)) - (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)), 2.0 - (0.1 * 2.0 / math.sqrt(0.901)) - (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)) ]), var0.eval()) self.assertAllCloseAccordingToType( np.array([ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) - (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)), 4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) - (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)) ]), var1.eval()) @parameterized.parameters(_DATA_TYPES) def testWithMomentum(self, dtype): with self.test_session(use_gpu=True): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) opt = rmsprop.RMSPropOptimizer(learning_rate=2.0, decay=0.9, momentum=0.5, epsilon=1.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() rms0 = opt.get_slot(var0, "rms") self.assertIsNotNone(rms0) rms1 = opt.get_slot(var1, "rms") self.assertIsNotNone(rms1) mom0 = opt.get_slot(var0, "momentum") self.assertIsNotNone(mom0) mom1 = opt.get_slot(var1, "momentum") self.assertIsNotNone(mom1) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Step 1: rms = 1, mom = 0. So we should see a normal # update: v -= grad * learning_rate update.run() # Check the root mean square accumulators. self.assertAllCloseAccordingToType(np.array([0.901, 0.901]), rms0.eval()) self.assertAllCloseAccordingToType(np.array([0.90001, 0.90001]), rms1.eval()) # Check the momentum accumulators self.assertAllCloseAccordingToType( np.array([(0.1 * 2.0 / math.sqrt(0.901)), (0.1 * 2.0 / math.sqrt(0.901))]), mom0.eval()) self.assertAllCloseAccordingToType( np.array([(0.01 * 2.0 / math.sqrt(0.90001)), (0.01 * 2.0 / math.sqrt(0.90001))]), mom1.eval()) # Check that the parameters. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0 / math.sqrt(0.901)), 2.0 - (0.1 * 2.0 / math.sqrt(0.901)) ]), var0.eval()) self.assertAllCloseAccordingToType( np.array([ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001)), 4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) ]), var1.eval()) # Step 2: the root mean square accumulators contain the previous update. update.run() # Check the rms accumulators. self.assertAllCloseAccordingToType( np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval()) self.assertAllCloseAccordingToType( np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval()) self.assertAllCloseAccordingToType( np.array([ 0.5 * (0.1 * 2.0 / math.sqrt(0.901)) + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)), 0.5 * (0.1 * 2.0 / math.sqrt(0.901)) + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)) ]), mom0.eval()) self.assertAllCloseAccordingToType( np.array([ 0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)), 0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)) ]), mom1.eval()) # Check the parameters. self.assertAllCloseAccordingToType( np.array([ 1.0 - (0.1 * 2.0 / math.sqrt(0.901)) - (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))), 2.0 - (0.1 * 2.0 / math.sqrt(0.901)) - (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))) ]), var0.eval()) self.assertAllCloseAccordingToType( np.array([ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) - (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))), 4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) - (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))) ]), var1.eval())
class ConvGRU2DTest(keras_parameterized.TestCase): @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *tf_test_util.generate_combinations_with_testcase_name( data_format=['channels_first', 'channels_last'], return_sequences=[True, False])) def test_conv_gru_2d(self, data_format, return_sequences): num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row, input_num_col) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) # test for return state: x = tf.keras.layers.Input(batch_shape=inputs.shape) kwargs = { 'data_format': data_format, 'return_sequences': return_sequences, 'return_state': True, 'stateful': True, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid' } layer = layers.ConvGRU2D(**kwargs) layer.build(inputs.shape) outputs = layer(x) _, states = outputs[0], outputs[1:] self.assertEqual(len(states), len(layer.cell.state_size)) model = tf.keras.models.Model(x, states[0]) state = model.predict(inputs) self.assertAllClose(tf.keras.backend.eval(layer.states[0]), state, atol=1e-4) # test for output shape: custom_objects = {'ConvGRU2D': layers.ConvGRU2D} with tf.keras.utils.custom_object_scope(custom_objects): testing_utils.layer_test(layers.ConvGRU2D, kwargs={ 'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid' }, input_shape=inputs.shape) def test_conv_gru_2d_statefulness(self): # Tests for statefulness num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) with self.cached_session(): model = tf.keras.models.Sequential() kwargs = { 'data_format': 'channels_last', 'return_sequences': False, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same' } layer = layers.ConvGRU2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different self.assertNotEqual(out1.max(), out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) self.assertNotEqual(out3.max(), out2.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) self.assertAllClose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) self.assertNotEqual(out4.max(), out5.max()) def test_conv_gru_2d_regularizers(self): # check regularizers num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) with self.cached_session(): kwargs = { 'data_format': 'channels_last', 'return_sequences': False, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': tf.keras.regularizers.L1L2(l1=0.01), 'recurrent_regularizer': tf.keras.regularizers.L1L2(l1=0.01), 'activity_regularizer': 'l2', 'bias_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same' } layer = layers.ConvGRU2D(**kwargs) layer.build(inputs.shape) self.assertEqual(len(layer.losses), 3) layer(tf.keras.backend.variable(np.ones(inputs.shape))) self.assertEqual(len(layer.losses), 4) def test_conv_gru_2d_dropout(self): # check dropout with self.cached_session(): custom_objects = {'ConvGRU2D': layers.ConvGRU2D} with tf.keras.utils.custom_object_scope(custom_objects): testing_utils.layer_test(layers.ConvGRU2D, kwargs={ 'data_format': 'channels_last', 'return_sequences': False, 'filters': 2, 'kernel_size': (3, 3), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1 }, input_shape=(1, 2, 5, 5, 2)) def test_conv_gru_2d_cloning(self): with self.cached_session(): model = tf.keras.models.Sequential() model.add(layers.ConvGRU2D(5, 3, input_shape=(None, 5, 5, 3))) test_inputs = np.random.random((2, 4, 5, 5, 3)) reference_outputs = model.predict(test_inputs) weights = model.get_weights() # Use a new graph to clone the model with self.cached_session(): clone = tf.keras.models.clone_model(model) clone.set_weights(weights) outputs = clone.predict(test_inputs) self.assertAllClose(reference_outputs, outputs, atol=1e-5)
class CuDNNV1OnlyTest(keras_parameterized.TestCase): @test_util.run_gpu_only def test_trainability(self): input_size = 10 units = 2 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: layer = layer_class(units) layer.build((None, None, input_size)) self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.trainable_weights), 3) self.assertEqual(len(layer.non_trainable_weights), 0) layer.trainable = False self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.non_trainable_weights), 3) self.assertEqual(len(layer.trainable_weights), 0) layer.trainable = True self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.trainable_weights), 3) self.assertEqual(len(layer.non_trainable_weights), 0) @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False], bidirectional=[True, False], implementation=[1, 2], model_nest_level=[1, 2], model_type=['seq', 'func'])) @test_util.run_v1_only('b/120911602, b/112083752') @test_util.run_gpu_only def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn, bidirectional, implementation, model_nest_level, model_type): input_size = 10 timesteps = 6 input_shape = (timesteps, input_size) units = 2 num_samples = 32 inputs = np.random.random((num_samples, timesteps, input_size)) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', 'implementation': implementation } if rnn_type == 'LSTM': rnn_layer_class = keras.layers.LSTM cudnn_rnn_layer_class = keras.layers.CuDNNLSTM else: rnn_layer_class = keras.layers.GRU cudnn_rnn_layer_class = keras.layers.CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) if bidirectional: layer = keras.layers.Bidirectional(layer) cudnn_layer = cudnn_rnn_layer_class(units) if bidirectional: cudnn_layer = keras.layers.Bidirectional(cudnn_layer) model = self._make_nested_model(input_shape, layer, model_nest_level, model_type) cudnn_model = self._make_nested_model(input_shape, cudnn_layer, model_nest_level, model_type) if to_cudnn: self._convert_model_weights(model, cudnn_model) else: self._convert_model_weights(cudnn_model, model) self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) def _make_nested_model(self, input_shape, layer, level=1, model_type='func'): # example: make_nested_seq_model((1,), Dense(10), level=2).summary() def make_nested_seq_model(input_shape, layer, level=1): model = layer for i in range(1, level + 1): layers = [keras.layers.InputLayer(input_shape), model ] if (i == 1) else [model] model = keras.models.Sequential(layers) return model # example: make_nested_func_model((1,), Dense(10), level=2).summary() def make_nested_func_model(input_shape, layer, level=1): model_input = keras.layers.Input(input_shape) model = layer for _ in range(level): model = keras.models.Model(model_input, model(model_input)) return model if model_type == 'func': return make_nested_func_model(input_shape, layer, level) elif model_type == 'seq': return make_nested_seq_model(input_shape, layer, level) def _convert_model_weights(self, source_model, target_model): _, fname = tempfile.mkstemp('.h5') source_model.save_weights(fname) target_model.load_weights(fname) os.remove(fname) @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False])) @test_util.run_v1_only('b/120911602') @test_util.run_gpu_only def test_load_weights_between_noncudnn_rnn_time_distributed( self, rnn_type, to_cudnn): # Similar test as test_load_weights_between_noncudnn_rnn() but has different # rank of input due to usage of TimeDistributed. Issue: #10356. input_size = 10 steps = 6 timesteps = 6 input_shape = (timesteps, steps, input_size) units = 2 num_samples = 32 inputs = np.random.random((num_samples, timesteps, steps, input_size)) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', } if rnn_type == 'LSTM': rnn_layer_class = keras.layers.LSTM cudnn_rnn_layer_class = keras.layers.CuDNNLSTM else: rnn_layer_class = keras.layers.GRU cudnn_rnn_layer_class = keras.layers.CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) layer = keras.layers.TimeDistributed(layer) cudnn_layer = cudnn_rnn_layer_class(units) cudnn_layer = keras.layers.TimeDistributed(cudnn_layer) model = self._make_nested_model(input_shape, layer) cudnn_model = self._make_nested_model(input_shape, cudnn_layer) if to_cudnn: self._convert_model_weights(model, cudnn_model) else: self._convert_model_weights(cudnn_model, model) self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) @test_util.run_gpu_only def test_cudnnrnn_bidirectional(self): rnn = keras.layers.CuDNNGRU samples = 2 dim = 2 timesteps = 2 output_dim = 2 mode = 'concat' x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) # test with Sequential model model = keras.Sequential() model.add( keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode, input_shape=(None, dim))) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) # test config model.get_config() model = keras.models.model_from_json(model.to_json()) model.summary() # test stacked bidirectional layers model = keras.Sequential() model.add( keras.layers.Bidirectional(rnn(output_dim, return_sequences=True), merge_mode=mode, input_shape=(None, dim))) model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) # test with functional API inputs = keras.Input((timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) # Bidirectional and stateful inputs = keras.Input(batch_shape=(1, timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) @test_util.run_gpu_only def test_preprocess_weights_for_loading_gru_incompatible(self): """Test loading weights between incompatible layers. Should fail fast with an exception. """ input_shape = (3, 5) def gru(cudnn=False, **kwargs): layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU return layer_class(2, input_shape=input_shape, **kwargs) def get_layer_weights(layer): layer.build(input_shape=input_shape) return layer.get_weights() def assert_not_compatible(src, dest, message): with self.assertRaises(ValueError) as ex: keras.engine.saving.preprocess_weights_for_loading( dest, get_layer_weights(src)) self.assertIn(message, str(ex.exception)) assert_not_compatible( gru(), gru(cudnn=True), 'GRU(reset_after=False) is not compatible with CuDNNGRU') assert_not_compatible( gru(cudnn=True), gru(), 'CuDNNGRU is not compatible with GRU(reset_after=False)') assert_not_compatible( gru(), gru(reset_after=True), 'GRU(reset_after=False) is not compatible with ' 'GRU(reset_after=True)') assert_not_compatible( gru(reset_after=True), gru(), 'GRU(reset_after=True) is not compatible with ' 'GRU(reset_after=False)')
if use_dataset: if action == "predict": input_data = dataset_ops.DatasetV2.from_tensor_slices( input_data).batch(batch_size) else: input_data = dataset_ops.DatasetV2.from_tensor_slices( (input_data, expected_output)).batch(batch_size) expected_output = None return (input_data, expected_output) @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( use_dict=[True, False], use_dataset=[True, False], action=["predict", "evaluate", "fit"])) class SparseTensorInputTest(keras_parameterized.TestCase): def test_sparse_tensors(self, use_dict, use_dataset, action): data = [(sparse_tensor.SparseTensor([[0, 0, 0], [1, 0, 0], [1, 0, 1]], [1, 2, 3], [2, 1, 3]), np.array([[[1, -1, -1]], [[2, 3, -1]]])), (sparse_tensor.SparseTensor( [[0, 0, 0], [1, 0, 0], [1, 0, 1], [2, 0, 1]], [5, 6, 7, 8], [3, 1, 4]), np.array([[[5, -1, -1, -1]], [[6, 7, -1, -1]], [[-1, 8, -1, -1]]]))] # Prepare the model to test. input_name = get_input_name(use_dict) model_input = input_layer.Input(shape=(1, None), sparse=True,
class TimeDistributedTest(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_timedistributed_dense(self): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense(2), input_shape=(3, 4))) model.compile(optimizer='rmsprop', loss='mse') model.fit( np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), epochs=1, batch_size=10) # test config model.get_config() # check whether the model variables are present in the # trackable list of objects checkpointed_object_ids = { id(o) for o in trackable_util.list_objects(model) } for v in model.variables: self.assertIn(id(v), checkpointed_object_ids) def test_timedistributed_static_batch_size(self): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense(2), input_shape=(3, 4), batch_size=10)) model.compile(optimizer='rmsprop', loss='mse') model.fit( np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), epochs=1, batch_size=10) def test_timedistributed_invalid_init(self): x = constant_op.constant(np.zeros((1, 1)).astype('float32')) with self.assertRaisesRegex( ValueError, 'Please initialize `TimeDistributed` layer with a ' '`tf.keras.layers.Layer` instance.'): keras.layers.TimeDistributed(x) def test_timedistributed_conv2d(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Conv2D(5, (2, 2), padding='same'), input_shape=(2, 4, 4, 3))) model.add(keras.layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.train_on_batch( np.random.random((1, 2, 4, 4, 3)), np.random.random((1, 2, 4, 4, 5))) model = keras.models.model_from_json(model.to_json()) model.summary() def test_timedistributed_stacked(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense(2), input_shape=(3, 4))) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model.add(keras.layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') model.fit( np.random.random((10, 3, 4)), np.random.random((10, 3, 3)), epochs=1, batch_size=10) def test_regularizers(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Dense(2, kernel_regularizer='l1', activity_regularizer='l1'), input_shape=(3, 4))) model.add(keras.layers.Activation('relu')) model.compile(optimizer='rmsprop', loss='mse') self.assertEqual(len(model.losses), 2) def test_TimeDistributed_learning_phase(self): with self.cached_session(): # test layers that need learning_phase to be set np.random.seed(1234) x = keras.layers.Input(shape=(3, 2)) y = keras.layers.TimeDistributed(keras.layers.Dropout(.999))( x, training=True) model = keras.models.Model(x, y) y = model.predict(np.random.random((10, 3, 2))) self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1) def test_TimeDistributed_batchnorm(self): with self.cached_session(): # test that wrapped BN updates still work. model = keras.models.Sequential() model.add(keras.layers.TimeDistributed( keras.layers.BatchNormalization(center=True, scale=True), name='bn', input_shape=(10, 2))) model.compile(optimizer='rmsprop', loss='mse') # Assert that mean and variance are 0 and 1. td = model.layers[0] self.assertAllClose(td.get_weights()[2], np.array([0, 0])) assert np.array_equal(td.get_weights()[3], np.array([1, 1])) # Train model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)), np.broadcast_to(np.array([0, 1]), (1, 10, 2))) # Assert that mean and variance changed. assert not np.array_equal(td.get_weights()[2], np.array([0, 0])) assert not np.array_equal(td.get_weights()[3], np.array([1, 1])) def test_TimeDistributed_trainable(self): # test layers that need learning_phase to be set x = keras.layers.Input(shape=(3, 2)) layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization()) _ = layer(x) self.assertEqual(len(layer.trainable_weights), 2) layer.trainable = False assert not layer.trainable_weights layer.trainable = True assert len(layer.trainable_weights) == 2 def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self): with self.cached_session(): # test with unspecified shape and Embeddings with mask_zero model = keras.models.Sequential() model.add(keras.layers.TimeDistributed( keras.layers.Embedding(5, 6, mask_zero=True), input_shape=(None, None))) # N by t_1 by t_2 by 6 model.add(keras.layers.TimeDistributed( keras.layers.SimpleRNN(7, return_sequences=True))) model.add(keras.layers.TimeDistributed( keras.layers.SimpleRNN(8, return_sequences=False))) model.add(keras.layers.SimpleRNN(1, return_sequences=False)) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4), dtype='int32') for i in range(4): model_input[i, i:, i:] = 0 model.fit(model_input, np.random.random((10, 1)), epochs=1, batch_size=10) mask_outputs = [model.layers[0].compute_mask(model.input)] for layer in model.layers[1:]: mask_outputs.append(layer.compute_mask(layer.input, mask_outputs[-1])) func = keras.backend.function([model.input], mask_outputs[:-1]) mask_outputs_val = func([model_input]) ref_mask_val_0 = model_input > 0 # embedding layer ref_mask_val_1 = ref_mask_val_0 # first RNN layer ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1) # second RNN layer ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2] for i in range(3): self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i]) self.assertIs(mask_outputs[-1], None) # final layer @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_TimeDistributed_with_masking_layer(self): # test with Masking layer model = keras.models.Sequential() model.add( keras.layers.TimeDistributed( keras.layers.Masking(mask_value=0.,), input_shape=(None, 4))) model.add(keras.layers.TimeDistributed(keras.layers.Dense(5))) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) for i in range(4): model_input[i, i:, :] = 0. model.compile(optimizer='rmsprop', loss='mse') model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6) mask_outputs = [model.layers[0].compute_mask(model.input)] mask_outputs += [ model.layers[1].compute_mask(model.layers[1].input, mask_outputs[-1]) ] func = keras.backend.function([model.input], mask_outputs) mask_outputs_val = func([model_input]) self.assertEqual((mask_outputs_val[0]).all(), model_input.all()) self.assertEqual((mask_outputs_val[1]).all(), model_input.all()) def test_TimeDistributed_with_different_time_shapes(self): time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) ph_1 = keras.backend.placeholder(shape=(None, 10, 13)) out_1 = time_dist(ph_1) self.assertEqual(out_1.shape.as_list(), [None, 10, 5]) ph_2 = keras.backend.placeholder(shape=(None, 1, 13)) out_2 = time_dist(ph_2) self.assertEqual(out_2.shape.as_list(), [None, 1, 5]) ph_3 = keras.backend.placeholder(shape=(None, 1, 18)) with self.assertRaisesRegex(ValueError, 'is incompatible with'): time_dist(ph_3) def test_TimeDistributed_with_invalid_dimensions(self): time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5)) ph = keras.backend.placeholder(shape=(None, 10)) with self.assertRaisesRegex( ValueError, '`TimeDistributed` Layer should be passed an `input_shape `'): time_dist(ph) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_TimeDistributed_reshape(self): class NoReshapeLayer(keras.layers.Layer): def call(self, inputs): return inputs # Built-in layers that aren't stateful use the reshape implementation. td1 = keras.layers.TimeDistributed(keras.layers.Dense(5)) self.assertTrue(td1._always_use_reshape) # Built-in layers that are stateful don't use the reshape implementation. td2 = keras.layers.TimeDistributed( keras.layers.RNN(keras.layers.SimpleRNNCell(10), stateful=True)) self.assertFalse(td2._always_use_reshape) # Custom layers are not allowlisted for the fast reshape implementation. td3 = keras.layers.TimeDistributed(NoReshapeLayer()) self.assertFalse(td3._always_use_reshape) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_TimeDistributed_output_shape_return_types(self): class TestLayer(keras.layers.Layer): def call(self, inputs): return array_ops.concat([inputs, inputs], axis=-1) def compute_output_shape(self, input_shape): output_shape = tensor_shape.TensorShape(input_shape).as_list() output_shape[-1] = output_shape[-1] * 2 output_shape = tensor_shape.TensorShape(output_shape) return output_shape class TestListLayer(TestLayer): def compute_output_shape(self, input_shape): shape = super(TestListLayer, self).compute_output_shape(input_shape) return shape.as_list() class TestTupleLayer(TestLayer): def compute_output_shape(self, input_shape): shape = super(TestTupleLayer, self).compute_output_shape(input_shape) return tuple(shape.as_list()) # Layers can specify output shape as list/tuple/TensorShape test_layers = [TestLayer, TestListLayer, TestTupleLayer] for layer in test_layers: input_layer = keras.layers.TimeDistributed(layer()) inputs = keras.backend.placeholder(shape=(None, 2, 4)) output = input_layer(inputs) self.assertEqual(output.shape.as_list(), [None, 2, 8]) self.assertEqual( input_layer.compute_output_shape([None, 2, 4]).as_list(), [None, 2, 8]) @keras_parameterized.run_all_keras_modes(always_skip_v1=True) # TODO(scottzhu): check why v1 session failed. def test_TimeDistributed_with_mask_first_implementation(self): np.random.seed(100) rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True) data = np.array([[[[1.0], [1.0]], [[0.0], [1.0]]], [[[1.0], [0.0]], [[1.0], [1.0]]], [[[1.0], [0.0]], [[1.0], [1.0]]]]) x = keras.layers.Input(shape=(2, 2, 1), batch_size=3) x_masking = keras.layers.Masking()(x) y = keras.layers.TimeDistributed(rnn_layer)(x_masking) model_1 = keras.models.Model(x, y) model_1.compile( 'rmsprop', 'mse', run_eagerly=testing_utils.should_run_eagerly()) output_with_mask = model_1.predict(data, steps=1) y = keras.layers.TimeDistributed(rnn_layer)(x) model_2 = keras.models.Model(x, y) model_2.compile( 'rmsprop', 'mse', run_eagerly=testing_utils.should_run_eagerly()) output = model_2.predict(data, steps=1) self.assertNotAllClose(output_with_mask, output, atol=1e-7) @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *tf_test_util.generate_combinations_with_testcase_name( layer=[keras.layers.LSTM, keras.layers.Dense])) def test_TimeDistributed_with_ragged_input(self, layer): if context.executing_eagerly(): self.skipTest('b/143103634') np.random.seed(100) layer = layer(4) ragged_data = ragged_factory_ops.constant( [[[[1.0], [1.0]], [[2.0], [2.0]]], [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]]], ragged_rank=1) x_ragged = keras.Input(shape=(None, 2, 1), dtype='float32', ragged=True) y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) model_1 = keras.models.Model(x_ragged, y_ragged) model_1._run_eagerly = testing_utils.should_run_eagerly() output_ragged = model_1.predict(ragged_data, steps=1) x_dense = keras.Input(shape=(None, 2, 1), dtype='float32') masking = keras.layers.Masking()(x_dense) y_dense = keras.layers.TimeDistributed(layer)(masking) model_2 = keras.models.Model(x_dense, y_dense) dense_data = ragged_data.to_tensor() model_2._run_eagerly = testing_utils.should_run_eagerly() output_dense = model_2.predict(dense_data, steps=1) output_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor( output_ragged, name='tensor') self.assertAllEqual(output_ragged.to_tensor(), output_dense) @keras_parameterized.run_all_keras_modes def test_TimeDistributed_with_ragged_input_with_batch_size(self): np.random.seed(100) layer = keras.layers.Dense(16) ragged_data = ragged_factory_ops.constant( [[[[1.0], [1.0]], [[2.0], [2.0]]], [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]], [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]]], ragged_rank=1) # Use the first implementation by specifying batch_size x_ragged = keras.Input(shape=(None, 2, 1), batch_size=3, dtype='float32', ragged=True) y_ragged = keras.layers.TimeDistributed(layer)(x_ragged) model_1 = keras.models.Model(x_ragged, y_ragged) output_ragged = model_1.predict(ragged_data, steps=1) x_dense = keras.Input(shape=(None, 2, 1), batch_size=3, dtype='float32') masking = keras.layers.Masking()(x_dense) y_dense = keras.layers.TimeDistributed(layer)(masking) model_2 = keras.models.Model(x_dense, y_dense) dense_data = ragged_data.to_tensor() output_dense = model_2.predict(dense_data, steps=1) output_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor( output_ragged, name='tensor') self.assertAllEqual(output_ragged.to_tensor(), output_dense) def test_TimeDistributed_set_static_shape(self): layer = keras.layers.TimeDistributed(keras.layers.Conv2D(16, (3, 3))) inputs = keras.Input(batch_shape=(1, None, 32, 32, 1)) outputs = layer(inputs) # Make sure the batch dim is not lost after array_ops.reshape. self.assertListEqual(outputs.shape.as_list(), [1, None, 30, 30, 16])
class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase, parameterized.TestCase): def _makeShapeTuple(self, batch_size, channel_count, data_rank, data_dim, data_layout): data_dims = data_rank * (data_dim,) if data_layout == 'channels_first': shape = (batch_size,) + (channel_count,) + data_dims elif data_layout == 'channels_last': shape = (batch_size,) + data_dims + (channel_count,) else: raise ValueError('Unknown data format') return shape def _dataFormatFromDataLayout(self, data_layout=None): if data_layout == 'channels_first': return 'NCHW' elif data_layout == 'channels_last': return 'NHWC' else: raise ValueError('Unknown data_layout') def _randomNDArray(self, shape): return 2 * np.random.random_sample(shape) - 1 def _randomDataOp(self, shape, data_type): return constant_op.constant(self._randomNDArray(shape), dtype=data_type) @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( # With the selected layer configuration, at least in TensorFlow # version 2.0, when data_layout='channels_last', bias_add operates # deterministically by default. I don't know if this is true for # all layer configurations. These cases are still being tested here, # for completeness. data_layout=['channels_first', 'channels_last'], data_rank=[1, 2, 3], data_type=[dtypes.float16, dtypes.float32, dtypes.float64])) @test_util.run_in_graph_and_eager_modes @test_util.run_cuda_only def testDeterministicGradients(self, data_layout, data_rank, data_type): with self.session(force_gpu=True): # Using a cached_session with force_gpu=True does not work at the time # of writing (2019-12-10). Before the @parameterized.named_parameters # decorator was added, this non-cached session context was set outside # the iteration loops for the parameter combinations, and so was re-used. seed = ( hash(data_layout) % 256 + hash(data_rank) % 256 + hash(data_type) % 256) np.random.seed(seed) batch_size = 10 channel_count = 8 data_dim = 14 input_shape = self._makeShapeTuple(batch_size, channel_count, data_rank, data_dim, data_layout) bias_shape = (channel_count,) output_shape = input_shape input_val = self._randomDataOp(input_shape, data_type) bias_val = self._randomDataOp(bias_shape, data_type) data_format = self._dataFormatFromDataLayout(data_layout) repeat_count = 5 if context.executing_eagerly(): def bias_gradients(local_seed): np.random.seed(local_seed) upstream_gradients = self._randomDataOp(output_shape, data_type) with backprop.GradientTape(persistent=True) as tape: tape.watch(bias_val) bias_add_output = nn_ops.bias_add( input_val, bias_val, data_format=data_format) gradient_injector_output = bias_add_output * upstream_gradients return tape.gradient(gradient_injector_output, bias_val) for i in range(repeat_count): local_seed = seed + i # select different upstream gradients result_a = bias_gradients(local_seed) result_b = bias_gradients(local_seed) self.assertAllEqual(result_a, result_b) else: # graph mode upstream_gradients = array_ops.placeholder( data_type, shape=output_shape, name='upstream_gradients') bias_add_output = nn_ops.bias_add( input_val, bias_val, data_format=data_format) gradient_injector_output = bias_add_output * upstream_gradients # The gradient function behaves as if grad_ys is multiplied by the op # gradient result, not passing the upstram gradients through the op's # gradient generation graph. This is the reason for using the # gradient injector bias_gradients = gradients_impl.gradients( gradient_injector_output, bias_val, grad_ys=None, colocate_gradients_with_ops=True)[0] for i in range(repeat_count): feed_dict = {upstream_gradients: self._randomNDArray(output_shape)} result_a = bias_gradients.eval(feed_dict=feed_dict) result_b = bias_gradients.eval(feed_dict=feed_dict) self.assertAllEqual(result_a, result_b) # TODO(duncanriach): Re-enable the following three tests for the error checks # after deterministic functionality is implemented at the CUDA kernel level. def testInputDims(self): pass def testBiasVec(self): pass def testBiasInputsMatch(self): pass
class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase, parameterized.TestCase): def _make_shape_tuple(self, batch_size, channel_count, data_rank, data_dim, data_layout): data_dims = data_rank * (data_dim,) if data_layout == 'channels_first': shape = (batch_size,) + (channel_count,) + data_dims elif data_layout == 'channels_last': shape = (batch_size,) + data_dims + (channel_count,) else: raise ValueError('Unknown data format') return shape def _data_format_from_data_layout(self, data_layout=None): if data_layout == 'channels_first': return 'NCHW' elif data_layout == 'channels_last': return 'NHWC' else: raise ValueError('Unknown data_layout') def _random_data_op(self, shape, data_type): return constant_op.constant( 2 * np.random.random_sample(shape) - 1, dtype=data_type) def _random_ndarray(self, shape): return 2 * np.random.random_sample(shape) - 1 def _assert_reproducible(self, operation, feed_dict={}): with self.cached_session(force_gpu=True): result_a = operation[0].eval(feed_dict=feed_dict) result_b = operation[0].eval(feed_dict=feed_dict) self.assertAllEqual(result_a, result_b) # TODO(duncanriach): add test coverage for deterministic gradients # in eager mode @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( data_layout=['channels_first', 'channels_last'], data_rank=[1, 2, 3], data_type=[dtypes.float16, dtypes.float32, dtypes.float64])) @test_util.run_deprecated_v1 @test_util.run_cuda_only def testDeterministicGradients(self, data_layout, data_rank, data_type): seed = ( hash(data_layout) % 256 + hash(data_rank) % 256 + hash(data_type) % 256) np.random.seed(seed) batch_size = 10 channel_count = 8 data_dim = 14 in_shape = self._make_shape_tuple(batch_size, channel_count, data_rank, data_dim, data_layout) bias_shape = (channel_count,) out_shape = in_shape in_op = self._random_data_op(in_shape, data_type) bias_op = self._random_data_op(bias_shape, data_type) data_format = self._data_format_from_data_layout(data_layout) bias_add_op = nn_ops.bias_add(in_op, bias_op, data_format=data_format) upstream_gradients = array_ops.placeholder( data_type, shape=out_shape, name='upstream_gradients') gradient_injector_op = bias_add_op * upstream_gradients # The gradient function behaves as if grad_ys is multiplied by the op # gradient result, not passing the upstram gradients through the op's # gradient generation graph. This is the reason for using the # gradient_injector_op grad_ys = None bias_gradients_op = gradients_impl.gradients( gradient_injector_op, bias_op, grad_ys=grad_ys, colocate_gradients_with_ops=True) for i in range(5): feed_dict = {upstream_gradients: self._random_ndarray(out_shape)} self._assert_reproducible(bias_gradients_op, feed_dict=feed_dict) # TODO(duncanriach): Re-enable the following three tests for the error checks # after deterministic functionality is implemented at the CUDA kernel level. def testInputDims(self): pass def testBiasVec(self): pass def testBiasInputsMatch(self): pass
class MergeLayersTest(keras_parameterized.TestCase): def test_merge_add(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) add_layer = keras.layers.Add() o = add_layer([i1, i2, i3]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2, i3], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 + x2 + x3, atol=1e-4) self.assertEqual( add_layer.compute_mask([i1, i2, i3], [None, None, None]), None) self.assertTrue( np.all( K.eval( add_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegexp(ValueError, '`mask` should be a list.'): add_layer.compute_mask([i1, i2, i3], x1) with self.assertRaisesRegexp(ValueError, '`inputs` should be a list.'): add_layer.compute_mask(i1, [None, None, None]) with self.assertRaisesRegexp(ValueError, ' should have the same length.'): add_layer.compute_mask([i1, i2, i3], [None, None]) def test_merge_subtract(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) subtract_layer = keras.layers.Subtract() o = subtract_layer([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 - x2, atol=1e-4) self.assertEqual(subtract_layer.compute_mask([i1, i2], [None, None]), None) self.assertTrue( np.all( K.eval( subtract_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegexp(ValueError, '`mask` should be a list.'): subtract_layer.compute_mask([i1, i2], x1) with self.assertRaisesRegexp(ValueError, '`inputs` should be a list.'): subtract_layer.compute_mask(i1, [None, None]) with self.assertRaisesRegexp( ValueError, 'layer should be called on exactly 2 inputs'): subtract_layer([i1, i2, i3]) with self.assertRaisesRegexp( ValueError, 'layer should be called on exactly 2 inputs'): subtract_layer([i1]) def test_merge_multiply(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) o = keras.layers.multiply([i1, i2, i3]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2, i3], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 * x2 * x3, atol=1e-4) def test_merge_average(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) o = keras.layers.average([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, 0.5 * (x1 + x2), atol=1e-4) def test_merge_maximum(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) o = keras.layers.maximum([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4) def test_merge_minimum(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) o = keras.layers.minimum([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4) def test_merge_concatenate(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) concat_layer = keras.layers.Concatenate(axis=1) o = concat_layer([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 8, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 8, 5)) self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4) self.assertEqual(concat_layer.compute_mask([i1, i2], [None, None]), None) self.assertTrue( np.all( K.eval( concat_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegexp(ValueError, '`mask` should be a list.'): concat_layer.compute_mask([i1, i2], x1) with self.assertRaisesRegexp(ValueError, '`inputs` should be a list.'): concat_layer.compute_mask(i1, [None, None]) with self.assertRaisesRegexp(ValueError, 'should have the same length'): concat_layer.compute_mask([i1, i2], [None]) with self.assertRaisesRegexp( ValueError, 'layer should be called on a list of inputs'): concat_layer(i1) def test_merge_dot(self): i1 = keras.layers.Input(shape=(4, )) i2 = keras.layers.Input(shape=(4, )) o = keras.layers.dot([i1, i2], axes=1) self.assertListEqual(o.shape.as_list(), [None, 1]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) _ = keras.layers.Dot(axes=1).get_config() x1 = np.random.random((2, 4)) x2 = np.random.random((2, 4)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 1)) expected = np.zeros((2, 1)) expected[0, 0] = np.dot(x1[0], x2[0]) expected[1, 0] = np.dot(x1[1], x2[1]) self.assertAllClose(out, expected, atol=1e-4) # Test with negative tuple of axes. o = keras.layers.dot([i1, i2], axes=(-1, -1)) self.assertListEqual(o.shape.as_list(), [None, 1]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() model._experimental_run_tf_function = testing_utils.should_run_tf_function( ) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 1)) self.assertAllClose(out, expected, atol=1e-4) # test compute_output_shape layer = keras.layers.Dot(axes=-1) self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1)) @parameterized.named_parameters( *tf_test_util.generate_combinations_with_testcase_name(layer=[ keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply, keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average, keras.layers.Concatenate ])) def test_merge_with_ragged_input(self, layer): ragged_data = ragged_factory_ops.constant( [[1., 1., 1.], [1., 1.], [1., 1., 1., 1.]], ragged_rank=1) dense_data = ragged_data.to_tensor() input1 = keras.Input(shape=(None, ), ragged=True) input2 = keras.Input(shape=(None, ), ragged=True) out = keras.layers.Add()([input1, input2]) model = keras.models.Model(inputs=[input1, input2], outputs=out) out_ragged = model.predict([ragged_data, ragged_data], steps=1) out_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor( out_ragged).to_tensor() input1 = keras.Input(shape=(None, )) input2 = keras.Input(shape=(None, )) out = keras.layers.Add()([input1, input2]) model = keras.models.Model(inputs=[input1, input2], outputs=out) out_dense = model.predict([dense_data, dense_data], steps=1) self.assertAllEqual(out_dense, out_ragged)
from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.platform import test from deepcell import layers @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *tf_test_util.generate_combinations_with_testcase_name( norm_method=[None, 'std', 'max', 'whole_image'])) class ImageNormalizationTest(keras_parameterized.TestCase): def test_normalize_2d(self, norm_method): custom_objects = {'ImageNormalization2D': layers.ImageNormalization2D} with tf.keras.utils.custom_object_scope(custom_objects): testing_utils.layer_test( layers.ImageNormalization2D, kwargs={'norm_method': norm_method, 'filter_size': 3, 'data_format': 'channels_last'}, input_shape=(3, 5, 6, 4)) testing_utils.layer_test( layers.ImageNormalization2D, kwargs={'norm_method': norm_method, 'filter_size': 3,
class ConvLSTMTest(keras_parameterized.TestCase): @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( data_format=['channels_first', 'channels_last'], return_sequences=[True, False])) def test_conv_lstm(self, data_format, return_sequences): num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row, input_num_col) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) # test for return state: x = keras.Input(batch_shape=inputs.shape) kwargs = { 'data_format': data_format, 'return_sequences': return_sequences, 'return_state': True, 'stateful': True, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid' } layer = keras.layers.ConvLSTM2D(**kwargs) layer.build(inputs.shape) outputs = layer(x) _, states = outputs[0], outputs[1:] self.assertEqual(len(states), 2) model = keras.models.Model(x, states[0]) state = model.predict(inputs) self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) # test for output shape: testing_utils.layer_test(keras.layers.ConvLSTM2D, kwargs={ 'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid' }, input_shape=inputs.shape) def test_conv_lstm_statefulness(self): # Tests for statefulness num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) with self.cached_session(): model = keras.models.Sequential() kwargs = { 'data_format': 'channels_last', 'return_sequences': False, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same' } layer = keras.layers.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different self.assertNotEqual(out1.max(), out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) self.assertNotEqual(out3.max(), out2.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) self.assertAllClose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) self.assertNotEqual(out4.max(), out5.max()) def test_conv_lstm_regularizers(self): # check regularizers num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) with self.cached_session(): kwargs = { 'data_format': 'channels_last', 'return_sequences': False, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': keras.regularizers.L1L2(l1=0.01), 'recurrent_regularizer': keras.regularizers.L1L2(l1=0.01), 'activity_regularizer': 'l2', 'bias_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same' } layer = keras.layers.ConvLSTM2D(**kwargs) layer.build(inputs.shape) self.assertEqual(len(layer.losses), 3) layer(keras.backend.variable(np.ones(inputs.shape))) self.assertEqual(len(layer.losses), 4) def test_conv_lstm_dropout(self): # check dropout with self.cached_session(): testing_utils.layer_test(keras.layers.ConvLSTM2D, kwargs={ 'data_format': 'channels_last', 'return_sequences': False, 'filters': 2, 'kernel_size': (3, 3), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1 }, input_shape=(1, 2, 5, 5, 2)) def test_conv_lstm_cloning(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.ConvLSTM2D(5, 3, input_shape=(None, 5, 5, 3))) test_inputs = np.random.random((2, 4, 5, 5, 3)) reference_outputs = model.predict(test_inputs) weights = model.get_weights() # Use a new graph to clone the model with self.cached_session(): clone = keras.models.clone_model(model) clone.set_weights(weights) outputs = clone.predict(test_inputs) self.assertAllClose(reference_outputs, outputs, atol=1e-5) def test_conv_lstm_with_initial_state(self): num_samples = 32 sequence_len = 5 encoder_inputs = keras.layers.Input((None, 32, 32, 3)) encoder = keras.layers.ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False, return_state=True) _, state_h, state_c = encoder(encoder_inputs) encoder_states = [state_h, state_c] decoder_inputs = keras.layers.Input((None, 32, 32, 4)) decoder_lstm = keras.layers.ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False, return_state=False) decoder_outputs = decoder_lstm(decoder_inputs, initial_state=encoder_states) output = keras.layers.Conv2D(1, (3, 3), padding='same', activation='relu')(decoder_outputs) model = keras.Model([encoder_inputs, decoder_inputs], output) model.compile(optimizer='sgd', loss='mse', run_eagerly=testing_utils.should_run_eagerly()) x_1 = np.random.rand(num_samples, sequence_len, 32, 32, 3) x_2 = np.random.rand(num_samples, sequence_len, 32, 32, 4) y = np.random.rand(num_samples, 32, 32, 1) model.fit([x_1, x_2], y) model.predict([x_1, x_2])
class CuDNNTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def test_cudnn_rnn_basics(self): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 timesteps = 6 units = 2 num_samples = 32 for layer_class in [ keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM ]: for return_sequences in [True, False]: with keras.utils.CustomObjectScope({ 'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU, 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM }): testing_utils.layer_test(layer_class, kwargs={ 'units': units, 'return_sequences': return_sequences }, input_shape=(num_samples, timesteps, input_size)) for go_backwards in [True, False]: with keras.utils.CustomObjectScope({ 'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU, 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM }): testing_utils.layer_test(layer_class, kwargs={ 'units': units, 'go_backwards': go_backwards }, input_shape=(num_samples, timesteps, input_size)) @test_util.run_in_graph_and_eager_modes def test_trainability(self): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 units = 2 for layer_class in [ keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM ]: layer = layer_class(units) layer.build((None, None, input_size)) self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.trainable_weights), 3) self.assertEqual(len(layer.non_trainable_weights), 0) layer.trainable = False self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.non_trainable_weights), 3) self.assertEqual(len(layer.trainable_weights), 0) layer.trainable = True self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.trainable_weights), 3) self.assertEqual(len(layer.non_trainable_weights), 0) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) def test_regularizer(self, layer_class): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 timesteps = 6 units = 2 num_samples = 32 layer = layer_class( units, return_sequences=False, input_shape=(timesteps, input_size), kernel_regularizer=keras.regularizers.l1(0.01), recurrent_regularizer=keras.regularizers.l1(0.01), bias_regularizer='l2') layer.build((None, None, input_size)) self.assertEqual(len(layer.losses), 3) layer = layer_class(units, return_sequences=False, input_shape=(timesteps, input_size), activity_regularizer='l2') self.assertTrue(layer.activity_regularizer) x = keras.backend.variable( np.ones((num_samples, timesteps, input_size))) layer(x) self.assertEqual(len(layer.get_losses_for(x)), 1) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) def test_return_state(self, layer_class): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 timesteps = 6 units = 2 num_samples = 32 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) layer = layer_class(units, return_state=True, stateful=True) outputs = layer(inputs) _, state = outputs[0], outputs[1:] self.assertEqual(len(state), num_states) model = keras.models.Model(inputs, state[0]) inputs = np.random.random((num_samples, timesteps, input_size)) state = model.predict(inputs) np.testing.assert_allclose(keras.backend.eval(layer.states[0]), state, atol=1e-4) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) def test_time_major_input(self, layer_class): if test.is_gpu_available(cuda_only=True): with self.test_session(use_gpu=True): input_size = 10 timesteps = 6 units = 2 num_samples = 32 model = keras.models.Sequential() model.add( keras.layers.Lambda( lambda t: array_ops.transpose(t, [1, 0, 2]))) layer = layer_class(units, time_major=True, return_sequences=True) model.add(layer) model.add( keras.layers.Lambda( lambda t: array_ops.transpose(t, [1, 0, 2]))) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit(np.ones((num_samples, timesteps, input_size)), np.ones((num_samples, timesteps, units))) out = model.predict( np.ones((num_samples, timesteps, input_size))) self.assertEqual(out.shape, (num_samples, timesteps, units)) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) def test_specify_initial_state_keras_tensor(self, layer_class): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 timesteps = 6 units = 2 num_samples = 32 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input((timesteps, input_size)) initial_state = [ keras.Input((units, )) for _ in range(num_states) ] layer = layer_class(units) if len(initial_state) == 1: output = layer(inputs, initial_state=initial_state[0]) else: output = layer(inputs, initial_state=initial_state) self.assertIn(initial_state[0], layer._inbound_nodes[0].input_tensors) model = keras.models.Model([inputs] + initial_state, output) model.compile(loss='categorical_crossentropy', optimizer='adam') inputs = np.random.random((num_samples, timesteps, input_size)) initial_state = [ np.random.random((num_samples, units)) for _ in range(num_states) ] targets = np.random.random((num_samples, units)) model.fit([inputs] + initial_state, targets) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) def test_statefulness(self, layer_class): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 timesteps = 6 units = 2 num_samples = 32 model = keras.models.Sequential() model.add( keras.layers.Embedding(10, input_size, input_length=timesteps, batch_input_shape=(num_samples, timesteps))) layer = layer_class(units, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((num_samples, timesteps))) self.assertEqual(out1.shape, (num_samples, units)) # train once so that the states change model.train_on_batch(np.ones((num_samples, timesteps)), np.ones((num_samples, units))) out2 = model.predict(np.ones((num_samples, timesteps))) # if the state is not reset, output should be different self.assertNotEqual(out1.max(), out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((num_samples, timesteps))) self.assertNotEqual(out2.max(), out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((num_samples, timesteps))) self.assertAllClose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((num_samples, timesteps))) self.assertNotEqual(out4.max(), out5.max()) @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False], bidirectional=[True, False], implementation=[1, 2], model_nest_level=[1, 2], model_type=['seq', 'func'])) def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn, bidirectional, implementation, model_nest_level, model_type): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 timesteps = 6 input_shape = (timesteps, input_size) units = 2 num_samples = 32 inputs = np.random.random((num_samples, timesteps, input_size)) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', 'implementation': implementation } if rnn_type == 'LSTM': rnn_layer_class = keras.layers.LSTM cudnn_rnn_layer_class = keras.layers.CuDNNLSTM else: rnn_layer_class = keras.layers.GRU cudnn_rnn_layer_class = keras.layers.CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) if bidirectional: layer = keras.layers.Bidirectional(layer) cudnn_layer = cudnn_rnn_layer_class(units) if bidirectional: cudnn_layer = keras.layers.Bidirectional(cudnn_layer) model = self._make_nested_model(input_shape, layer, model_nest_level, model_type) cudnn_model = self._make_nested_model(input_shape, cudnn_layer, model_nest_level, model_type) if to_cudnn: self._convert_model_weights(model, cudnn_model) else: self._convert_model_weights(cudnn_model, model) self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) def _make_nested_model(self, input_shape, layer, level=1, model_type='func'): # example: make_nested_seq_model((1,), Dense(10), level=2).summary() def make_nested_seq_model(input_shape, layer, level=1): model = layer for i in range(1, level + 1): layers = [keras.layers.InputLayer(input_shape), model ] if (i == 1) else [model] model = keras.models.Sequential(layers) return model # example: make_nested_func_model((1,), Dense(10), level=2).summary() def make_nested_func_model(input_shape, layer, level=1): model_input = keras.layers.Input(input_shape) model = layer for _ in range(level): model = keras.models.Model(model_input, model(model_input)) return model if model_type == 'func': return make_nested_func_model(input_shape, layer, level) elif model_type == 'seq': return make_nested_seq_model(input_shape, layer, level) def _convert_model_weights(self, source_model, target_model): _, fname = tempfile.mkstemp('.h5') source_model.save_weights(fname) target_model.load_weights(fname) os.remove(fname) @parameterized.named_parameters( *test_util.generate_combinations_with_testcase_name( rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False])) def test_load_weights_between_noncudnn_rnn_time_distributed( self, rnn_type, to_cudnn): # Similar test as test_load_weights_between_noncudnn_rnn() but has different # rank of input due to usage of TimeDistributed. Issue: #10356. if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_size = 10 steps = 6 timesteps = 6 input_shape = (timesteps, steps, input_size) units = 2 num_samples = 32 inputs = np.random.random( (num_samples, timesteps, steps, input_size)) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', } if rnn_type == 'LSTM': rnn_layer_class = keras.layers.LSTM cudnn_rnn_layer_class = keras.layers.CuDNNLSTM else: rnn_layer_class = keras.layers.GRU cudnn_rnn_layer_class = keras.layers.CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) layer = keras.layers.TimeDistributed(layer) cudnn_layer = cudnn_rnn_layer_class(units) cudnn_layer = keras.layers.TimeDistributed(cudnn_layer) model = self._make_nested_model(input_shape, layer) cudnn_model = self._make_nested_model(input_shape, cudnn_layer) if to_cudnn: self._convert_model_weights(model, cudnn_model) else: self._convert_model_weights(cudnn_model, model) self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) @test_util.run_in_graph_and_eager_modes def test_cudnnrnn_bidirectional(self): if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): rnn = keras.layers.CuDNNGRU samples = 2 dim = 2 timesteps = 2 output_dim = 2 mode = 'concat' x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) # test with Sequential model model = keras.Sequential() model.add( keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode, input_shape=(None, dim))) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) # test config model.get_config() model = keras.models.model_from_json(model.to_json()) model.summary() # test stacked bidirectional layers model = keras.Sequential() model.add( keras.layers.Bidirectional(rnn(output_dim, return_sequences=True), merge_mode=mode, input_shape=(None, dim))) model.add( keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) # test with functional API inputs = keras.Input((timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) # Bidirectional and stateful inputs = keras.Input(batch_shape=(1, timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) model.fit(x, y, epochs=1, batch_size=1) def test_preprocess_weights_for_loading_gru_incompatible(self): """Test loading weights between incompatible layers. Should fail fast with an exception. """ if test.is_gpu_available(cuda_only=True): with self.session(use_gpu=True): input_shape = (3, 5) def gru(cudnn=False, **kwargs): layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU return layer_class(2, input_shape=input_shape, **kwargs) def get_layer_weights(layer): layer.build(input_shape=input_shape) return layer.get_weights() def assert_not_compatible(src, dest, message): with self.assertRaises(ValueError) as ex: keras.engine.saving.preprocess_weights_for_loading( dest, get_layer_weights(src)) self.assertIn(message, str(ex.exception)) assert_not_compatible( gru(), gru(cudnn=True), 'GRU(reset_after=False) is not compatible with CuDNNGRU') assert_not_compatible( gru(cudnn=True), gru(), 'CuDNNGRU is not compatible with GRU(reset_after=False)') assert_not_compatible( gru(), gru(reset_after=True), 'GRU(reset_after=False) is not compatible with ' 'GRU(reset_after=True)') assert_not_compatible( gru(reset_after=True), gru(), 'GRU(reset_after=True) is not compatible with ' 'GRU(reset_after=False)')
class TestBackboneUtils(keras_parameterized.TestCase): @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *tf_test_util.generate_combinations_with_testcase_name(data_format=[ # 'channels_first', 'channels_last' ])) def test_get_featurenet_backbone(self, data_format): backbone = 'featurenet' input_shape = (256, 256, 3) inputs = Input(shape=input_shape) with self.cached_session(): K.set_image_data_format(data_format) model, output_dict = backbone_utils.get_backbone(backbone, inputs, return_dict=True) assert isinstance(output_dict, dict) assert all(k.startswith('C') for k in output_dict) assert isinstance(model, Model) # No imagenet weights for featurenet backbone with self.assertRaises(ValueError): backbone_utils.get_backbone(backbone, inputs, use_imagenet=True) # @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *tf_test_util.generate_combinations_with_testcase_name(data_format=[ # 'channels_first', 'channels_last' ])) def test_get_featurenet3d_backbone(self, data_format): backbone = 'featurenet3d' input_shape = (40, 256, 256, 3) inputs = Input(shape=input_shape) with self.cached_session(): K.set_image_data_format(data_format) model, output_dict = backbone_utils.get_backbone(backbone, inputs, return_dict=True) assert isinstance(output_dict, dict) assert all(k.startswith('C') for k in output_dict) assert isinstance(model, Model) # No imagenet weights for featurenet backbone with self.assertRaises(ValueError): backbone_utils.get_backbone(backbone, inputs, use_imagenet=True) # @keras_parameterized.run_with_all_model_types # @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *tf_test_util.generate_combinations_with_testcase_name(backbone=[ 'resnet50', 'resnet101', 'resnet152', 'resnet50v2', 'resnet101v2', 'resnet152v2', # 'resnext50', # 'resnext101', 'vgg16', 'vgg19', 'densenet121', 'densenet169', 'densenet201', 'mobilenet', 'mobilenetv2', 'efficientnetb0', 'efficientnetb1', 'efficientnetb2', 'efficientnetb3', 'efficientnetb4', 'efficientnetb5', 'efficientnetb6', 'efficientnetb7', 'nasnet_large', 'nasnet_mobile' ])) def test_get_backbone(self, backbone): with self.cached_session(): K.set_image_data_format('channels_last') inputs = Input(shape=(256, 256, 3)) model, output_dict = backbone_utils.get_backbone(backbone, inputs, return_dict=True) assert isinstance(output_dict, dict) assert all(k.startswith('C') for k in output_dict) assert isinstance(model, Model) def test_invalid_backbone(self): inputs = Input(shape=(4, 2, 3)) with self.assertRaises(ValueError): backbone_utils.get_backbone('bad', inputs, return_dict=True)