def test_specify_state_with_masking(self): num_states = 2 timesteps = 3 embedding_dim = 4 units = 3 num_samples = 2 inputs = keras.Input((timesteps, embedding_dim)) _ = keras.layers.Masking()(inputs) initial_state = [keras.Input((units,)) for _ in range(num_states)] output = rnn.LSTM(units)( inputs, initial_state=initial_state) model = keras.models.Model([inputs] + initial_state, output) model.compile( loss='categorical_crossentropy', optimizer=gradient_descent.GradientDescentOptimizer(0.01)) inputs = np.random.random((num_samples, timesteps, embedding_dim)) initial_state = [ np.random.random((num_samples, units)) for _ in range(num_states) ] targets = np.random.random((num_samples, units)) model.train_on_batch([inputs] + initial_state, targets)
def test_reset_states_with_values(self): num_states = 2 timesteps = 3 embedding_dim = 4 units = 3 num_samples = 2 layer = rnn.LSTM(units, stateful=True) layer.build((num_samples, timesteps, embedding_dim)) initial_weight_count = len(layer.weights) layer.reset_states() assert len(layer.states) == num_states assert layer.states[0] is not None self.assertAllClose( keras.backend.eval(layer.states[0]), np.zeros(keras.backend.int_shape(layer.states[0])), atol=1e-4) state_shapes = [keras.backend.int_shape(state) for state in layer.states] values = [np.ones(shape) for shape in state_shapes] if len(values) == 1: values = values[0] layer.reset_states(values) self.assertAllClose( keras.backend.eval(layer.states[0]), np.ones(keras.backend.int_shape(layer.states[0])), atol=1e-4) # Test with invalid data with self.assertRaises(ValueError): layer.reset_states([1] * (len(layer.states) + 1)) self.assertEqual(initial_weight_count, len(layer.weights)) # Variables in "states" shouldn't show up in .weights layer.states = nest.map_structure(variables.Variable, values) layer.reset_states() self.assertEqual(initial_weight_count, len(layer.weights))
def test_specify_initial_state_non_keras_tensor(self): num_states = 2 timesteps = 3 embedding_dim = 4 units = 3 num_samples = 2 # Test with non-Keras tensor inputs = keras.Input((timesteps, embedding_dim)) initial_state = [ keras.backend.random_normal_variable((num_samples, units), 0, 1) for _ in range(num_states) ] layer = rnn.LSTM(units) output = layer(inputs, initial_state=initial_state) model = keras.models.Model(inputs, output) model.compile( loss='categorical_crossentropy', optimizer=gradient_descent.GradientDescentOptimizer(0.01)) inputs = np.random.random((num_samples, timesteps, embedding_dim)) targets = np.random.random((num_samples, units)) model.train_on_batch(inputs, targets)
def test_return_state(self): if test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') num_states = 2 timesteps = 3 embedding_dim = 4 units = 3 num_samples = 2 inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim)) masked = keras.layers.Masking()(inputs) layer = rnn.LSTM(units, return_state=True, stateful=True) outputs = layer(masked) state = outputs[1:] assert len(state) == num_states model = keras.models.Model(inputs, state[0]) inputs = np.random.random((num_samples, timesteps, embedding_dim)) state = model.predict(inputs) self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4)
def test_time_major_and_go_backward(self, time_major, go_backwards): input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 100 x_train = np.random.random((batch, timestep, input_shape)) def build_model(layer_cls): inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) layer = layer_cls(rnn_state_size, recurrent_activation='sigmoid', time_major=time_major, return_sequences=True, go_backwards=go_backwards) if time_major: converted_input = keras.layers.Lambda( lambda t: array_ops.transpose(t, [1, 0, 2]))(inputs) outputs = layer(converted_input) outputs = keras.layers.Lambda( lambda t: array_ops.transpose(t, [1, 0, 2]))(outputs) else: outputs = layer(inputs) return keras.models.Model(inputs, outputs) lstm_model = build_model(rnn_v1.LSTM) y_ref = lstm_model.predict(x_train) weights = lstm_model.get_weights() lstm_v2_model = build_model(rnn.LSTM) lstm_v2_model.set_weights(weights) y = lstm_v2_model.predict(x_train) self.assertAllClose(y, y_ref) input_shape = 10 rnn_state_size = 8 output_shape = 8 timestep = 4 batch = 100 epoch = 10 (x_train, y_train), _ = testing_utils.get_test_data(train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=output_shape) y_train = keras.utils.to_categorical(y_train, output_shape) layer = rnn.LSTM(rnn_state_size) inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=dtypes.float32) outputs = layer(inputs) model = keras.models.Model(inputs, outputs) model.compile('rmsprop', loss='mse') model.fit(x_train, y_train, epochs=epoch) model.evaluate(x_train, y_train) model.predict(x_train)
class LayerCorrectnessTest(keras_parameterized.TestCase): def setUp(self): super(LayerCorrectnessTest, self).setUp() # Set two virtual CPUs to test MirroredStrategy with multiple devices cpus = config_module.list_physical_devices('CPU') config_module.set_logical_device_configuration(cpus[0], [ context.LogicalDeviceConfiguration(), context.LogicalDeviceConfiguration(), ]) def _create_model_from_layer(self, layer, input_shapes): inputs = [layers.Input(batch_input_shape=s) for s in input_shapes] if len(inputs) == 1: inputs = inputs[0] y = layer(inputs) model = models.Model(inputs, y) model.compile('sgd', 'mse') return model @parameterized.named_parameters( ('LeakyReLU', advanced_activations.LeakyReLU, (2, 2)), ('PReLU', advanced_activations.PReLU, (2, 2)), ('ELU', advanced_activations.ELU, (2, 2)), ('ThresholdedReLU', advanced_activations.ThresholdedReLU, (2, 2)), ('Softmax', advanced_activations.Softmax, (2, 2)), ('ReLU', advanced_activations.ReLU, (2, 2)), ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)), ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)), ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)), ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2), (2, 2, 2, 2)), ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2), (2, 2, 2, 1)), ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2), (2, 2, 2, 1)), ('UpSampling2D', convolutional.UpSampling2D, (2, 2, 2, 1)), ('ZeroPadding2D', convolutional.ZeroPadding2D, (2, 2, 2, 1)), ('Cropping2D', convolutional.Cropping2D, (2, 3, 3, 1)), ('ConvLSTM2D', lambda: convolutional_recurrent.ConvLSTM2D(4, kernel_size=(2, 2)), (4, 4, 4, 4, 4)), ('Dense', lambda: core.Dense(2), (2, 2)), ('Dropout', lambda: core.Dropout(0.5), (2, 2)), ('SpatialDropout2D', lambda: core.SpatialDropout2D(0.5), (2, 2, 2, 2)), ('Activation', lambda: core.Activation('sigmoid'), (2, 2)), ('Reshape', lambda: core.Reshape((1, 4, 1)), (2, 2, 2)), ('Permute', lambda: core.Permute((2, 1)), (2, 2, 2)), ('Attention', dense_attention.Attention, [(2, 2, 3), (2, 3, 3), (2, 3, 3)]), ('AdditiveAttention', dense_attention.AdditiveAttention, [(2, 2, 3), (2, 3, 3), (2, 3, 3)]), ('Embedding', lambda: embeddings.Embedding(4, 4), (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))), ('LocallyConnected1D', lambda: local.LocallyConnected1D(2, 2), (2, 2, 1)), ('LocallyConnected2D', lambda: local.LocallyConnected2D(2, 2), (2, 2, 2, 1)), ('Add', merge.Add, [(2, 2), (2, 2)]), ('Subtract', merge.Subtract, [(2, 2), (2, 2)]), ('Multiply', merge.Multiply, [(2, 2), (2, 2)]), ('Average', merge.Average, [(2, 2), (2, 2)]), ('Maximum', merge.Maximum, [(2, 2), (2, 2)]), ('Minimum', merge.Minimum, [(2, 2), (2, 2)]), ('Concatenate', merge.Concatenate, [(2, 2), (2, 2)]), ('Dot', lambda: merge.Dot(1), [(2, 2), (2, 2)]), ('GaussianNoise', lambda: noise.GaussianNoise(0.5), (2, 2)), ('GaussianDropout', lambda: noise.GaussianDropout(0.5), (2, 2)), ('AlphaDropout', lambda: noise.AlphaDropout(0.5), (2, 2)), ('BatchNormalization', normalization_v2.BatchNormalization, (2, 2), 1e-2, 1e-2), ('LayerNormalization', normalization.LayerNormalization, (2, 2)), ('LayerNormalizationUnfused', lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)), ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)), ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)), ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)), ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D, (2, 2, 2, 1)), ('SimpleRNN', lambda: recurrent.SimpleRNN(units=4), (4, 4, 4), 1e-2, 1e-2), ('GRU', lambda: recurrent.GRU(units=4), (4, 4, 4)), ('LSTM', lambda: recurrent.LSTM(units=4), (4, 4, 4)), ('GRUV2', lambda: recurrent_v2.GRU(units=4), (4, 4, 4)), ('LSTMV2', lambda: recurrent_v2.LSTM(units=4), (4, 4, 4)), ('TimeDistributed', lambda: wrappers.TimeDistributed(core.Dense(2)), (2, 2, 2)), ('Bidirectional', lambda: wrappers.Bidirectional(recurrent.SimpleRNN(units=4)), (2, 2, 2)), ('AttentionLayerCausal', lambda: dense_attention.Attention(causal=True), [ (2, 2, 3), (2, 3, 3), (2, 3, 3) ]), ('AdditiveAttentionLayerCausal', lambda: dense_attention.AdditiveAttention(causal=True), [(2, 3, 4), (2, 3, 4), (2, 3, 4)]), ) def test_layer(self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3, input_data=None): """Tests a layer by comparing the float32 and mixed precision weights. A float32 layer, a mixed precision layer, and a distributed mixed precision layer are run. The three layers are identical other than their dtypes and distribution strategies. The outputs after predict() and weights after fit() are asserted to be close. Args: f32_layer_fn: A function returning a float32 layer. The other two layers will automatically be created from this input_shape: The shape of the input to the layer, including the batch dimension. Or a list of shapes if the layer takes multiple inputs. rtol: The relative tolerance to be asserted. atol: The absolute tolerance to be asserted. input_data: A Numpy array with the data of the input. If None, input data will be randomly generated """ if f32_layer_fn == convolutional.ZeroPadding2D and \ test.is_built_with_rocm(): return if isinstance(input_shape[0], int): input_shapes = [input_shape] else: input_shapes = input_shape strategy = create_mirrored_strategy() f32_layer = f32_layer_fn() # Create the layers assert f32_layer.dtype == f32_layer._compute_dtype == 'float32' config = f32_layer.get_config() config['dtype'] = policy.Policy('mixed_float16') mp_layer = f32_layer.__class__.from_config(config) distributed_mp_layer = f32_layer.__class__.from_config(config) # Compute per_replica_input_shapes for the distributed model global_batch_size = input_shapes[0][0] assert global_batch_size % strategy.num_replicas_in_sync == 0, ( 'The number of replicas, %d, does not divide the global batch size of ' '%d' % (strategy.num_replicas_in_sync, global_batch_size)) per_replica_batch_size = ( global_batch_size // strategy.num_replicas_in_sync) per_replica_input_shapes = [(per_replica_batch_size,) + s[1:] for s in input_shapes] # Create the models f32_model = self._create_model_from_layer(f32_layer, input_shapes) mp_model = self._create_model_from_layer(mp_layer, input_shapes) with strategy.scope(): distributed_mp_model = self._create_model_from_layer( distributed_mp_layer, per_replica_input_shapes) # Set all model weights to the same values f32_weights = f32_model.get_weights() mp_model.set_weights(f32_weights) distributed_mp_model.set_weights(f32_weights) # Generate input data if input_data is None: # Cast inputs to float16 to avoid measuring error from having f16 layers # cast to float16. input_data = [np.random.normal(size=s).astype('float16') for s in input_shapes] if len(input_data) == 1: input_data = input_data[0] # Assert all models have close outputs. f32_output = f32_model.predict(input_data) mp_output = mp_model.predict(input_data) self.assertAllClose( mp_output, f32_output, rtol=rtol, atol=atol) self.assertAllClose( distributed_mp_model.predict(input_data), f32_output, rtol=rtol, atol=atol) # Run fit() on models output = np.random.normal(size=f32_model.outputs[0].shape).astype('float16') for model in f32_model, mp_model, distributed_mp_model: model.fit(input_data, output, batch_size=global_batch_size) # Assert all models have close weights f32_weights = f32_model.get_weights() self.assertAllClose( mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol) self.assertAllClose( distributed_mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
def test_use_on_default_activation_with_gpu_kernel(self): layer = rnn.LSTM(1, activation=nn.tanh) self.assertTrue(layer._could_use_gpu_kernel) layer = rnn.LSTM(1, recurrent_activation=nn.sigmoid) self.assertTrue(layer._could_use_gpu_kernel)
def test_lstm(self): self._test_layer(recurrent_v2.LSTM(units=4, return_sequences=True), input_shape=(4, 4, 4))