Ejemplo n.º 1
0
    def test_deepcopy(self):
        if not tf.executing_eagerly():
            self.skipTest('v2-only test')
        original_layer = lstm.LSTM(5)
        copied_layer = copy.deepcopy(original_layer)
        self.assertEqual(copied_layer.units, 5)
        self.assertEqual(original_layer.get_config(),
                         original_layer.get_config())

        # Copy layer before layer call on inputs without weight initialization.
        inputs = np.random.normal(size=[32, 10, 8]).astype(np.float32)
        original_layer = lstm.LSTM(4)
        copied_layer = copy.deepcopy(original_layer)
        outputs = original_layer(inputs)
        copied_outputs = copied_layer(inputs)
        self.assertNotAllClose(self.evaluate(outputs),
                               self.evaluate(copied_outputs))

        # Copy layer after layer call on inputs with weight initialization.
        original_layer = lstm.LSTM(4)
        outputs = original_layer(inputs)
        copied_layer = copy.deepcopy(original_layer)
        copied_outputs = copied_layer(inputs)
        self.assertAllClose(self.evaluate(outputs),
                            self.evaluate(copied_outputs))
Ejemplo n.º 2
0
 def test_stacking_LSTM(self):
     inputs = np.random.random((2, 3, 4))
     targets = np.abs(np.random.random((2, 3, 5)))
     targets /= targets.sum(axis=-1, keepdims=True)
     model = keras.models.Sequential()
     model.add(lstm.LSTM(10, return_sequences=True, unroll=False))
     model.add(lstm.LSTM(5, return_sequences=True, unroll=False))
     model.compile(
         loss='categorical_crossentropy',
         optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01))
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
Ejemplo n.º 3
0
    def test_explicit_device_with_go_backward_and_mask(self):
        batch_size = 8
        timestep = 7
        masksteps = 5
        units = 4

        inputs = np.random.randn(batch_size, timestep,
                                 units).astype(np.float32)
        mask = np.ones((batch_size, timestep)).astype(np.bool)
        mask[:, masksteps:] = 0

        # Test for V1 behavior.
        lstm_v1_layer = lstm_v1.LSTM(units,
                                     return_sequences=True,
                                     go_backwards=True)
        with test_utils.device(should_use_gpu=True):
            outputs_masked_v1 = lstm_v1_layer(inputs, mask=tf.constant(mask))
            outputs_trimmed_v1 = lstm_v1_layer(inputs[:, :masksteps])
        self.assertAllClose(outputs_masked_v1[:, -masksteps:],
                            outputs_trimmed_v1)

        # Test for V2 behavior.
        lstm_layer = lstm.LSTM(units, return_sequences=True, go_backwards=True)
        with test_utils.device(should_use_gpu=True):
            outputs_masked = lstm_layer(inputs, mask=tf.constant(mask))
            outputs_trimmed = lstm_layer(inputs[:, :masksteps])
        self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
Ejemplo n.º 4
0
    def test_state_reuse(self):
        timesteps = 3
        embedding_dim = 4
        units = 3
        num_samples = 2

        inputs = keras.Input(batch_shape=(num_samples, timesteps,
                                          embedding_dim))
        layer = lstm.LSTM(units, return_state=True, return_sequences=True)
        outputs = layer(inputs)
        output, state = outputs[0], outputs[1:]
        output = lstm.LSTM(units)(output, initial_state=state)
        model = keras.models.Model(inputs, output)

        inputs = np.random.random((num_samples, timesteps, embedding_dim))
        model.predict(inputs)
Ejemplo n.º 5
0
    def test_with_fully_masked_inputs(self):
        num_samples = 8
        timestep = 5
        embedding_dim = 4
        vocab_size = 20
        units = 2

        inputs = np.random.randint(0, vocab_size, size=(num_samples, timestep))
        # Set the first inputs to be fully zero.
        inputs[0, :] = 0.0

        model = keras.models.Sequential()
        model.add(
            keras.layers.Embedding(vocab_size,
                                   embedding_dim,
                                   mask_zero=True,
                                   input_length=timestep,
                                   batch_input_shape=(num_samples, timestep)))
        layer = lstm.LSTM(units)
        model.add(layer)
        model.compile(
            optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01),
            loss='mse',
            run_eagerly=test_utils.should_run_eagerly())
        # Make sure it doesn't crash with cudnn kernel.
        model.predict(inputs)
Ejemplo n.º 6
0
    def test_specify_initial_state_keras_tensor(self):
        num_states = 2
        timesteps = 3
        embedding_dim = 4
        units = 3
        num_samples = 2

        # Test with Keras tensor
        inputs = keras.Input((timesteps, embedding_dim))
        initial_state = [keras.Input((units, )) for _ in range(num_states)]
        layer = lstm.LSTM(units)
        if len(initial_state) == 1:
            output = layer(inputs, initial_state=initial_state[0])
        else:
            output = layer(inputs, initial_state=initial_state)
        self.assertTrue(
            any(initial_state[0] is t
                for t in layer._inbound_nodes[0].input_tensors))

        model = keras.models.Model([inputs] + initial_state, output)
        model.compile(
            loss='categorical_crossentropy',
            optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01))

        inputs = np.random.random((num_samples, timesteps, embedding_dim))
        initial_state = [
            np.random.random((num_samples, units)) for _ in range(num_states)
        ]
        targets = np.random.random((num_samples, units))
        model.train_on_batch([inputs] + initial_state, targets)
Ejemplo n.º 7
0
 def build_model():
     inputs = keras.layers.Input(shape=[timestep, input_dim],
                                 dtype=tf.float32)
     layer = lstm.LSTM(units,
                       use_bias=use_bias,
                       bias_initializer=bias_initializer)
     output = layer(inputs)
     return keras.models.Model(inputs, output), layer
Ejemplo n.º 8
0
 def test_could_use_defun_backend(self, activation, recurrent_activation,
                                  recurrent_dropout, unroll, use_bias):
     layer = lstm.LSTM(1,
                       activation=activation,
                       recurrent_activation=recurrent_activation,
                       recurrent_dropout=recurrent_dropout,
                       unroll=unroll,
                       use_bias=use_bias)
     self.assertFalse(layer._could_use_gpu_kernel)
Ejemplo n.º 9
0
    def test_LSTM_runtime_with_mask(self):
        # Masking will affect which backend is selected based on whether the mask
        # is strictly right padded.
        layer = lstm.LSTM(self.rnn_state_size, return_runtime=True)

        inputs = keras.layers.Input(shape=[self.timestep, self.input_shape],
                                    dtype=tf.float32)
        masked_inputs = keras.layers.Masking()(inputs)

        outputs, runtime = layer(masked_inputs)
        # Expand the runtime so that it is a 1D tensor instead of scalar.
        # TF model does not work with scalar model output, specially during
        # aggregation.
        runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(
            runtime)
        model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime])

        (x_train,
         y_train), _ = test_utils.get_test_data(train_samples=self.batch,
                                                test_samples=0,
                                                input_shape=(self.timestep,
                                                             self.input_shape),
                                                num_classes=self.output_shape)
        y_train = np_utils.to_categorical(y_train, self.output_shape)

        model.compile(optimizer='sgd',
                      loss=['categorical_crossentropy', None],
                      run_eagerly=test_utils.should_run_eagerly())

        model.fit(x_train, y_train)

        # Verify unpadded data.
        _, runtime_value = model.predict(x_train)
        if tf.test.is_gpu_available():
            self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU)
        else:
            self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU)

        # Update x/y to be right padded by setting the last timestep to 0
        x_train[:, -1, :] = 0
        y_train[:, -1] = 0
        _, runtime_value = model.predict(x_train)
        if tf.test.is_gpu_available():
            self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_GPU)
        else:
            self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU)

        # Further update x/y to be mix padded (masks in the middle), and verify
        # only cpu kernel can be selected.
        x_train[:, -3, :] = 0
        y_train[:, -3] = 0
        _, runtime_value = model.predict(x_train)
        self.assertEqual(runtime_value[0], gru_lstm_utils.RUNTIME_CPU)
Ejemplo n.º 10
0
    def test_bidirectional(self):
        batch = 128
        timestep = 20
        vocab_size = 1000
        model = keras.Sequential([
            keras.layers.Embedding(vocab_size, 64),
            keras.layers.Bidirectional(lstm.LSTM(64, return_sequences=True)),
            keras.layers.Bidirectional(lstm.LSTM(32)),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])

        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        x = np.random.randint(0, vocab_size, size=(batch, timestep))
        y = np.random.randint(0, 1, size=(batch))
        model.fit(x, y, epochs=1, shuffle=False)
        model.evaluate(x, y)
        model.predict(x)
Ejemplo n.º 11
0
 def test_dynamic_behavior_LSTM(self):
     num_samples = 2
     timesteps = 3
     embedding_dim = 4
     units = 2
     layer = lstm.LSTM(units, input_shape=(None, embedding_dim))
     model = keras.models.Sequential()
     model.add(layer)
     model.compile(tf.compat.v1.train.GradientDescentOptimizer(0.001),
                   'mse')
     x = np.random.random((num_samples, timesteps, embedding_dim))
     y = np.random.random((num_samples, units))
     model.train_on_batch(x, y)
Ejemplo n.º 12
0
    def test_lstm_output_on_multiple_kernel(self):
        input_shape = 10
        rnn_state_size = 8
        timestep = 4
        batch = 100

        x_train = np.random.random((batch, timestep, input_shape))

        inputs = keras.layers.Input(shape=[timestep, input_shape],
                                    dtype=tf.float32)
        with test_utils.device(should_use_gpu=False):
            layer = lstm.LSTM(rnn_state_size)
            output = layer(inputs)
            cpu_model = keras.models.Model(inputs, output)
            weights = cpu_model.get_weights()
        y_1 = cpu_model.predict(x_train)

        with test_utils.device(should_use_gpu=True):
            layer = lstm.LSTM(rnn_state_size)
            output = layer(inputs)
            gpu_model = keras.models.Model(inputs, output)
            gpu_model.set_weights(weights)
        y_2 = gpu_model.predict(x_train)

        # Note that cuDNN uses 'sigmoid' as activation, so the LSTM V2 uses
        # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve
        # the same output.
        with test_utils.device(should_use_gpu=True):
            layer = lstm_v1.LSTM(rnn_state_size,
                                 recurrent_activation='sigmoid')
            output = layer(inputs)
            canonical_model = keras.models.Model(inputs, output)
            # Remove the extra cudnn bias since canonical lstm will not use it.
            canonical_model.set_weights(weights[:3])
        y_3 = canonical_model.predict(x_train)

        self.assertAllClose(y_1, y_2)
        self.assertAllClose(y_2, y_3)
Ejemplo n.º 13
0
    def test_static_shape_inference_LSTM(self):
        # Github issue: 15165
        timesteps = 3
        embedding_dim = 4
        units = 2

        model = keras.models.Sequential()
        inputs = keras.layers.Dense(embedding_dim,
                                    input_shape=(timesteps, embedding_dim))
        model.add(inputs)
        layer = lstm.LSTM(units, return_sequences=True)
        model.add(layer)
        outputs = model.layers[-1].output
        self.assertEqual(outputs.shape.as_list(), [None, timesteps, units])
Ejemplo n.º 14
0
    def test_LSTM_runtime(self):
        layer = lstm.LSTM(self.rnn_state_size, return_runtime=True)

        inputs = keras.layers.Input(shape=[self.timestep, self.input_shape],
                                    dtype=tf.float32)

        outputs, runtime = layer(inputs)
        # Expand the runtime so that it is a 1D tensor instead of scalar.
        # TF model does not work with scalar model output, specially during
        # aggregation.
        runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(
            runtime)
        model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime])
        self._test_runtime_with_model(model)
Ejemplo n.º 15
0
    def test_v1_session_behavior(self):
        with tf.compat.v1.get_default_graph().as_default():
            # See b/139132348 for more details.
            x = np.random.uniform(size=(100, 4, 8))
            y = np.random.uniform(size=(100, 1))
            dataset = tf.data.Dataset.from_tensor_slices(
                (x, y)).shuffle(100).batch(32)

            inp = keras.layers.Input(shape=(4, 8))
            layer = lstm.LSTM(1)(inp)
            layer = keras.layers.Dense(1)(layer)

            model = keras.models.Model(inp, layer)

            model.compile(loss='mse', optimizer='sgd')
            model.fit(dataset)
Ejemplo n.º 16
0
    def test_lstm_feature_parity_v1_v2(self):
        input_shape = 10
        rnn_state_size = 8
        timestep = 4
        batch = 20

        (x_train, y_train), _ = test_utils.get_test_data(
            train_samples=batch,
            test_samples=0,
            input_shape=(timestep, input_shape),
            num_classes=rnn_state_size,
            random_seed=87654321,
        )
        y_train = np_utils.to_categorical(y_train, rnn_state_size)
        # For the last batch item of the test data, we filter out the last
        # timestep to simulate the variable length sequence and masking test.
        x_train[-2:, -1, :] = 0.0
        y_train[-2:] = 0

        inputs = keras.layers.Input(shape=[timestep, input_shape],
                                    dtype=tf.float32)
        masked_input = keras.layers.Masking()(inputs)
        lstm_layer = lstm_v1.LSTM(rnn_state_size,
                                  recurrent_activation="sigmoid")
        output = lstm_layer(masked_input)
        lstm_model = keras.models.Model(inputs, output)
        weights = lstm_model.get_weights()
        y_1 = lstm_model.predict(x_train)
        lstm_model.compile("rmsprop", "mse")
        lstm_model.fit(x_train, y_train)
        y_2 = lstm_model.predict(x_train)

        with test_utils.device(should_use_gpu=True):
            cudnn_layer = lstm.LSTM(rnn_state_size)
            cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input))
        cudnn_model.set_weights(weights)
        y_3 = cudnn_model.predict(x_train)
        cudnn_model.compile("rmsprop", "mse")
        cudnn_model.fit(x_train, y_train)
        y_4 = cudnn_model.predict(x_train)

        self.assertAllClose(y_1, y_3, rtol=1e-5, atol=2e-5)
        self.assertAllClose(y_2, y_4, rtol=1e-5, atol=2e-5)
Ejemplo n.º 17
0
    def _time_performance_run_unifed_lstm_gpu(self, test_config, x_train,
                                              y_train):
        # Get performance number for lstm_v2 with grappler swap the impl
        input_shape = test_config['input_shape']
        rnn_state_size = test_config['rnn_state_size']
        timestep = test_config['timestep']

        layer = lstm.LSTM(rnn_state_size)
        inputs = keras.layers.Input(shape=[timestep, input_shape],
                                    dtype=tf.float32)

        outputs = layer(inputs)
        model = keras.models.Model(inputs, outputs)
        model.compile('sgd', 'mse')

        sec_per_epoch = self._measure_performance(test_config, model, x_train,
                                                  y_train)
        logging.info('Average performance for %s per epoch is: %s', 'LSTM V2',
                     sec_per_epoch)
        return sec_per_epoch
Ejemplo n.º 18
0
    def test_return_state(self):
        num_states = 2
        timesteps = 3
        embedding_dim = 4
        units = 3
        num_samples = 2

        inputs = keras.Input(batch_shape=(num_samples, timesteps,
                                          embedding_dim))
        masked = keras.layers.Masking()(inputs)
        layer = lstm.LSTM(units, return_state=True, stateful=True)
        outputs = layer(masked)
        state = outputs[1:]
        assert len(state) == num_states
        model = keras.models.Model(inputs, state[0])

        inputs = np.random.random((num_samples, timesteps, embedding_dim))
        state = model.predict(inputs)
        self.assertAllClose(keras.backend.eval(layer.states[0]),
                            state,
                            atol=1e-4)
Ejemplo n.º 19
0
    def test_stateful_LSTM_training(self):
        # See b/123587692 for more context.
        vocab_size = 20
        embedding_dim = 10
        batch_size = 8
        timestep = 12
        units = 5
        x = np.random.randint(0, vocab_size, size=(batch_size, timestep))
        y = np.random.randint(0, vocab_size, size=(batch_size, timestep))

        model = keras.Sequential([
            keras.layers.Embedding(vocab_size,
                                   embedding_dim,
                                   batch_input_shape=[batch_size, timestep]),
            lstm.LSTM(units, return_sequences=True, stateful=True),
            keras.layers.Dense(vocab_size)
        ])
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      run_eagerly=test_utils.should_run_eagerly())
        model.fit(x, y, epochs=1, shuffle=False)
Ejemplo n.º 20
0
    def test_specify_state_with_masking(self):
        num_states = 2
        timesteps = 3
        embedding_dim = 4
        units = 3
        num_samples = 2

        inputs = keras.Input((timesteps, embedding_dim))
        _ = keras.layers.Masking()(inputs)
        initial_state = [keras.Input((units, )) for _ in range(num_states)]
        output = lstm.LSTM(units)(inputs, initial_state=initial_state)

        model = keras.models.Model([inputs] + initial_state, output)
        model.compile(
            loss='categorical_crossentropy',
            optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01))

        inputs = np.random.random((num_samples, timesteps, embedding_dim))
        initial_state = [
            np.random.random((num_samples, units)) for _ in range(num_states)
        ]
        targets = np.random.random((num_samples, units))
        model.train_on_batch([inputs] + initial_state, targets)
Ejemplo n.º 21
0
    def test_LSTM_runtime_with_cond(self):
        # This test is to demonstrate the graph rewrite of grappler plugin under
        # the condition that the function returns different number of internal
        # states.
        layer = lstm.LSTM(self.rnn_state_size, return_runtime=True)

        inputs = keras.layers.Input(shape=[self.timestep, self.input_shape],
                                    dtype=tf.float32)

        zeros = tf.zeros([self.batch, self.output_shape])
        dummy_runtime = gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_UNKNOWN)
        a = tf.constant(0)
        b = tf.constant(1)
        # Will always run the lstm layer.
        outputs, runtime = tf.cond(tf.less(a, b), lambda: layer(inputs),
                                   lambda: (zeros, dummy_runtime))

        # Expand the runtime so that it is a 1D tensor instead of scalar.
        # TF model does not work with scalar model output, specially during
        # aggregation.
        runtime = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(
            runtime)
        model = keras.models.Model(inputs=inputs, outputs=[outputs, runtime])
        self._test_runtime_with_model(model)
Ejemplo n.º 22
0
    def test_specify_initial_state_non_keras_tensor(self):
        num_states = 2
        timesteps = 3
        embedding_dim = 4
        units = 3
        num_samples = 2

        # Test with non-Keras tensor
        inputs = keras.Input((timesteps, embedding_dim))
        initial_state = [
            keras.backend.random_normal_variable((num_samples, units), 0, 1)
            for _ in range(num_states)
        ]
        layer = lstm.LSTM(units)
        output = layer(inputs, initial_state=initial_state)

        model = keras.models.Model(inputs, output)
        model.compile(
            loss='categorical_crossentropy',
            optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01))

        inputs = np.random.random((num_samples, timesteps, embedding_dim))
        targets = np.random.random((num_samples, units))
        model.train_on_batch(inputs, targets)
Ejemplo n.º 23
0
    def test_reset_states_with_values(self):
        num_states = 2
        timesteps = 3
        embedding_dim = 4
        units = 3
        num_samples = 2

        layer = lstm.LSTM(units, stateful=True)
        layer.build((num_samples, timesteps, embedding_dim))
        initial_weight_count = len(layer.weights)
        layer.reset_states()
        assert len(layer.states) == num_states
        assert layer.states[0] is not None
        self.assertAllClose(keras.backend.eval(layer.states[0]),
                            np.zeros(keras.backend.int_shape(layer.states[0])),
                            atol=1e-4)
        state_shapes = [
            keras.backend.int_shape(state) for state in layer.states
        ]
        values = [np.ones(shape) for shape in state_shapes]
        if len(values) == 1:
            values = values[0]
        layer.reset_states(values)
        self.assertAllClose(keras.backend.eval(layer.states[0]),
                            np.ones(keras.backend.int_shape(layer.states[0])),
                            atol=1e-4)

        # Test with invalid data
        with self.assertRaises(ValueError):
            layer.reset_states([1] * (len(layer.states) + 1))

        self.assertEqual(initial_weight_count, len(layer.weights))
        # Variables in "states" shouldn't show up in .weights
        layer.states = tf.nest.map_structure(tf.Variable, values)
        layer.reset_states()
        self.assertEqual(initial_weight_count, len(layer.weights))
Ejemplo n.º 24
0
class LayerCorrectnessTest(test_combinations.TestCase):
    def setUp(self):
        super(LayerCorrectnessTest, self).setUp()
        # Set two virtual CPUs to test MirroredStrategy with multiple devices
        cpus = tf.config.list_physical_devices('CPU')
        tf.config.set_logical_device_configuration(cpus[0], [
            tf.config.LogicalDeviceConfiguration(),
            tf.config.LogicalDeviceConfiguration(),
        ])

    def _create_model_from_layer(self, layer, input_shapes):
        inputs = [layers.Input(batch_input_shape=s) for s in input_shapes]
        if len(inputs) == 1:
            inputs = inputs[0]
        y = layer(inputs)
        model = models.Model(inputs, y)
        model.compile('sgd', 'mse')
        return model

    @parameterized.named_parameters(
        ('LeakyReLU', activation.LeakyReLU, (2, 2)),
        ('PReLU', activation.PReLU, (2, 2)), ('ELU', activation.ELU, (2, 2)),
        ('ThresholdedReLU', activation.ThresholdedReLU,
         (2, 2)), ('Softmax', activation.Softmax,
                   (2, 2)), ('ReLU', activation.ReLU, (2, 2)),
        ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)),
        ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)),
        ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)),
        ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2),
         (2, 2, 2, 2)),
        ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2),
         (2, 2, 2, 1)),
        ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2),
         (2, 2, 2, 1)), ('UpSampling2D', reshaping.UpSampling2D, (2, 2, 2, 1)),
        ('ZeroPadding2D', reshaping.ZeroPadding2D,
         (2, 2, 2, 1)), ('Cropping2D', reshaping.Cropping2D, (2, 3, 3, 1)),
        ('ConvLSTM2D', lambda: conv_lstm2d.ConvLSTM2D(4, kernel_size=(2, 2)),
         (4, 4, 4, 4, 4)), ('Dense', lambda: core.Dense(2), (2, 2)),
        ('Dropout', lambda: regularization.Dropout(0.5), (2, 2)),
        ('SpatialDropout2D', lambda: regularization.SpatialDropout2D(0.5),
         (2, 2, 2, 2)), ('Activation', lambda: core.Activation('sigmoid'),
                         (2, 2)), ('Reshape', lambda: reshaping.Reshape(
                             (1, 4, 1)), (2, 2, 2)),
        ('Permute', lambda: reshaping.Permute(
            (2, 1)), (2, 2, 2)), ('Attention', attention.Attention, [
                (2, 2, 3), (2, 3, 3), (2, 3, 3)
            ]), ('AdditiveAttention', attention.AdditiveAttention, [
                (2, 2, 3), (2, 3, 3), (2, 3, 3)
            ]), ('Embedding', lambda: core.Embedding(4, 4),
                 (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))),
        ('LocallyConnected1D',
         lambda: locally_connected.LocallyConnected1D(2, 2),
         (2, 2, 1)), ('LocallyConnected2D',
                      lambda: locally_connected.LocallyConnected2D(2, 2),
                      (2, 2, 2, 1)), ('Add', merging.Add, [(2, 2), (2, 2)]),
        ('Subtract', merging.Subtract, [(2, 2), (2, 2)]),
        ('Multiply', merging.Multiply, [
            (2, 2), (2, 2)
        ]), ('Average', merging.Average, [(2, 2), (2, 2)]),
        ('Maximum', merging.Maximum, [
            (2, 2), (2, 2)
        ]), ('Minimum', merging.Minimum, [
            (2, 2), (2, 2)
        ]), ('Concatenate', merging.Concatenate, [
            (2, 2), (2, 2)
        ]), ('Dot', lambda: merging.Dot(1), [(2, 2), (2, 2)]),
        ('GaussianNoise', lambda: regularization.GaussianNoise(0.5), (2, 2)),
        ('GaussianDropout', lambda: regularization.GaussianDropout(0.5),
         (2, 2)), ('AlphaDropout', lambda: regularization.AlphaDropout(0.5),
                   (2, 2)),
        ('BatchNormalization', batch_normalization.BatchNormalization,
         (2, 2), 1e-2, 1e-2),
        ('LayerNormalization', layer_normalization.LayerNormalization,
         (2, 2)), ('LayerNormalizationUnfused',
                   lambda: layer_normalization.LayerNormalization(axis=1),
                   (2, 2, 2)), ('MaxPooling2D', pooling.MaxPooling2D,
                                (2, 2, 2, 1)),
        ('AveragePooling2D', pooling.AveragePooling2D,
         (2, 2, 2, 1)), ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D,
                         (2, 2, 2, 1)),
        ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D,
         (2, 2, 2, 1)), ('SimpleRNN', lambda: simple_rnn.SimpleRNN(units=4),
                         (4, 4, 4), 1e-2, 1e-2),
        ('SimpleRNN_stateful',
         lambda: simple_rnn.SimpleRNN(units=4, stateful=True), (4, 4, 4), 1e-2,
         1e-2), ('GRU', lambda: gru_v1.GRU(units=4),
                 (4, 4, 4)), ('LSTM', lambda: lstm_v1.LSTM(units=4),
                              (4, 4, 4)), ('GRUV2', lambda: gru.GRU(units=4),
                                           (4, 4, 4)),
        ('GRUV2_stateful', lambda: gru.GRU(units=4, stateful=True),
         (4, 4, 4)), ('LSTMV2', lambda: lstm.LSTM(units=4), (4, 4, 4)),
        ('LSTMV2_stateful', lambda: lstm.LSTM(units=4, stateful=True),
         (4, 4, 4)), ('TimeDistributed',
                      lambda: time_distributed.TimeDistributed(core.Dense(2)),
                      (2, 2, 2)),
        ('Bidirectional',
         lambda: bidirectional.Bidirectional(simple_rnn.SimpleRNN(units=4)),
         (2, 2, 2)),
        ('AttentionLayerCausal', lambda: attention.Attention(causal=True), [
            (2, 2, 3), (2, 3, 3), (2, 3, 3)
        ]), ('AdditiveAttentionLayerCausal',
             lambda: attention.AdditiveAttention(causal=True), [
                 (2, 3, 4), (2, 3, 4), (2, 3, 4)
             ]), ('NormalizationAdapt', _create_normalization_layer_with_adapt,
                  (4, 4)),
        ('NormalizationNoAdapt', _create_normalization_layer_without_adapt,
         (4, 4)), ('Resizing', lambda: image_preprocessing.Resizing(3, 3),
                   (2, 5, 5, 1)),
        ('Rescaling', lambda: image_preprocessing.Rescaling(2., 1.),
         (6, 6)), ('CenterCrop', lambda: image_preprocessing.CenterCrop(3, 3),
                   (2, 5, 5, 1)))
    def test_layer(self,
                   f32_layer_fn,
                   input_shape,
                   rtol=2e-3,
                   atol=2e-3,
                   input_data=None):
        """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, and a distributed mixed precision
    layer are run. The three layers are identical other than their dtypes and
    distribution strategies. The outputs after predict() and weights after fit()
    are asserted to be close.

    Args:
      f32_layer_fn: A function returning a float32 layer. The other two layers
        will automatically be created from this
      input_shape: The shape of the input to the layer, including the batch
        dimension. Or a list of shapes if the layer takes multiple inputs.
      rtol: The relative tolerance to be asserted.
      atol: The absolute tolerance to be asserted.
      input_data: A Numpy array with the data of the input. If None, input data
        will be randomly generated
    """

        if f32_layer_fn == reshaping.ZeroPadding2D and tf.test.is_built_with_rocm(
        ):
            return
        if isinstance(input_shape[0], int):
            input_shapes = [input_shape]
        else:
            input_shapes = input_shape
        strategy = create_mirrored_strategy()
        f32_layer = f32_layer_fn()

        # Create the layers
        assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
        config = f32_layer.get_config()
        config['dtype'] = policy.Policy('mixed_float16')
        mp_layer = f32_layer.__class__.from_config(config)
        distributed_mp_layer = f32_layer.__class__.from_config(config)

        # Compute per_replica_input_shapes for the distributed model
        global_batch_size = input_shapes[0][0]
        assert global_batch_size % strategy.num_replicas_in_sync == 0, (
            'The number of replicas, %d, does not divide the global batch size of '
            '%d' % (strategy.num_replicas_in_sync, global_batch_size))
        per_replica_batch_size = (global_batch_size //
                                  strategy.num_replicas_in_sync)
        per_replica_input_shapes = [(per_replica_batch_size, ) + s[1:]
                                    for s in input_shapes]

        # Create the models
        f32_model = self._create_model_from_layer(f32_layer, input_shapes)
        mp_model = self._create_model_from_layer(mp_layer, input_shapes)
        with strategy.scope():
            distributed_mp_model = self._create_model_from_layer(
                distributed_mp_layer, per_replica_input_shapes)

        # Set all model weights to the same values
        f32_weights = f32_model.get_weights()
        mp_model.set_weights(f32_weights)
        distributed_mp_model.set_weights(f32_weights)

        # Generate input data
        if input_data is None:
            # Cast inputs to float16 to avoid measuring error from having f16 layers
            # cast to float16.
            input_data = [
                np.random.normal(size=s).astype('float16')
                for s in input_shapes
            ]
            if len(input_data) == 1:
                input_data = input_data[0]

        # Assert all models have close outputs.
        f32_output = f32_model.predict(input_data)
        mp_output = mp_model.predict(input_data)
        self.assertAllClose(mp_output, f32_output, rtol=rtol, atol=atol)
        self.assertAllClose(distributed_mp_model.predict(input_data),
                            f32_output,
                            rtol=rtol,
                            atol=atol)

        # Run fit() on models
        output = np.random.normal(
            size=f32_model.outputs[0].shape).astype('float16')
        for model in f32_model, mp_model, distributed_mp_model:
            model.fit(input_data, output, batch_size=global_batch_size)

        # Assert all models have close weights
        f32_weights = f32_model.get_weights()
        self.assertAllClose(mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
        self.assertAllClose(distributed_mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
Ejemplo n.º 25
0
    def test_time_major_and_go_backward_v1_v2(self, time_major, go_backwards):
        input_shape = 10
        rnn_state_size = 8
        timestep = 4
        batch = 100

        x_train = np.random.random((batch, timestep, input_shape))

        def build_model(layer_cls):
            inputs = keras.layers.Input(shape=[timestep, input_shape],
                                        dtype=tf.float32)
            layer = layer_cls(
                rnn_state_size,
                recurrent_activation="sigmoid",
                time_major=time_major,
                return_sequences=True,
                go_backwards=go_backwards,
            )
            if time_major:
                converted_input = keras.layers.Lambda(
                    lambda t: tf.transpose(t, [1, 0, 2]))(inputs)
                outputs = layer(converted_input)
                outputs = keras.layers.Lambda(
                    lambda t: tf.transpose(t, [1, 0, 2]))(outputs)
            else:
                outputs = layer(inputs)
            return keras.models.Model(inputs, outputs)

        lstm_model = build_model(lstm_v1.LSTM)
        y_ref = lstm_model.predict(x_train)
        weights = lstm_model.get_weights()

        lstm_v2_model = build_model(lstm.LSTM)
        lstm_v2_model.set_weights(weights)
        y = lstm_v2_model.predict(x_train)

        self.assertAllClose(y, y_ref)

        input_shape = 10
        rnn_state_size = 8
        output_shape = 8
        timestep = 4
        batch = 100
        epoch = 10

        (x_train, y_train), _ = test_utils.get_test_data(
            train_samples=batch,
            test_samples=0,
            input_shape=(timestep, input_shape),
            num_classes=output_shape,
        )
        y_train = np_utils.to_categorical(y_train, output_shape)

        layer = lstm.LSTM(rnn_state_size)

        inputs = keras.layers.Input(shape=[timestep, input_shape],
                                    dtype=tf.float32)

        outputs = layer(inputs)
        model = keras.models.Model(inputs, outputs)
        model.compile("rmsprop", loss="mse")
        model.fit(x_train, y_train, epochs=epoch)
        model.evaluate(x_train, y_train)
        model.predict(x_train)
Ejemplo n.º 26
0
    def test_use_on_default_activation_with_gpu_kernel(self):
        layer = lstm.LSTM(1, activation=tf.tanh)
        self.assertTrue(layer._could_use_gpu_kernel)

        layer = lstm.LSTM(1, recurrent_activation=tf.sigmoid)
        self.assertTrue(layer._could_use_gpu_kernel)