Ejemplo n.º 1
0
class MergeLayersTest(keras_parameterized.TestCase):
    def test_merge_add(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        i3 = keras.layers.Input(shape=(4, 5))

        add_layer = keras.layers.Add()
        o = add_layer([i1, i2, i3])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2, i3], o)
        model.run_eagerly = testing_utils.should_run_eagerly()

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        x3 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2, x3])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, x1 + x2 + x3, atol=1e-4)

        self.assertEqual(
            add_layer.compute_mask([i1, i2, i3], [None, None, None]), None)
        self.assertTrue(
            np.all(
                K.eval(
                    add_layer.compute_mask(
                        [i1, i2],
                        [K.variable(x1), K.variable(x2)]))))

        with self.assertRaisesRegex(ValueError, '`mask` should be a list.'):
            add_layer.compute_mask([i1, i2, i3], x1)
        with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'):
            add_layer.compute_mask(i1, [None, None, None])
        with self.assertRaisesRegex(ValueError,
                                    ' should have the same length.'):
            add_layer.compute_mask([i1, i2, i3], [None, None])

    def test_merge_subtract(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        i3 = keras.layers.Input(shape=(4, 5))

        subtract_layer = keras.layers.Subtract()
        o = subtract_layer([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, x1 - x2, atol=1e-4)

        self.assertEqual(subtract_layer.compute_mask([i1, i2], [None, None]),
                         None)
        self.assertTrue(
            np.all(
                K.eval(
                    subtract_layer.compute_mask(
                        [i1, i2],
                        [K.variable(x1), K.variable(x2)]))))

        with self.assertRaisesRegex(ValueError, '`mask` should be a list.'):
            subtract_layer.compute_mask([i1, i2], x1)
        with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'):
            subtract_layer.compute_mask(i1, [None, None])
        with self.assertRaisesRegex(
                ValueError, 'layer should be called on exactly 2 inputs'):
            subtract_layer([i1, i2, i3])
        with self.assertRaisesRegex(
                ValueError, 'layer should be called on exactly 2 inputs'):
            subtract_layer([i1])

    def test_merge_multiply(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        i3 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.multiply([i1, i2, i3])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2, i3], o)
        model.run_eagerly = testing_utils.should_run_eagerly()

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        x3 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2, x3])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, x1 * x2 * x3, atol=1e-4)

    def test_merge_average(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.average([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, 0.5 * (x1 + x2), atol=1e-4)

    def test_merge_maximum(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.maximum([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4)

    def test_merge_minimum(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.minimum([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4)

    def test_merge_concatenate(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        concat_layer = keras.layers.Concatenate(axis=1)
        o = concat_layer([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 8, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 8, 5))
        self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4)

        self.assertEqual(concat_layer.compute_mask([i1, i2], [None, None]),
                         None)
        self.assertTrue(
            np.all(
                K.eval(
                    concat_layer.compute_mask(
                        [i1, i2],
                        [K.variable(x1), K.variable(x2)]))))

        # Should work with unit-length input.
        unit_length_o = concat_layer([i1])
        self.assertListEqual(unit_length_o.shape.as_list(), i1.shape.as_list())

        with self.assertRaisesRegex(ValueError, '`mask` should be a list.'):
            concat_layer.compute_mask([i1, i2], x1)
        with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'):
            concat_layer.compute_mask(i1, [None, None])
        with self.assertRaisesRegex(ValueError, 'should have the same length'):
            concat_layer.compute_mask([i1, i2], [None])
        with self.assertRaisesRegex(
                ValueError, 'layer should be called on a list of inputs'):
            concat_layer(i1)

    def test_merge_dot(self):
        i1 = keras.layers.Input(shape=(4, ))
        i2 = keras.layers.Input(shape=(4, ))
        o = keras.layers.dot([i1, i2], axes=1)
        self.assertListEqual(o.shape.as_list(), [None, 1])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        _ = keras.layers.Dot(axes=1).get_config()

        x1 = np.random.random((2, 4))
        x2 = np.random.random((2, 4))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 1))
        expected = np.zeros((2, 1))
        expected[0, 0] = np.dot(x1[0], x2[0])
        expected[1, 0] = np.dot(x1[1], x2[1])
        self.assertAllClose(out, expected, atol=1e-4)

        # Test with negative tuple of axes.
        o = keras.layers.dot([i1, i2], axes=(-1, -1))
        self.assertListEqual(o.shape.as_list(), [None, 1])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 1))
        self.assertAllClose(out, expected, atol=1e-4)

        # test compute_output_shape
        layer = keras.layers.Dot(axes=-1)
        self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1))

    @parameterized.named_parameters(
        *testing_utils.generate_combinations_with_testcase_name(layer=[
            keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply,
            keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average,
            keras.layers.Concatenate
        ]))
    def test_merge_with_ragged_input(self, layer):
        ragged_data = ragged_factory_ops.constant(
            [[1., 1., 1.], [1., 1.], [1., 1., 1., 1.]], ragged_rank=1)
        dense_data = ragged_data.to_tensor()
        input1 = keras.Input(shape=(None, ), ragged=True)
        input2 = keras.Input(shape=(None, ), ragged=True)
        out = keras.layers.Add()([input1, input2])
        model = keras.models.Model(inputs=[input1, input2], outputs=out)
        out_ragged = model.predict([ragged_data, ragged_data], steps=1)
        out_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            out_ragged).to_tensor()

        input1 = keras.Input(shape=(None, ))
        input2 = keras.Input(shape=(None, ))
        out = keras.layers.Add()([input1, input2])
        model = keras.models.Model(inputs=[input1, input2], outputs=out)
        out_dense = model.predict([dense_data, dense_data], steps=1)

        self.assertAllEqual(out_dense, out_ragged)

    @parameterized.named_parameters(
        *testing_utils.generate_combinations_with_testcase_name(layer=[
            keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply,
            keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average
        ]))
    def test_merge_with_scalar_input(self, layer):
        x1 = np.array((1))
        x2 = np.array((2))
        out = layer()([x1, x2])
        self.assertEqual(out.shape, ())
    if use_dataset:
        if action == "predict":
            input_data = dataset_ops.DatasetV2.from_tensor_slices(
                input_data).batch(batch_size)
        else:
            input_data = dataset_ops.DatasetV2.from_tensor_slices(
                (input_data, expected_output)).batch(batch_size)
            expected_output = None
    return (input_data, expected_output)


@keras_parameterized.run_with_all_model_types
@keras_parameterized.run_all_keras_modes
@parameterized.named_parameters(
    *testing_utils.generate_combinations_with_testcase_name(
        use_dict=[True, False],
        use_dataset=[True, False],
        action=["predict", "evaluate", "fit"]))
class SparseTensorInputTest(keras_parameterized.TestCase):
    def test_sparse_tensors(self, use_dict, use_dataset, action):
        data = [(sparse_tensor.SparseTensor([[0, 0, 0], [1, 0, 0], [1, 0, 1]],
                                            [1, 2, 3], [2, 1, 3]),
                 np.array([[[1, -1, -1]], [[2, 3, -1]]])),
                (sparse_tensor.SparseTensor(
                    [[0, 0, 0], [1, 0, 0], [1, 0, 1], [2, 0, 1]], [5, 6, 7, 8],
                    [3, 1, 4]),
                 np.array([[[5, -1, -1, -1]], [[6, 7, -1, -1]],
                           [[-1, 8, -1, -1]]]))]
        # Prepare the model to test.
        input_name = get_input_name(use_dict)
        model_input = input_layer.Input(shape=(1, None),
                                        sparse=True,
Ejemplo n.º 3
0
class ConvLSTMTest(keras_parameterized.TestCase):

  @parameterized.named_parameters(
      *testing_utils.generate_combinations_with_testcase_name(
          data_format=['channels_first', 'channels_last'],
          return_sequences=[True, False]))
  def test_conv_lstm(self, data_format, return_sequences):
    num_row = 3
    num_col = 3
    filters = 2
    num_samples = 1
    input_channel = 2
    input_num_row = 5
    input_num_col = 5
    sequence_len = 2
    if data_format == 'channels_first':
      inputs = np.random.rand(num_samples, sequence_len,
                              input_channel,
                              input_num_row, input_num_col)
    else:
      inputs = np.random.rand(num_samples, sequence_len,
                              input_num_row, input_num_col,
                              input_channel)

    # test for return state:
    x = keras.Input(batch_shape=inputs.shape)
    kwargs = {'data_format': data_format,
              'return_sequences': return_sequences,
              'return_state': True,
              'stateful': True,
              'filters': filters,
              'kernel_size': (num_row, num_col),
              'padding': 'valid'}
    layer = keras.layers.ConvLSTM2D(**kwargs)
    layer.build(inputs.shape)
    outputs = layer(x)
    _, states = outputs[0], outputs[1:]
    self.assertEqual(len(states), 2)
    model = keras.models.Model(x, states[0])
    state = model.predict(inputs)

    self.assertAllClose(
        keras.backend.eval(layer.states[0]), state, atol=1e-4)

    # test for output shape:
    testing_utils.layer_test(
        keras.layers.ConvLSTM2D,
        kwargs={'data_format': data_format,
                'return_sequences': return_sequences,
                'filters': filters,
                'kernel_size': (num_row, num_col),
                'padding': 'valid'},
        input_shape=inputs.shape)

  def test_conv_lstm_statefulness(self):
    # Tests for statefulness
    num_row = 3
    num_col = 3
    filters = 2
    num_samples = 1
    input_channel = 2
    input_num_row = 5
    input_num_col = 5
    sequence_len = 2
    inputs = np.random.rand(num_samples, sequence_len,
                            input_num_row, input_num_col,
                            input_channel)

    with self.cached_session():
      model = keras.models.Sequential()
      kwargs = {'data_format': 'channels_last',
                'return_sequences': False,
                'filters': filters,
                'kernel_size': (num_row, num_col),
                'stateful': True,
                'batch_input_shape': inputs.shape,
                'padding': 'same'}
      layer = keras.layers.ConvLSTM2D(**kwargs)

      model.add(layer)
      model.compile(optimizer='sgd', loss='mse')
      out1 = model.predict(np.ones_like(inputs))

      # train once so that the states change
      model.train_on_batch(np.ones_like(inputs),
                           np.random.random(out1.shape))
      out2 = model.predict(np.ones_like(inputs))

      # if the state is not reset, output should be different
      self.assertNotEqual(out1.max(), out2.max())

      # check that output changes after states are reset
      # (even though the model itself didn't change)
      layer.reset_states()
      out3 = model.predict(np.ones_like(inputs))
      self.assertNotEqual(out3.max(), out2.max())

      # check that container-level reset_states() works
      model.reset_states()
      out4 = model.predict(np.ones_like(inputs))
      self.assertAllClose(out3, out4, atol=1e-5)

      # check that the call to `predict` updated the states
      out5 = model.predict(np.ones_like(inputs))
      self.assertNotEqual(out4.max(), out5.max())

  def test_conv_lstm_regularizers(self):
    # check regularizers
    num_row = 3
    num_col = 3
    filters = 2
    num_samples = 1
    input_channel = 2
    input_num_row = 5
    input_num_col = 5
    sequence_len = 2
    inputs = np.random.rand(num_samples, sequence_len,
                            input_num_row, input_num_col,
                            input_channel)

    with self.cached_session():
      kwargs = {'data_format': 'channels_last',
                'return_sequences': False,
                'kernel_size': (num_row, num_col),
                'stateful': True,
                'filters': filters,
                'batch_input_shape': inputs.shape,
                'kernel_regularizer': keras.regularizers.L1L2(l1=0.01),
                'recurrent_regularizer': keras.regularizers.L1L2(l1=0.01),
                'activity_regularizer': 'l2',
                'bias_regularizer': 'l2',
                'kernel_constraint': 'max_norm',
                'recurrent_constraint': 'max_norm',
                'bias_constraint': 'max_norm',
                'padding': 'same'}

      layer = keras.layers.ConvLSTM2D(**kwargs)
      layer.build(inputs.shape)
      self.assertEqual(len(layer.losses), 3)
      layer(keras.backend.variable(np.ones(inputs.shape)))
      self.assertEqual(len(layer.losses), 4)

  def test_conv_lstm_dropout(self):
    # check dropout
    with self.cached_session():
      testing_utils.layer_test(
          keras.layers.ConvLSTM2D,
          kwargs={'data_format': 'channels_last',
                  'return_sequences': False,
                  'filters': 2,
                  'kernel_size': (3, 3),
                  'padding': 'same',
                  'dropout': 0.1,
                  'recurrent_dropout': 0.1},
          input_shape=(1, 2, 5, 5, 2))

  def test_conv_lstm_cloning(self):
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.ConvLSTM2D(5, 3, input_shape=(None, 5, 5, 3)))

      test_inputs = np.random.random((2, 4, 5, 5, 3))
      reference_outputs = model.predict(test_inputs)
      weights = model.get_weights()

    # Use a new graph to clone the model
    with self.cached_session():
      clone = keras.models.clone_model(model)
      clone.set_weights(weights)

      outputs = clone.predict(test_inputs)
      self.assertAllClose(reference_outputs, outputs, atol=1e-5)

  @test.disable_with_predicate(
      pred=test.is_built_with_rocm,
      skip_message='Skipping the test as OOM occurred with 1 GB budget.')
  def test_conv_lstm_with_initial_state(self):
    num_samples = 32
    sequence_len = 5
    encoder_inputs = keras.layers.Input((None, 32, 32, 3))
    encoder = keras.layers.ConvLSTM2D(
        filters=32, kernel_size=(3, 3), padding='same',
        return_sequences=False, return_state=True)
    _, state_h, state_c = encoder(encoder_inputs)
    encoder_states = [state_h, state_c]

    decoder_inputs = keras.layers.Input((None, 32, 32, 4))
    decoder_lstm = keras.layers.ConvLSTM2D(
        filters=32, kernel_size=(3, 3), padding='same',
        return_sequences=False, return_state=False)
    decoder_outputs = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    output = keras.layers.Conv2D(
        1, (3, 3), padding='same', activation='relu')(decoder_outputs)
    model = keras.Model([encoder_inputs, decoder_inputs], output)

    model.compile(
        optimizer='sgd', loss='mse',
        run_eagerly=testing_utils.should_run_eagerly())
    x_1 = np.random.rand(num_samples, sequence_len, 32, 32, 3)
    x_2 = np.random.rand(num_samples, sequence_len, 32, 32, 4)
    y = np.random.rand(num_samples, 32, 32, 1)
    model.fit([x_1, x_2], y)

    model.predict([x_1, x_2])
Ejemplo n.º 4
0
class CuDNNTest(keras_parameterized.TestCase):

  @parameterized.named_parameters(
      *testing_utils.generate_combinations_with_testcase_name(
          layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM],
          return_sequences=[True, False]))
  @test_util.run_gpu_only
  def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences):
    input_size = 10
    timesteps = 6
    units = 2
    num_samples = 32
    testing_utils.layer_test(
        layer_class,
        kwargs={'units': units,
                'return_sequences': return_sequences},
        input_shape=(num_samples, timesteps, input_size))

  @parameterized.named_parameters(
      *testing_utils.generate_combinations_with_testcase_name(
          layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM],
          go_backwards=[True, False]))
  @test_util.run_gpu_only
  def test_cudnn_rnn_go_backward(self, layer_class, go_backwards):
    input_size = 10
    timesteps = 6
    units = 2
    num_samples = 32
    testing_utils.layer_test(
        layer_class,
        kwargs={'units': units,
                'go_backwards': go_backwards},
        input_shape=(num_samples, timesteps, input_size))

  @parameterized.named_parameters(
      ('cudnngru', keras.layers.CuDNNGRU),
      ('cudnnlstm', keras.layers.CuDNNLSTM),
  )
  @test_util.run_gpu_only
  def test_return_state(self, layer_class):
    input_size = 10
    timesteps = 6
    units = 2
    num_samples = 32
    num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

    inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size))
    layer = layer_class(units, return_state=True, stateful=True)
    outputs = layer(inputs)
    _, state = outputs[0], outputs[1:]
    self.assertEqual(len(state), num_states)
    model = keras.models.Model(inputs, state[0])
    model.run_eagerly = testing_utils.should_run_eagerly()

    inputs = np.random.random((num_samples, timesteps, input_size))
    state = model.predict(inputs)
    np.testing.assert_allclose(
        keras.backend.eval(layer.states[0]), state, atol=1e-4)

  @parameterized.named_parameters(
      ('cudnngru', keras.layers.CuDNNGRU),
      ('cudnnlstm', keras.layers.CuDNNLSTM),
  )
  @test_util.run_gpu_only
  def test_time_major_input(self, layer_class):
    input_size = 10
    timesteps = 6
    units = 2
    num_samples = 32

    model = keras.models.Sequential()
    model.add(
        keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
    layer = layer_class(units, time_major=True, return_sequences=True)
    model.add(layer)
    model.add(
        keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(learning_rate=0.001))
    model.fit(
        np.ones((num_samples, timesteps, input_size)),
        np.ones((num_samples, timesteps, units)))
    out = model.predict(np.ones((num_samples, timesteps, input_size)))
    self.assertEqual(out.shape, (num_samples, timesteps, units))

  @parameterized.named_parameters(
      ('cudnngru', keras.layers.CuDNNGRU),
      ('cudnnlstm', keras.layers.CuDNNLSTM),
  )
  @test_util.run_gpu_only
  def test_specify_initial_state_keras_tensor(self, layer_class):
    input_size = 10
    timesteps = 6
    units = 2
    num_samples = 32
    num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

    inputs = keras.Input((timesteps, input_size))
    initial_state = [keras.Input((units,)) for _ in range(num_states)]
    layer = layer_class(units)
    if len(initial_state) == 1:
      output = layer(inputs, initial_state=initial_state[0])
    else:
      output = layer(inputs, initial_state=initial_state)
    self.assertTrue(
        any(initial_state[0] is t
            for t in layer._inbound_nodes[0].input_tensors))

    model = keras.models.Model([inputs] + initial_state, output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=RMSprop(learning_rate=0.001),
        run_eagerly=testing_utils.should_run_eagerly())

    inputs = np.random.random((num_samples, timesteps, input_size))
    initial_state = [
        np.random.random((num_samples, units)) for _ in range(num_states)
    ]
    targets = np.random.random((num_samples, units))
    model.fit([inputs] + initial_state, targets)
Ejemplo n.º 5
0
class CuDNNV1OnlyTest(keras_parameterized.TestCase):

  @test_util.run_gpu_only
  def test_trainability(self):
    input_size = 10
    units = 2
    for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
      layer = layer_class(units)
      layer.build((None, None, input_size))
      self.assertEqual(len(layer.weights), 3)
      self.assertEqual(len(layer.trainable_weights), 3)
      self.assertEqual(len(layer.non_trainable_weights), 0)
      layer.trainable = False
      self.assertEqual(len(layer.weights), 3)
      self.assertEqual(len(layer.non_trainable_weights), 3)
      self.assertEqual(len(layer.trainable_weights), 0)
      layer.trainable = True
      self.assertEqual(len(layer.weights), 3)
      self.assertEqual(len(layer.trainable_weights), 3)
      self.assertEqual(len(layer.non_trainable_weights), 0)

  @parameterized.named_parameters(
      *testing_utils.generate_combinations_with_testcase_name(
          rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False],
          bidirectional=[False], implementation=[1, 2],
          model_nest_level=[1, 2], model_type=['seq', 'func']))
  @test_util.run_v1_only('b/120911602, b/112083752')
  @test_util.run_gpu_only
  def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn,
                                             bidirectional, implementation,
                                             model_nest_level, model_type):
    input_size = 10
    timesteps = 6
    input_shape = (timesteps, input_size)
    units = 2
    num_samples = 32
    inputs = np.random.random((num_samples, timesteps, input_size))

    rnn_layer_kwargs = {
        'recurrent_activation': 'sigmoid',
        # ensure biases are non-zero and properly converted
        'bias_initializer': 'random_uniform',
        'implementation': implementation
    }
    if rnn_type == 'LSTM':
      rnn_layer_class = keras.layers.LSTM
      cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
    else:
      rnn_layer_class = keras.layers.GRU
      cudnn_rnn_layer_class = keras.layers.CuDNNGRU
      rnn_layer_kwargs['reset_after'] = True

    layer = rnn_layer_class(units, **rnn_layer_kwargs)
    cudnn_layer = cudnn_rnn_layer_class(units)

    model = self._make_nested_model(input_shape, layer, model_nest_level,
                                    model_type)
    cudnn_model = self._make_nested_model(input_shape, cudnn_layer,
                                          model_nest_level, model_type)

    if to_cudnn:
      self._convert_model_weights(model, cudnn_model)
    else:
      self._convert_model_weights(cudnn_model, model)

    self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs),
                        atol=1e-4)

  def _make_nested_model(self, input_shape, layer, level=1, model_type='func'):
    # example: make_nested_seq_model((1,), Dense(10), level=2).summary()
    def make_nested_seq_model(input_shape, layer, level=1):
      model = layer
      for i in range(1, level + 1):
        layers = [keras.layers.InputLayer(input_shape),
                  model] if (i == 1) else [model]
        model = keras.models.Sequential(layers)
        if i > 1:
          model.build((None,) + input_shape)
      return model

    # example: make_nested_func_model((1,), Dense(10), level=2).summary()
    def make_nested_func_model(input_shape, layer, level=1):
      model_input = keras.layers.Input(input_shape)
      model = layer
      for _ in range(level):
        model = keras.models.Model(model_input, model(model_input))
      return model

    if model_type == 'func':
      return make_nested_func_model(input_shape, layer, level)
    elif model_type == 'seq':
      return make_nested_seq_model(input_shape, layer, level)

  def _convert_model_weights(self, source_model, target_model):
    _, fname = tempfile.mkstemp('.h5')
    source_model.save_weights(fname)
    target_model.load_weights(fname)
    os.remove(fname)

  @test_util.run_gpu_only
  def test_preprocess_weights_for_loading_gru_incompatible(self):
    """Test loading weights between incompatible layers.

    Should fail fast with an exception.
    """
    input_shape = (3, 5)

    def gru(cudnn=False, **kwargs):
      layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRUV1
      return layer_class(2, input_shape=input_shape, **kwargs)

    def get_layer_weights(layer):
      layer.build(input_shape=input_shape)
      return layer.get_weights()

    def assert_not_compatible(src, dest, message):
      with self.assertRaises(ValueError) as ex:
        keras.saving.hdf5_format.preprocess_weights_for_loading(
            dest,
            get_layer_weights(src))
      self.assertIn(message, str(ex.exception))

    assert_not_compatible(
        gru(),
        gru(cudnn=True),
        'GRU(reset_after=False) is not compatible with CuDNNGRU')
    assert_not_compatible(
        gru(cudnn=True),
        gru(),
        'CuDNNGRU is not compatible with GRU(reset_after=False)')
    assert_not_compatible(
        gru(),
        gru(reset_after=True),
        'GRU(reset_after=False) is not compatible with '
        'GRU(reset_after=True)')
    assert_not_compatible(
        gru(reset_after=True),
        gru(),
        'GRU(reset_after=True) is not compatible with '
        'GRU(reset_after=False)')
Ejemplo n.º 6
0
class CuDNNV1OnlyTest(keras_parameterized.TestCase):

  @test_util.run_gpu_only
  def test_trainability(self):
    input_size = 10
    units = 2
    for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
      layer = layer_class(units)
      layer.build((None, None, input_size))
      self.assertEqual(len(layer.weights), 3)
      self.assertEqual(len(layer.trainable_weights), 3)
      self.assertEqual(len(layer.non_trainable_weights), 0)
      layer.trainable = False
      self.assertEqual(len(layer.weights), 3)
      self.assertEqual(len(layer.non_trainable_weights), 3)
      self.assertEqual(len(layer.trainable_weights), 0)
      layer.trainable = True
      self.assertEqual(len(layer.weights), 3)
      self.assertEqual(len(layer.trainable_weights), 3)
      self.assertEqual(len(layer.non_trainable_weights), 0)

  # TODO(b/156439419): Reenable after the bug is fixed.
  @parameterized.named_parameters(
      *testing_utils.generate_combinations_with_testcase_name(
          rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False],
          bidirectional=[True, False], implementation=[1, 2],
          model_nest_level=[1, 2], model_type=['seq', 'func']))
  @test_util.run_v1_only('b/120911602, b/112083752')
  @test_util.run_gpu_only
  def DISALBED_test_load_weights_between_noncudnn_rnn(
      self, rnn_type, to_cudnn, bidirectional, implementation,
      model_nest_level, model_type):
    input_size = 10
    timesteps = 6
    input_shape = (timesteps, input_size)
    units = 2
    num_samples = 32
    inputs = np.random.random((num_samples, timesteps, input_size))

    rnn_layer_kwargs = {
        'recurrent_activation': 'sigmoid',
        # ensure biases are non-zero and properly converted
        'bias_initializer': 'random_uniform',
        'implementation': implementation
    }
    if rnn_type == 'LSTM':
      rnn_layer_class = keras.layers.LSTM
      cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
    else:
      rnn_layer_class = keras.layers.GRU
      cudnn_rnn_layer_class = keras.layers.CuDNNGRU
      rnn_layer_kwargs['reset_after'] = True

    layer = rnn_layer_class(units, **rnn_layer_kwargs)
    if bidirectional:
      layer = keras.layers.Bidirectional(layer)

    cudnn_layer = cudnn_rnn_layer_class(units)
    if bidirectional:
      cudnn_layer = keras.layers.Bidirectional(cudnn_layer)

    model = self._make_nested_model(input_shape, layer, model_nest_level,
                                    model_type)
    cudnn_model = self._make_nested_model(input_shape, cudnn_layer,
                                          model_nest_level, model_type)

    if to_cudnn:
      self._convert_model_weights(model, cudnn_model)
    else:
      self._convert_model_weights(cudnn_model, model)

    self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs),
                        atol=1e-4)

  def _make_nested_model(self, input_shape, layer, level=1, model_type='func'):
    # example: make_nested_seq_model((1,), Dense(10), level=2).summary()
    def make_nested_seq_model(input_shape, layer, level=1):
      model = layer
      for i in range(1, level + 1):
        layers = [keras.layers.InputLayer(input_shape),
                  model] if (i == 1) else [model]
        model = keras.models.Sequential(layers)
        if i > 1:
          model.build((None,) + input_shape)
      return model

    # example: make_nested_func_model((1,), Dense(10), level=2).summary()
    def make_nested_func_model(input_shape, layer, level=1):
      model_input = keras.layers.Input(input_shape)
      model = layer
      for _ in range(level):
        model = keras.models.Model(model_input, model(model_input))
      return model

    if model_type == 'func':
      return make_nested_func_model(input_shape, layer, level)
    elif model_type == 'seq':
      return make_nested_seq_model(input_shape, layer, level)

  def _convert_model_weights(self, source_model, target_model):
    _, fname = tempfile.mkstemp('.h5')
    source_model.save_weights(fname)
    target_model.load_weights(fname)
    os.remove(fname)

  @parameterized.named_parameters(
      *testing_utils.generate_combinations_with_testcase_name(
          rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False]))
  @test_util.run_v1_only('b/120911602')
  @test_util.run_gpu_only
  def test_load_weights_between_noncudnn_rnn_time_distributed(self, rnn_type,
                                                              to_cudnn):
    # Similar test as test_load_weights_between_noncudnn_rnn() but has different
    # rank of input due to usage of TimeDistributed. Issue: #10356.
    input_size = 10
    steps = 6
    timesteps = 6
    input_shape = (timesteps, steps, input_size)
    units = 2
    num_samples = 32
    inputs = np.random.random((num_samples, timesteps, steps, input_size))

    rnn_layer_kwargs = {
        'recurrent_activation': 'sigmoid',
        # ensure biases are non-zero and properly converted
        'bias_initializer': 'random_uniform',
    }
    if rnn_type == 'LSTM':
      rnn_layer_class = keras.layers.LSTM
      cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
    else:
      rnn_layer_class = keras.layers.GRU
      cudnn_rnn_layer_class = keras.layers.CuDNNGRU
      rnn_layer_kwargs['reset_after'] = True

    layer = rnn_layer_class(units, **rnn_layer_kwargs)
    layer = keras.layers.TimeDistributed(layer)

    cudnn_layer = cudnn_rnn_layer_class(units)
    cudnn_layer = keras.layers.TimeDistributed(cudnn_layer)

    model = self._make_nested_model(input_shape, layer)
    cudnn_model = self._make_nested_model(input_shape, cudnn_layer)

    if to_cudnn:
      self._convert_model_weights(model, cudnn_model)
    else:
      self._convert_model_weights(cudnn_model, model)

    self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs),
                        atol=1e-4)

  @test_util.run_gpu_only
  def test_cudnnrnn_bidirectional(self):
    rnn = keras.layers.CuDNNGRU
    samples = 2
    dim = 2
    timesteps = 2
    output_dim = 2
    mode = 'concat'

    x = np.random.random((samples, timesteps, dim))
    target_dim = 2 * output_dim if mode == 'concat' else output_dim
    y = np.random.random((samples, target_dim))

    # test with Sequential model
    model = keras.Sequential()
    model.add(
        keras.layers.Bidirectional(
            rnn(output_dim), merge_mode=mode, input_shape=(None, dim)))
    model.compile(loss='mse', optimizer='rmsprop')
    model.fit(x, y, epochs=1, batch_size=1)

    # test config
    model.get_config()
    model = keras.models.model_from_json(model.to_json())
    model.summary()

    # test stacked bidirectional layers
    model = keras.Sequential()
    model.add(
        keras.layers.Bidirectional(
            rnn(output_dim, return_sequences=True),
            merge_mode=mode,
            input_shape=(None, dim)))
    model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode))
    model.compile(loss='mse', optimizer=R'rmsprop')
    model.fit(x, y, epochs=1, batch_size=1)

    # test with functional API
    inputs = keras.Input((timesteps, dim))
    outputs = keras.layers.Bidirectional(
        rnn(output_dim), merge_mode=mode)(
            inputs)
    model = keras.Model(inputs, outputs)
    model.compile(loss='mse', optimizer=R'rmsprop')
    model.fit(x, y, epochs=1, batch_size=1)

    # Bidirectional and stateful
    inputs = keras.Input(batch_shape=(1, timesteps, dim))
    outputs = keras.layers.Bidirectional(
        rnn(output_dim, stateful=True), merge_mode=mode)(
            inputs)
    model = keras.Model(inputs, outputs)
    model.compile(loss='mse', optimizer='rmsprop')
    model.fit(x, y, epochs=1, batch_size=1)

  @test_util.run_gpu_only
  def test_preprocess_weights_for_loading_gru_incompatible(self):
    """Test loading weights between incompatible layers.

    Should fail fast with an exception.
    """
    input_shape = (3, 5)

    def gru(cudnn=False, **kwargs):
      layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRUV1
      return layer_class(2, input_shape=input_shape, **kwargs)

    def get_layer_weights(layer):
      layer.build(input_shape=input_shape)
      return layer.get_weights()

    def assert_not_compatible(src, dest, message):
      with self.assertRaises(ValueError) as ex:
        keras.saving.hdf5_format.preprocess_weights_for_loading(
            dest,
            get_layer_weights(src))
      self.assertIn(message, str(ex.exception))

    assert_not_compatible(
        gru(),
        gru(cudnn=True),
        'GRU(reset_after=False) is not compatible with CuDNNGRU')
    assert_not_compatible(
        gru(cudnn=True),
        gru(),
        'CuDNNGRU is not compatible with GRU(reset_after=False)')
    assert_not_compatible(
        gru(),
        gru(reset_after=True),
        'GRU(reset_after=False) is not compatible with '
        'GRU(reset_after=True)')
    assert_not_compatible(
        gru(reset_after=True),
        gru(),
        'GRU(reset_after=True) is not compatible with '
        'GRU(reset_after=False)')
class CuDNNTest(test.TestCase, parameterized.TestCase):
    @test_util.run_in_graph_and_eager_modes
    def test_cudnn_rnn_basics(self):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                for layer_class in [
                        keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM
                ]:
                    for return_sequences in [True, False]:
                        with keras.utils.CustomObjectScope({
                                'keras.layers.CuDNNGRU':
                                keras.layers.CuDNNGRU,
                                'keras.layers.CuDNNLSTM':
                                keras.layers.CuDNNLSTM
                        }):
                            testing_utils.layer_test(layer_class,
                                                     kwargs={
                                                         'units':
                                                         units,
                                                         'return_sequences':
                                                         return_sequences
                                                     },
                                                     input_shape=(num_samples,
                                                                  timesteps,
                                                                  input_size))
                    for go_backwards in [True, False]:
                        with keras.utils.CustomObjectScope({
                                'keras.layers.CuDNNGRU':
                                keras.layers.CuDNNGRU,
                                'keras.layers.CuDNNLSTM':
                                keras.layers.CuDNNLSTM
                        }):
                            testing_utils.layer_test(layer_class,
                                                     kwargs={
                                                         'units':
                                                         units,
                                                         'go_backwards':
                                                         go_backwards
                                                     },
                                                     input_shape=(num_samples,
                                                                  timesteps,
                                                                  input_size))

    @test_util.run_in_graph_and_eager_modes
    def test_trainability(self):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                units = 2
                for layer_class in [
                        keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM
                ]:
                    layer = layer_class(units)
                    layer.build((None, None, input_size))
                    self.assertEqual(len(layer.weights), 3)
                    self.assertEqual(len(layer.trainable_weights), 3)
                    self.assertEqual(len(layer.non_trainable_weights), 0)
                    layer.trainable = False
                    self.assertEqual(len(layer.weights), 3)
                    self.assertEqual(len(layer.non_trainable_weights), 3)
                    self.assertEqual(len(layer.trainable_weights), 0)
                    layer.trainable = True
                    self.assertEqual(len(layer.weights), 3)
                    self.assertEqual(len(layer.trainable_weights), 3)
                    self.assertEqual(len(layer.non_trainable_weights), 0)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_regularizer(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                layer = layer_class(
                    units,
                    return_sequences=False,
                    input_shape=(timesteps, input_size),
                    kernel_regularizer=keras.regularizers.l1(0.01),
                    recurrent_regularizer=keras.regularizers.l1(0.01),
                    bias_regularizer='l2')
                layer.build((None, None, input_size))
                self.assertEqual(len(layer.losses), 3)

                layer = layer_class(units,
                                    return_sequences=False,
                                    input_shape=(timesteps, input_size),
                                    activity_regularizer='l2')
                self.assertTrue(layer.activity_regularizer)
                x = keras.backend.variable(
                    np.ones((num_samples, timesteps, input_size)))
                layer(x)
                self.assertEqual(len(layer.get_losses_for(x)), 1)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_return_state(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

                inputs = keras.Input(batch_shape=(num_samples, timesteps,
                                                  input_size))
                layer = layer_class(units, return_state=True, stateful=True)
                outputs = layer(inputs)
                _, state = outputs[0], outputs[1:]
                self.assertEqual(len(state), num_states)
                model = keras.models.Model(inputs, state[0])

                inputs = np.random.random((num_samples, timesteps, input_size))
                state = model.predict(inputs)
                np.testing.assert_allclose(keras.backend.eval(layer.states[0]),
                                           state,
                                           atol=1e-4)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_specify_initial_state_keras_tensor(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

                inputs = keras.Input((timesteps, input_size))
                initial_state = [
                    keras.Input((units, )) for _ in range(num_states)
                ]
                layer = layer_class(units)
                if len(initial_state) == 1:
                    output = layer(inputs, initial_state=initial_state[0])
                else:
                    output = layer(inputs, initial_state=initial_state)
                self.assertIn(initial_state[0],
                              layer._inbound_nodes[0].input_tensors)

                model = keras.models.Model([inputs] + initial_state, output)
                model.compile(loss='categorical_crossentropy',
                              optimizer='adam')

                inputs = np.random.random((num_samples, timesteps, input_size))
                initial_state = [
                    np.random.random((num_samples, units))
                    for _ in range(num_states)
                ]
                targets = np.random.random((num_samples, units))
                model.fit([inputs] + initial_state, targets)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_statefulness(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32

                model = keras.models.Sequential()
                model.add(
                    keras.layers.Embedding(10,
                                           input_size,
                                           input_length=timesteps,
                                           batch_input_shape=(num_samples,
                                                              timesteps)))
                layer = layer_class(units,
                                    return_sequences=False,
                                    stateful=True,
                                    weights=None)
                model.add(layer)
                model.compile(optimizer='sgd', loss='mse')
                out1 = model.predict(np.ones((num_samples, timesteps)))
                self.assertEqual(out1.shape, (num_samples, units))

                # train once so that the states change
                model.train_on_batch(np.ones((num_samples, timesteps)),
                                     np.ones((num_samples, units)))
                out2 = model.predict(np.ones((num_samples, timesteps)))

                # if the state is not reset, output should be different
                self.assertNotEqual(out1.max(), out2.max())

                # check that output changes after states are reset
                # (even though the model itself didn't change)
                layer.reset_states()
                out3 = model.predict(np.ones((num_samples, timesteps)))
                self.assertNotEqual(out2.max(), out3.max())

                # check that container-level reset_states() works
                model.reset_states()
                out4 = model.predict(np.ones((num_samples, timesteps)))
                self.assertAllClose(out3, out4, atol=1e-5)

                # check that the call to `predict` updated the states
                out5 = model.predict(np.ones((num_samples, timesteps)))
                self.assertNotEqual(out4.max(), out5.max())

    @parameterized.named_parameters(
        *testing_utils.generate_combinations_with_testcase_name(
            rnn_type=['LSTM', 'GRU'],
            to_cudnn=[True, False],
            bidirectional=[True, False],
            implementation=[1, 2],
            model_nest_level=[1, 2],
            model_type=['seq', 'func']))
    def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn,
                                               bidirectional, implementation,
                                               model_nest_level, model_type):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                timesteps = 6
                input_shape = (timesteps, input_size)
                units = 2
                num_samples = 32
                inputs = np.random.random((num_samples, timesteps, input_size))

                rnn_layer_kwargs = {
                    'recurrent_activation': 'sigmoid',
                    # ensure biases are non-zero and properly converted
                    'bias_initializer': 'random_uniform',
                    'implementation': implementation
                }
                if rnn_type == 'LSTM':
                    rnn_layer_class = keras.layers.LSTM
                    cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
                else:
                    rnn_layer_class = keras.layers.GRU
                    cudnn_rnn_layer_class = keras.layers.CuDNNGRU
                    rnn_layer_kwargs['reset_after'] = True

                layer = rnn_layer_class(units, **rnn_layer_kwargs)
                if bidirectional:
                    layer = keras.layers.Bidirectional(layer)

                cudnn_layer = cudnn_rnn_layer_class(units)
                if bidirectional:
                    cudnn_layer = keras.layers.Bidirectional(cudnn_layer)

                model = self._make_nested_model(input_shape, layer,
                                                model_nest_level, model_type)
                cudnn_model = self._make_nested_model(input_shape, cudnn_layer,
                                                      model_nest_level,
                                                      model_type)

                if to_cudnn:
                    self._convert_model_weights(model, cudnn_model)
                else:
                    self._convert_model_weights(cudnn_model, model)

                self.assertAllClose(model.predict(inputs),
                                    cudnn_model.predict(inputs),
                                    atol=1e-4)

    def _make_nested_model(self,
                           input_shape,
                           layer,
                           level=1,
                           model_type='func'):
        # example: make_nested_seq_model((1,), Dense(10), level=2).summary()
        def make_nested_seq_model(input_shape, layer, level=1):
            model = layer
            for i in range(1, level + 1):
                layers = [keras.layers.InputLayer(input_shape), model
                          ] if (i == 1) else [model]
                model = keras.models.Sequential(layers)
            return model

        # example: make_nested_func_model((1,), Dense(10), level=2).summary()
        def make_nested_func_model(input_shape, layer, level=1):
            model_input = keras.layers.Input(input_shape)
            model = layer
            for _ in range(level):
                model = keras.models.Model(model_input, model(model_input))
            return model

        if model_type == 'func':
            return make_nested_func_model(input_shape, layer, level)
        elif model_type == 'seq':
            return make_nested_seq_model(input_shape, layer, level)

    def _convert_model_weights(self, source_model, target_model):
        _, fname = tempfile.mkstemp('.h5')
        source_model.save_weights(fname)
        target_model.load_weights(fname)
        os.remove(fname)

    @parameterized.named_parameters(
        *testing_utils.generate_combinations_with_testcase_name(
            rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False]))
    def test_load_weights_between_noncudnn_rnn_time_distributed(
            self, rnn_type, to_cudnn):
        # Similar test as test_load_weights_between_noncudnn_rnn() but has different
        # rank of input due to usage of TimeDistributed. Issue: #10356.
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                steps = 6
                timesteps = 6
                input_shape = (timesteps, steps, input_size)
                units = 2
                num_samples = 32
                inputs = np.random.random(
                    (num_samples, timesteps, steps, input_size))

                rnn_layer_kwargs = {
                    'recurrent_activation': 'sigmoid',
                    # ensure biases are non-zero and properly converted
                    'bias_initializer': 'random_uniform',
                }
                if rnn_type == 'LSTM':
                    rnn_layer_class = keras.layers.LSTM
                    cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
                else:
                    rnn_layer_class = keras.layers.GRU
                    cudnn_rnn_layer_class = keras.layers.CuDNNGRU
                    rnn_layer_kwargs['reset_after'] = True

                layer = rnn_layer_class(units, **rnn_layer_kwargs)
                layer = keras.layers.TimeDistributed(layer)

                cudnn_layer = cudnn_rnn_layer_class(units)
                cudnn_layer = keras.layers.TimeDistributed(cudnn_layer)

                model = self._make_nested_model(input_shape, layer)
                cudnn_model = self._make_nested_model(input_shape, cudnn_layer)

                if to_cudnn:
                    self._convert_model_weights(model, cudnn_model)
                else:
                    self._convert_model_weights(cudnn_model, model)

                self.assertAllClose(model.predict(inputs),
                                    cudnn_model.predict(inputs),
                                    atol=1e-4)

    @test_util.run_in_graph_and_eager_modes
    def test_cudnnrnn_bidirectional(self):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                rnn = keras.layers.CuDNNGRU
                samples = 2
                dim = 2
                timesteps = 2
                output_dim = 2
                mode = 'concat'

                x = np.random.random((samples, timesteps, dim))
                target_dim = 2 * output_dim if mode == 'concat' else output_dim
                y = np.random.random((samples, target_dim))

                # test with Sequential model
                model = keras.Sequential()
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim),
                                               merge_mode=mode,
                                               input_shape=(None, dim)))
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # test config
                model.get_config()
                model = keras.models.model_from_json(model.to_json())
                model.summary()

                # test stacked bidirectional layers
                model = keras.Sequential()
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim,
                                                   return_sequences=True),
                                               merge_mode=mode,
                                               input_shape=(None, dim)))
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim),
                                               merge_mode=mode))
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # test with functional API
                inputs = keras.Input((timesteps, dim))
                outputs = keras.layers.Bidirectional(rnn(output_dim),
                                                     merge_mode=mode)(inputs)
                model = keras.Model(inputs, outputs)
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # Bidirectional and stateful
                inputs = keras.Input(batch_shape=(1, timesteps, dim))
                outputs = keras.layers.Bidirectional(rnn(output_dim,
                                                         stateful=True),
                                                     merge_mode=mode)(inputs)
                model = keras.Model(inputs, outputs)
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

    def test_preprocess_weights_for_loading_gru_incompatible(self):
        """Test loading weights between incompatible layers.

    Should fail fast with an exception.
    """
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_shape = (3, 5)

                def gru(cudnn=False, **kwargs):
                    layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU
                    return layer_class(2, input_shape=input_shape, **kwargs)

                def get_layer_weights(layer):
                    layer.build(input_shape=input_shape)
                    return layer.get_weights()

                def assert_not_compatible(src, dest, message):
                    with self.assertRaises(ValueError) as ex:
                        keras.engine.saving.preprocess_weights_for_loading(
                            dest, get_layer_weights(src))
                    self.assertIn(message, str(ex.exception))

                assert_not_compatible(
                    gru(), gru(cudnn=True),
                    'GRU(reset_after=False) is not compatible with CuDNNGRU')
                assert_not_compatible(
                    gru(cudnn=True), gru(),
                    'CuDNNGRU is not compatible with GRU(reset_after=False)')
                assert_not_compatible(
                    gru(), gru(reset_after=True),
                    'GRU(reset_after=False) is not compatible with '
                    'GRU(reset_after=True)')
                assert_not_compatible(
                    gru(reset_after=True), gru(),
                    'GRU(reset_after=True) is not compatible with '
                    'GRU(reset_after=False)')