def test_convolutional_recurrent():

    class Masking5D(Masking):
        """Regular masking layer returns wrong shape of mask for RNN"""
        def compute_mask(self, inputs, mask=None):
            return K.any(K.not_equal(inputs, 0.), axis=[2, 3, 4])

    for data_format in ['channels_first', 'channels_last']:

        if data_format == 'channels_first':
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_channel,
                                    input_num_row, input_num_col)
        else:
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_num_row, input_num_col,
                                    input_channel)

        for use_mask in [False, True]:
            for return_sequences in [True, False]:
                # test for return state:
                x = Input(batch_shape=inputs.shape)
                kwargs = {'data_format': data_format,
                          'return_sequences': return_sequences,
                          'return_state': True,
                          'stateful': True,
                          'filters': filters,
                          'kernel_size': (num_row, num_col),
                          'padding': 'valid'}
                layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
                layer.build(inputs.shape)
                if use_mask:
                    outputs = layer(Masking5D()(x))
                else:
                    outputs = layer(x)
                output, states = outputs[0], outputs[1:]
                assert len(states) == 2
                model = Model(x, states[0])
                state = model.predict(inputs)
                np.testing.assert_allclose(
                    K.eval(layer.states[0]), state, atol=1e-4)

                # test for output shape:
                output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                    kwargs={'data_format': data_format,
                                            'return_sequences': return_sequences,
                                            'filters': filters,
                                            'kernel_size': (num_row, num_col),
                                            'padding': 'valid'},
                                    input_shape=inputs.shape)
def test_convolutional_recurrent():

    for data_format in ['channels_first', 'channels_last']:

        if data_format == 'channels_first':
            inputs = np.random.rand(num_samples, sequence_len, input_channel,
                                    input_num_row, input_num_col)
        else:
            inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                                    input_num_col, input_channel)

        for return_sequences in [True, False]:

            # test for return state:
            x = Input(batch_shape=inputs.shape)
            kwargs = {
                'data_format': data_format,
                'return_sequences': return_sequences,
                'return_state': True,
                'stateful': True,
                'filters': filters,
                'kernel_size': (num_row, num_col),
                'padding': 'valid'
            }
            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
            layer.build(inputs.shape)
            outputs = layer(x)
            output, states = outputs[0], outputs[1:]
            assert len(states) == 2
            model = Model(x, states[0])
            state = model.predict(inputs)
            np.testing.assert_allclose(K.eval(layer.states[0]),
                                       state,
                                       atol=1e-4)

            # test for output shape:
            output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                kwargs={
                                    'data_format': data_format,
                                    'return_sequences': return_sequences,
                                    'filters': filters,
                                    'kernel_size': (num_row, num_col),
                                    'padding': 'valid'
                                },
                                input_shape=inputs.shape)
Example #3
0
def test_convolutional_recurrent_statefulness():

    data_format = 'channels_last'
    return_sequences = False
    inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                            input_num_col, input_channel)
    # Tests for statefulness
    model = Sequential()
    kwargs = {
        'data_format': data_format,
        'return_sequences': return_sequences,
        'filters': filters,
        'kernel_size': (num_row, num_col),
        'stateful': True,
        'batch_input_shape': inputs.shape,
        'padding': 'same'
    }
    layer = convolutional_recurrent.ConvLSTM2D(**kwargs)

    model.add(layer)
    model.compile(optimizer='sgd', loss='mse')
    out1 = model.predict(np.ones_like(inputs))

    # train once so that the states change
    model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape))
    out2 = model.predict(np.ones_like(inputs))

    # if the state is not reset, output should be different
    assert (out1.max() != out2.max())

    # check that output changes after states are reset
    # (even though the model itself didn't change)
    layer.reset_states()
    out3 = model.predict(np.ones_like(inputs))
    assert (out2.max() != out3.max())

    # check that container-level reset_states() works
    model.reset_states()
    out4 = model.predict(np.ones_like(inputs))
    assert_allclose(out3, out4, atol=1e-5)

    # check that the call to `predict` updated the states
    out5 = model.predict(np.ones_like(inputs))
    assert (out4.max() != out5.max())

    # cntk doesn't support eval convolution with static
    # variable, will enable it later
    if K.backend() != 'cntk':
        # check regularizers
        kwargs = {
            'data_format': data_format,
            'return_sequences': return_sequences,
            'kernel_size': (num_row, num_col),
            'stateful': True,
            'filters': filters,
            'batch_input_shape': inputs.shape,
            'kernel_regularizer': regularizers.L1L2(l1=0.01),
            'recurrent_regularizer': regularizers.L1L2(l1=0.01),
            'bias_regularizer': 'l2',
            'activity_regularizer': 'l2',
            'kernel_constraint': 'max_norm',
            'recurrent_constraint': 'max_norm',
            'bias_constraint': 'max_norm',
            'padding': 'same'
        }

        layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
        layer.build(inputs.shape)
        assert len(layer.losses) == 3
        assert layer.activity_regularizer
        output = layer(K.variable(np.ones(inputs.shape)))
        assert len(layer.losses) == 4
        K.eval(output)

    # check dropout
    layer_test(convolutional_recurrent.ConvLSTM2D,
               kwargs={
                   'data_format': data_format,
                   'return_sequences': return_sequences,
                   'filters': filters,
                   'kernel_size': (num_row, num_col),
                   'padding': 'same',
                   'dropout': 0.1,
                   'recurrent_dropout': 0.1
               },
               input_shape=inputs.shape)

    # check state initialization
    layer = convolutional_recurrent.ConvLSTM2D(
        filters=filters,
        kernel_size=(num_row, num_col),
        data_format=data_format,
        return_sequences=return_sequences)
    layer.build(inputs.shape)
    x = Input(batch_shape=inputs.shape)
    initial_state = layer.get_initial_state(x)
    y = layer(x, initial_state=initial_state)
    model = Model(x, y)
    assert (model.predict(inputs).shape == layer.compute_output_shape(
        inputs.shape))
Example #4
0
class LayerCorrectnessTest(keras_parameterized.TestCase):
    def setUp(self):
        super(LayerCorrectnessTest, self).setUp()
        # Set two virtual CPUs to test MirroredStrategy with multiple devices
        cpus = tf.config.list_physical_devices('CPU')
        tf.config.set_logical_device_configuration(cpus[0], [
            tf.config.LogicalDeviceConfiguration(),
            tf.config.LogicalDeviceConfiguration(),
        ])

    def _create_model_from_layer(self, layer, input_shapes):
        inputs = [layers.Input(batch_input_shape=s) for s in input_shapes]
        if len(inputs) == 1:
            inputs = inputs[0]
        y = layer(inputs)
        model = models.Model(inputs, y)
        model.compile('sgd', 'mse')
        return model

    @parameterized.named_parameters(
        ('LeakyReLU', advanced_activations.LeakyReLU, (2, 2)),
        ('PReLU', advanced_activations.PReLU, (2, 2)),
        ('ELU', advanced_activations.ELU, (2, 2)),
        ('ThresholdedReLU', advanced_activations.ThresholdedReLU, (2, 2)),
        ('Softmax', advanced_activations.Softmax, (2, 2)),
        ('ReLU', advanced_activations.ReLU, (2, 2)),
        ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)),
        ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)),
        ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)),
        ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2),
         (2, 2, 2, 2)),
        ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2),
         (2, 2, 2, 1)),
        ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2),
         (2, 2, 2, 1)),
        ('UpSampling2D', convolutional.UpSampling2D, (2, 2, 2, 1)),
        ('ZeroPadding2D', convolutional.ZeroPadding2D, (2, 2, 2, 1)),
        ('Cropping2D', convolutional.Cropping2D, (2, 3, 3, 1)),
        ('ConvLSTM2D',
         lambda: convolutional_recurrent.ConvLSTM2D(4, kernel_size=(2, 2)),
         (4, 4, 4, 4, 4)),
        ('Dense', lambda: core.Dense(2), (2, 2)),
        ('Dropout', lambda: core.Dropout(0.5), (2, 2)),
        ('SpatialDropout2D', lambda: core.SpatialDropout2D(0.5), (2, 2, 2, 2)),
        ('Activation', lambda: core.Activation('sigmoid'), (2, 2)),
        ('Reshape', lambda: core.Reshape((1, 4, 1)), (2, 2, 2)),
        ('Permute', lambda: core.Permute((2, 1)), (2, 2, 2)),
        ('Attention', dense_attention.Attention, [(2, 2, 3), (2, 3, 3),
                                                  (2, 3, 3)]),
        ('AdditiveAttention', dense_attention.AdditiveAttention, [(2, 2, 3),
                                                                  (2, 3, 3),
                                                                  (2, 3, 3)]),
        ('Embedding', lambda: embeddings.Embedding(4, 4),
         (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))),
        ('LocallyConnected1D', lambda: local.LocallyConnected1D(2, 2),
         (2, 2, 1)),
        ('LocallyConnected2D', lambda: local.LocallyConnected2D(2, 2),
         (2, 2, 2, 1)),
        ('Add', merge.Add, [(2, 2), (2, 2)]),
        ('Subtract', merge.Subtract, [(2, 2), (2, 2)]),
        ('Multiply', merge.Multiply, [(2, 2), (2, 2)]),
        ('Average', merge.Average, [(2, 2), (2, 2)]),
        ('Maximum', merge.Maximum, [(2, 2), (2, 2)]),
        ('Minimum', merge.Minimum, [(2, 2), (2, 2)]),
        ('Concatenate', merge.Concatenate, [(2, 2), (2, 2)]),
        ('Dot', lambda: merge.Dot(1), [(2, 2), (2, 2)]),
        ('GaussianNoise', lambda: noise.GaussianNoise(0.5), (2, 2)),
        ('GaussianDropout', lambda: noise.GaussianDropout(0.5), (2, 2)),
        ('AlphaDropout', lambda: noise.AlphaDropout(0.5), (2, 2)),
        ('BatchNormalization', normalization_v2.BatchNormalization,
         (2, 2), 1e-2, 1e-2),
        ('LayerNormalization', normalization.LayerNormalization, (2, 2)),
        ('LayerNormalizationUnfused',
         lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)),
        ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)),
        ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)),
        ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)),
        ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D,
         (2, 2, 2, 1)),
        ('SimpleRNN', lambda: recurrent.SimpleRNN(units=4),
         (4, 4, 4), 1e-2, 1e-2),
        ('GRU', lambda: recurrent.GRU(units=4), (4, 4, 4)),
        ('LSTM', lambda: recurrent.LSTM(units=4), (4, 4, 4)),
        ('GRUV2', lambda: recurrent_v2.GRU(units=4), (4, 4, 4)),
        ('LSTMV2', lambda: recurrent_v2.LSTM(units=4), (4, 4, 4)),
        ('TimeDistributed', lambda: wrappers.TimeDistributed(core.Dense(2)),
         (2, 2, 2)),
        ('Bidirectional',
         lambda: wrappers.Bidirectional(recurrent.SimpleRNN(units=4)),
         (2, 2, 2)),
        ('AttentionLayerCausal',
         lambda: dense_attention.Attention(causal=True), [(2, 2, 3), (2, 3, 3),
                                                          (2, 3, 3)]),
        ('AdditiveAttentionLayerCausal',
         lambda: dense_attention.AdditiveAttention(causal=True), [(2, 3, 4),
                                                                  (2, 3, 4),
                                                                  (2, 3, 4)]),
    )
    def test_layer(self,
                   f32_layer_fn,
                   input_shape,
                   rtol=2e-3,
                   atol=2e-3,
                   input_data=None):
        """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, and a distributed mixed precision
    layer are run. The three layers are identical other than their dtypes and
    distribution strategies. The outputs after predict() and weights after fit()
    are asserted to be close.

    Args:
      f32_layer_fn: A function returning a float32 layer. The other two layers
        will automatically be created from this
      input_shape: The shape of the input to the layer, including the batch
        dimension. Or a list of shapes if the layer takes multiple inputs.
      rtol: The relative tolerance to be asserted.
      atol: The absolute tolerance to be asserted.
      input_data: A Numpy array with the data of the input. If None, input data
        will be randomly generated
    """

        if f32_layer_fn == convolutional.ZeroPadding2D and \
           tf.test.is_built_with_rocm():
            return
        if isinstance(input_shape[0], int):
            input_shapes = [input_shape]
        else:
            input_shapes = input_shape
        strategy = create_mirrored_strategy()
        f32_layer = f32_layer_fn()

        # Create the layers
        assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
        config = f32_layer.get_config()
        config['dtype'] = policy.Policy('mixed_float16')
        mp_layer = f32_layer.__class__.from_config(config)
        distributed_mp_layer = f32_layer.__class__.from_config(config)

        # Compute per_replica_input_shapes for the distributed model
        global_batch_size = input_shapes[0][0]
        assert global_batch_size % strategy.num_replicas_in_sync == 0, (
            'The number of replicas, %d, does not divide the global batch size of '
            '%d' % (strategy.num_replicas_in_sync, global_batch_size))
        per_replica_batch_size = (global_batch_size //
                                  strategy.num_replicas_in_sync)
        per_replica_input_shapes = [(per_replica_batch_size, ) + s[1:]
                                    for s in input_shapes]

        # Create the models
        f32_model = self._create_model_from_layer(f32_layer, input_shapes)
        mp_model = self._create_model_from_layer(mp_layer, input_shapes)
        with strategy.scope():
            distributed_mp_model = self._create_model_from_layer(
                distributed_mp_layer, per_replica_input_shapes)

        # Set all model weights to the same values
        f32_weights = f32_model.get_weights()
        mp_model.set_weights(f32_weights)
        distributed_mp_model.set_weights(f32_weights)

        # Generate input data
        if input_data is None:
            # Cast inputs to float16 to avoid measuring error from having f16 layers
            # cast to float16.
            input_data = [
                np.random.normal(size=s).astype('float16')
                for s in input_shapes
            ]
            if len(input_data) == 1:
                input_data = input_data[0]

        # Assert all models have close outputs.
        f32_output = f32_model.predict(input_data)
        mp_output = mp_model.predict(input_data)
        self.assertAllClose(mp_output, f32_output, rtol=rtol, atol=atol)
        self.assertAllClose(distributed_mp_model.predict(input_data),
                            f32_output,
                            rtol=rtol,
                            atol=atol)

        # Run fit() on models
        output = np.random.normal(
            size=f32_model.outputs[0].shape).astype('float16')
        for model in f32_model, mp_model, distributed_mp_model:
            model.fit(input_data, output, batch_size=global_batch_size)

        # Assert all models have close weights
        f32_weights = f32_model.get_weights()
        self.assertAllClose(mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
        self.assertAllClose(distributed_mp_model.get_weights(),
                            f32_weights,
                            rtol=rtol,
                            atol=atol)
Example #5
0
def test_recurrent_convolutional():
    nb_row = 3
    nb_col = 3
    nb_filter = 5
    nb_samples = 2
    input_channel = 2
    input_nb_row = 5
    input_nb_col = 5
    sequence_len = 2
    for dim_ordering in ['th', 'tf']:

        if dim_ordering == 'th':
            input = np.random.rand(nb_samples, sequence_len, input_channel,
                                   input_nb_row, input_nb_col)
        else:  # tf
            input = np.random.rand(nb_samples, sequence_len, input_nb_row,
                                   input_nb_col, input_channel)

        for return_sequences in [True, False]:
            # test for ouptput shape:
            output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                kwargs={
                                    'dim_ordering': dim_ordering,
                                    'return_sequences': return_sequences,
                                    'nb_filter': nb_filter,
                                    'nb_row': nb_row,
                                    'nb_col': nb_col,
                                    'border_mode': "same"
                                },
                                input_shape=input.shape)

            output_shape = [nb_samples, input_nb_row, input_nb_col]

            if dim_ordering == 'th':
                output_shape.insert(1, nb_filter)
            else:
                output_shape.insert(3, nb_filter)

            if return_sequences:
                output_shape.insert(1, sequence_len)

            assert output.shape == tuple(output_shape)

            # No need to check statefulness for both
            if dim_ordering == 'th' or return_sequences:
                continue

            # Tests for statefulness
            model = Sequential()
            kwargs = {
                'dim_ordering': dim_ordering,
                'return_sequences': return_sequences,
                'nb_filter': nb_filter,
                'nb_row': nb_row,
                'nb_col': nb_col,
                'stateful': True,
                'batch_input_shape': input.shape,
                'border_mode': "same"
            }
            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)

            model.add(layer)
            model.compile(optimizer='sgd', loss='mse')
            out1 = model.predict(np.ones_like(input))
            assert (out1.shape == tuple(output_shape))

            # train once so that the states change
            model.train_on_batch(np.ones_like(input), np.ones_like(output))
            out2 = model.predict(np.ones_like(input))

            # if the state is not reset, output should be different
            assert (out1.max() != out2.max())

            # check that output changes after states are reset
            # (even though the model itself didn't change)
            layer.reset_states()
            out3 = model.predict(np.ones_like(input))
            assert (out2.max() != out3.max())

            # check that container-level reset_states() works
            model.reset_states()
            out4 = model.predict(np.ones_like(input))
            assert_allclose(out3, out4, atol=1e-5)

            # check that the call to `predict` updated the states
            out5 = model.predict(np.ones_like(input))
            assert (out4.max() != out5.max())

            # check regularizers
            kwargs = {
                'dim_ordering': dim_ordering,
                'return_sequences': return_sequences,
                'nb_filter': nb_filter,
                'nb_row': nb_row,
                'nb_col': nb_col,
                'stateful': True,
                'batch_input_shape': input.shape,
                'W_regularizer': regularizers.WeightRegularizer(l1=0.01),
                'U_regularizer': regularizers.WeightRegularizer(l1=0.01),
                'b_regularizer': 'l2',
                'border_mode': "same"
            }

            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
            layer.build(input.shape)
            output = layer(K.variable(np.ones(input.shape)))
            K.eval(output)

            # check dropout
            layer_test(convolutional_recurrent.ConvLSTM2D,
                       kwargs={
                           'dim_ordering': dim_ordering,
                           'return_sequences': return_sequences,
                           'nb_filter': nb_filter,
                           'nb_row': nb_row,
                           'nb_col': nb_col,
                           'border_mode': "same",
                           'dropout_W': 0.1,
                           'dropout_U': 0.1
                       },
                       input_shape=input.shape)
Example #6
0
def test_convolutional_recurrent():
    num_row = 3
    num_col = 3
    filters = 5
    num_samples = 2
    input_channel = 2
    input_num_row = 5
    input_num_col = 5
    sequence_len = 2
    for data_format in ['channels_first', 'channels_last']:

        if data_format == 'channels_first':
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_channel,
                                    input_num_row, input_num_col)
        else:
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_num_row, input_num_col,
                                    input_channel)

        for return_sequences in [True, False]:
            # test for output shape:
            output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                kwargs={'data_format': data_format,
                                        'return_sequences': return_sequences,
                                        'filters': filters,
                                        'kernel_size': (num_row, num_col),
                                        'padding': 'valid'},
                                input_shape=inputs.shape)

            # No need to check following tests for both data formats
            if data_format == 'channels_first' or return_sequences:
                continue

            # Tests for statefulness
            model = Sequential()
            kwargs = {'data_format': data_format,
                      'return_sequences': return_sequences,
                      'filters': filters,
                      'kernel_size': (num_row, num_col),
                      'stateful': True,
                      'batch_input_shape': inputs.shape,
                      'padding': 'same'}
            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)

            model.add(layer)
            model.compile(optimizer='sgd', loss='mse')
            out1 = model.predict(np.ones_like(inputs))

            # train once so that the states change
            model.train_on_batch(np.ones_like(inputs),
                                 np.random.random(out1.shape))
            out2 = model.predict(np.ones_like(inputs))

            # if the state is not reset, output should be different
            assert(out1.max() != out2.max())

            # check that output changes after states are reset
            # (even though the model itself didn't change)
            layer.reset_states()
            out3 = model.predict(np.ones_like(inputs))
            assert(out2.max() != out3.max())

            # check that container-level reset_states() works
            model.reset_states()
            out4 = model.predict(np.ones_like(inputs))
            assert_allclose(out3, out4, atol=1e-5)

            # check that the call to `predict` updated the states
            out5 = model.predict(np.ones_like(inputs))
            assert(out4.max() != out5.max())

            # check regularizers
            kwargs = {'data_format': data_format,
                      'return_sequences': return_sequences,
                      'kernel_size': (num_row, num_col),
                      'stateful': True,
                      'filters': filters,
                      'batch_input_shape': inputs.shape,
                      'kernel_regularizer': regularizers.L1L2(l1=0.01),
                      'recurrent_regularizer': regularizers.L1L2(l1=0.01),
                      'bias_regularizer': 'l2',
                      'activity_regularizer': 'l2',
                      'kernel_constraint': 'max_norm',
                      'recurrent_constraint': 'max_norm',
                      'bias_constraint': 'max_norm',
                      'padding': 'same'}

            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
            layer.build(inputs.shape)
            assert len(layer.losses) == 3
            assert layer.activity_regularizer
            output = layer(K.variable(np.ones(inputs.shape)))
            assert len(layer.losses) == 4
            K.eval(output)

            # check dropout
            layer_test(convolutional_recurrent.ConvLSTM2D,
                       kwargs={'data_format': data_format,
                               'return_sequences': return_sequences,
                               'filters': filters,
                               'kernel_size': (num_row, num_col),
                               'padding': 'same',
                               'dropout': 0.1,
                               'recurrent_dropout': 0.1},
                       input_shape=inputs.shape)