예제 #1
0
class CuDNNTest(keras_parameterized.TestCase):
    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM],
            return_sequences=[True, False]))
    @test_util.run_gpu_only
    def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences):
        input_size = 10
        timesteps = 6
        units = 2
        num_samples = 32
        testing_utils.layer_test(layer_class,
                                 kwargs={
                                     'units': units,
                                     'return_sequences': return_sequences
                                 },
                                 input_shape=(num_samples, timesteps,
                                              input_size))

    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM],
            go_backwards=[True, False]))
    @test_util.run_gpu_only
    def test_cudnn_rnn_go_backward(self, layer_class, go_backwards):
        input_size = 10
        timesteps = 6
        units = 2
        num_samples = 32
        testing_utils.layer_test(layer_class,
                                 kwargs={
                                     'units': units,
                                     'go_backwards': go_backwards
                                 },
                                 input_shape=(num_samples, timesteps,
                                              input_size))

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    @test_util.run_gpu_only
    def test_return_state(self, layer_class):
        input_size = 10
        timesteps = 6
        units = 2
        num_samples = 32
        num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

        inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size))
        layer = layer_class(units, return_state=True, stateful=True)
        outputs = layer(inputs)
        _, state = outputs[0], outputs[1:]
        self.assertEqual(len(state), num_states)
        model = keras.models.Model(inputs, state[0])
        model.run_eagerly = testing_utils.should_run_eagerly()

        inputs = np.random.random((num_samples, timesteps, input_size))
        state = model.predict(inputs)
        np.testing.assert_allclose(keras.backend.eval(layer.states[0]),
                                   state,
                                   atol=1e-4)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    @test_util.run_gpu_only
    def test_time_major_input(self, layer_class):
        input_size = 10
        timesteps = 6
        units = 2
        num_samples = 32

        model = keras.models.Sequential()
        model.add(
            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
        layer = layer_class(units, time_major=True, return_sequences=True)
        model.add(layer)
        model.add(
            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
        model.compile(loss='categorical_crossentropy',
                      optimizer=RMSprop(learning_rate=0.001))
        model.fit(np.ones((num_samples, timesteps, input_size)),
                  np.ones((num_samples, timesteps, units)))
        out = model.predict(np.ones((num_samples, timesteps, input_size)))
        self.assertEqual(out.shape, (num_samples, timesteps, units))

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    @test_util.run_gpu_only
    def test_specify_initial_state_keras_tensor(self, layer_class):
        input_size = 10
        timesteps = 6
        units = 2
        num_samples = 32
        num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

        inputs = keras.Input((timesteps, input_size))
        initial_state = [keras.Input((units, )) for _ in range(num_states)]
        layer = layer_class(units)
        if len(initial_state) == 1:
            output = layer(inputs, initial_state=initial_state[0])
        else:
            output = layer(inputs, initial_state=initial_state)
        self.assertIn(initial_state[0], layer._inbound_nodes[0].input_tensors)

        model = keras.models.Model([inputs] + initial_state, output)
        model.compile(loss='categorical_crossentropy',
                      optimizer=RMSprop(learning_rate=0.001),
                      run_eagerly=testing_utils.should_run_eagerly())

        inputs = np.random.random((num_samples, timesteps, input_size))
        initial_state = [
            np.random.random((num_samples, units)) for _ in range(num_states)
        ]
        targets = np.random.random((num_samples, units))
        model.fit([inputs] + initial_state, targets)
예제 #2
0
class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
    def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, decay, momentum,
                              centered):
        rms_t = rms * decay + (1 - decay) * g * g
        if centered:
            mg_t = mg * decay + (1 - decay) * g
            denom_t = rms_t - mg_t * mg_t
        else:
            mg_t = mg
            denom_t = rms_t
        mom_t = momentum * mom + lr * g / np.sqrt(denom_t, dtype=denom_t.dtype)
        var_t = var - mom_t
        return var_t, mg_t, rms_t, mom_t

    def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom,
                                     lr, decay, momentum, centered):
        mg_t = copy.deepcopy(mg)
        rms_t = copy.deepcopy(rms)
        mom_t = copy.deepcopy(mom)
        var_t = copy.deepcopy(var)
        for i in range(len(gindexs)):
            gindex = gindexs[i]
            gvalue = gvalues[i]
            rms_t[gindex] = rms[gindex] * decay + (1 - decay) * gvalue * gvalue
            denom_t = rms_t[gindex]
            if centered:
                mg_t[gindex] = mg_t[gindex] * decay + (1 - decay) * gvalue
                denom_t -= mg_t[gindex] * mg_t[gindex]
            mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt(
                denom_t)
            var_t[gindex] = var[gindex] - mom_t[gindex]
        return var_t, mg_t, rms_t, mom_t

    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
    def testDense(self, dtype, param_value):
        (learning_rate, decay, momentum, epsilon, centered,
         use_resource) = tuple(param_value)
        with self.test_session(use_gpu=True):
            # Initialize variables for numpy implementation.
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)

            if use_resource:
                var0 = resource_variable_ops.ResourceVariable(var0_np)
                var1 = resource_variable_ops.ResourceVariable(var1_np)
            else:
                var0 = variables.Variable(var0_np)
                var1 = variables.Variable(var1_np)
            grads0 = constant_op.constant(grads0_np)
            grads1 = constant_op.constant(grads1_np)
            opt = rmsprop.RMSPropOptimizer(learning_rate=learning_rate,
                                           decay=decay,
                                           momentum=momentum,
                                           epsilon=epsilon,
                                           centered=centered)

            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            variables.global_variables_initializer().run()

            mg0 = opt.get_slot(var0, "mg")
            self.assertEqual(mg0 is not None, centered)
            mg1 = opt.get_slot(var1, "mg")
            self.assertEqual(mg1 is not None, centered)
            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            mom0 = opt.get_slot(var0, "momentum")
            self.assertIsNotNone(mom0)
            mom1 = opt.get_slot(var1, "momentum")
            self.assertIsNotNone(mom1)

            mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
            mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
            rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
            rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
            mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
            mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], var0.eval())
            self.assertAllClose([3.0, 4.0], var1.eval())

            # Run 4 steps of RMSProp
            for _ in range(4):
                update.run()

                var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                    var0_np, grads0_np, mg0_np, rms0_np, mom0_np,
                    learning_rate, decay, momentum, centered)
                var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                    var1_np, grads1_np, mg1_np, rms1_np, mom1_np,
                    learning_rate, decay, momentum, centered)

                # Validate updated params
                if centered:
                    self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
                    self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
                self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
                self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
                self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
                self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
                self.assertAllCloseAccordingToType(var0_np, var0.eval())
                self.assertAllCloseAccordingToType(var1_np, var1.eval())

    @parameterized.parameters([dtypes.float32, dtypes.float64])
    def testMinimizeSparseResourceVariable(self, dtype):
        with self.cached_session():
            var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                          dtype=dtype)
            x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
            pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]),
                                   x)
            loss = pred * pred
            sgd_op = rmsprop.RMSPropOptimizer(learning_rate=1.0,
                                              decay=0.0,
                                              momentum=0.0,
                                              epsilon=0.0,
                                              centered=False).minimize(loss)
            variables.global_variables_initializer().run()
            # Fetch params to validate initial values
            self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
            # Run 1 step of sgd
            sgd_op.run()
            # Validate updated params
            self.assertAllCloseAccordingToType([[0., 1.]],
                                               var0.eval(),
                                               atol=0.01)

    @parameterized.parameters([dtypes.float32, dtypes.float64])
    def testMinimizeSparseResourceVariableCentered(self, dtype):
        with self.cached_session():
            var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                          dtype=dtype)
            x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
            pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]),
                                   x)
            loss = pred * pred
            sgd_op = rmsprop.RMSPropOptimizer(learning_rate=1.0,
                                              decay=0.1,
                                              momentum=0.0,
                                              epsilon=1.0,
                                              centered=True).minimize(loss)
            variables.global_variables_initializer().run()
            # Fetch params to validate initial values
            self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
            # Run 1 step of sgd
            sgd_op.run()
            # Validate updated params
            self.assertAllCloseAccordingToType([[-7 / 3.0, -4 / 3.0]],
                                               var0.eval(),
                                               atol=0.01)

    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
    def testSparse(self, dtype, param_value):
        (learning_rate, decay, momentum, epsilon, centered,
         _) = tuple(param_value)
        with self.test_session(use_gpu=True):
            # Initialize variables for numpy implementation.
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)

            var0 = variables.Variable(var0_np)
            var1 = variables.Variable(var1_np)
            grads0_np_indices = np.array([0], dtype=np.int32)
            grads0 = ops.IndexedSlices(constant_op.constant(grads0_np),
                                       constant_op.constant(grads0_np_indices),
                                       constant_op.constant([1]))
            grads1_np_indices = np.array([1], dtype=np.int32)
            grads1 = ops.IndexedSlices(constant_op.constant(grads1_np),
                                       constant_op.constant(grads1_np_indices),
                                       constant_op.constant([1]))
            opt = rmsprop.RMSPropOptimizer(learning_rate=learning_rate,
                                           decay=decay,
                                           momentum=momentum,
                                           epsilon=epsilon,
                                           centered=centered)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            variables.global_variables_initializer().run()

            mg0 = opt.get_slot(var0, "mg")
            self.assertEqual(mg0 is not None, centered)
            mg1 = opt.get_slot(var1, "mg")
            self.assertEqual(mg1 is not None, centered)
            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            mom0 = opt.get_slot(var0, "momentum")
            self.assertIsNotNone(mom0)
            mom1 = opt.get_slot(var1, "momentum")
            self.assertIsNotNone(mom1)

            mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
            mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
            rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
            rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
            mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
            mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], var0.eval())
            self.assertAllClose([3.0, 4.0], var1.eval())

            # Run 4 steps of RMSProp
            for _ in range(4):
                update.run()

                var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy(
                    var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np,
                    mom0_np, learning_rate, decay, momentum, centered)
                var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy(
                    var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np,
                    mom1_np, learning_rate, decay, momentum, centered)

                # Validate updated params
                if centered:
                    self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
                    self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
                self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
                self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
                self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
                self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
                self.assertAllCloseAccordingToType(var0_np, var0.eval())
                self.assertAllCloseAccordingToType(var1_np, var1.eval())

    @parameterized.parameters(_DATA_TYPES)
    def testWithoutMomentum(self, dtype):
        with self.test_session(use_gpu=True):
            var0 = variables.Variable([1.0, 2.0], dtype=dtype)
            var1 = variables.Variable([3.0, 4.0], dtype=dtype)
            grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
            grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
            opt = rmsprop.RMSPropOptimizer(learning_rate=2.0,
                                           decay=0.9,
                                           momentum=0.0,
                                           epsilon=1.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            variables.global_variables_initializer().run()

            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            mom0 = opt.get_slot(var0, "momentum")
            self.assertIsNotNone(mom0)
            mom1 = opt.get_slot(var1, "momentum")
            self.assertIsNotNone(mom1)

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], var0.eval())
            self.assertAllClose([3.0, 4.0], var1.eval())
            # Step 1: the rms accumulators where 1. So we should see a normal
            # update: v -= grad * learning_rate
            update.run()
            # Check the root mean square accumulators.
            self.assertAllCloseAccordingToType(np.array([0.901, 0.901]),
                                               rms0.eval())
            self.assertAllCloseAccordingToType(np.array([0.90001, 0.90001]),
                                               rms1.eval())
            # Check the parameters.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0 / math.sqrt(0.901)),
                    2.0 - (0.1 * 2.0 / math.sqrt(0.901))
                ]), var0.eval())
            self.assertAllCloseAccordingToType(
                np.array([
                    3.0 - (0.01 * 2.0 / math.sqrt(0.90001)),
                    4.0 - (0.01 * 2.0 / math.sqrt(0.90001))
                ]), var1.eval())
            # Step 2: the root mean square accumulators contain the previous update.
            update.run()
            # Check the rms accumulators.
            self.assertAllCloseAccordingToType(
                np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]),
                rms0.eval())
            self.assertAllCloseAccordingToType(
                np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]),
                rms1.eval())
            # Check the parameters.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
                    (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)),
                    2.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
                    (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))
                ]), var0.eval())
            self.assertAllCloseAccordingToType(
                np.array([
                    3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
                    (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)),
                    4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
                    (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))
                ]), var1.eval())

    @parameterized.parameters(_DATA_TYPES)
    def testWithMomentum(self, dtype):
        with self.test_session(use_gpu=True):
            var0 = variables.Variable([1.0, 2.0], dtype=dtype)
            var1 = variables.Variable([3.0, 4.0], dtype=dtype)
            grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
            grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)

            opt = rmsprop.RMSPropOptimizer(learning_rate=2.0,
                                           decay=0.9,
                                           momentum=0.5,
                                           epsilon=1.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            variables.global_variables_initializer().run()

            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            mom0 = opt.get_slot(var0, "momentum")
            self.assertIsNotNone(mom0)
            mom1 = opt.get_slot(var1, "momentum")
            self.assertIsNotNone(mom1)

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], var0.eval())
            self.assertAllClose([3.0, 4.0], var1.eval())
            # Step 1: rms = 1, mom = 0. So we should see a normal
            # update: v -= grad * learning_rate
            update.run()
            # Check the root mean square accumulators.
            self.assertAllCloseAccordingToType(np.array([0.901, 0.901]),
                                               rms0.eval())
            self.assertAllCloseAccordingToType(np.array([0.90001, 0.90001]),
                                               rms1.eval())
            # Check the momentum accumulators
            self.assertAllCloseAccordingToType(
                np.array([(0.1 * 2.0 / math.sqrt(0.901)),
                          (0.1 * 2.0 / math.sqrt(0.901))]), mom0.eval())
            self.assertAllCloseAccordingToType(
                np.array([(0.01 * 2.0 / math.sqrt(0.90001)),
                          (0.01 * 2.0 / math.sqrt(0.90001))]), mom1.eval())

            # Check that the parameters.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0 / math.sqrt(0.901)),
                    2.0 - (0.1 * 2.0 / math.sqrt(0.901))
                ]), var0.eval())
            self.assertAllCloseAccordingToType(
                np.array([
                    3.0 - (0.01 * 2.0 / math.sqrt(0.90001)),
                    4.0 - (0.01 * 2.0 / math.sqrt(0.90001))
                ]), var1.eval())

            # Step 2: the root mean square accumulators contain the previous update.
            update.run()
            # Check the rms accumulators.
            self.assertAllCloseAccordingToType(
                np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]),
                rms0.eval())
            self.assertAllCloseAccordingToType(
                np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]),
                rms1.eval())
            self.assertAllCloseAccordingToType(
                np.array([
                    0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
                    (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)),
                    0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
                    (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))
                ]), mom0.eval())
            self.assertAllCloseAccordingToType(
                np.array([
                    0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
                    (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)),
                    0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
                    (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))
                ]), mom1.eval())

            # Check the parameters.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
                    (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
                     (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))),
                    2.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
                    (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
                     (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)))
                ]), var0.eval())

            self.assertAllCloseAccordingToType(
                np.array([
                    3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
                    (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
                     (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))),
                    4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
                    (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
                     (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)))
                ]), var1.eval())
class ConvGRU2DTest(keras_parameterized.TestCase):
    @keras_parameterized.run_all_keras_modes
    @parameterized.named_parameters(
        *tf_test_util.generate_combinations_with_testcase_name(
            data_format=['channels_first', 'channels_last'],
            return_sequences=[True, False]))
    def test_conv_gru_2d(self, data_format, return_sequences):
        num_row = 3
        num_col = 3
        filters = 2
        num_samples = 1
        input_channel = 2
        input_num_row = 5
        input_num_col = 5
        sequence_len = 2

        if data_format == 'channels_first':
            inputs = np.random.rand(num_samples, sequence_len, input_channel,
                                    input_num_row, input_num_col)
        else:
            inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                                    input_num_col, input_channel)

        # test for return state:
        x = tf.keras.layers.Input(batch_shape=inputs.shape)
        kwargs = {
            'data_format': data_format,
            'return_sequences': return_sequences,
            'return_state': True,
            'stateful': True,
            'filters': filters,
            'kernel_size': (num_row, num_col),
            'padding': 'valid'
        }
        layer = layers.ConvGRU2D(**kwargs)
        layer.build(inputs.shape)

        outputs = layer(x)
        _, states = outputs[0], outputs[1:]
        self.assertEqual(len(states), len(layer.cell.state_size))
        model = tf.keras.models.Model(x, states[0])
        state = model.predict(inputs)

        self.assertAllClose(tf.keras.backend.eval(layer.states[0]),
                            state,
                            atol=1e-4)

        # test for output shape:
        custom_objects = {'ConvGRU2D': layers.ConvGRU2D}
        with tf.keras.utils.custom_object_scope(custom_objects):
            testing_utils.layer_test(layers.ConvGRU2D,
                                     kwargs={
                                         'data_format': data_format,
                                         'return_sequences': return_sequences,
                                         'filters': filters,
                                         'kernel_size': (num_row, num_col),
                                         'padding': 'valid'
                                     },
                                     input_shape=inputs.shape)

    def test_conv_gru_2d_statefulness(self):
        # Tests for statefulness
        num_row = 3
        num_col = 3
        filters = 2
        num_samples = 1
        input_channel = 2
        input_num_row = 5
        input_num_col = 5
        sequence_len = 2
        inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                                input_num_col, input_channel)

        with self.cached_session():
            model = tf.keras.models.Sequential()
            kwargs = {
                'data_format': 'channels_last',
                'return_sequences': False,
                'filters': filters,
                'kernel_size': (num_row, num_col),
                'stateful': True,
                'batch_input_shape': inputs.shape,
                'padding': 'same'
            }
            layer = layers.ConvGRU2D(**kwargs)

            model.add(layer)
            model.compile(optimizer='sgd', loss='mse')
            out1 = model.predict(np.ones_like(inputs))

            # train once so that the states change
            model.train_on_batch(np.ones_like(inputs),
                                 np.random.random(out1.shape))
            out2 = model.predict(np.ones_like(inputs))

            # if the state is not reset, output should be different
            self.assertNotEqual(out1.max(), out2.max())

            # check that output changes after states are reset
            # (even though the model itself didn't change)
            layer.reset_states()
            out3 = model.predict(np.ones_like(inputs))
            self.assertNotEqual(out3.max(), out2.max())

            # check that container-level reset_states() works
            model.reset_states()
            out4 = model.predict(np.ones_like(inputs))
            self.assertAllClose(out3, out4, atol=1e-5)

            # check that the call to `predict` updated the states
            out5 = model.predict(np.ones_like(inputs))
            self.assertNotEqual(out4.max(), out5.max())

    def test_conv_gru_2d_regularizers(self):
        # check regularizers
        num_row = 3
        num_col = 3
        filters = 2
        num_samples = 1
        input_channel = 2
        input_num_row = 5
        input_num_col = 5
        sequence_len = 2
        inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                                input_num_col, input_channel)

        with self.cached_session():
            kwargs = {
                'data_format': 'channels_last',
                'return_sequences': False,
                'kernel_size': (num_row, num_col),
                'stateful': True,
                'filters': filters,
                'batch_input_shape': inputs.shape,
                'kernel_regularizer': tf.keras.regularizers.L1L2(l1=0.01),
                'recurrent_regularizer': tf.keras.regularizers.L1L2(l1=0.01),
                'activity_regularizer': 'l2',
                'bias_regularizer': 'l2',
                'kernel_constraint': 'max_norm',
                'recurrent_constraint': 'max_norm',
                'bias_constraint': 'max_norm',
                'padding': 'same'
            }

            layer = layers.ConvGRU2D(**kwargs)
            layer.build(inputs.shape)
            self.assertEqual(len(layer.losses), 3)
            layer(tf.keras.backend.variable(np.ones(inputs.shape)))
            self.assertEqual(len(layer.losses), 4)

    def test_conv_gru_2d_dropout(self):
        # check dropout
        with self.cached_session():
            custom_objects = {'ConvGRU2D': layers.ConvGRU2D}
            with tf.keras.utils.custom_object_scope(custom_objects):
                testing_utils.layer_test(layers.ConvGRU2D,
                                         kwargs={
                                             'data_format': 'channels_last',
                                             'return_sequences': False,
                                             'filters': 2,
                                             'kernel_size': (3, 3),
                                             'padding': 'same',
                                             'dropout': 0.1,
                                             'recurrent_dropout': 0.1
                                         },
                                         input_shape=(1, 2, 5, 5, 2))

    def test_conv_gru_2d_cloning(self):
        with self.cached_session():
            model = tf.keras.models.Sequential()
            model.add(layers.ConvGRU2D(5, 3, input_shape=(None, 5, 5, 3)))

            test_inputs = np.random.random((2, 4, 5, 5, 3))
            reference_outputs = model.predict(test_inputs)
            weights = model.get_weights()

        # Use a new graph to clone the model
        with self.cached_session():
            clone = tf.keras.models.clone_model(model)
            clone.set_weights(weights)

            outputs = clone.predict(test_inputs)
            self.assertAllClose(reference_outputs, outputs, atol=1e-5)
예제 #4
0
class CuDNNV1OnlyTest(keras_parameterized.TestCase):
    @test_util.run_gpu_only
    def test_trainability(self):
        input_size = 10
        units = 2
        for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
            layer = layer_class(units)
            layer.build((None, None, input_size))
            self.assertEqual(len(layer.weights), 3)
            self.assertEqual(len(layer.trainable_weights), 3)
            self.assertEqual(len(layer.non_trainable_weights), 0)
            layer.trainable = False
            self.assertEqual(len(layer.weights), 3)
            self.assertEqual(len(layer.non_trainable_weights), 3)
            self.assertEqual(len(layer.trainable_weights), 0)
            layer.trainable = True
            self.assertEqual(len(layer.weights), 3)
            self.assertEqual(len(layer.trainable_weights), 3)
            self.assertEqual(len(layer.non_trainable_weights), 0)

    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            rnn_type=['LSTM', 'GRU'],
            to_cudnn=[True, False],
            bidirectional=[True, False],
            implementation=[1, 2],
            model_nest_level=[1, 2],
            model_type=['seq', 'func']))
    @test_util.run_v1_only('b/120911602, b/112083752')
    @test_util.run_gpu_only
    def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn,
                                               bidirectional, implementation,
                                               model_nest_level, model_type):
        input_size = 10
        timesteps = 6
        input_shape = (timesteps, input_size)
        units = 2
        num_samples = 32
        inputs = np.random.random((num_samples, timesteps, input_size))

        rnn_layer_kwargs = {
            'recurrent_activation': 'sigmoid',
            # ensure biases are non-zero and properly converted
            'bias_initializer': 'random_uniform',
            'implementation': implementation
        }
        if rnn_type == 'LSTM':
            rnn_layer_class = keras.layers.LSTM
            cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
        else:
            rnn_layer_class = keras.layers.GRU
            cudnn_rnn_layer_class = keras.layers.CuDNNGRU
            rnn_layer_kwargs['reset_after'] = True

        layer = rnn_layer_class(units, **rnn_layer_kwargs)
        if bidirectional:
            layer = keras.layers.Bidirectional(layer)

        cudnn_layer = cudnn_rnn_layer_class(units)
        if bidirectional:
            cudnn_layer = keras.layers.Bidirectional(cudnn_layer)

        model = self._make_nested_model(input_shape, layer, model_nest_level,
                                        model_type)
        cudnn_model = self._make_nested_model(input_shape, cudnn_layer,
                                              model_nest_level, model_type)

        if to_cudnn:
            self._convert_model_weights(model, cudnn_model)
        else:
            self._convert_model_weights(cudnn_model, model)

        self.assertAllClose(model.predict(inputs),
                            cudnn_model.predict(inputs),
                            atol=1e-4)

    def _make_nested_model(self,
                           input_shape,
                           layer,
                           level=1,
                           model_type='func'):
        # example: make_nested_seq_model((1,), Dense(10), level=2).summary()
        def make_nested_seq_model(input_shape, layer, level=1):
            model = layer
            for i in range(1, level + 1):
                layers = [keras.layers.InputLayer(input_shape), model
                          ] if (i == 1) else [model]
                model = keras.models.Sequential(layers)
            return model

        # example: make_nested_func_model((1,), Dense(10), level=2).summary()
        def make_nested_func_model(input_shape, layer, level=1):
            model_input = keras.layers.Input(input_shape)
            model = layer
            for _ in range(level):
                model = keras.models.Model(model_input, model(model_input))
            return model

        if model_type == 'func':
            return make_nested_func_model(input_shape, layer, level)
        elif model_type == 'seq':
            return make_nested_seq_model(input_shape, layer, level)

    def _convert_model_weights(self, source_model, target_model):
        _, fname = tempfile.mkstemp('.h5')
        source_model.save_weights(fname)
        target_model.load_weights(fname)
        os.remove(fname)

    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False]))
    @test_util.run_v1_only('b/120911602')
    @test_util.run_gpu_only
    def test_load_weights_between_noncudnn_rnn_time_distributed(
            self, rnn_type, to_cudnn):
        # Similar test as test_load_weights_between_noncudnn_rnn() but has different
        # rank of input due to usage of TimeDistributed. Issue: #10356.
        input_size = 10
        steps = 6
        timesteps = 6
        input_shape = (timesteps, steps, input_size)
        units = 2
        num_samples = 32
        inputs = np.random.random((num_samples, timesteps, steps, input_size))

        rnn_layer_kwargs = {
            'recurrent_activation': 'sigmoid',
            # ensure biases are non-zero and properly converted
            'bias_initializer': 'random_uniform',
        }
        if rnn_type == 'LSTM':
            rnn_layer_class = keras.layers.LSTM
            cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
        else:
            rnn_layer_class = keras.layers.GRU
            cudnn_rnn_layer_class = keras.layers.CuDNNGRU
            rnn_layer_kwargs['reset_after'] = True

        layer = rnn_layer_class(units, **rnn_layer_kwargs)
        layer = keras.layers.TimeDistributed(layer)

        cudnn_layer = cudnn_rnn_layer_class(units)
        cudnn_layer = keras.layers.TimeDistributed(cudnn_layer)

        model = self._make_nested_model(input_shape, layer)
        cudnn_model = self._make_nested_model(input_shape, cudnn_layer)

        if to_cudnn:
            self._convert_model_weights(model, cudnn_model)
        else:
            self._convert_model_weights(cudnn_model, model)

        self.assertAllClose(model.predict(inputs),
                            cudnn_model.predict(inputs),
                            atol=1e-4)

    @test_util.run_gpu_only
    def test_cudnnrnn_bidirectional(self):
        rnn = keras.layers.CuDNNGRU
        samples = 2
        dim = 2
        timesteps = 2
        output_dim = 2
        mode = 'concat'

        x = np.random.random((samples, timesteps, dim))
        target_dim = 2 * output_dim if mode == 'concat' else output_dim
        y = np.random.random((samples, target_dim))

        # test with Sequential model
        model = keras.Sequential()
        model.add(
            keras.layers.Bidirectional(rnn(output_dim),
                                       merge_mode=mode,
                                       input_shape=(None, dim)))
        model.compile(loss='mse',
                      optimizer=RMSPropOptimizer(learning_rate=0.001))
        model.fit(x, y, epochs=1, batch_size=1)

        # test config
        model.get_config()
        model = keras.models.model_from_json(model.to_json())
        model.summary()

        # test stacked bidirectional layers
        model = keras.Sequential()
        model.add(
            keras.layers.Bidirectional(rnn(output_dim, return_sequences=True),
                                       merge_mode=mode,
                                       input_shape=(None, dim)))
        model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode))
        model.compile(loss='mse',
                      optimizer=RMSPropOptimizer(learning_rate=0.001))
        model.fit(x, y, epochs=1, batch_size=1)

        # test with functional API
        inputs = keras.Input((timesteps, dim))
        outputs = keras.layers.Bidirectional(rnn(output_dim),
                                             merge_mode=mode)(inputs)
        model = keras.Model(inputs, outputs)
        model.compile(loss='mse',
                      optimizer=RMSPropOptimizer(learning_rate=0.001))
        model.fit(x, y, epochs=1, batch_size=1)

        # Bidirectional and stateful
        inputs = keras.Input(batch_shape=(1, timesteps, dim))
        outputs = keras.layers.Bidirectional(rnn(output_dim, stateful=True),
                                             merge_mode=mode)(inputs)
        model = keras.Model(inputs, outputs)
        model.compile(loss='mse',
                      optimizer=RMSPropOptimizer(learning_rate=0.001))
        model.fit(x, y, epochs=1, batch_size=1)

    @test_util.run_gpu_only
    def test_preprocess_weights_for_loading_gru_incompatible(self):
        """Test loading weights between incompatible layers.

    Should fail fast with an exception.
    """
        input_shape = (3, 5)

        def gru(cudnn=False, **kwargs):
            layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU
            return layer_class(2, input_shape=input_shape, **kwargs)

        def get_layer_weights(layer):
            layer.build(input_shape=input_shape)
            return layer.get_weights()

        def assert_not_compatible(src, dest, message):
            with self.assertRaises(ValueError) as ex:
                keras.engine.saving.preprocess_weights_for_loading(
                    dest, get_layer_weights(src))
            self.assertIn(message, str(ex.exception))

        assert_not_compatible(
            gru(), gru(cudnn=True),
            'GRU(reset_after=False) is not compatible with CuDNNGRU')
        assert_not_compatible(
            gru(cudnn=True), gru(),
            'CuDNNGRU is not compatible with GRU(reset_after=False)')
        assert_not_compatible(
            gru(), gru(reset_after=True),
            'GRU(reset_after=False) is not compatible with '
            'GRU(reset_after=True)')
        assert_not_compatible(
            gru(reset_after=True), gru(),
            'GRU(reset_after=True) is not compatible with '
            'GRU(reset_after=False)')
    if use_dataset:
        if action == "predict":
            input_data = dataset_ops.DatasetV2.from_tensor_slices(
                input_data).batch(batch_size)
        else:
            input_data = dataset_ops.DatasetV2.from_tensor_slices(
                (input_data, expected_output)).batch(batch_size)
            expected_output = None
    return (input_data, expected_output)


@keras_parameterized.run_with_all_model_types
@keras_parameterized.run_all_keras_modes
@parameterized.named_parameters(
    *test_util.generate_combinations_with_testcase_name(
        use_dict=[True, False],
        use_dataset=[True, False],
        action=["predict", "evaluate", "fit"]))
class SparseTensorInputTest(keras_parameterized.TestCase):
    def test_sparse_tensors(self, use_dict, use_dataset, action):
        data = [(sparse_tensor.SparseTensor([[0, 0, 0], [1, 0, 0], [1, 0, 1]],
                                            [1, 2, 3], [2, 1, 3]),
                 np.array([[[1, -1, -1]], [[2, 3, -1]]])),
                (sparse_tensor.SparseTensor(
                    [[0, 0, 0], [1, 0, 0], [1, 0, 1], [2, 0, 1]], [5, 6, 7, 8],
                    [3, 1, 4]),
                 np.array([[[5, -1, -1, -1]], [[6, 7, -1, -1]],
                           [[-1, 8, -1, -1]]]))]
        # Prepare the model to test.
        input_name = get_input_name(use_dict)
        model_input = input_layer.Input(shape=(1, None),
                                        sparse=True,
예제 #6
0
class TimeDistributedTest(keras_parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_timedistributed_dense(self):
    model = keras.models.Sequential()
    model.add(
        keras.layers.TimeDistributed(
            keras.layers.Dense(2), input_shape=(3, 4)))
    model.compile(optimizer='rmsprop', loss='mse')
    model.fit(
        np.random.random((10, 3, 4)),
        np.random.random((10, 3, 2)),
        epochs=1,
        batch_size=10)

    # test config
    model.get_config()

    # check whether the model variables are present in the
    # trackable list of objects
    checkpointed_object_ids = {
        id(o) for o in trackable_util.list_objects(model)
    }
    for v in model.variables:
      self.assertIn(id(v), checkpointed_object_ids)

  def test_timedistributed_static_batch_size(self):
    model = keras.models.Sequential()
    model.add(
        keras.layers.TimeDistributed(
            keras.layers.Dense(2), input_shape=(3, 4), batch_size=10))
    model.compile(optimizer='rmsprop', loss='mse')
    model.fit(
        np.random.random((10, 3, 4)),
        np.random.random((10, 3, 2)),
        epochs=1,
        batch_size=10)

  def test_timedistributed_invalid_init(self):
    x = constant_op.constant(np.zeros((1, 1)).astype('float32'))
    with self.assertRaisesRegex(
        ValueError, 'Please initialize `TimeDistributed` layer with a '
        '`tf.keras.layers.Layer` instance.'):
      keras.layers.TimeDistributed(x)

  def test_timedistributed_conv2d(self):
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(
          keras.layers.TimeDistributed(
              keras.layers.Conv2D(5, (2, 2), padding='same'),
              input_shape=(2, 4, 4, 3)))
      model.add(keras.layers.Activation('relu'))
      model.compile(optimizer='rmsprop', loss='mse')
      model.train_on_batch(
          np.random.random((1, 2, 4, 4, 3)), np.random.random((1, 2, 4, 4, 5)))

      model = keras.models.model_from_json(model.to_json())
      model.summary()

  def test_timedistributed_stacked(self):
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(
          keras.layers.TimeDistributed(
              keras.layers.Dense(2), input_shape=(3, 4)))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
      model.add(keras.layers.Activation('relu'))
      model.compile(optimizer='rmsprop', loss='mse')

      model.fit(
          np.random.random((10, 3, 4)),
          np.random.random((10, 3, 3)),
          epochs=1,
          batch_size=10)

  def test_regularizers(self):
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(
          keras.layers.TimeDistributed(
              keras.layers.Dense(2, kernel_regularizer='l1',
                                 activity_regularizer='l1'),
              input_shape=(3, 4)))
      model.add(keras.layers.Activation('relu'))
      model.compile(optimizer='rmsprop', loss='mse')
      self.assertEqual(len(model.losses), 2)

  def test_TimeDistributed_learning_phase(self):
    with self.cached_session():
      # test layers that need learning_phase to be set
      np.random.seed(1234)
      x = keras.layers.Input(shape=(3, 2))
      y = keras.layers.TimeDistributed(keras.layers.Dropout(.999))(
          x, training=True)
      model = keras.models.Model(x, y)
      y = model.predict(np.random.random((10, 3, 2)))
      self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1)

  def test_TimeDistributed_batchnorm(self):
    with self.cached_session():
      # test that wrapped BN updates still work.
      model = keras.models.Sequential()
      model.add(keras.layers.TimeDistributed(
          keras.layers.BatchNormalization(center=True, scale=True),
          name='bn',
          input_shape=(10, 2)))
      model.compile(optimizer='rmsprop', loss='mse')
      # Assert that mean and variance are 0 and 1.
      td = model.layers[0]
      self.assertAllClose(td.get_weights()[2], np.array([0, 0]))
      assert np.array_equal(td.get_weights()[3], np.array([1, 1]))
      # Train
      model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)),
                           np.broadcast_to(np.array([0, 1]), (1, 10, 2)))
      # Assert that mean and variance changed.
      assert not np.array_equal(td.get_weights()[2], np.array([0, 0]))
      assert not np.array_equal(td.get_weights()[3], np.array([1, 1]))

  def test_TimeDistributed_trainable(self):
    # test layers that need learning_phase to be set
    x = keras.layers.Input(shape=(3, 2))
    layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization())
    _ = layer(x)
    self.assertEqual(len(layer.trainable_weights), 2)
    layer.trainable = False
    assert not layer.trainable_weights
    layer.trainable = True
    assert len(layer.trainable_weights) == 2

  def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self):
    with self.cached_session():
      # test with unspecified shape and Embeddings with mask_zero
      model = keras.models.Sequential()
      model.add(keras.layers.TimeDistributed(
          keras.layers.Embedding(5, 6, mask_zero=True),
          input_shape=(None, None)))  # N by t_1 by t_2 by 6
      model.add(keras.layers.TimeDistributed(
          keras.layers.SimpleRNN(7, return_sequences=True)))
      model.add(keras.layers.TimeDistributed(
          keras.layers.SimpleRNN(8, return_sequences=False)))
      model.add(keras.layers.SimpleRNN(1, return_sequences=False))
      model.compile(optimizer='rmsprop', loss='mse')
      model_input = np.random.randint(low=1, high=5, size=(10, 3, 4),
                                      dtype='int32')
      for i in range(4):
        model_input[i, i:, i:] = 0
      model.fit(model_input,
                np.random.random((10, 1)), epochs=1, batch_size=10)
      mask_outputs = [model.layers[0].compute_mask(model.input)]
      for layer in model.layers[1:]:
        mask_outputs.append(layer.compute_mask(layer.input, mask_outputs[-1]))
      func = keras.backend.function([model.input], mask_outputs[:-1])
      mask_outputs_val = func([model_input])
      ref_mask_val_0 = model_input > 0         # embedding layer
      ref_mask_val_1 = ref_mask_val_0          # first RNN layer
      ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1)     # second RNN layer
      ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2]
      for i in range(3):
        self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i])
      self.assertIs(mask_outputs[-1], None)  # final layer

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_TimeDistributed_with_masking_layer(self):
    # test with Masking layer
    model = keras.models.Sequential()
    model.add(
        keras.layers.TimeDistributed(
            keras.layers.Masking(mask_value=0.,), input_shape=(None, 4)))
    model.add(keras.layers.TimeDistributed(keras.layers.Dense(5)))
    model.compile(optimizer='rmsprop', loss='mse')
    model_input = np.random.randint(low=1, high=5, size=(10, 3, 4))
    for i in range(4):
      model_input[i, i:, :] = 0.
    model.compile(optimizer='rmsprop', loss='mse')
    model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6)
    mask_outputs = [model.layers[0].compute_mask(model.input)]
    mask_outputs += [
        model.layers[1].compute_mask(model.layers[1].input, mask_outputs[-1])
    ]
    func = keras.backend.function([model.input], mask_outputs)
    mask_outputs_val = func([model_input])
    self.assertEqual((mask_outputs_val[0]).all(), model_input.all())
    self.assertEqual((mask_outputs_val[1]).all(), model_input.all())

  def test_TimeDistributed_with_different_time_shapes(self):
    time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5))
    ph_1 = keras.backend.placeholder(shape=(None, 10, 13))
    out_1 = time_dist(ph_1)
    self.assertEqual(out_1.shape.as_list(), [None, 10, 5])

    ph_2 = keras.backend.placeholder(shape=(None, 1, 13))
    out_2 = time_dist(ph_2)
    self.assertEqual(out_2.shape.as_list(), [None, 1, 5])

    ph_3 = keras.backend.placeholder(shape=(None, 1, 18))
    with self.assertRaisesRegex(ValueError, 'is incompatible with'):
      time_dist(ph_3)

  def test_TimeDistributed_with_invalid_dimensions(self):
    time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5))
    ph = keras.backend.placeholder(shape=(None, 10))
    with self.assertRaisesRegex(
        ValueError,
        '`TimeDistributed` Layer should be passed an `input_shape `'):
      time_dist(ph)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_TimeDistributed_reshape(self):

    class NoReshapeLayer(keras.layers.Layer):

      def call(self, inputs):
        return inputs

    # Built-in layers that aren't stateful use the reshape implementation.
    td1 = keras.layers.TimeDistributed(keras.layers.Dense(5))
    self.assertTrue(td1._always_use_reshape)

    # Built-in layers that are stateful don't use the reshape implementation.
    td2 = keras.layers.TimeDistributed(
        keras.layers.RNN(keras.layers.SimpleRNNCell(10), stateful=True))
    self.assertFalse(td2._always_use_reshape)

    # Custom layers are not allowlisted for the fast reshape implementation.
    td3 = keras.layers.TimeDistributed(NoReshapeLayer())
    self.assertFalse(td3._always_use_reshape)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_TimeDistributed_output_shape_return_types(self):

    class TestLayer(keras.layers.Layer):

      def call(self, inputs):
        return array_ops.concat([inputs, inputs], axis=-1)

      def compute_output_shape(self, input_shape):
        output_shape = tensor_shape.TensorShape(input_shape).as_list()
        output_shape[-1] = output_shape[-1] * 2
        output_shape = tensor_shape.TensorShape(output_shape)
        return output_shape

    class TestListLayer(TestLayer):

      def compute_output_shape(self, input_shape):
        shape = super(TestListLayer, self).compute_output_shape(input_shape)
        return shape.as_list()

    class TestTupleLayer(TestLayer):

      def compute_output_shape(self, input_shape):
        shape = super(TestTupleLayer, self).compute_output_shape(input_shape)
        return tuple(shape.as_list())

    # Layers can specify output shape as list/tuple/TensorShape
    test_layers = [TestLayer, TestListLayer, TestTupleLayer]
    for layer in test_layers:
      input_layer = keras.layers.TimeDistributed(layer())
      inputs = keras.backend.placeholder(shape=(None, 2, 4))
      output = input_layer(inputs)
      self.assertEqual(output.shape.as_list(), [None, 2, 8])
      self.assertEqual(
          input_layer.compute_output_shape([None, 2, 4]).as_list(),
          [None, 2, 8])

  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
  # TODO(scottzhu): check why v1 session failed.
  def test_TimeDistributed_with_mask_first_implementation(self):
    np.random.seed(100)
    rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True)

    data = np.array([[[[1.0], [1.0]], [[0.0], [1.0]]],
                     [[[1.0], [0.0]], [[1.0], [1.0]]],
                     [[[1.0], [0.0]], [[1.0], [1.0]]]])
    x = keras.layers.Input(shape=(2, 2, 1), batch_size=3)
    x_masking = keras.layers.Masking()(x)
    y = keras.layers.TimeDistributed(rnn_layer)(x_masking)
    model_1 = keras.models.Model(x, y)
    model_1.compile(
        'rmsprop',
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())
    output_with_mask = model_1.predict(data, steps=1)

    y = keras.layers.TimeDistributed(rnn_layer)(x)
    model_2 = keras.models.Model(x, y)
    model_2.compile(
        'rmsprop',
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())
    output = model_2.predict(data, steps=1)

    self.assertNotAllClose(output_with_mask, output, atol=1e-7)

  @keras_parameterized.run_all_keras_modes
  @parameterized.named_parameters(
      *tf_test_util.generate_combinations_with_testcase_name(
          layer=[keras.layers.LSTM,
                 keras.layers.Dense]))
  def test_TimeDistributed_with_ragged_input(self, layer):
    if context.executing_eagerly():
      self.skipTest('b/143103634')
    np.random.seed(100)
    layer = layer(4)
    ragged_data = ragged_factory_ops.constant(
        [[[[1.0], [1.0]], [[2.0], [2.0]]],
         [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]],
         [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]]],
        ragged_rank=1)

    x_ragged = keras.Input(shape=(None, 2, 1), dtype='float32', ragged=True)
    y_ragged = keras.layers.TimeDistributed(layer)(x_ragged)
    model_1 = keras.models.Model(x_ragged, y_ragged)
    model_1._run_eagerly = testing_utils.should_run_eagerly()
    output_ragged = model_1.predict(ragged_data, steps=1)

    x_dense = keras.Input(shape=(None, 2, 1), dtype='float32')
    masking = keras.layers.Masking()(x_dense)
    y_dense = keras.layers.TimeDistributed(layer)(masking)
    model_2 = keras.models.Model(x_dense, y_dense)
    dense_data = ragged_data.to_tensor()
    model_2._run_eagerly = testing_utils.should_run_eagerly()
    output_dense = model_2.predict(dense_data, steps=1)

    output_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        output_ragged, name='tensor')
    self.assertAllEqual(output_ragged.to_tensor(), output_dense)

  @keras_parameterized.run_all_keras_modes
  def test_TimeDistributed_with_ragged_input_with_batch_size(self):
    np.random.seed(100)
    layer = keras.layers.Dense(16)

    ragged_data = ragged_factory_ops.constant(
        [[[[1.0], [1.0]], [[2.0], [2.0]]],
         [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]],
         [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]]],
        ragged_rank=1)

    # Use the first implementation by specifying batch_size
    x_ragged = keras.Input(shape=(None, 2, 1), batch_size=3, dtype='float32',
                           ragged=True)
    y_ragged = keras.layers.TimeDistributed(layer)(x_ragged)
    model_1 = keras.models.Model(x_ragged, y_ragged)
    output_ragged = model_1.predict(ragged_data, steps=1)

    x_dense = keras.Input(shape=(None, 2, 1), batch_size=3, dtype='float32')
    masking = keras.layers.Masking()(x_dense)
    y_dense = keras.layers.TimeDistributed(layer)(masking)
    model_2 = keras.models.Model(x_dense, y_dense)
    dense_data = ragged_data.to_tensor()
    output_dense = model_2.predict(dense_data, steps=1)

    output_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        output_ragged, name='tensor')
    self.assertAllEqual(output_ragged.to_tensor(), output_dense)

  def test_TimeDistributed_set_static_shape(self):
    layer = keras.layers.TimeDistributed(keras.layers.Conv2D(16, (3, 3)))
    inputs = keras.Input(batch_shape=(1, None, 32, 32, 1))
    outputs = layer(inputs)
    # Make sure the batch dim is not lost after array_ops.reshape.
    self.assertListEqual(outputs.shape.as_list(), [1, None, 30, 30, 16])
예제 #7
0
class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase,
                               parameterized.TestCase):

  def _makeShapeTuple(self, batch_size, channel_count, data_rank, data_dim,
                      data_layout):
    data_dims = data_rank * (data_dim,)
    if data_layout == 'channels_first':
      shape = (batch_size,) + (channel_count,) + data_dims
    elif data_layout == 'channels_last':
      shape = (batch_size,) + data_dims + (channel_count,)
    else:
      raise ValueError('Unknown data format')
    return shape

  def _dataFormatFromDataLayout(self, data_layout=None):
    if data_layout == 'channels_first':
      return 'NCHW'
    elif data_layout == 'channels_last':
      return 'NHWC'
    else:
      raise ValueError('Unknown data_layout')

  def _randomNDArray(self, shape):
    return 2 * np.random.random_sample(shape) - 1

  def _randomDataOp(self, shape, data_type):
    return constant_op.constant(self._randomNDArray(shape), dtype=data_type)

  @parameterized.named_parameters(
      *test_util.generate_combinations_with_testcase_name(
          # With the selected layer configuration, at least in TensorFlow
          # version 2.0, when data_layout='channels_last', bias_add operates
          # deterministically by default. I don't know if this is true for
          # all layer configurations. These cases are still being tested here,
          # for completeness.
          data_layout=['channels_first', 'channels_last'],
          data_rank=[1, 2, 3],
          data_type=[dtypes.float16, dtypes.float32, dtypes.float64]))
  @test_util.run_in_graph_and_eager_modes
  @test_util.run_cuda_only
  def testDeterministicGradients(self, data_layout, data_rank, data_type):
    with self.session(force_gpu=True):
      # Using a cached_session with force_gpu=True does not work at the time
      # of writing (2019-12-10). Before the @parameterized.named_parameters
      # decorator was added, this non-cached session context was set outside
      # the iteration loops for the parameter combinations, and so was re-used.
      seed = (
          hash(data_layout) % 256 + hash(data_rank) % 256 +
          hash(data_type) % 256)
      np.random.seed(seed)
      batch_size = 10
      channel_count = 8
      data_dim = 14
      input_shape = self._makeShapeTuple(batch_size, channel_count, data_rank,
                                         data_dim, data_layout)
      bias_shape = (channel_count,)
      output_shape = input_shape
      input_val = self._randomDataOp(input_shape, data_type)
      bias_val = self._randomDataOp(bias_shape, data_type)
      data_format = self._dataFormatFromDataLayout(data_layout)
      repeat_count = 5
      if context.executing_eagerly():

        def bias_gradients(local_seed):
          np.random.seed(local_seed)
          upstream_gradients = self._randomDataOp(output_shape, data_type)
          with backprop.GradientTape(persistent=True) as tape:
            tape.watch(bias_val)
            bias_add_output = nn_ops.bias_add(
                input_val, bias_val, data_format=data_format)
            gradient_injector_output = bias_add_output * upstream_gradients
          return tape.gradient(gradient_injector_output, bias_val)

        for i in range(repeat_count):
          local_seed = seed + i  # select different upstream gradients
          result_a = bias_gradients(local_seed)
          result_b = bias_gradients(local_seed)
          self.assertAllEqual(result_a, result_b)
      else:  # graph mode
        upstream_gradients = array_ops.placeholder(
            data_type, shape=output_shape, name='upstream_gradients')
        bias_add_output = nn_ops.bias_add(
            input_val, bias_val, data_format=data_format)
        gradient_injector_output = bias_add_output * upstream_gradients
        # The gradient function behaves as if grad_ys is multiplied by the op
        # gradient result, not passing the upstram gradients through the op's
        # gradient generation graph. This is the reason for using the
        # gradient injector
        bias_gradients = gradients_impl.gradients(
            gradient_injector_output,
            bias_val,
            grad_ys=None,
            colocate_gradients_with_ops=True)[0]
        for i in range(repeat_count):
          feed_dict = {upstream_gradients: self._randomNDArray(output_shape)}
          result_a = bias_gradients.eval(feed_dict=feed_dict)
          result_b = bias_gradients.eval(feed_dict=feed_dict)
          self.assertAllEqual(result_a, result_b)

  # TODO(duncanriach): Re-enable the following three tests for the error checks
  #   after deterministic functionality is implemented at the CUDA kernel level.
  def testInputDims(self):
    pass

  def testBiasVec(self):
    pass

  def testBiasInputsMatch(self):
    pass
class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase,
                               parameterized.TestCase):

  def _make_shape_tuple(self, batch_size, channel_count, data_rank, data_dim,
                        data_layout):
    data_dims = data_rank * (data_dim,)
    if data_layout == 'channels_first':
      shape = (batch_size,) + (channel_count,) + data_dims
    elif data_layout == 'channels_last':
      shape = (batch_size,) + data_dims + (channel_count,)
    else:
      raise ValueError('Unknown data format')
    return shape

  def _data_format_from_data_layout(self, data_layout=None):
    if data_layout == 'channels_first':
      return 'NCHW'
    elif data_layout == 'channels_last':
      return 'NHWC'
    else:
      raise ValueError('Unknown data_layout')

  def _random_data_op(self, shape, data_type):
    return constant_op.constant(
        2 * np.random.random_sample(shape) - 1, dtype=data_type)

  def _random_ndarray(self, shape):
    return 2 * np.random.random_sample(shape) - 1

  def _assert_reproducible(self, operation, feed_dict={}):
    with self.cached_session(force_gpu=True):
      result_a = operation[0].eval(feed_dict=feed_dict)
      result_b = operation[0].eval(feed_dict=feed_dict)
      self.assertAllEqual(result_a, result_b)

  # TODO(duncanriach): add test coverage for deterministic gradients
  #   in eager mode
  @parameterized.named_parameters(
      *test_util.generate_combinations_with_testcase_name(
          data_layout=['channels_first', 'channels_last'],
          data_rank=[1, 2, 3],
          data_type=[dtypes.float16, dtypes.float32, dtypes.float64]))
  @test_util.run_deprecated_v1
  @test_util.run_cuda_only
  def testDeterministicGradients(self, data_layout, data_rank, data_type):
    seed = (
        hash(data_layout) % 256 + hash(data_rank) % 256 + hash(data_type) % 256)
    np.random.seed(seed)
    batch_size = 10
    channel_count = 8
    data_dim = 14
    in_shape = self._make_shape_tuple(batch_size, channel_count, data_rank,
                                      data_dim, data_layout)
    bias_shape = (channel_count,)
    out_shape = in_shape
    in_op = self._random_data_op(in_shape, data_type)
    bias_op = self._random_data_op(bias_shape, data_type)
    data_format = self._data_format_from_data_layout(data_layout)
    bias_add_op = nn_ops.bias_add(in_op, bias_op, data_format=data_format)
    upstream_gradients = array_ops.placeholder(
        data_type, shape=out_shape, name='upstream_gradients')
    gradient_injector_op = bias_add_op * upstream_gradients
    # The gradient function behaves as if grad_ys is multiplied by the op
    # gradient result, not passing the upstram gradients through the op's
    # gradient generation graph. This is the reason for using the
    # gradient_injector_op
    grad_ys = None
    bias_gradients_op = gradients_impl.gradients(
        gradient_injector_op,
        bias_op,
        grad_ys=grad_ys,
        colocate_gradients_with_ops=True)
    for i in range(5):
      feed_dict = {upstream_gradients: self._random_ndarray(out_shape)}
      self._assert_reproducible(bias_gradients_op, feed_dict=feed_dict)

  # TODO(duncanriach): Re-enable the following three tests for the error checks
  #   after deterministic functionality is implemented at the CUDA kernel level.
  def testInputDims(self):
    pass

  def testBiasVec(self):
    pass

  def testBiasInputsMatch(self):
    pass
예제 #9
0
class MergeLayersTest(keras_parameterized.TestCase):
    def test_merge_add(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        i3 = keras.layers.Input(shape=(4, 5))

        add_layer = keras.layers.Add()
        o = add_layer([i1, i2, i3])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2, i3], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        x3 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2, x3])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, x1 + x2 + x3, atol=1e-4)

        self.assertEqual(
            add_layer.compute_mask([i1, i2, i3], [None, None, None]), None)
        self.assertTrue(
            np.all(
                K.eval(
                    add_layer.compute_mask(
                        [i1, i2],
                        [K.variable(x1), K.variable(x2)]))))

        with self.assertRaisesRegexp(ValueError, '`mask` should be a list.'):
            add_layer.compute_mask([i1, i2, i3], x1)
        with self.assertRaisesRegexp(ValueError, '`inputs` should be a list.'):
            add_layer.compute_mask(i1, [None, None, None])
        with self.assertRaisesRegexp(ValueError,
                                     ' should have the same length.'):
            add_layer.compute_mask([i1, i2, i3], [None, None])

    def test_merge_subtract(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        i3 = keras.layers.Input(shape=(4, 5))

        subtract_layer = keras.layers.Subtract()
        o = subtract_layer([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, x1 - x2, atol=1e-4)

        self.assertEqual(subtract_layer.compute_mask([i1, i2], [None, None]),
                         None)
        self.assertTrue(
            np.all(
                K.eval(
                    subtract_layer.compute_mask(
                        [i1, i2],
                        [K.variable(x1), K.variable(x2)]))))

        with self.assertRaisesRegexp(ValueError, '`mask` should be a list.'):
            subtract_layer.compute_mask([i1, i2], x1)
        with self.assertRaisesRegexp(ValueError, '`inputs` should be a list.'):
            subtract_layer.compute_mask(i1, [None, None])
        with self.assertRaisesRegexp(
                ValueError, 'layer should be called on exactly 2 inputs'):
            subtract_layer([i1, i2, i3])
        with self.assertRaisesRegexp(
                ValueError, 'layer should be called on exactly 2 inputs'):
            subtract_layer([i1])

    def test_merge_multiply(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        i3 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.multiply([i1, i2, i3])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2, i3], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        x3 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2, x3])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, x1 * x2 * x3, atol=1e-4)

    def test_merge_average(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.average([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, 0.5 * (x1 + x2), atol=1e-4)

    def test_merge_maximum(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.maximum([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4)

    def test_merge_minimum(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        o = keras.layers.minimum([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 4, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 4, 5))
        self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4)

    def test_merge_concatenate(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 5))
        concat_layer = keras.layers.Concatenate(axis=1)
        o = concat_layer([i1, i2])
        self.assertListEqual(o.shape.as_list(), [None, 8, 5])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )

        x1 = np.random.random((2, 4, 5))
        x2 = np.random.random((2, 4, 5))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 8, 5))
        self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4)

        self.assertEqual(concat_layer.compute_mask([i1, i2], [None, None]),
                         None)
        self.assertTrue(
            np.all(
                K.eval(
                    concat_layer.compute_mask(
                        [i1, i2],
                        [K.variable(x1), K.variable(x2)]))))

        with self.assertRaisesRegexp(ValueError, '`mask` should be a list.'):
            concat_layer.compute_mask([i1, i2], x1)
        with self.assertRaisesRegexp(ValueError, '`inputs` should be a list.'):
            concat_layer.compute_mask(i1, [None, None])
        with self.assertRaisesRegexp(ValueError,
                                     'should have the same length'):
            concat_layer.compute_mask([i1, i2], [None])
        with self.assertRaisesRegexp(
                ValueError, 'layer should be called on a list of inputs'):
            concat_layer(i1)

    def test_merge_dot(self):
        i1 = keras.layers.Input(shape=(4, ))
        i2 = keras.layers.Input(shape=(4, ))
        o = keras.layers.dot([i1, i2], axes=1)
        self.assertListEqual(o.shape.as_list(), [None, 1])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )
        _ = keras.layers.Dot(axes=1).get_config()

        x1 = np.random.random((2, 4))
        x2 = np.random.random((2, 4))
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 1))
        expected = np.zeros((2, 1))
        expected[0, 0] = np.dot(x1[0], x2[0])
        expected[1, 0] = np.dot(x1[1], x2[1])
        self.assertAllClose(out, expected, atol=1e-4)

        # Test with negative tuple of axes.
        o = keras.layers.dot([i1, i2], axes=(-1, -1))
        self.assertListEqual(o.shape.as_list(), [None, 1])
        model = keras.models.Model([i1, i2], o)
        model.run_eagerly = testing_utils.should_run_eagerly()
        model._experimental_run_tf_function = testing_utils.should_run_tf_function(
        )
        out = model.predict([x1, x2])
        self.assertEqual(out.shape, (2, 1))
        self.assertAllClose(out, expected, atol=1e-4)

        # test compute_output_shape
        layer = keras.layers.Dot(axes=-1)
        self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1))

    @parameterized.named_parameters(
        *tf_test_util.generate_combinations_with_testcase_name(layer=[
            keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply,
            keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average,
            keras.layers.Concatenate
        ]))
    def test_merge_with_ragged_input(self, layer):
        ragged_data = ragged_factory_ops.constant(
            [[1., 1., 1.], [1., 1.], [1., 1., 1., 1.]], ragged_rank=1)
        dense_data = ragged_data.to_tensor()
        input1 = keras.Input(shape=(None, ), ragged=True)
        input2 = keras.Input(shape=(None, ), ragged=True)
        out = keras.layers.Add()([input1, input2])
        model = keras.models.Model(inputs=[input1, input2], outputs=out)
        out_ragged = model.predict([ragged_data, ragged_data], steps=1)
        out_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            out_ragged).to_tensor()

        input1 = keras.Input(shape=(None, ))
        input2 = keras.Input(shape=(None, ))
        out = keras.layers.Add()([input1, input2])
        model = keras.models.Model(inputs=[input1, input2], outputs=out)
        out_dense = model.predict([dense_data, dense_data], steps=1)

        self.assertAllEqual(out_dense, out_ragged)
예제 #10
0
from absl.testing import parameterized

import numpy as np
import tensorflow as tf

from tensorflow.python.keras import keras_parameterized
from tensorflow.python.keras import testing_utils
from tensorflow.python.framework import test_util as tf_test_util
from tensorflow.python.platform import test

from deepcell import layers


@keras_parameterized.run_all_keras_modes
@parameterized.named_parameters(
    *tf_test_util.generate_combinations_with_testcase_name(
        norm_method=[None, 'std', 'max', 'whole_image']))
class ImageNormalizationTest(keras_parameterized.TestCase):

    def test_normalize_2d(self, norm_method):
        custom_objects = {'ImageNormalization2D': layers.ImageNormalization2D}
        with tf.keras.utils.custom_object_scope(custom_objects):
            testing_utils.layer_test(
                layers.ImageNormalization2D,
                kwargs={'norm_method': norm_method,
                        'filter_size': 3,
                        'data_format': 'channels_last'},
                input_shape=(3, 5, 6, 4))
            testing_utils.layer_test(
                layers.ImageNormalization2D,
                kwargs={'norm_method': norm_method,
                        'filter_size': 3,
class ConvLSTMTest(keras_parameterized.TestCase):
    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            data_format=['channels_first', 'channels_last'],
            return_sequences=[True, False]))
    def test_conv_lstm(self, data_format, return_sequences):
        num_row = 3
        num_col = 3
        filters = 2
        num_samples = 1
        input_channel = 2
        input_num_row = 5
        input_num_col = 5
        sequence_len = 2
        if data_format == 'channels_first':
            inputs = np.random.rand(num_samples, sequence_len, input_channel,
                                    input_num_row, input_num_col)
        else:
            inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                                    input_num_col, input_channel)

        # test for return state:
        x = keras.Input(batch_shape=inputs.shape)
        kwargs = {
            'data_format': data_format,
            'return_sequences': return_sequences,
            'return_state': True,
            'stateful': True,
            'filters': filters,
            'kernel_size': (num_row, num_col),
            'padding': 'valid'
        }
        layer = keras.layers.ConvLSTM2D(**kwargs)
        layer.build(inputs.shape)
        outputs = layer(x)
        _, states = outputs[0], outputs[1:]
        self.assertEqual(len(states), 2)
        model = keras.models.Model(x, states[0])
        state = model.predict(inputs)

        self.assertAllClose(keras.backend.eval(layer.states[0]),
                            state,
                            atol=1e-4)

        # test for output shape:
        testing_utils.layer_test(keras.layers.ConvLSTM2D,
                                 kwargs={
                                     'data_format': data_format,
                                     'return_sequences': return_sequences,
                                     'filters': filters,
                                     'kernel_size': (num_row, num_col),
                                     'padding': 'valid'
                                 },
                                 input_shape=inputs.shape)

    def test_conv_lstm_statefulness(self):
        # Tests for statefulness
        num_row = 3
        num_col = 3
        filters = 2
        num_samples = 1
        input_channel = 2
        input_num_row = 5
        input_num_col = 5
        sequence_len = 2
        inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                                input_num_col, input_channel)

        with self.cached_session():
            model = keras.models.Sequential()
            kwargs = {
                'data_format': 'channels_last',
                'return_sequences': False,
                'filters': filters,
                'kernel_size': (num_row, num_col),
                'stateful': True,
                'batch_input_shape': inputs.shape,
                'padding': 'same'
            }
            layer = keras.layers.ConvLSTM2D(**kwargs)

            model.add(layer)
            model.compile(optimizer='sgd', loss='mse')
            out1 = model.predict(np.ones_like(inputs))

            # train once so that the states change
            model.train_on_batch(np.ones_like(inputs),
                                 np.random.random(out1.shape))
            out2 = model.predict(np.ones_like(inputs))

            # if the state is not reset, output should be different
            self.assertNotEqual(out1.max(), out2.max())

            # check that output changes after states are reset
            # (even though the model itself didn't change)
            layer.reset_states()
            out3 = model.predict(np.ones_like(inputs))
            self.assertNotEqual(out3.max(), out2.max())

            # check that container-level reset_states() works
            model.reset_states()
            out4 = model.predict(np.ones_like(inputs))
            self.assertAllClose(out3, out4, atol=1e-5)

            # check that the call to `predict` updated the states
            out5 = model.predict(np.ones_like(inputs))
            self.assertNotEqual(out4.max(), out5.max())

    def test_conv_lstm_regularizers(self):
        # check regularizers
        num_row = 3
        num_col = 3
        filters = 2
        num_samples = 1
        input_channel = 2
        input_num_row = 5
        input_num_col = 5
        sequence_len = 2
        inputs = np.random.rand(num_samples, sequence_len, input_num_row,
                                input_num_col, input_channel)

        with self.cached_session():
            kwargs = {
                'data_format': 'channels_last',
                'return_sequences': False,
                'kernel_size': (num_row, num_col),
                'stateful': True,
                'filters': filters,
                'batch_input_shape': inputs.shape,
                'kernel_regularizer': keras.regularizers.L1L2(l1=0.01),
                'recurrent_regularizer': keras.regularizers.L1L2(l1=0.01),
                'activity_regularizer': 'l2',
                'bias_regularizer': 'l2',
                'kernel_constraint': 'max_norm',
                'recurrent_constraint': 'max_norm',
                'bias_constraint': 'max_norm',
                'padding': 'same'
            }

            layer = keras.layers.ConvLSTM2D(**kwargs)
            layer.build(inputs.shape)
            self.assertEqual(len(layer.losses), 3)
            layer(keras.backend.variable(np.ones(inputs.shape)))
            self.assertEqual(len(layer.losses), 4)

    def test_conv_lstm_dropout(self):
        # check dropout
        with self.cached_session():
            testing_utils.layer_test(keras.layers.ConvLSTM2D,
                                     kwargs={
                                         'data_format': 'channels_last',
                                         'return_sequences': False,
                                         'filters': 2,
                                         'kernel_size': (3, 3),
                                         'padding': 'same',
                                         'dropout': 0.1,
                                         'recurrent_dropout': 0.1
                                     },
                                     input_shape=(1, 2, 5, 5, 2))

    def test_conv_lstm_cloning(self):
        with self.cached_session():
            model = keras.models.Sequential()
            model.add(
                keras.layers.ConvLSTM2D(5, 3, input_shape=(None, 5, 5, 3)))

            test_inputs = np.random.random((2, 4, 5, 5, 3))
            reference_outputs = model.predict(test_inputs)
            weights = model.get_weights()

        # Use a new graph to clone the model
        with self.cached_session():
            clone = keras.models.clone_model(model)
            clone.set_weights(weights)

            outputs = clone.predict(test_inputs)
            self.assertAllClose(reference_outputs, outputs, atol=1e-5)

    def test_conv_lstm_with_initial_state(self):
        num_samples = 32
        sequence_len = 5
        encoder_inputs = keras.layers.Input((None, 32, 32, 3))
        encoder = keras.layers.ConvLSTM2D(filters=32,
                                          kernel_size=(3, 3),
                                          padding='same',
                                          return_sequences=False,
                                          return_state=True)
        _, state_h, state_c = encoder(encoder_inputs)
        encoder_states = [state_h, state_c]

        decoder_inputs = keras.layers.Input((None, 32, 32, 4))
        decoder_lstm = keras.layers.ConvLSTM2D(filters=32,
                                               kernel_size=(3, 3),
                                               padding='same',
                                               return_sequences=False,
                                               return_state=False)
        decoder_outputs = decoder_lstm(decoder_inputs,
                                       initial_state=encoder_states)
        output = keras.layers.Conv2D(1, (3, 3),
                                     padding='same',
                                     activation='relu')(decoder_outputs)
        model = keras.Model([encoder_inputs, decoder_inputs], output)

        model.compile(optimizer='sgd',
                      loss='mse',
                      run_eagerly=testing_utils.should_run_eagerly())
        x_1 = np.random.rand(num_samples, sequence_len, 32, 32, 3)
        x_2 = np.random.rand(num_samples, sequence_len, 32, 32, 4)
        y = np.random.rand(num_samples, 32, 32, 1)
        model.fit([x_1, x_2], y)

        model.predict([x_1, x_2])
예제 #12
0
class CuDNNTest(test.TestCase, parameterized.TestCase):
    @test_util.run_in_graph_and_eager_modes
    def test_cudnn_rnn_basics(self):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                for layer_class in [
                        keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM
                ]:
                    for return_sequences in [True, False]:
                        with keras.utils.CustomObjectScope({
                                'keras.layers.CuDNNGRU':
                                keras.layers.CuDNNGRU,
                                'keras.layers.CuDNNLSTM':
                                keras.layers.CuDNNLSTM
                        }):
                            testing_utils.layer_test(layer_class,
                                                     kwargs={
                                                         'units':
                                                         units,
                                                         'return_sequences':
                                                         return_sequences
                                                     },
                                                     input_shape=(num_samples,
                                                                  timesteps,
                                                                  input_size))
                    for go_backwards in [True, False]:
                        with keras.utils.CustomObjectScope({
                                'keras.layers.CuDNNGRU':
                                keras.layers.CuDNNGRU,
                                'keras.layers.CuDNNLSTM':
                                keras.layers.CuDNNLSTM
                        }):
                            testing_utils.layer_test(layer_class,
                                                     kwargs={
                                                         'units':
                                                         units,
                                                         'go_backwards':
                                                         go_backwards
                                                     },
                                                     input_shape=(num_samples,
                                                                  timesteps,
                                                                  input_size))

    @test_util.run_in_graph_and_eager_modes
    def test_trainability(self):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                units = 2
                for layer_class in [
                        keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM
                ]:
                    layer = layer_class(units)
                    layer.build((None, None, input_size))
                    self.assertEqual(len(layer.weights), 3)
                    self.assertEqual(len(layer.trainable_weights), 3)
                    self.assertEqual(len(layer.non_trainable_weights), 0)
                    layer.trainable = False
                    self.assertEqual(len(layer.weights), 3)
                    self.assertEqual(len(layer.non_trainable_weights), 3)
                    self.assertEqual(len(layer.trainable_weights), 0)
                    layer.trainable = True
                    self.assertEqual(len(layer.weights), 3)
                    self.assertEqual(len(layer.trainable_weights), 3)
                    self.assertEqual(len(layer.non_trainable_weights), 0)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_regularizer(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                layer = layer_class(
                    units,
                    return_sequences=False,
                    input_shape=(timesteps, input_size),
                    kernel_regularizer=keras.regularizers.l1(0.01),
                    recurrent_regularizer=keras.regularizers.l1(0.01),
                    bias_regularizer='l2')
                layer.build((None, None, input_size))
                self.assertEqual(len(layer.losses), 3)

                layer = layer_class(units,
                                    return_sequences=False,
                                    input_shape=(timesteps, input_size),
                                    activity_regularizer='l2')
                self.assertTrue(layer.activity_regularizer)
                x = keras.backend.variable(
                    np.ones((num_samples, timesteps, input_size)))
                layer(x)
                self.assertEqual(len(layer.get_losses_for(x)), 1)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_return_state(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

                inputs = keras.Input(batch_shape=(num_samples, timesteps,
                                                  input_size))
                layer = layer_class(units, return_state=True, stateful=True)
                outputs = layer(inputs)
                _, state = outputs[0], outputs[1:]
                self.assertEqual(len(state), num_states)
                model = keras.models.Model(inputs, state[0])

                inputs = np.random.random((num_samples, timesteps, input_size))
                state = model.predict(inputs)
                np.testing.assert_allclose(keras.backend.eval(layer.states[0]),
                                           state,
                                           atol=1e-4)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_time_major_input(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.test_session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32

                model = keras.models.Sequential()
                model.add(
                    keras.layers.Lambda(
                        lambda t: array_ops.transpose(t, [1, 0, 2])))
                layer = layer_class(units,
                                    time_major=True,
                                    return_sequences=True)
                model.add(layer)
                model.add(
                    keras.layers.Lambda(
                        lambda t: array_ops.transpose(t, [1, 0, 2])))
                model.compile(loss='categorical_crossentropy',
                              optimizer='adam')
                model.fit(np.ones((num_samples, timesteps, input_size)),
                          np.ones((num_samples, timesteps, units)))
                out = model.predict(
                    np.ones((num_samples, timesteps, input_size)))
                self.assertEqual(out.shape, (num_samples, timesteps, units))

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_specify_initial_state_keras_tensor(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32
                num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1

                inputs = keras.Input((timesteps, input_size))
                initial_state = [
                    keras.Input((units, )) for _ in range(num_states)
                ]
                layer = layer_class(units)
                if len(initial_state) == 1:
                    output = layer(inputs, initial_state=initial_state[0])
                else:
                    output = layer(inputs, initial_state=initial_state)
                self.assertIn(initial_state[0],
                              layer._inbound_nodes[0].input_tensors)

                model = keras.models.Model([inputs] + initial_state, output)
                model.compile(loss='categorical_crossentropy',
                              optimizer='adam')

                inputs = np.random.random((num_samples, timesteps, input_size))
                initial_state = [
                    np.random.random((num_samples, units))
                    for _ in range(num_states)
                ]
                targets = np.random.random((num_samples, units))
                model.fit([inputs] + initial_state, targets)

    @parameterized.named_parameters(
        ('cudnngru', keras.layers.CuDNNGRU),
        ('cudnnlstm', keras.layers.CuDNNLSTM),
    )
    def test_statefulness(self, layer_class):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                timesteps = 6
                units = 2
                num_samples = 32

                model = keras.models.Sequential()
                model.add(
                    keras.layers.Embedding(10,
                                           input_size,
                                           input_length=timesteps,
                                           batch_input_shape=(num_samples,
                                                              timesteps)))
                layer = layer_class(units,
                                    return_sequences=False,
                                    stateful=True,
                                    weights=None)
                model.add(layer)
                model.compile(optimizer='sgd', loss='mse')
                out1 = model.predict(np.ones((num_samples, timesteps)))
                self.assertEqual(out1.shape, (num_samples, units))

                # train once so that the states change
                model.train_on_batch(np.ones((num_samples, timesteps)),
                                     np.ones((num_samples, units)))
                out2 = model.predict(np.ones((num_samples, timesteps)))

                # if the state is not reset, output should be different
                self.assertNotEqual(out1.max(), out2.max())

                # check that output changes after states are reset
                # (even though the model itself didn't change)
                layer.reset_states()
                out3 = model.predict(np.ones((num_samples, timesteps)))
                self.assertNotEqual(out2.max(), out3.max())

                # check that container-level reset_states() works
                model.reset_states()
                out4 = model.predict(np.ones((num_samples, timesteps)))
                self.assertAllClose(out3, out4, atol=1e-5)

                # check that the call to `predict` updated the states
                out5 = model.predict(np.ones((num_samples, timesteps)))
                self.assertNotEqual(out4.max(), out5.max())

    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            rnn_type=['LSTM', 'GRU'],
            to_cudnn=[True, False],
            bidirectional=[True, False],
            implementation=[1, 2],
            model_nest_level=[1, 2],
            model_type=['seq', 'func']))
    def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn,
                                               bidirectional, implementation,
                                               model_nest_level, model_type):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                timesteps = 6
                input_shape = (timesteps, input_size)
                units = 2
                num_samples = 32
                inputs = np.random.random((num_samples, timesteps, input_size))

                rnn_layer_kwargs = {
                    'recurrent_activation': 'sigmoid',
                    # ensure biases are non-zero and properly converted
                    'bias_initializer': 'random_uniform',
                    'implementation': implementation
                }
                if rnn_type == 'LSTM':
                    rnn_layer_class = keras.layers.LSTM
                    cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
                else:
                    rnn_layer_class = keras.layers.GRU
                    cudnn_rnn_layer_class = keras.layers.CuDNNGRU
                    rnn_layer_kwargs['reset_after'] = True

                layer = rnn_layer_class(units, **rnn_layer_kwargs)
                if bidirectional:
                    layer = keras.layers.Bidirectional(layer)

                cudnn_layer = cudnn_rnn_layer_class(units)
                if bidirectional:
                    cudnn_layer = keras.layers.Bidirectional(cudnn_layer)

                model = self._make_nested_model(input_shape, layer,
                                                model_nest_level, model_type)
                cudnn_model = self._make_nested_model(input_shape, cudnn_layer,
                                                      model_nest_level,
                                                      model_type)

                if to_cudnn:
                    self._convert_model_weights(model, cudnn_model)
                else:
                    self._convert_model_weights(cudnn_model, model)

                self.assertAllClose(model.predict(inputs),
                                    cudnn_model.predict(inputs),
                                    atol=1e-4)

    def _make_nested_model(self,
                           input_shape,
                           layer,
                           level=1,
                           model_type='func'):
        # example: make_nested_seq_model((1,), Dense(10), level=2).summary()
        def make_nested_seq_model(input_shape, layer, level=1):
            model = layer
            for i in range(1, level + 1):
                layers = [keras.layers.InputLayer(input_shape), model
                          ] if (i == 1) else [model]
                model = keras.models.Sequential(layers)
            return model

        # example: make_nested_func_model((1,), Dense(10), level=2).summary()
        def make_nested_func_model(input_shape, layer, level=1):
            model_input = keras.layers.Input(input_shape)
            model = layer
            for _ in range(level):
                model = keras.models.Model(model_input, model(model_input))
            return model

        if model_type == 'func':
            return make_nested_func_model(input_shape, layer, level)
        elif model_type == 'seq':
            return make_nested_seq_model(input_shape, layer, level)

    def _convert_model_weights(self, source_model, target_model):
        _, fname = tempfile.mkstemp('.h5')
        source_model.save_weights(fname)
        target_model.load_weights(fname)
        os.remove(fname)

    @parameterized.named_parameters(
        *test_util.generate_combinations_with_testcase_name(
            rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False]))
    def test_load_weights_between_noncudnn_rnn_time_distributed(
            self, rnn_type, to_cudnn):
        # Similar test as test_load_weights_between_noncudnn_rnn() but has different
        # rank of input due to usage of TimeDistributed. Issue: #10356.
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_size = 10
                steps = 6
                timesteps = 6
                input_shape = (timesteps, steps, input_size)
                units = 2
                num_samples = 32
                inputs = np.random.random(
                    (num_samples, timesteps, steps, input_size))

                rnn_layer_kwargs = {
                    'recurrent_activation': 'sigmoid',
                    # ensure biases are non-zero and properly converted
                    'bias_initializer': 'random_uniform',
                }
                if rnn_type == 'LSTM':
                    rnn_layer_class = keras.layers.LSTM
                    cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
                else:
                    rnn_layer_class = keras.layers.GRU
                    cudnn_rnn_layer_class = keras.layers.CuDNNGRU
                    rnn_layer_kwargs['reset_after'] = True

                layer = rnn_layer_class(units, **rnn_layer_kwargs)
                layer = keras.layers.TimeDistributed(layer)

                cudnn_layer = cudnn_rnn_layer_class(units)
                cudnn_layer = keras.layers.TimeDistributed(cudnn_layer)

                model = self._make_nested_model(input_shape, layer)
                cudnn_model = self._make_nested_model(input_shape, cudnn_layer)

                if to_cudnn:
                    self._convert_model_weights(model, cudnn_model)
                else:
                    self._convert_model_weights(cudnn_model, model)

                self.assertAllClose(model.predict(inputs),
                                    cudnn_model.predict(inputs),
                                    atol=1e-4)

    @test_util.run_in_graph_and_eager_modes
    def test_cudnnrnn_bidirectional(self):
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                rnn = keras.layers.CuDNNGRU
                samples = 2
                dim = 2
                timesteps = 2
                output_dim = 2
                mode = 'concat'

                x = np.random.random((samples, timesteps, dim))
                target_dim = 2 * output_dim if mode == 'concat' else output_dim
                y = np.random.random((samples, target_dim))

                # test with Sequential model
                model = keras.Sequential()
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim),
                                               merge_mode=mode,
                                               input_shape=(None, dim)))
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # test config
                model.get_config()
                model = keras.models.model_from_json(model.to_json())
                model.summary()

                # test stacked bidirectional layers
                model = keras.Sequential()
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim,
                                                   return_sequences=True),
                                               merge_mode=mode,
                                               input_shape=(None, dim)))
                model.add(
                    keras.layers.Bidirectional(rnn(output_dim),
                                               merge_mode=mode))
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # test with functional API
                inputs = keras.Input((timesteps, dim))
                outputs = keras.layers.Bidirectional(rnn(output_dim),
                                                     merge_mode=mode)(inputs)
                model = keras.Model(inputs, outputs)
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

                # Bidirectional and stateful
                inputs = keras.Input(batch_shape=(1, timesteps, dim))
                outputs = keras.layers.Bidirectional(rnn(output_dim,
                                                         stateful=True),
                                                     merge_mode=mode)(inputs)
                model = keras.Model(inputs, outputs)
                model.compile(loss='mse',
                              optimizer=RMSPropOptimizer(learning_rate=0.001))
                model.fit(x, y, epochs=1, batch_size=1)

    def test_preprocess_weights_for_loading_gru_incompatible(self):
        """Test loading weights between incompatible layers.

    Should fail fast with an exception.
    """
        if test.is_gpu_available(cuda_only=True):
            with self.session(use_gpu=True):
                input_shape = (3, 5)

                def gru(cudnn=False, **kwargs):
                    layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU
                    return layer_class(2, input_shape=input_shape, **kwargs)

                def get_layer_weights(layer):
                    layer.build(input_shape=input_shape)
                    return layer.get_weights()

                def assert_not_compatible(src, dest, message):
                    with self.assertRaises(ValueError) as ex:
                        keras.engine.saving.preprocess_weights_for_loading(
                            dest, get_layer_weights(src))
                    self.assertIn(message, str(ex.exception))

                assert_not_compatible(
                    gru(), gru(cudnn=True),
                    'GRU(reset_after=False) is not compatible with CuDNNGRU')
                assert_not_compatible(
                    gru(cudnn=True), gru(),
                    'CuDNNGRU is not compatible with GRU(reset_after=False)')
                assert_not_compatible(
                    gru(), gru(reset_after=True),
                    'GRU(reset_after=False) is not compatible with '
                    'GRU(reset_after=True)')
                assert_not_compatible(
                    gru(reset_after=True), gru(),
                    'GRU(reset_after=True) is not compatible with '
                    'GRU(reset_after=False)')
예제 #13
0
class TestBackboneUtils(keras_parameterized.TestCase):
    @keras_parameterized.run_with_all_model_types
    @keras_parameterized.run_all_keras_modes
    @parameterized.named_parameters(
        *tf_test_util.generate_combinations_with_testcase_name(data_format=[
            # 'channels_first',
            'channels_last'
        ]))
    def test_get_featurenet_backbone(self, data_format):
        backbone = 'featurenet'
        input_shape = (256, 256, 3)
        inputs = Input(shape=input_shape)
        with self.cached_session():
            K.set_image_data_format(data_format)
            model, output_dict = backbone_utils.get_backbone(backbone,
                                                             inputs,
                                                             return_dict=True)
            assert isinstance(output_dict, dict)
            assert all(k.startswith('C') for k in output_dict)
            assert isinstance(model, Model)

            # No imagenet weights for featurenet backbone
            with self.assertRaises(ValueError):
                backbone_utils.get_backbone(backbone,
                                            inputs,
                                            use_imagenet=True)

    # @keras_parameterized.run_all_keras_modes
    @parameterized.named_parameters(
        *tf_test_util.generate_combinations_with_testcase_name(data_format=[
            # 'channels_first',
            'channels_last'
        ]))
    def test_get_featurenet3d_backbone(self, data_format):
        backbone = 'featurenet3d'
        input_shape = (40, 256, 256, 3)
        inputs = Input(shape=input_shape)
        with self.cached_session():
            K.set_image_data_format(data_format)
            model, output_dict = backbone_utils.get_backbone(backbone,
                                                             inputs,
                                                             return_dict=True)
            assert isinstance(output_dict, dict)
            assert all(k.startswith('C') for k in output_dict)
            assert isinstance(model, Model)

            # No imagenet weights for featurenet backbone
            with self.assertRaises(ValueError):
                backbone_utils.get_backbone(backbone,
                                            inputs,
                                            use_imagenet=True)

    # @keras_parameterized.run_with_all_model_types
    # @keras_parameterized.run_all_keras_modes
    @parameterized.named_parameters(
        *tf_test_util.generate_combinations_with_testcase_name(backbone=[
            'resnet50',
            'resnet101',
            'resnet152',
            'resnet50v2',
            'resnet101v2',
            'resnet152v2',
            # 'resnext50',
            # 'resnext101',
            'vgg16',
            'vgg19',
            'densenet121',
            'densenet169',
            'densenet201',
            'mobilenet',
            'mobilenetv2',
            'efficientnetb0',
            'efficientnetb1',
            'efficientnetb2',
            'efficientnetb3',
            'efficientnetb4',
            'efficientnetb5',
            'efficientnetb6',
            'efficientnetb7',
            'nasnet_large',
            'nasnet_mobile'
        ]))
    def test_get_backbone(self, backbone):
        with self.cached_session():
            K.set_image_data_format('channels_last')
            inputs = Input(shape=(256, 256, 3))
            model, output_dict = backbone_utils.get_backbone(backbone,
                                                             inputs,
                                                             return_dict=True)
            assert isinstance(output_dict, dict)
            assert all(k.startswith('C') for k in output_dict)
            assert isinstance(model, Model)

    def test_invalid_backbone(self):
        inputs = Input(shape=(4, 2, 3))
        with self.assertRaises(ValueError):
            backbone_utils.get_backbone('bad', inputs, return_dict=True)