Ejemplo n.º 1
0
    def testCompositeTypeSpecArgWithoutDtype(self):
        for assign_variant_dtype in [False, True]:
            # Create a Keras Input
            spec = TwoTensorsSpecNoOneDtype(
                (1, 2, 3),
                dtypes.float32, (1, 2, 3),
                dtypes.int64,
                assign_variant_dtype=assign_variant_dtype)
            x = input_layer_lib.Input(type_spec=spec)

            def lambda_fn(tensors):
                return (math_ops.cast(tensors.x, dtypes.float64) +
                        math_ops.cast(tensors.y, dtypes.float64))

            # Verify you can construct and use a model w/ this input
            model = functional.Functional(x, core.Lambda(lambda_fn)(x))

            # And that the model works
            two_tensors = TwoTensors(
                array_ops.ones((1, 2, 3)) * 2.0, array_ops.ones(1, 2, 3))
            self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors))

            # Test serialization / deserialization
            model = functional.Functional.from_config(model.get_config())
            self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors))
            model = model_config.model_from_json(model.to_json())
            self.assertAllEqual(model(two_tensors), lambda_fn(two_tensors))
Ejemplo n.º 2
0
  def test_Bidirectional_ragged_input(self, merge_mode):
    if test.is_built_with_rocm():
      # ragged tenors are not supported in ROCM RNN implementation
      self.skipTest('Test not supported on the ROCm platform')
    np.random.seed(100)
    rnn = keras.layers.LSTM
    units = 3
    x = ragged_factory_ops.constant(
        [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1]],
         [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
         [[1, 1, 1], [1, 1, 1], [1, 1, 1]]],
        ragged_rank=1)
    x = math_ops.cast(x, 'float32')

    # pylint: disable=g-long-lambda
    with self.cached_session():
      if merge_mode == 'ave':
        merge_func = lambda y, y_rev: (y + y_rev) / 2
      elif merge_mode == 'concat':
        merge_func = lambda y, y_rev: ragged_concat_ops.concat(
            (y, y_rev), axis=-1)
      elif merge_mode == 'mul':
        merge_func = lambda y, y_rev: (y * y_rev)
        # pylint: enable=g-long-lambda

      inputs = keras.Input(
          shape=(None, 3), batch_size=4, dtype='float32', ragged=True)
      layer = keras.layers.Bidirectional(
          rnn(units, return_sequences=True), merge_mode=merge_mode)
      f_merged = keras.backend.function([inputs], layer(inputs))
      f_forward = keras.backend.function([inputs],
                                         layer.forward_layer(inputs))

      # TODO(kaftan): after KerasTensor refactor TF op layers should work
      # with many composite tensors, and this shouldn't need to be a lambda
      # layer.
      reverse_layer = core.Lambda(array_ops.reverse, arguments=dict(axis=[1]))
      f_backward = keras.backend.function(
          [inputs],
          reverse_layer(layer.backward_layer(inputs)))

      y_merged = f_merged(x)
      y_expected = merge_func(
          ragged_tensor.convert_to_tensor_or_ragged_tensor(f_forward(x)),
          ragged_tensor.convert_to_tensor_or_ragged_tensor(f_backward(x)))

      y_merged = ragged_tensor.convert_to_tensor_or_ragged_tensor(y_merged)
      self.assertAllClose(y_merged.flat_values, y_expected.flat_values)
Ejemplo n.º 3
0
    def test_fixed_loss_scaling(self, strategy_fn, cloning=True):
        if testing_utils.should_run_distributed():
            self.skipTest('b/137397816')
        # Note: We do not test mixed precision in this method, only loss scaling.
        if not self._is_strategy_supported(strategy_fn):
            return
        loss_scale = 8.
        batch_size = 4
        with strategy_fn().scope():
            x = layers.Input(shape=(1, ), batch_size=batch_size)
            layer = AddLayer()
            y = layer(x)

            # The gradient of 'y' at this point is 1. With loss scaling, the gradient
            # is 'loss_scale'. We divide by the batch size since the loss is averaged
            # across batch elements.
            expected_gradient = loss_scale / batch_size
            identity_with_grad_check_fn = (
                mp_test_util.create_identity_with_grad_check_fn(
                    [expected_gradient]))
            y = core.Lambda(identity_with_grad_check_fn)(y)
            model = models.Model(inputs=x, outputs=y)

            def loss_fn(y_true, y_pred):
                del y_true
                return math_ops.reduce_mean(y_pred)

            opt = gradient_descent.SGD(1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
            model.compile(
                opt,
                loss=loss_fn,
                cloning=cloning,
                run_eagerly=testing_utils.should_run_eagerly(),
                run_distributed=testing_utils.should_run_distributed())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 1 subtracted from it.
        expected = 0
        self.assertEqual(backend.eval(layer.v), expected)
Ejemplo n.º 4
0
  def test_adapt_preprocessing_stage_with_dict_input(self):
    x0 = Input(shape=(3,), name='x0')
    x1 = Input(shape=(4,), name='x1')
    x2 = Input(shape=(3, 5), name='x2')

    # dimension will mismatch if x1 incorrectly placed.
    x1_sum = core.Lambda(
        lambda x: math_ops.reduce_sum(x, axis=-1, keepdims=True))(
            x1)
    x2_sum = core.Lambda(lambda x: math_ops.reduce_sum(x, axis=-1))(x2)

    l0 = PLMerge()
    y = l0([x0, x1_sum])

    l1 = PLMerge()
    y = l1([y, x2_sum])

    l2 = PLSplit()
    z, y = l2(y)
    stage = preprocessing_stage.FunctionalPreprocessingStage(
        {
            'x2': x2,
            'x0': x0,
            'x1': x1
        }, [y, z])
    stage.compile()

    # Test with dict of NumPy array
    one_array0 = np.ones((4, 3), dtype='float32')
    one_array1 = np.ones((4, 4), dtype='float32')
    one_array2 = np.ones((4, 3, 5), dtype='float32')
    adapt_data = {'x1': one_array1, 'x0': one_array0, 'x2': one_array2}
    stage.adapt(adapt_data)
    self.assertEqual(l0.adapt_count, 1)
    self.assertEqual(l1.adapt_count, 1)
    self.assertEqual(l2.adapt_count, 1)
    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
    self.assertLessEqual(l1.adapt_time, l2.adapt_time)

    # Check call
    y, z = stage({
        'x1': array_ops.constant(one_array1),
        'x2': array_ops.constant(one_array2),
        'x0': array_ops.constant(one_array0)
    })
    self.assertAllClose(y, np.zeros((4, 3), dtype='float32') + 9.)
    self.assertAllClose(z, np.zeros((4, 3), dtype='float32') + 11.)

    # Test with list of NumPy array
    adapt_data = [one_array0, one_array1, one_array2]
    stage.adapt(adapt_data)
    self.assertEqual(l0.adapt_count, 2)
    self.assertEqual(l1.adapt_count, 2)
    self.assertEqual(l2.adapt_count, 2)
    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
    self.assertLessEqual(l1.adapt_time, l2.adapt_time)

    # Test with flattened dataset
    adapt_data = dataset_ops.Dataset.from_tensor_slices(
        (one_array0, one_array1, one_array2))
    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples

    stage.adapt(adapt_data)
    self.assertEqual(l0.adapt_count, 3)
    self.assertEqual(l1.adapt_count, 3)
    self.assertEqual(l2.adapt_count, 3)
    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
    self.assertLessEqual(l1.adapt_time, l2.adapt_time)

    # Test with dataset in dict shape
    adapt_data = dataset_ops.Dataset.from_tensor_slices({
        'x0': one_array0,
        'x2': one_array2,
        'x1': one_array1
    })
    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
    stage.adapt(adapt_data)
    self.assertEqual(l0.adapt_count, 4)
    self.assertEqual(l1.adapt_count, 4)
    self.assertEqual(l2.adapt_count, 4)
    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
    self.assertLessEqual(l1.adapt_time, l2.adapt_time)

    # Test error with bad data
    with self.assertRaisesRegex(ValueError, 'requires a '):
      stage.adapt(None)
Ejemplo n.º 5
0
    def test_dynamic_loss_scaling(self, strategy_fn, cloning=True):
        if testing_utils.should_run_distributed():
            self.skipTest('b/137397816')
        if not self._is_strategy_supported(strategy_fn):
            return
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            with policy.policy_scope(policy.Policy('infer_float32_vars')):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = AddLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(1.)
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
                model.compile(
                    opt,
                    loss=loss_fn,
                    cloning=cloning,
                    run_eagerly=testing_utils.should_run_eagerly(),
                    run_distributed=testing_utils.should_run_distributed())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
Ejemplo n.º 6
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        if testing_utils.should_run_distributed():
            self.skipTest('b/137397816')
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        if not self._is_strategy_supported(strategy_fn):
            return
        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy('infer_float32_vars')):
                x = layers.Input(shape=(1, ),
                                 batch_size=2,
                                 dtype=dtypes.float16)
                layer1 = AddLayer(assert_type=dtypes.float16,
                                  regularizer=IdentityRegularizer(),
                                  use_operator=True)
                layer2 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                                 use_operator=True)
                layer3 = AddLayer(assert_type=dtypes.float16,
                                  use_operator=False)
                layer4 = AddLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=IdentityRegularizer(),
                    use_operator=False)
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=dtypes.float16,
                            expected_gradient=[expected_gradient]))
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    self.assertEqual(y_true.dtype, dtypes.float32)
                    self.assertEqual(y_pred.dtype, dtypes.float32)
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, loss_scale)
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=testing_utils.should_run_eagerly(),
                    run_distributed=testing_utils.should_run_distributed())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Ejemplo n.º 7
0
    def test_dynamic_loss_scaling(self,
                                  strategy_fn,
                                  pass_loss_scale_to_policy=False,
                                  get_config=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.)
            if pass_loss_scale_to_policy:
                p = policy.Policy('mixed_float16', loss_scale=loss_scale)
            else:
                p = policy.Policy('mixed_float16', loss_scale=None)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
            with policy.policy_scope(p):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            'MultiplyLayer': mp_test_util.MultiplyLayer
                        })
                    (layer, ) = (
                        layer for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)