Exemple #1
0
    def test_wrap_optimizer_dynamic_loss_scale(self):
        opt = gradient_descent_v2.SGD(1.0)
        opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            opt, "dynamic"
        )
        self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual(self.evaluate(opt.loss_scale), 2.0**15)
        self.assertTrue(opt.dynamic)
        self.assertTrue(opt.initial_scale, 2.0**15)
        self.assertTrue(opt.dynamic_growth_steps, 2000)

        opt = gradient_descent_v2.SGD(1.0)
        opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            opt,
            tf.compat.v1.mixed_precision.DynamicLossScale(
                initial_loss_scale=4, increment_period=1000
            ),
        )
        self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual(self.evaluate(opt.loss_scale), 4.0)
        self.assertTrue(opt.dynamic)
        self.assertTrue(opt.initial_scale, 4.0)
        self.assertTrue(opt.dynamic_growth_steps, 1000)
Exemple #2
0
 def test_wide_deep_model_backprop(self):
     with self.cached_session():
         linear_model = linear.LinearModel(units=1,
                                           kernel_initializer="zeros")
         dnn_model = sequential.Sequential(
             [core.Dense(units=1, kernel_initializer="zeros")])
         wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
         linear_inp = np.array([[1.0]])
         dnn_inp = np.array([[1.0]])
         inputs = [linear_inp, dnn_inp]
         output = linear_inp + 2 * dnn_inp
         linear_opt = gradient_descent.SGD(learning_rate=0.1)
         dnn_opt = gradient_descent.SGD(learning_rate=0.3)
         wide_deep_model.compile(
             optimizer=[linear_opt, dnn_opt],
             loss="mse",
             metrics=[],
             run_eagerly=test_utils.should_run_eagerly(),
         )
         self.evaluate(tf.compat.v1.global_variables_initializer())
         wide_deep_model.fit(inputs, output, epochs=1)
         self.assertAllClose(
             [[0.6]],
             self.evaluate(
                 wide_deep_model.linear_model.dense_layers[0].kernel),
         )
         self.assertAllClose(
             [[1.8]],
             self.evaluate(wide_deep_model.dnn_model.layers[0].kernel),
         )
    def testConstructMomentumWithLR(self):
        opt = gradient_descent.SGD(lr=1.0, momentum=0.9)
        opt_2 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9, lr=1.0)
        opt_3 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
 def test_error_if_policy_is_set(self):
     with policy.policy_scope('mixed_float16'):
         with self.assertRaisesRegex(
                 ValueError, 'the global Keras dtype Policy has been set'):
             tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                 gradient_descent_v2.SGD(1.0))
     # Test no error is thrown when the policy is currently the default.
     tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
         gradient_descent_v2.SGD(1.0))
     # Test no error is thrown when the policy is a non-mixed policy.
     with policy.policy_scope('float64'):
         tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
             gradient_descent_v2.SGD(1.0))
 def testErrorWhenV3LsoWrapsV2Optimizer(self):
   sgd = gradient_descent.SGD()
   with self.assertRaisesRegex(
       TypeError, 'only the new experimental optimizer '
       'defined in keras/optimizer_expeirmental/optimizer.py can be '
       'passed'):
     loss_scale_optimizer.LossScaleOptimizerV3(sgd)
    def test_variable_run_argument(self, distribution):
        # Test that variables passed to run() remain variables. Previous
        # behavior in TPUStrategy was to cast to Tensor.

        with distribution.scope():
            optimizer = gradient_descent.SGD(0.1)
            net = core.Dense(1, trainable=True)
        dataset = tf.data.Dataset.from_tensors([[1.0]])
        dataset = dataset.repeat()
        dataset = dataset.batch(2, drop_remainder=True)

        def replica_step(trainable_variables, features):

            with tf.GradientTape() as tape:
                net_out = net(features[0], training=True)
                loss = (net_out - 1.0) * (net_out - 1.0)
            gradients = tape.gradient(loss, trainable_variables)
            optimizer.apply_gradients(zip(gradients, trainable_variables))
            return loss

        @tf.function
        def step(features):
            per_replica_losses = distribution.run(
                replica_step,
                (net.trainable_variables, features),
            )
            loss = distribution.reduce(tf.distribute.ReduceOp.SUM,
                                       per_replica_losses,
                                       axis=None)
            return loss

        step(next(iter(dataset)))
 def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self):
     for dtype in [tf.half, tf.float32, tf.float64]:
         learning_rate = learning_rate_schedule.InverseTimeDecay(
             3.0, decay_steps=1.0, decay_rate=0.5)
         sgd = gradient_descent.SGD(learning_rate=learning_rate)
         sgd = gradient_descent.SGD.from_config(sgd.get_config())
         self._test_basic_sgd_with_learning_rate_decay(sgd, dtype)
 def testBasicWithLearningRateDecay(self):
     for dtype in [tf.half, tf.float32, tf.float64]:
         learning_rate = 3.0
         decay = 0.5
         sgd = gradient_descent.SGD(learning_rate=learning_rate,
                                    decay=decay)
         self._test_basic_sgd_with_learning_rate_decay(sgd, dtype)
 def testSparseBasic(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     with tf.Graph().as_default():
         for dtype in [tf.half, tf.float32, tf.float64]:
             var0 = tf.Variable([[1.0], [2.0]], dtype=dtype)
             var1 = tf.Variable([[3.0], [4.0]], dtype=dtype)
             grads0 = tf.IndexedSlices(
                 tf.constant([0.1], shape=[1, 1], dtype=dtype),
                 tf.constant([0]),
                 tf.constant([2, 1]),
             )
             grads1 = tf.IndexedSlices(
                 tf.constant([0.01], shape=[1, 1], dtype=dtype),
                 tf.constant([1]),
                 tf.constant([2, 1]),
             )
             sgd_op = gradient_descent.SGD(3.0).apply_gradients(
                 zip([grads0, grads1], [var0, var1]))
             self.evaluate(tf.compat.v1.global_variables_initializer())
             # Run 1 step of sgd
             self.evaluate(sgd_op)
             # Validate updated params
             self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                                self.evaluate(var0))
             self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                                self.evaluate(var1))
    def testMinimizeSparseResourceVariable(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
                var1 = tf.Variable([3.0], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x)
                    pred += var1
                    return pred * pred

                sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
                np_grad = 2 * np_pred
                self.assertAllCloseAccordingToType(
                    [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]],
                    self.evaluate(var0),
                )
                self.assertAllCloseAccordingToType([3.0 - np_grad],
                                                   self.evaluate(var1))
Exemple #11
0
 def test_wide_deep_model_with_two_feature_columns(self):
     vocab_list = ["alpha", "beta", "gamma"]
     vocab_val = [0.4, 0.6, 0.9]
     data = np.random.choice(vocab_list, size=256)
     y = np.zeros_like(data, dtype=np.float32)
     for vocab, val in zip(vocab_list, vocab_val):
         indices = np.where(data == vocab)
         y[indices] = val + np.random.uniform(
             low=-0.01, high=0.01, size=indices[0].shape)
     cat_column = tf.feature_column.categorical_column_with_vocabulary_list(
         key="symbol", vocabulary_list=vocab_list)
     ind_column = tf.feature_column.indicator_column(cat_column)
     emb_column = tf.feature_column.embedding_column(cat_column,
                                                     dimension=5)
     linear_feature_layer = dense_features_v2.DenseFeatures([ind_column])
     linear_model = linear.LinearModel(use_bias=False,
                                       kernel_initializer="zeros")
     combined_linear = sequential.Sequential(
         [linear_feature_layer, linear_model])
     dnn_model = sequential.Sequential([core.Dense(units=1)])
     dnn_feature_layer = dense_features_v2.DenseFeatures([emb_column])
     combined_dnn = sequential.Sequential([dnn_feature_layer, dnn_model])
     wide_deep_model = wide_deep.WideDeepModel(combined_linear,
                                               combined_dnn)
     opt = gradient_descent.SGD(learning_rate=0.1)
     wide_deep_model.compile(opt,
                             "mse", [],
                             run_eagerly=test_utils.should_run_eagerly())
     wide_deep_model.fit(x={"symbol": data}, y=y, batch_size=32, epochs=10)
    def test_custom_aggregation(self, distribution,
                                experimental_aggregate_gradients, expected):

        with distribution.scope():
            v = tf.Variable([0.0, 0.0])
            optimizer = gradient_descent.SGD(0.1)

        class PerReplica(values.DistributedValues):
            """Holds a map from replica to unsynchronized values."""
            @property
            def values(self):
                """Returns the per replica values."""
                return self._values

        @tf.function
        def optimize():
            with tf.device(distribution.extended.worker_devices[0]):
                v1 = tf.convert_to_tensor([1.0, 1.0])
            with tf.device(distribution.extended.worker_devices[1]):
                v2 = tf.convert_to_tensor([2.0, 2.0])
            grads = PerReplica([v1, v2])

            def step_fn(grads):
                optimizer.apply_gradients(
                    [(grads, v)],
                    experimental_aggregate_gradients=
                    experimental_aggregate_gradients,
                )
                return v.read_value()

            return distribution.experimental_local_results(
                distribution.run(step_fn, args=(grads, )))

        self.assertAllClose(optimize(), expected)
 def testConfig(self):
     opt = gradient_descent.SGD(learning_rate=1.0,
                                momentum=0.9,
                                nesterov=True)
     config = opt.get_config()
     opt2 = gradient_descent.SGD.from_config(config)
     lr = opt.lr
     lr2 = opt2.lr
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertAllClose(self.evaluate(lr), self.evaluate(lr2))
     self.assertAllClose(self.evaluate(opt._get_hyper("momentum")),
                         self.evaluate(opt2._get_hyper("momentum")))
     self.assertAllClose(self.evaluate(opt._get_hyper("decay")),
                         self.evaluate(opt2._get_hyper("decay")))
     var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32)
     loss = lambda: 3 * var0
     # learning rate variable created when calling minimize.
     opt.minimize(loss, [var0])
     self.evaluate(tf.compat.v1.global_variables_initializer())
     config = opt.get_config()
     opt3 = gradient_descent.SGD.from_config(config)
     lr3 = opt3.lr
     self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertAllClose(self.evaluate(lr), self.evaluate(lr3))
     self.assertAllClose(self.evaluate(opt._get_hyper("momentum")),
                         self.evaluate(opt3._get_hyper("momentum")))
     self.assertAllClose(self.evaluate(opt._get_hyper("decay")),
                         self.evaluate(opt3._get_hyper("decay")))
     self.assertTrue(opt3.nesterov)
    def testSharing(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([1.0, 2.0], dtype=dtype)
                var1 = tf.Variable([3.0, 4.0], dtype=dtype)
                grads0 = tf.constant([0.1, 0.1], dtype=dtype)
                grads1 = tf.constant([0.01, 0.01], dtype=dtype)
                mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9)
                mom_update1 = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                mom_update2 = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEqual(slot1.shape, var1.shape)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Step 1: the momentum accumulators where 0. So we should see a normal
                # update: v -= grad * learning_rate
                self.evaluate(mom_update1)
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]),
                                                   self.evaluate(slot0))
                self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]),
                                                   self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                    self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                    self.evaluate(var1))
                # Step 2: the second momentum accumulators contain the previous update.
                self.evaluate(mom_update2)
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.2) - 2.0 * 0.1),
                              (0.9 * (-0.2) - 2.0 * 0.1)]),
                    self.evaluate(slot0))
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.02) - 2.0 * 0.01),
                              (0.9 * (-0.02) - 2.0 * 0.01)]),
                    self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]), self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([
                        2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]), self.evaluate(var1))
Exemple #15
0
    def test_save_slot_variables_with_autocast_vars(self,
                                                    strategy_fn,
                                                    var_name='v'):
        p = policy.Policy('mixed_float16')
        with strategy_fn().scope(), policy.policy_scope(p):
            x = layers.Input(shape=(2, ), batch_size=2)
            # Having a var_name other than 'v' tests that a fixed bug (b/134713714)
            # does not reoccur. The bug was that a crash would occur when saving a
            # checkpoint where an AutoCastVariable with a slot variable would have a
            # different name than the layer attribute's name (layer.v in this case).
            layer = mp_test_util.MultiplyLayer(assert_type=tf.float16,
                                               var_name=var_name)
            y = layer(x)
            model = models.Model(inputs=x, outputs=y)
            opt = gradient_descent.SGD(1., 1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(opt,
                                                          dynamic=False,
                                                          initial_scale=1)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=test_utils.should_run_eagerly())

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        weights_file = os.path.join(self.get_temp_dir(), 'weights')
        model.save_weights(weights_file)
        saved_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
        self.assertNotEqual(new_slot, saved_slot)

        model.load_weights(weights_file)
        restored_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
        self.assertEqual(restored_slot, saved_slot)
 def testIterations(self):
   opt = gradient_descent.SGD(2.0)
   lso = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False,
                                                 initial_scale=10.)
   lso.iterations = 7
   self.assertEqual(lso.iterations, 7)
   self.assertEqual(opt.iterations, 7)
    def test_model_with_fixed_input_dim(self):
        """Ensure that the batch_dim is removed when saving.

        When serving or retraining, it is important to reset the batch dim.
        This can be an issue inside of tf.function. See b/132783590 for context.
        """
        model = test_utils.get_small_mlp(10, 3, 5)

        loss_object = keras.losses.MeanSquaredError()
        optimizer = gradient_descent.SGD()

        @tf.function
        def train_step(data, labels):
            with tf.GradientTape() as tape:
                predictions = model(data)
                loss = loss_object(labels, predictions)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        x = np.random.random((8, 5))
        y = np.random.random((8, 3))

        train_step(x, y)

        fn = saving_utils.trace_model_call(model)
        self.assertEqual(
            fn.structured_input_signature[0][0].shape.as_list(),
            tf.TensorShape([None, 5]).as_list(),
        )
def get_mnist_model(input_shape):
    """Define a deterministically-initialized CNN model for MNIST testing."""
    inputs = keras.Input(shape=input_shape)
    x = keras.layers.Conv2D(
        32,
        kernel_size=(3, 3),
        activation="relu",
        kernel_initializer=keras.initializers.TruncatedNormal(seed=99),
    )(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Flatten()(x) + keras.layers.Flatten()(x)
    x = keras.layers.Dense(
        10,
        activation="softmax",
        kernel_initializer=keras.initializers.TruncatedNormal(seed=99),
    )(x)
    model = keras.Model(inputs=inputs, outputs=x)

    # TODO(yuefengz): optimizer with slot variables doesn't work because of
    # optimizer's bug.
    # TODO(yuefengz): we should not allow non-v2 optimizer.
    model.compile(
        loss=keras.losses.sparse_categorical_crossentropy,
        optimizer=gradient_descent.SGD(learning_rate=0.001),
        metrics=["accuracy"],
    )
    return model
Exemple #19
0
 def testConfigWithLearningRateDecay(self):
   with test_utils.use_gpu():
     var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32)
     for decay_schedule in [
         learning_rate_schedule.InverseTimeDecay(
             0.5, decay_steps=1.0, decay_rate=0.1),
         learning_rate_schedule.PiecewiseConstantDecay(
             [5], [1., .5])
     ]:
       step = 10
       opt = gradient_descent.SGD(decay_schedule)
       config = opt.get_config()
       opt2 = gradient_descent.SGD.from_config(config)
       # assert both are equal float values.
       self.assertAllEqual(
           decay_schedule(step),
           opt._get_hyper('learning_rate')(step))
       self.assertAllEqual(
           decay_schedule(step),
           opt2._get_hyper('learning_rate')(step))
       loss = lambda: 3 * var0
       # learning rate variable is created when calling minimize.
       opt.minimize(loss, [var0])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       config = opt.get_config()
       opt3 = gradient_descent.SGD.from_config(config)
       self.assertAllEqual(
           self.evaluate(opt._get_hyper('learning_rate')(step)),
           opt3._get_hyper('learning_rate')(step))
Exemple #20
0
    def get_model(
        self,
        max_words=10,
        initial_weights=None,
        distribution=None,
        input_shapes=None,
    ):
        del input_shapes
        with keras_correctness_test_base.MaybeDistributionScope(distribution):
            word_ids = keras.layers.Input(shape=(max_words, ),
                                          dtype=np.int32,
                                          name="words")
            word_embed = keras.layers.Embedding(input_dim=20,
                                                output_dim=10)(word_ids)
            if self.use_distributed_dense:
                word_embed = keras.layers.TimeDistributed(
                    keras.layers.Dense(4))(word_embed)
            avg = keras.layers.GlobalAveragePooling1D()(word_embed)
            preds = keras.layers.Dense(2, activation="softmax")(avg)
            model = keras.Model(inputs=[word_ids], outputs=[preds])

            if initial_weights:
                model.set_weights(initial_weights)

            model.compile(
                optimizer=gradient_descent_keras.SGD(learning_rate=0.1),
                loss="sparse_categorical_crossentropy",
                metrics=["sparse_categorical_accuracy"],
            )
        return model
Exemple #21
0
    def test_gradient(self, strategy_fn):
        x = tf.constant([1.])
        with strategy_fn().scope() as strategy:
            with policy.policy_scope('mixed_float16'):
                layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate is not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)

                def run_fn():
                    with tf.GradientTape() as tape:
                        y = layer(x)
                        # Divide by num_replicas_in_sync, as the effective total loss is the
                        # sum of each of the replica's losses.
                        y /= strategy.num_replicas_in_sync

                    grad = tape.gradient(y, layer.v)
                    return opt.apply_gradients([(grad, layer.v)])

                op = strategy.experimental_run(run_fn)
                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    self.evaluate(op)
                # The gradient with respective to the variable is 1. Since the
                # variable is initialized with 1 and the learning rate is 2**-14, the
                # new variable value should be: init_val - gradient * learning_rate,
                # which is  1 - 1 * 2**-14
                self.assertEqual(self.evaluate(layer.v), 1 - 2**-14)
    def test_wrap_optimizer_dynamic_loss_scale_errors(self):

        opt = gradient_descent_v2.SGD(1.0)
        with self.assertRaisesRegex(
                ValueError, 'When passing a DynamicLossScale to "loss_scale", '
                'DynamicLossScale.multiplier must be 2. Got: '
                'DynamicLossScale'):
            tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                opt,
                tf.compat.v1.mixed_precision.DynamicLossScale(multiplier=4.))

        class MyLossScale(tf.compat.v1.mixed_precision.LossScale):
            def __call__(self):
                return 1.

            def update(self, grads):
                return None, True

            def get_config(self):
                return {}

        with self.assertRaisesRegex(
                TypeError,
                'Passing a LossScale that is not a FixedLossScale or a '
                'DynamicLossScale is not supported. Got:'):
            tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                opt, MyLossScale())
Exemple #23
0
 def test_linear_model_with_feature_column(self):
     vocab_list = ["alpha", "beta", "gamma"]
     vocab_val = [0.4, 0.6, 0.9]
     data = np.random.choice(vocab_list, size=256)
     y = np.zeros_like(data, dtype=np.float32)
     for vocab, val in zip(vocab_list, vocab_val):
         indices = np.where(data == vocab)
         y[indices] = val + np.random.uniform(
             low=-0.01, high=0.01, size=indices[0].shape
         )
     cat_column = tf.feature_column.categorical_column_with_vocabulary_list(
         key="symbol", vocabulary_list=vocab_list
     )
     ind_column = tf.feature_column.indicator_column(cat_column)
     dense_feature_layer = dense_features_v2.DenseFeatures([ind_column])
     linear_model = linear.LinearModel(
         use_bias=False, kernel_initializer="zeros"
     )
     combined = sequential.Sequential([dense_feature_layer, linear_model])
     opt = gradient_descent.SGD(learning_rate=0.1)
     combined.compile(opt, "mse", [])
     combined.fit(x={"symbol": data}, y=y, batch_size=32, epochs=10)
     self.assertAllClose(
         [[0.4], [0.6], [0.9]],
         combined.layers[1].dense_layers[0].kernel.numpy(),
         atol=0.01,
     )
Exemple #24
0
 def test_linear_model(self, distribution, use_dataset_creator, data_fn):
     if (not use_dataset_creator) and isinstance(
             distribution,
             tf.distribute.experimental.ParameterServerStrategy):
         self.skipTest(
             "Parameter Server strategy requires dataset creator to be used in "
             "model.fit.")
     if (not tf.__internal__.tf2.enabled() and use_dataset_creator
             and isinstance(
                 distribution,
                 tf.distribute.experimental.ParameterServerStrategy)):
         self.skipTest(
             "Parameter Server strategy with dataset creator needs to be run when "
             "eager execution is enabled.")
     with distribution.scope():
         model = linear.LinearModel()
         opt = gradient_descent.SGD(learning_rate=0.1)
         model.compile(opt, "mse")
         if use_dataset_creator:
             x = dataset_creator.DatasetCreator(dataset_fn)
             hist = model.fit(x, epochs=3, steps_per_epoch=INPUT_SIZE)
         else:
             if data_fn == "numpy":
                 inputs, output = get_numpy()
                 hist = model.fit(inputs, output, epochs=3)
             else:
                 hist = model.fit(get_dataset(), epochs=3)
             self.assertLess(hist.history["loss"][2], 0.2)
Exemple #25
0
    def test_wide_deep_model(self, distribution, use_dataset_creator, data_fn):
        if (not use_dataset_creator) and isinstance(
                distribution,
                tf.distribute.experimental.ParameterServerStrategy):
            self.skipTest(
                "Parameter Server strategy requires dataset creator to be used in "
                "model.fit.")
        if (not tf.__internal__.tf2.enabled() and use_dataset_creator
                and isinstance(
                    distribution,
                    tf.distribute.experimental.ParameterServerStrategy)):
            self.skipTest(
                "Parameter Server strategy with dataset creator needs to be run when "
                "eager execution is enabled.")
        with distribution.scope():
            linear_model = linear.LinearModel(units=1)
            dnn_model = sequential.Sequential([core.Dense(units=1)])
            wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
            linear_opt = gradient_descent.SGD(learning_rate=0.05)
            dnn_opt = adagrad.Adagrad(learning_rate=0.1)
            wide_deep_model.compile(optimizer=[linear_opt, dnn_opt],
                                    loss="mse")

            if use_dataset_creator:
                x = dataset_creator.DatasetCreator(dataset_fn)
                hist = wide_deep_model.fit(x,
                                           epochs=3,
                                           steps_per_epoch=INPUT_SIZE)
            else:
                if data_fn == "numpy":
                    inputs, output = get_numpy()
                    hist = wide_deep_model.fit(inputs, output, epochs=3)
                else:
                    hist = wide_deep_model.fit(get_dataset(), epochs=3)
            self.assertLess(hist.history["loss"][2], 0.2)
    def get_model(self,
                  initial_weights=None,
                  distribution=None,
                  input_shapes=None):
        del input_shapes
        with keras_correctness_test_base.MaybeDistributionScope(distribution):
            image = keras.layers.Input(shape=(28, 28, 3), name='image')
            c1 = keras.layers.Conv2D(
                name='conv1',
                filters=16,
                kernel_size=(3, 3),
                strides=(4, 4),
                kernel_regularizer=keras.regularizers.l2(1e-4))(image)
            if self.with_batch_norm == 'regular':
                c1 = keras.layers.BatchNormalization(name='bn1')(c1)
            elif self.with_batch_norm == 'sync':
                # Test with parallel batch norms to verify all-reduce works OK.
                bn1 = keras.layers.SyncBatchNormalization(name='bn1')(c1)
                bn2 = keras.layers.SyncBatchNormalization(name='bn2')(c1)
                c1 = keras.layers.Add()([bn1, bn2])
            c1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(c1)
            logits = keras.layers.Dense(10, activation='softmax', name='pred')(
                keras.layers.Flatten()(c1))
            model = keras.Model(inputs=[image], outputs=[logits])

            if initial_weights:
                model.set_weights(initial_weights)

            model.compile(optimizer=gradient_descent.SGD(learning_rate=0.1),
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])

        return model
Exemple #27
0
    def get_model(self,
                  initial_weights=None,
                  distribution=None,
                  input_shapes=None):
        with keras_correctness_test_base.MaybeDistributionScope(distribution):
            # We add few non-linear layers to make it non-trivial.
            model = keras.Sequential()
            model.add(
                keras.layers.Dense(10, activation="relu", input_shape=(1, )))
            model.add(
                keras.layers.Dense(
                    10,
                    activation="relu",
                    kernel_regularizer=keras.regularizers.l2(1e-4),
                ))
            model.add(keras.layers.Dense(10, activation="relu"))
            model.add(keras.layers.Dense(1))

            if initial_weights:
                model.set_weights(initial_weights)

            model.compile(
                loss=keras.losses.mean_squared_error,
                optimizer=gradient_descent_keras.SGD(0.05),
                metrics=["mse"],
            )
            return model
Exemple #28
0
    def test_save_model_with_dynamic_loss_scaling(self, strategy_fn, h5=False):
        # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy
        # as well.
        strategy = strategy_fn()
        if (isinstance(strategy, tf.distribute.MirroredStrategy)
                and not tf.executing_eagerly()):
            # TODO(b/121381184): Enable running the test in this case.
            return

        # Create and run model.
        with strategy.scope():
            x = layers.Input(shape=(2, ), batch_size=2, dtype=tf.float32)
            y = mp_test_util.MultiplyLayer()(x)
            model = models.Model(inputs=x, outputs=y)

            opt = gradient_descent.SGD(1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=1., dynamic_growth_steps=2.)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=test_utils.should_run_eagerly())
        # Run for 3 steps (6 examples with a batch size of 2)
        model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
        (weight, ) = model.trainable_weights
        orig_weight = backend.get_value(weight)

        # Save model weights.
        save_path = os.path.join(self.get_temp_dir(), 'model')
        model.save(save_path, save_format='h5' if h5 else 'tf')

        # Run model again for 1 step (2 examples with a batch size of 2)
        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_weight = backend.get_value(weight)
        self.assertNotEqual(new_weight, orig_weight)
        self.assertEqual(backend.get_value(opt.loss_scale), 4)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

        # Load model weights and ensure loss scale weights are restored.
        model = save.load_model(
            save_path,
            custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer})
        (weight, ) = model.trainable_weights
        loaded_weight = backend.get_value(weight)
        self.assertEqual(loaded_weight, orig_weight)
        # Currently the loss scale isn't always saved when the model is saved with
        # Model.save(). So we assert the loss scale either has the value when it was
        # saved, or the value it was initialized with.
        # TODO(reedwm): Always save/restore the loss scale with Model.save().
        self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2))
        self.assertIn(backend.get_value(model.optimizer.dynamic_counter),
                      (0, 1))

        # Test optimizer attributes and type
        self.assertEqual(model.optimizer.initial_scale, 1.)
        self.assertEqual(model.optimizer.dynamic_growth_steps, 2.)
        self.assertEqual(type(model.optimizer),
                         loss_scale_optimizer.LossScaleOptimizer)
  def testIsInstance(self):
    optimizer = create_lso(sgd_experimental.SGD())
    self.assertIsInstance(optimizer,
                          loss_scale_optimizer.BaseLossScaleOptimizer)

    optimizer = create_lso(gradient_descent.SGD())
    self.assertIsInstance(optimizer,
                          loss_scale_optimizer.BaseLossScaleOptimizer)
Exemple #30
0
 def testDir(self):
   opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.1)
   dir_result = set(dir(opt))
   self.assertIn('learning_rate', dir_result)  # Hyperparameter
   self.assertIn('lr', dir_result)  # Hyperparameter
   self.assertIn('momentum', dir_result)  # Hyperparameter
   self.assertIn('nesterov', dir_result)  # Attribute
   self.assertIn('minimize', dir_result)  # Attribute