Ejemplo n.º 1
0
    def testSetLearningRate(self):
        optimizer = adam_new.Adam(learning_rate=1.0)
        self.assertIsInstance(optimizer._learning_rate, tf.Variable)
        self.assertEqual(self.evaluate(optimizer.learning_rate), 1.0)
        optimizer.learning_rate = 2.0
        self.assertEqual(self.evaluate(optimizer.learning_rate), 2.0)
        # Test the legacy setter.
        optimizer.lr = 3.0
        self.assertEqual(self.evaluate(optimizer.learning_rate), 3.0)

        lr_schedule = learning_rate_schedule.ExponentialDecay(
            initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9)
        optimizer = adam_new.Adam(learning_rate=lr_schedule)
        self.assertIsInstance(optimizer._learning_rate,
                              learning_rate_schedule.ExponentialDecay)
        self.assertEqual(optimizer.learning_rate, 0.01)
        # Test the legacy property.
        self.assertEqual(optimizer.lr, 0.01)

        x = tf.Variable([1.0, 2.0], dtype=tf.float32)
        grads = tf.convert_to_tensor([1.0, 2.0])
        for _ in range(2):
            optimizer.apply_gradients(zip([grads], [x]))
        self.assertTrue(optimizer.learning_rate < 0.01
                        and optimizer.learning_rate > 0.00999)
        with self.assertRaisesRegex(TypeError,
                                    "This optimizer was created with*"):
            optimizer.learning_rate = 2.0
Ejemplo n.º 2
0
    def testCheckpointOptimizer(self):
        x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32)
        lr_schedule = learning_rate_schedule.ExponentialDecay(
            initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9)
        optimizer_1 = adam_new.Adam(learning_rate=lr_schedule,
                                    beta_1=0.8,
                                    beta_2=0.888)
        grads = tf.convert_to_tensor([[1.0, 2.0], [3.0, 4.0]])

        for _ in range(1):
            optimizer_1.apply_gradients(zip([grads], [x]))

        # Then save the variable and optimizer to a checkpoint.
        checkpoint_1 = tf.train.Checkpoint(var=x, optimizer=optimizer_1)
        checkpoint_path = checkpoint_1.save(self.get_temp_dir())

        # Create a new optimizer and call restore on it (and x)
        x2 = tf.Variable([[0., 0.], [0., 0.]], dtype=x.dtype)
        optimizer_2 = adam_new.Adam(learning_rate=0.02,
                                    beta_1=0.7,
                                    beta_2=0.777)
        optimizer_2.build([x2])
        checkpoint_2 = tf.train.Checkpoint(var=x2, optimizer=optimizer_2)
        checkpoint_2.restore(checkpoint_path)

        self.assertTrue((self.evaluate(
            optimizer_1._momentums._storage[0]) == self.evaluate(
                optimizer_2._momentums._storage[0])).all())
        self.assertEqual(self.evaluate(optimizer_1._iterations),
                         self.evaluate(optimizer_2._iterations))
Ejemplo n.º 3
0
    def testJitCompile(self, strategy):
        # Test the optimizer yields same numerical results when jit_compile is
        # on and off.
        with strategy.scope():
            optimizer_1 = adam_new.Adam(jit_compile=False,
                                        use_ema=True,
                                        ema_overwrite_frequency=1)
            optimizer_2 = adam_new.Adam(jit_compile=True,
                                        use_ema=True,
                                        ema_overwrite_frequency=1)
            model_1 = keras.Sequential([
                keras.layers.Input(shape=(2, )),
                keras.layers.Dense(5),
                keras.layers.Dense(1)
            ])
            model_2 = keras.models.clone_model(model_1)
            model_2.set_weights(model_1.get_weights())

            def per_worker_dataset_fn():
                def dataset_fn(_):
                    x = np.random.rand(6, 2)
                    y = [1, 1, 1, 0, 0, 0]
                    ds = tf.data.Dataset.from_tensor_slices((x, y))
                    ds = ds.repeat().batch(6)
                    return ds

                return strategy.distribute_datasets_from_function(dataset_fn)

            ds = per_worker_dataset_fn()

            @tf.function
            def train_step(ds):
                def replica_fn(data):
                    features, labels = data
                    with tf.GradientTape() as tape:
                        output_1 = model_1(features)
                        loss_1 = keras.losses.MeanSquaredError(
                            reduction=losses_utils.ReductionV2.NONE)(labels,
                                                                     output_1)
                    grads_1 = tape.gradient(loss_1,
                                            model_1.trainable_variables)
                    optimizer_1.apply_gradients(
                        zip(grads_1, model_1.trainable_variables))

                    with tf.GradientTape() as tape:
                        output_2 = model_2(features)
                        loss_2 = keras.losses.MeanSquaredError(
                            reduction=losses_utils.ReductionV2.NONE)(labels,
                                                                     output_2)
                    grads_2 = tape.gradient(loss_2,
                                            model_2.trainable_variables)
                    optimizer_2.apply_gradients(
                        zip(grads_2, model_2.trainable_variables))

                strategy.run(replica_fn, args=(next(iter(ds)), ))

            for _ in range(3):
                train_step(ds)
                self.assertAllClose(model_1.trainable_variables[0][0],
                                    model_2.trainable_variables[0][0])
  def test_checkpoint_sam(self):
    model = keras.Sequential([
        keras.Input([2, 2]),
        keras.layers.Dense(4),
        keras.layers.Dense(1),
    ])
    sam_model_1 = sharpness_aware_minimization.SharpnessAwareMinimization(model)
    sam_model_2 = sharpness_aware_minimization.SharpnessAwareMinimization(model)
    data = tf.random.uniform([1, 2, 2])
    label = data[:, 0] > 0.5

    sam_model_1.compile(
        optimizer=adam.Adam(),
        loss=keras.losses.BinaryCrossentropy(from_logits=True),
    )

    sam_model_1.fit(data, label)

    checkpoint = tf.train.Checkpoint(sam_model_1)
    checkpoint2 = tf.train.Checkpoint(sam_model_2)
    temp_dir = self.get_temp_dir()
    save_path = checkpoint.save(temp_dir)
    checkpoint2.restore(save_path)

    self.assertAllClose(sam_model_1(data), sam_model_2(data))
Ejemplo n.º 5
0
    def test_save_sam(self):
        model = keras.Sequential(
            [
                keras.Input([2, 2]),
                keras.layers.Dense(4),
                keras.layers.Dense(1),
            ]
        )
        sam_model = sharpness_aware_minimization.SharpnessAwareMinimization(
            model
        )
        data = tf.random.uniform([1, 2, 2])
        label = data[:, 0] > 0.5

        sam_model.compile(
            optimizer=adam.Adam(),
            loss=keras.losses.BinaryCrossentropy(from_logits=True),
        )

        sam_model.fit(data, label)

        path = os.path.join(self.get_temp_dir(), "model")
        sam_model.save(path)
        loaded_sam_model = keras.models.load_model(path)
        loaded_sam_model.load_weights(path)

        self.assertAllClose(sam_model(data), loaded_sam_model(data))
Ejemplo n.º 6
0
 def testAddVariableFromReference(self):
     optimizer = adam_new.Adam()
     variable = optimizer.add_variable_from_reference(
         tf.Variable(1.0, name="tmp"), "test"
     )
     self.assertEqual(variable._shared_name, "test/tmp")
     self.assertEqual(self.evaluate(variable), 0)
Ejemplo n.º 7
0
 def testAddVarialeWithCustomShape(self):
     optimizer = adam_new.Adam()
     variable = optimizer.add_variable_from_reference(tf.Variable(
         [1.0, 2.0], name="tmp"),
                                                      "test",
                                                      shape=[])
     self.assertEqual(variable, tf.Variable(0.0))
Ejemplo n.º 8
0
 def testGetAndFromConfig(self):
     optimizer = adam_new.Adam(learning_rate=np.float64(0.05),
                               beta_1=0.7,
                               beta_2=0.77,
                               amsgrad=True,
                               epsilon=0.001,
                               clipnorm=0.5,
                               use_ema=True,
                               ema_momentum=0.5,
                               ema_overwrite_frequency=50)
     config = optimizer.get_config()
     self.assertDictEqual(
         config, {
             "learning_rate": np.float32(0.05),
             "beta_1": 0.7,
             "beta_2": 0.77,
             "epsilon": 0.001,
             "amsgrad": True,
             "clipnorm": 0.5,
             "global_clipnorm": None,
             "clipvalue": None,
             "use_ema": True,
             "ema_momentum": 0.5,
             "ema_overwrite_frequency": 50,
             "jit_compile": False,
         })
     restored_optimizer = adam_new.Adam.from_config(config)
     self.assertDictEqual(restored_optimizer.get_config(),
                          optimizer.get_config())
Ejemplo n.º 9
0
 def testBuildIndexDict(self):
     optimizer = adam_new.Adam()
     var_list = [tf.Variable(0, name=f"var{i}") for i in range(10)]
     optimizer._build_index_dict(var_list)
     self.assertEqual(
         optimizer._index_dict[optimizer._var_key(var_list[7])], 7
     )
Ejemplo n.º 10
0
 def testClipGlobalNorm(self):
     optimizer = adam_new.Adam(global_clipnorm=1)
     grad = [
         tf.cast([100.0, 100.0], dtype=tf.float32),
         tf.cast([100.0, 100.0], dtype=tf.float32)
     ]
     clipped_grad = optimizer._clip_gradients(grad)
     self.assertAllClose(clipped_grad[0], [0.5, 0.5])
Ejemplo n.º 11
0
    def testPassingLegacyArgsRaiseWarning(self):
        with self.assertLogs(level="WARNING") as log_output:
            logging.set_verbosity(logging.WARNING)
            _ = adam_new.Adam(clipnorm=1, decay=0.5)
            expected_log = "decay is deprecated in"
            output = log_output[0][0].message

            self.assertTrue(re.search(expected_log, output))
Ejemplo n.º 12
0
 def testSetIterations(self):
     optimizer = adam_new.Adam()
     optimizer.iterations = tf.Variable(2, dtype=tf.int32)
     self.assertEqual(optimizer.iterations, 2)
     var_list = [tf.Variable(2.0), tf.Variable(2.0)]
     grads = tf.convert_to_tensor([1.0, 1.0])
     optimizer.apply_gradients(zip(grads, var_list))
     self.assertEqual(optimizer.iterations, 3)
     with self.assertRaisesRegex(RuntimeError, "Cannot set*"):
         optimizer.iterations = 2
Ejemplo n.º 13
0
 def testReturnAllOptimizerVariables(self):
     x = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32)
     optimizer = adam_new.Adam()
     grads = tf.convert_to_tensor([[1.0, 2.0], [3.0, 4.0]])
     optimizer.apply_gradients(zip([grads], [x]))
     optimizer_variables = optimizer.variables
     all_names = [var._shared_name for var in optimizer_variables]
     self.assertLen(optimizer_variables, 4)
     self.assertCountEqual(all_names, [
         "iteration", "learning_rate", "Adam/m/Variable", "Adam/v/Variable"
     ])
  def test_sam_model_fit(self, strategy):
    with strategy.scope():
      model = keras.Sequential([
          keras.Input([2, 2]),
          keras.layers.Dense(4),
          keras.layers.Dense(1),
      ])
      sam_model = sharpness_aware_minimization.SharpnessAwareMinimization(model)
      data = tf.random.uniform([2, 2])
      label = data[:, 0] > 0.5

      sam_model.compile(
          optimizer=adam.Adam(),
          loss=keras.losses.BinaryCrossentropy(from_logits=True),
      )

      sam_model.fit(data, label, steps_per_epoch=1)
Ejemplo n.º 15
0
 def testNoGradients(self):
     optimizer = adam_new.Adam(jit_compile=False)
     optimizer.apply_gradients(zip([], []))
Ejemplo n.º 16
0
 def testPassingLegacyClipnorm(self):
     optimizer = adam_new.Adam(clipnorm=1)
     self.assertEqual(optimizer.clipnorm, 1)
Ejemplo n.º 17
0
 def testClipNorm(self):
     optimizer = adam_new.Adam(clipnorm=1)
     grad = [tf.convert_to_tensor([100.0, 100.0])]
     clipped_grad = optimizer._clip_gradients(grad)
     self.assertAllClose(clipped_grad[0], [2**0.5 / 2, 2**0.5 / 2])
Ejemplo n.º 18
0
    adagrad_optimizer_v1_fn,
    ftrl_optimizer_v1_fn,
    rmsprop_optimizer_v1_fn,
]

adadelta_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)
)
adagrad_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)
)
adam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0)
)
adam_experimental_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamExperimental", lambda: adam_experimental.Adam(0.001)
)
adamax_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0)
)
nadam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0)
)
ftrl_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001)
)
gradient_descent_optimizer_keras_v2_fn = (
    tf.__internal__.test.combinations.NamedObject(
        "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.001)
    )
)
Ejemplo n.º 19
0
 def testAdam(self):
     self._compare_numerical(adam_old.Adam(amsgrad=True),
                             adam_new.Adam(amsgrad=True))
Ejemplo n.º 20
0
    ds_combinations.cloud_tpu_strategy,
    ds_combinations.multi_worker_mirrored_2x1_cpu,
    ds_combinations.multi_worker_mirrored_2x2_gpu,
    ds_combinations.central_storage_strategy_with_two_gpus,
]

adadelta_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladadelta",
    lambda: adadelta_new.Adadelta(  # pylint: disable=g-long-lambda
        0.002,
        use_ema=True,
        ema_overwrite_frequency=None))
adagrad_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002))
adam_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladam", lambda: adam_new.Adam(0.002))
adamw_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladamw", lambda: adamw_new.AdamW(0.002, weight_decay=0.004))
rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002))
sgd_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalsgdaverage",
    lambda: sgd_new.SGD(  # pylint: disable=g-long-lambda
        0.002,
        use_ema=True,
        ema_overwrite_frequency=1))

OPTIMIZER_FN = [
    adadelta_new_fn,
    adagrad_new_fn,
    adam_new_fn,
Ejemplo n.º 21
0
 def testClipValue(self):
     optimizer = adam_new.Adam(clipvalue=1)
     grad = [tf.convert_to_tensor([100.0, 100.0])]
     clipped_grad = optimizer._clip_gradients(grad)
     self.assertAllEqual(clipped_grad[0], [1.0, 1.0])
Ejemplo n.º 22
0
STRATEGIES = [
    ds_combinations.parameter_server_strategy_3worker_2ps_cpu,
    ds_combinations.parameter_server_strategy_3worker_2ps_1gpu,
]

adadelta_fn = tf.__internal__.test.combinations.NamedObject(
    "adadelta",
    lambda: adadelta.Adadelta(  # pylint: disable=g-long-lambda
        0.002,
        use_ema=True,
        ema_overwrite_frequency=None),
)
adagrad_fn = tf.__internal__.test.combinations.NamedObject(
    "adagrad", lambda: adagrad.Adagrad(0.002))
adam_fn = tf.__internal__.test.combinations.NamedObject(
    "adam", lambda: adam.Adam(0.002))
adamax_fn = tf.__internal__.test.combinations.NamedObject(
    "adamax", lambda: adamax.Adamax(0.002))
adamw_fn = tf.__internal__.test.combinations.NamedObject(
    "adamw", lambda: adamw.AdamW(0.002, weight_decay=0.004))
ftrl_fn = tf.__internal__.test.combinations.NamedObject(
    "ftrl", lambda: ftrl.Ftrl(0.002))
nadam_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentnadam", lambda: nadam.Nadam(0.002))
rmsprop_fn = tf.__internal__.test.combinations.NamedObject(
    "rmsprop", lambda: rmsprop.RMSprop(0.002))
sgd_fn = tf.__internal__.test.combinations.NamedObject(
    "sgdaverage",
    lambda: sgd.SGD(  # pylint: disable=g-long-lambda
        0.002,
        use_ema=True,