def test_model_weights_update(self): self.skipTest( "Wait for https://github.com/tensorflow/tensorflow/issues/31582") grad = tf.Variable([[0.1]]) model = tf.keras.Sequential([ tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.Constant([[1.0]]), use_bias=False) ]) model.build(input_shape=[1, 1]) opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), 0.5) update = opt.apply_gradients(list(zip([grad], model.variables))) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.assertAllClose(model.variables[0].read_value(), [[0.8]]) mean_update = opt.assign_average_vars(model.variables) self.evaluate(mean_update) self.assertAllClose(model.variables[0].read_value(), [[0.9]])
def test_model_dynamic_lr(self): grad = tf.Variable([[0.1]]) model = tf.keras.Sequential([ tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.Constant([[1.0]]), use_bias=False, ) ]) model.build(input_shape=[1, 1]) self.evaluate(tf.compat.v1.global_variables_initializer()) opt = MovingAverage(tf.keras.optimizers.SGD(lr=1e-3), average_decay=0.5) update = opt.apply_gradients(list(zip([grad], model.variables))) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.assertAllClose(opt.lr.read_value(), 1e-3) opt.lr = 1e-4 self.assertAllClose(opt.lr.read_value(), 1e-4)
def test_config(): sgd_opt = tf.keras.optimizers.SGD(lr=2.0, nesterov=True, momentum=0.3, decay=0.1) opt = MovingAverage(sgd_opt, average_decay=0.5, num_updates=None, start_step=5, dynamic_decay=True) config = opt.get_config() assert config["average_decay"] == 0.5 assert config["num_updates"] is None assert config["start_step"] == 5 assert config["dynamic_decay"] is True new_opt = MovingAverage.from_config(config) old_sgd_config = opt._optimizer.get_config() new_sgd_config = new_opt._optimizer.get_config() for k1, k2 in zip(old_sgd_config, new_sgd_config): assert old_sgd_config[k1] == new_sgd_config[k2]
def get_data_and_model(optimizer="moving_avg"): x = tf.random.normal([TRAIN_SAMPLES, INPUT_DIM]) y = tf.random.normal([TRAIN_SAMPLES, NUM_CLASSES]) moving_avg = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5) if optimizer == "moving_avg": optimizer = moving_avg inputs = keras.layers.Input(INPUT_DIM) hidden_layer = keras.layers.Dense( NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu" )(inputs) outputs = keras.layers.Dense(NUM_CLASSES, activation="softmax")(hidden_layer) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["acc"]) return x, y, model
def test_run(self): self.skipTest( "Wait for https://github.com/tensorflow/tensorflow/issues/31582") for sequential_update in [True, False]: var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([0.01, 0.01]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5, sequential_update=sequential_update) if not tf.executing_eagerly(): update = opt.apply_gradients(grads_and_vars) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.evaluate(update) else: opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) self.assertAllClose(var0.read_value(), [0.6, 1.6]) self.assertAllClose(var1.read_value(), [2.96, 3.96]) ema_var0 = opt._ema.average(var0) # pylint: disable=protected-access ema_var1 = opt._ema.average(var1) # pylint: disable=protected-access if sequential_update: self.assertAllClose(ema_var0.read_value(), [0.75, 1.75]) self.assertAllClose(ema_var1.read_value(), [2.975, 3.975]) assign = opt.assign_average_vars([var0, var1]) self.evaluate(assign) if sequential_update: self.assertAllClose(var0.read_value(), [0.75, 1.75]) self.assertAllClose(var1.read_value(), [2.975, 3.975]) perturb = tf.group([ var0.assign_add([1.0, 1.0]), var1.assign_add([2.0, 2.0]), ema_var0.assign_add([3.0, 3.0]), ema_var1.assign_add([4.0, 4.0]) ]) self.evaluate(perturb) if sequential_update: self.assertAllClose(var0.read_value(), [1.75, 2.75]) self.assertAllClose(var1.read_value(), [4.975, 5.975]) self.assertAllClose(ema_var0.read_value(), [3.75, 4.75]) self.assertAllClose(ema_var1.read_value(), [6.975, 7.975])
def test_loss_scale_optimizer(tmp_path): test_model_filepath = str(tmp_path / "test_model.{epoch:02d}.h5") moving_avg = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5) optimizer = tf.keras.mixed_precision.LossScaleOptimizer(moving_avg) x, y, model = get_data_and_model(optimizer) save_freq = "epoch" avg_model_ckpt = AverageModelCheckpoint(update_weights=False, filepath=test_model_filepath, save_freq=save_freq) model.fit( x, y, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(x, y), callbacks=[avg_model_ckpt], ) assert not os.path.exists(test_model_filepath)
def test_start_step(): var0 = tf.Variable([1.0, 2.0]) grads0 = tf.constant([0.1, 0.1]) grads_and_vars = [(grads0, var0)] opt = MovingAverage( tf.keras.optimizers.SGD(lr=1.0), average_decay=0.5, start_step=1, ) opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.9, 1.9]) ema_var0 = opt.get_slot(var0, "average") opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.8, 1.8]) np.testing.assert_allclose(ema_var0.read_value(), [0.85, 1.85])
def test_run(sequential_update): var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([0.01, 0.01]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) opt = MovingAverage( tf.keras.optimizers.SGD(lr=2.0), sequential_update=sequential_update, average_decay=0.5, ) opt.apply_gradients(grads_and_vars) opt.apply_gradients(grads_and_vars) np.testing.assert_allclose(var0.read_value(), [0.6, 1.6]) np.testing.assert_allclose(var1.read_value(), [2.96, 3.96]) ema_var0 = opt.get_slot(var0, "average") ema_var1 = opt.get_slot(var1, "average") if sequential_update: np.testing.assert_allclose(ema_var0.read_value(), [0.75, 1.75]) np.testing.assert_allclose(ema_var1.read_value(), [2.975, 3.975]) _ = opt.assign_average_vars([var0, var1]) if sequential_update: np.testing.assert_allclose(var0.read_value(), [0.75, 1.75]) np.testing.assert_allclose(var1.read_value(), [2.975, 3.975]) var0.assign_add([1.0, 1.0]), var1.assign_add([2.0, 2.0]), ema_var0.assign_add([3.0, 3.0]), ema_var1.assign_add([4.0, 4.0]), if sequential_update: np.testing.assert_allclose(var0.read_value(), [1.75, 2.75]) np.testing.assert_allclose(var1.read_value(), [4.975, 5.975]) np.testing.assert_allclose(ema_var0.read_value(), [3.75, 4.75]) np.testing.assert_allclose(ema_var1.read_value(), [6.975, 7.975])
def test_opt_failure(self): base_opt = None for sequential_update in [True, False]: with self.assertRaises(TypeError): MovingAverage(base_opt, sequential_update, 0.5)
def test_optimizer_string(self): _ = MovingAverage('adam')
def test_opt_failure(sequential_update): base_opt = None with pytest.raises(TypeError): MovingAverage(base_opt, sequential_update, 0.5)
def test_optimizer_string(): _ = MovingAverage("adam")
def test_num_updates_valid(): for num_updates in [1, tf.Variable(1)]: MovingAverage("sgd", num_updates=num_updates)
def test_opt_failure(): base_opt = None with pytest.raises(TypeError): MovingAverage(base_opt, 0.5)
def test_serialization(): sgd_opt = tf.keras.optimizers.SGD(lr=2.0, nesterov=True, momentum=0.3, decay=0.1) optimizer = MovingAverage(sgd_opt, average_decay=0.5, num_updates=None) config = tf.keras.optimizers.serialize(optimizer) new_optimizer = tf.keras.optimizers.deserialize(config) assert new_optimizer.get_config() == optimizer.get_config()
def wrap(opt): return MovingAverage(Lookahead(opt))
def test_num_updates_invalid(): for num_updates in [1.0, tf.Variable(1.0), "a"]: with pytest.raises(TypeError): MovingAverage("sgd", num_updates=num_updates)