def test_model_weights_update(self):
        self.skipTest(
            "Wait for https://github.com/tensorflow/tensorflow/issues/31582")
        grad = tf.Variable([[0.1]])
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(
                1,
                kernel_initializer=tf.keras.initializers.Constant([[1.0]]),
                use_bias=False)
        ])
        model.build(input_shape=[1, 1])

        opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), 0.5)
        update = opt.apply_gradients(list(zip([grad], model.variables)))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(update)
        self.assertAllClose(model.variables[0].read_value(), [[0.8]])

        mean_update = opt.assign_average_vars(model.variables)
        self.evaluate(mean_update)
        self.assertAllClose(model.variables[0].read_value(), [[0.9]])
예제 #2
0
    def test_model_dynamic_lr(self):
        grad = tf.Variable([[0.1]])
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(
                1,
                kernel_initializer=tf.keras.initializers.Constant([[1.0]]),
                use_bias=False,
            )
        ])
        model.build(input_shape=[1, 1])
        self.evaluate(tf.compat.v1.global_variables_initializer())

        opt = MovingAverage(tf.keras.optimizers.SGD(lr=1e-3),
                            average_decay=0.5)
        update = opt.apply_gradients(list(zip([grad], model.variables)))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(update)
        self.assertAllClose(opt.lr.read_value(), 1e-3)

        opt.lr = 1e-4
        self.assertAllClose(opt.lr.read_value(), 1e-4)
예제 #3
0
def test_config():
    sgd_opt = tf.keras.optimizers.SGD(lr=2.0,
                                      nesterov=True,
                                      momentum=0.3,
                                      decay=0.1)
    opt = MovingAverage(sgd_opt,
                        average_decay=0.5,
                        num_updates=None,
                        start_step=5,
                        dynamic_decay=True)
    config = opt.get_config()

    assert config["average_decay"] == 0.5
    assert config["num_updates"] is None
    assert config["start_step"] == 5
    assert config["dynamic_decay"] is True

    new_opt = MovingAverage.from_config(config)
    old_sgd_config = opt._optimizer.get_config()
    new_sgd_config = new_opt._optimizer.get_config()

    for k1, k2 in zip(old_sgd_config, new_sgd_config):
        assert old_sgd_config[k1] == new_sgd_config[k2]
def get_data_and_model(optimizer="moving_avg"):
    x = tf.random.normal([TRAIN_SAMPLES, INPUT_DIM])
    y = tf.random.normal([TRAIN_SAMPLES, NUM_CLASSES])
    moving_avg = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5)
    if optimizer == "moving_avg":
        optimizer = moving_avg
    inputs = keras.layers.Input(INPUT_DIM)
    hidden_layer = keras.layers.Dense(
        NUM_HIDDEN, input_dim=INPUT_DIM, activation="relu"
    )(inputs)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="softmax")(hidden_layer)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["acc"])
    return x, y, model
    def test_run(self):
        self.skipTest(
            "Wait for https://github.com/tensorflow/tensorflow/issues/31582")
        for sequential_update in [True, False]:
            var0 = tf.Variable([1.0, 2.0])
            var1 = tf.Variable([3.0, 4.0])

            grads0 = tf.constant([0.1, 0.1])
            grads1 = tf.constant([0.01, 0.01])

            grads_and_vars = list(zip([grads0, grads1], [var0, var1]))

            opt = MovingAverage(
                tf.keras.optimizers.SGD(lr=2.0),
                average_decay=0.5,
                sequential_update=sequential_update)

            if not tf.executing_eagerly():
                update = opt.apply_gradients(grads_and_vars)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.evaluate(update)
                self.evaluate(update)
            else:
                opt.apply_gradients(grads_and_vars)
                opt.apply_gradients(grads_and_vars)

            self.assertAllClose(var0.read_value(), [0.6, 1.6])
            self.assertAllClose(var1.read_value(), [2.96, 3.96])

            ema_var0 = opt._ema.average(var0)  # pylint: disable=protected-access
            ema_var1 = opt._ema.average(var1)  # pylint: disable=protected-access

            if sequential_update:
                self.assertAllClose(ema_var0.read_value(), [0.75, 1.75])
                self.assertAllClose(ema_var1.read_value(), [2.975, 3.975])

            assign = opt.assign_average_vars([var0, var1])
            self.evaluate(assign)

            if sequential_update:
                self.assertAllClose(var0.read_value(), [0.75, 1.75])
                self.assertAllClose(var1.read_value(), [2.975, 3.975])

            perturb = tf.group([
                var0.assign_add([1.0, 1.0]),
                var1.assign_add([2.0, 2.0]),
                ema_var0.assign_add([3.0, 3.0]),
                ema_var1.assign_add([4.0, 4.0])
            ])
            self.evaluate(perturb)

            if sequential_update:
                self.assertAllClose(var0.read_value(), [1.75, 2.75])
                self.assertAllClose(var1.read_value(), [4.975, 5.975])
                self.assertAllClose(ema_var0.read_value(), [3.75, 4.75])
                self.assertAllClose(ema_var1.read_value(), [6.975, 7.975])
def test_loss_scale_optimizer(tmp_path):
    test_model_filepath = str(tmp_path / "test_model.{epoch:02d}.h5")
    moving_avg = MovingAverage(tf.keras.optimizers.SGD(lr=2.0),
                               average_decay=0.5)
    optimizer = tf.keras.mixed_precision.LossScaleOptimizer(moving_avg)
    x, y, model = get_data_and_model(optimizer)
    save_freq = "epoch"
    avg_model_ckpt = AverageModelCheckpoint(update_weights=False,
                                            filepath=test_model_filepath,
                                            save_freq=save_freq)
    model.fit(
        x,
        y,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(x, y),
        callbacks=[avg_model_ckpt],
    )
    assert not os.path.exists(test_model_filepath)
예제 #7
0
def test_start_step():
    var0 = tf.Variable([1.0, 2.0])
    grads0 = tf.constant([0.1, 0.1])
    grads_and_vars = [(grads0, var0)]

    opt = MovingAverage(
        tf.keras.optimizers.SGD(lr=1.0), average_decay=0.5, start_step=1,
    )

    opt.apply_gradients(grads_and_vars)

    np.testing.assert_allclose(var0.read_value(), [0.9, 1.9])

    ema_var0 = opt.get_slot(var0, "average")

    opt.apply_gradients(grads_and_vars)

    np.testing.assert_allclose(var0.read_value(), [0.8, 1.8])

    np.testing.assert_allclose(ema_var0.read_value(), [0.85, 1.85])
예제 #8
0
def test_run(sequential_update):
    var0 = tf.Variable([1.0, 2.0])
    var1 = tf.Variable([3.0, 4.0])

    grads0 = tf.constant([0.1, 0.1])
    grads1 = tf.constant([0.01, 0.01])

    grads_and_vars = list(zip([grads0, grads1], [var0, var1]))

    opt = MovingAverage(
        tf.keras.optimizers.SGD(lr=2.0),
        sequential_update=sequential_update,
        average_decay=0.5,
    )

    opt.apply_gradients(grads_and_vars)
    opt.apply_gradients(grads_and_vars)

    np.testing.assert_allclose(var0.read_value(), [0.6, 1.6])
    np.testing.assert_allclose(var1.read_value(), [2.96, 3.96])

    ema_var0 = opt.get_slot(var0, "average")
    ema_var1 = opt.get_slot(var1, "average")

    if sequential_update:
        np.testing.assert_allclose(ema_var0.read_value(), [0.75, 1.75])
        np.testing.assert_allclose(ema_var1.read_value(), [2.975, 3.975])

    _ = opt.assign_average_vars([var0, var1])

    if sequential_update:
        np.testing.assert_allclose(var0.read_value(), [0.75, 1.75])
        np.testing.assert_allclose(var1.read_value(), [2.975, 3.975])

    var0.assign_add([1.0, 1.0]),
    var1.assign_add([2.0, 2.0]),
    ema_var0.assign_add([3.0, 3.0]),
    ema_var1.assign_add([4.0, 4.0]),

    if sequential_update:
        np.testing.assert_allclose(var0.read_value(), [1.75, 2.75])
        np.testing.assert_allclose(var1.read_value(), [4.975, 5.975])
        np.testing.assert_allclose(ema_var0.read_value(), [3.75, 4.75])
        np.testing.assert_allclose(ema_var1.read_value(), [6.975, 7.975])
예제 #9
0
 def test_opt_failure(self):
     base_opt = None
     for sequential_update in [True, False]:
         with self.assertRaises(TypeError):
             MovingAverage(base_opt, sequential_update, 0.5)
예제 #10
0
 def test_optimizer_string(self):
     _ = MovingAverage('adam')
예제 #11
0
def test_opt_failure(sequential_update):
    base_opt = None
    with pytest.raises(TypeError):
        MovingAverage(base_opt, sequential_update, 0.5)
예제 #12
0
def test_optimizer_string():
    _ = MovingAverage("adam")
예제 #13
0
def test_num_updates_valid():
    for num_updates in [1, tf.Variable(1)]:
        MovingAverage("sgd", num_updates=num_updates)
예제 #14
0
def test_opt_failure():
    base_opt = None
    with pytest.raises(TypeError):
        MovingAverage(base_opt, 0.5)
예제 #15
0
def test_serialization():
    sgd_opt = tf.keras.optimizers.SGD(lr=2.0, nesterov=True, momentum=0.3, decay=0.1)
    optimizer = MovingAverage(sgd_opt, average_decay=0.5, num_updates=None)
    config = tf.keras.optimizers.serialize(optimizer)
    new_optimizer = tf.keras.optimizers.deserialize(config)
    assert new_optimizer.get_config() == optimizer.get_config()
예제 #16
0
def wrap(opt):
    return MovingAverage(Lookahead(opt))
예제 #17
0
def test_num_updates_invalid():
    for num_updates in [1.0, tf.Variable(1.0), "a"]:
        with pytest.raises(TypeError):
            MovingAverage("sgd", num_updates=num_updates)