Example #1
0
def test_swap_weights(device):
    with device.scope():
        var = tf.Variable([1.0, 2.0])
        grads = tf.constant([0.1, 0.1])

        opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5,)

    @tf.function
    def apply_gradients():
        opt.apply_gradients([(grads, var)])

    device.run(apply_gradients)

    np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
    ema_var = opt.get_slot(var, "average")
    np.testing.assert_allclose(ema_var.read_value(), [0.85, 1.85])

    with device.scope():
        opt.shadow_copy([var])
        opt.swap_weights()

    np.testing.assert_allclose(ema_var.read_value(), [0.8, 1.8])
    np.testing.assert_allclose(var.read_value(), [0.85, 1.85])

    with device.scope():
        opt.swap_weights()

    np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
    np.testing.assert_allclose(ema_var.read_value(), [0.85, 1.85])
Example #2
0
    def test_run(self):
        for sequential_update in [True, False]:
            var0 = tf.Variable([1.0, 2.0])
            var1 = tf.Variable([3.0, 4.0])

            grads0 = tf.constant([0.1, 0.1])
            grads1 = tf.constant([0.01, 0.01])

            grads_and_vars = list(zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

            opt = MovingAverage(
                tf.keras.optimizers.SGD(lr=2.0),
                sequential_update=sequential_update,
                average_decay=0.5,
            )

            if not tf.executing_eagerly():
                update = opt.apply_gradients(grads_and_vars)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.evaluate(update)
                self.evaluate(update)
            else:
                opt.apply_gradients(grads_and_vars)
                opt.apply_gradients(grads_and_vars)

            self.assertAllClose(var0.read_value(), [0.6, 1.6])
            self.assertAllClose(var1.read_value(), [2.96, 3.96])

            ema_var0 = opt.get_slot(var0, "average")
            ema_var1 = opt.get_slot(var1, "average")

            if sequential_update:
                self.assertAllClose(ema_var0.read_value(), [0.75, 1.75])
                self.assertAllClose(ema_var1.read_value(), [2.975, 3.975])

            assign = opt.assign_average_vars([var0, var1])
            self.evaluate(assign)

            if sequential_update:
                self.assertAllClose(var0.read_value(), [0.75, 1.75])
                self.assertAllClose(var1.read_value(), [2.975, 3.975])

            perturb = tf.group([
                var0.assign_add([1.0, 1.0]),
                var1.assign_add([2.0, 2.0]),
                ema_var0.assign_add([3.0, 3.0]),
                ema_var1.assign_add([4.0, 4.0]),
            ])
            self.evaluate(perturb)

            if sequential_update:
                self.assertAllClose(var0.read_value(), [1.75, 2.75])
                self.assertAllClose(var1.read_value(), [4.975, 5.975])
                self.assertAllClose(ema_var0.read_value(), [3.75, 4.75])
                self.assertAllClose(ema_var1.read_value(), [6.975, 7.975])
Example #3
0
def test_dynamic_decay():
    var0 = tf.Variable([1.0, 2.0])
    grads0 = tf.constant([0.1, 0.1])
    grads_and_vars = [(grads0, var0)]

    opt = MovingAverage(
        tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5, dynamic_decay=True,
    )

    opt.apply_gradients(grads_and_vars)
    opt.apply_gradients(grads_and_vars)

    np.testing.assert_allclose(var0.read_value(), [0.6, 1.6])

    ema_var0 = opt.get_slot(var0, "average")
    np.testing.assert_allclose(ema_var0.read_value(), [0.64, 1.64])
Example #4
0
def test_run(sequential_update):
    var0 = tf.Variable([1.0, 2.0])
    var1 = tf.Variable([3.0, 4.0])

    grads0 = tf.constant([0.1, 0.1])
    grads1 = tf.constant([0.01, 0.01])

    grads_and_vars = list(zip([grads0, grads1], [var0, var1]))

    opt = MovingAverage(
        tf.keras.optimizers.SGD(lr=2.0),
        sequential_update=sequential_update,
        average_decay=0.5,
    )

    opt.apply_gradients(grads_and_vars)
    opt.apply_gradients(grads_and_vars)

    np.testing.assert_allclose(var0.read_value(), [0.6, 1.6])
    np.testing.assert_allclose(var1.read_value(), [2.96, 3.96])

    ema_var0 = opt.get_slot(var0, "average")
    ema_var1 = opt.get_slot(var1, "average")

    if sequential_update:
        np.testing.assert_allclose(ema_var0.read_value(), [0.75, 1.75])
        np.testing.assert_allclose(ema_var1.read_value(), [2.975, 3.975])

    _ = opt.assign_average_vars([var0, var1])

    if sequential_update:
        np.testing.assert_allclose(var0.read_value(), [0.75, 1.75])
        np.testing.assert_allclose(var1.read_value(), [2.975, 3.975])

    var0.assign_add([1.0, 1.0]),
    var1.assign_add([2.0, 2.0]),
    ema_var0.assign_add([3.0, 3.0]),
    ema_var1.assign_add([4.0, 4.0]),

    if sequential_update:
        np.testing.assert_allclose(var0.read_value(), [1.75, 2.75])
        np.testing.assert_allclose(var1.read_value(), [4.975, 5.975])
        np.testing.assert_allclose(ema_var0.read_value(), [3.75, 4.75])
        np.testing.assert_allclose(ema_var1.read_value(), [6.975, 7.975])
Example #5
0
def test_start_step():
    var0 = tf.Variable([1.0, 2.0])
    grads0 = tf.constant([0.1, 0.1])
    grads_and_vars = [(grads0, var0)]

    opt = MovingAverage(
        tf.keras.optimizers.SGD(lr=1.0), average_decay=0.5, start_step=1,
    )

    opt.apply_gradients(grads_and_vars)

    np.testing.assert_allclose(var0.read_value(), [0.9, 1.9])

    ema_var0 = opt.get_slot(var0, "average")

    opt.apply_gradients(grads_and_vars)

    np.testing.assert_allclose(var0.read_value(), [0.8, 1.8])

    np.testing.assert_allclose(ema_var0.read_value(), [0.85, 1.85])