Esempio n. 1
0
def test_sparse_repeated_indices():
    for dtype in _dtypes_to_test(use_gpu=tf.test.is_gpu_available()):
        repeated_index_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype)
        aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype)
        grad_repeated_index = tf.IndexedSlices(
            tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
            tf.constant([1, 1]),
            tf.constant([2, 1]),
        )
        grad_aggregated = tf.IndexedSlices(
            tf.constant([0.2], shape=[1, 1], dtype=dtype),
            tf.constant([1]),
            tf.constant([2, 1]),
        )
        opt1 = yogi.Yogi()
        opt2 = yogi.Yogi()

        np.testing.assert_allclose(
            aggregated_update_var.numpy(),
            repeated_index_update_var.numpy(),
        )

        for _ in range(3):
            opt1.apply_gradients([(grad_repeated_index,
                                   repeated_index_update_var)])
            opt2.apply_gradients([(grad_aggregated, aggregated_update_var)])

        np.testing.assert_allclose(
            aggregated_update_var.numpy(),
            repeated_index_update_var.numpy(),
        )
Esempio n. 2
0
def test_sharing():
    for dtype in _dtypes_to_test(use_gpu=tf.test.is_gpu_available()):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)
        opt = yogi.Yogi(initial_accumulator_value=1.0)

        # Fetch params to validate initial values.
        np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy())
        np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy())

        # Run 3 steps of intertwined Yogi1 and Yogi2.
        for t in range(1, 4):
            beta1_power, beta2_power = get_beta_accumulators(opt, dtype)
            test_utils.assert_allclose_according_to_type(0.9**t, beta1_power)
            test_utils.assert_allclose_according_to_type(0.999**t, beta2_power)
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            var0_np, m0, v0 = yogi_update_numpy(var0_np, grads0_np, t, m0, v0)
            var1_np, m1, v1 = yogi_update_numpy(var1_np, grads1_np, t, m1, v1)

            # Validate updated params.
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
Esempio n. 3
0
    def testSparseRepeatedIndices(self):
        for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()):
            repeated_index_update_var = tf.Variable([[1.0], [2.0]],
                                                    dtype=dtype)
            aggregated_update_var = tf.Variable([[1.0], [2.0]], dtype=dtype)
            grad_repeated_index = tf.IndexedSlices(
                tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
                tf.constant([1, 1]),
                tf.constant([2, 1]),
            )
            grad_aggregated = tf.IndexedSlices(
                tf.constant([0.2], shape=[1, 1], dtype=dtype),
                tf.constant([1]),
                tf.constant([2, 1]),
            )
            opt1 = yogi.Yogi()
            opt2 = yogi.Yogi()

            if not tf.executing_eagerly():
                repeated_update = opt1.apply_gradients([
                    (grad_repeated_index, repeated_index_update_var)
                ])
                aggregated_update = opt2.apply_gradients([
                    (grad_aggregated, aggregated_update_var)
                ])
                self.evaluate(tf.compat.v1.global_variables_initializer())

            self.assertAllClose(
                self.evaluate(aggregated_update_var),
                self.evaluate(repeated_index_update_var),
            )

            for _ in range(3):
                if not tf.executing_eagerly():
                    self.evaluate(repeated_update)
                    self.evaluate(aggregated_update)
                else:
                    opt1.apply_gradients([(grad_repeated_index,
                                           repeated_index_update_var)])
                    opt2.apply_gradients([(grad_aggregated,
                                           aggregated_update_var)])

                self.assertAllClose(
                    self.evaluate(aggregated_update_var),
                    self.evaluate(repeated_index_update_var),
                )
Esempio n. 4
0
def do_test_sparse(beta1=0.0, l1reg=0.0, l2reg=0.0):
    for dtype in _dtypes_to_test(use_gpu=test_utils.is_gpu_available()):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0_np_indices = np.array([0, 1], dtype=np.int32)
        grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                  tf.constant(grads0_np_indices),
                                  tf.constant([2]))
        grads1_np_indices = np.array([0, 1], dtype=np.int32)
        grads1 = tf.IndexedSlices(tf.constant(grads1_np),
                                  tf.constant(grads1_np_indices),
                                  tf.constant([2]))
        opt = yogi.Yogi(
            beta1=beta1,
            l1_regularization_strength=l1reg,
            l2_regularization_strength=l2reg,
            initial_accumulator_value=1.0,
        )

        # Fetch params to validate initial values.
        np.testing.assert_allclose(np.asanyarray([1.0, 2.0]), var0.numpy())
        np.testing.assert_allclose(np.asanyarray([3.0, 4.0]), var1.numpy())

        # Run 3 steps of Yogi.
        for t in range(1, 4):
            beta1_power, beta2_power = get_beta_accumulators(opt, dtype)
            test_utils.assert_allclose_according_to_type(beta1**t, beta1_power)
            test_utils.assert_allclose_according_to_type(0.999**t, beta2_power)
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

            var0_np, m0, v0 = yogi_update_numpy(var0_np,
                                                grads0_np,
                                                t,
                                                m0,
                                                v0,
                                                beta1=beta1,
                                                l1reg=l1reg,
                                                l2reg=l2reg)
            var1_np, m1, v1 = yogi_update_numpy(var1_np,
                                                grads1_np,
                                                t,
                                                m1,
                                                v1,
                                                beta1=beta1,
                                                l1reg=l1reg,
                                                l2reg=l2reg)

            # Validate updated params.
            test_utils.assert_allclose_according_to_type(var0_np, var0.numpy())
            test_utils.assert_allclose_according_to_type(var1_np, var1.numpy())
Esempio n. 5
0
    def testSharing(self):
        for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()):
            # Initialize variables for numpy implementation.
            m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)
            opt = yogi.Yogi(initial_accumulator_value=1.0)

            if not tf.executing_eagerly():
                update1 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                update2 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values.
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 3 steps of intertwined Yogi1 and Yogi2.
            for t in range(1, 4):
                beta1_power, beta2_power = get_beta_accumulators(opt, dtype)
                self.assertAllCloseAccordingToType(0.9**t,
                                                   self.evaluate(beta1_power))
                self.assertAllCloseAccordingToType(0.999**t,
                                                   self.evaluate(beta2_power))
                if not tf.executing_eagerly():
                    if t % 2 == 0:
                        self.evaluate(update1)
                    else:
                        self.evaluate(update2)
                else:
                    opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

                var0_np, m0, v0 = yogi_update_numpy(var0_np, grads0_np, t, m0,
                                                    v0)
                var1_np, m1, v1 = yogi_update_numpy(var1_np, grads1_np, t, m1,
                                                    v1)

                # Validate updated params.
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
Esempio n. 6
0
    def doTestSparse(self, beta1=0.0, l1reg=0.0, l2reg=0.0):
        for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()):
            # Initialize variables for numpy implementation.
            m0, v0, m1, v1 = 0.0, 1.0, 0.0, 1.0
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0_np_indices = np.array([0, 1], dtype=np.int32)
            grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                      tf.constant(grads0_np_indices),
                                      tf.constant([2]))
            grads1_np_indices = np.array([0, 1], dtype=np.int32)
            grads1 = tf.IndexedSlices(tf.constant(grads1_np),
                                      tf.constant(grads1_np_indices),
                                      tf.constant([2]))
            opt = yogi.Yogi(
                beta1=beta1,
                l1_regularization_strength=l1reg,
                l2_regularization_strength=l2reg,
                initial_accumulator_value=1.0,
            )
            if not tf.executing_eagerly():
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values.
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 3 steps of Yogi.
            for t in range(1, 4):
                beta1_power, beta2_power = get_beta_accumulators(opt, dtype)
                self.assertAllCloseAccordingToType(beta1**t,
                                                   self.evaluate(beta1_power))
                self.assertAllCloseAccordingToType(0.999**t,
                                                   self.evaluate(beta2_power))
                if not tf.executing_eagerly():
                    self.evaluate(update)
                else:
                    opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

                var0_np, m0, v0 = yogi_update_numpy(var0_np,
                                                    grads0_np,
                                                    t,
                                                    m0,
                                                    v0,
                                                    beta1=beta1,
                                                    l1reg=l1reg,
                                                    l2reg=l2reg)
                var1_np, m1, v1 = yogi_update_numpy(var1_np,
                                                    grads1_np,
                                                    t,
                                                    m1,
                                                    v1,
                                                    beta1=beta1,
                                                    l1reg=l1reg,
                                                    l2reg=l2reg)

                # Validate updated params.
                self.assertAllCloseAccordingToType(
                    var0_np,
                    self.evaluate(var0),
                    msg="Updated params 0 do not match in NP and TF",
                )
                self.assertAllCloseAccordingToType(
                    var1_np,
                    self.evaluate(var1),
                    msg="Updated params 1 do not match in NP and TF",
                )
Esempio n. 7
0
 def test_get_config(self):
     opt = yogi.Yogi(1e-4)
     config = opt.get_config()
     self.assertEqual(config["learning_rate"], 1e-4)
Esempio n. 8
0
def test_serialization():
    optimizer = yogi.Yogi(1e-4)
    config = tf.keras.optimizers.serialize(optimizer)
    new_optimizer = tf.keras.optimizers.deserialize(config)
    assert new_optimizer.get_config() == optimizer.get_config()
Esempio n. 9
0
def test_get_config():
    opt = yogi.Yogi(1e-4)
    config = opt.get_config()
    assert config["learning_rate"] == 1e-4