def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
        # This test invokes the ResourceSparseApplyConditionalGradient
        # operation.
        var0 = tf.Variable(tf.ones([2, 2]))

        def loss():
            return tf.math.reduce_sum(tf.nn.embedding_lookup(var0, [[1]]))

        # the gradient for this loss function:
        grads0 = tf.constant([[0, 0], [1, 1]], dtype=tf.float32)
        norm0 = tf.math.reduce_sum(grads0**2)**0.5

        learning_rate = 0.1
        lambda_ = 0.1
        opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                         lambda_=lambda_)
        cg_op = opt.minimize(loss, var_list=[var0])
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Run 1 step of cg_op
        self.evaluate(cg_op)
        norm0 = self.evaluate(norm0)
        self.assertAllCloseAccordingToType(
            [[1, 1],
             [
                 learning_rate * 1 - (1 - learning_rate) * lambda_ * 1 / norm0,
                 learning_rate * 1 - (1 - learning_rate) * lambda_ * 1 / norm0
             ]], self.evaluate(var0))
 def testVariablesAcrossGraphs(self):
     optimizer = cg_lib.ConditionalGradient(0.01, 0.5)
     with tf.Graph().as_default():
         var0 = tf.Variable([1.0, 2.0], dtype=tf.float32, name="var0")
         var1 = tf.Variable([3.0, 4.0], dtype=tf.float32, name="var1")
         loss = lambda: tf.math.reduce_sum(var0 + var1)
         optimizer.minimize(loss, var_list=[var0, var1])
         optimizer_variables = optimizer.variables()
         # There should be three items. The first item is iteration,
         #and one item for each variable.
         self.assertStartsWith(optimizer_variables[1].name,
                               "ConditionalGradient/var0")
         self.assertStartsWith(optimizer_variables[2].name,
                               "ConditionalGradient/var1")
         self.assertEqual(3, len(optimizer_variables))
    def testMinimizeSparseResourceVariable(self):
        # This test invokes the ResourceSparseApplyConditionalGradient
        # operation. And it will call the 'ResourceScatterUpdate' OpKernel
        # for 'GPU' devices. However, tf.half is not registered in this case,
        # based on issue #347.
        # Thus, we will call the "_DtypesToTest" function.
        #
        # TODO:
        #       Wait for the solving of issue #347. After that, we will test
        #       for the dtype to be tf.half, with 'GPU' devices.
        for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()):
            var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)

            def loss():
                x = tf.constant([[4.0], [5.0]], dtype=dtype)
                pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x)
                return pred * pred

            # the gradient based on the current loss function
            grads0_0 = 32 * 1.0 + 40 * 2.0
            grads0_1 = 40 * 1.0 + 50 * 2.0
            grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype)
            norm0 = tf.math.reduce_sum(grads0**2)**0.5

            learning_rate = 0.1
            lambda_ = 0.1
            opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                             lambda_=lambda_)
            cg_op = opt.minimize(loss, var_list=[var0])
            self.evaluate(tf.compat.v1.global_variables_initializer())

            # Run 1 step of cg_op
            self.evaluate(cg_op)

            # Validate updated params
            norm0 = self.evaluate(norm0)
            self.assertAllCloseAccordingToType(
                [[
                    1.0 * learning_rate -
                    (1 - learning_rate) * lambda_ * grads0_0 / norm0,
                    2.0 * learning_rate -
                    (1 - learning_rate) * lambda_ * grads0_1 / norm0,
                ]],
                self.evaluate(var0),
            )
    def testLikeDistBeliefCG01(self):
        with self.cached_session():
            db_grad, db_out = self._dbParamsCG01()
            num_samples = len(db_grad)
            var0 = tf.Variable([0.0] * num_samples)
            grads0 = tf.constant([0.0] * num_samples)
            cg_opt = cg_lib.ConditionalGradient(learning_rate=0.1, lambda_=0.1)
            if not tf.executing_eagerly():
                cg_update = cg_opt.apply_gradients(zip([grads0], [var0]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            for i in xrange(num_samples):
                if tf.executing_eagerly():
                    grads0 = tf.constant(db_grad[i])
                    cg_opt.apply_gradients(zip([grads0], [var0]))
                else:
                    cg_update.run(feed_dict={grads0: db_grad[i]})
                self.assertAllClose(np.array(db_out[i]), self.evaluate(var0))
    def testSharing(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            with self.cached_session():
                var0 = tf.Variable([1.0, 2.0], dtype=dtype)
                var1 = tf.Variable([3.0, 4.0], dtype=dtype)
                grads0 = tf.constant([0.1, 0.1], dtype=dtype)
                grads1 = tf.constant([0.01, 0.01], dtype=dtype)
                norm0 = tf.math.reduce_sum(grads0**2)**0.5
                norm1 = tf.math.reduce_sum(grads1**2)**0.5
                learning_rate = 0.1
                lambda_ = 0.1
                cg_opt = cg_lib.ConditionalGradient(
                    learning_rate=learning_rate, lambda_=lambda_)
                cg_update1 = cg_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                cg_update2 = cg_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Check we have slots
                self.assertEqual(["conditional_gradient"],
                                 cg_opt.get_slot_names())
                slot0 = cg_opt.get_slot(var0, "conditional_gradient")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                slot1 = cg_opt.get_slot(var1, "conditional_gradient")
                self.assertEquals(slot1.get_shape(), var1.get_shape())

                if not tf.executing_eagerly():
                    self.assertFalse(
                        slot0 in tf.compat.v1.trainable_variables())
                    self.assertFalse(
                        slot1 in tf.compat.v1.trainable_variables())
                # Because in the eager mode, as we declare two cg_update variables,
                # it already altomatically finish executing them. Thus, we cannot
                # test the param value at this time for eager mode. We can only test
                # the final value of param after the second execution.
                if not tf.executing_eagerly():
                    self.evaluate(cg_update1)
                    # Check that the parameters have been updated.
                    norm0 = self.evaluate(norm0)
                    norm1 = self.evaluate(norm1)
                    self.assertAllCloseAccordingToType(
                        np.array([
                            1.0 * learning_rate -
                            (1 - learning_rate) * lambda_ * 0.1 / norm0,
                            2.0 * learning_rate -
                            (1 - learning_rate) * lambda_ * 0.1 / norm0
                        ]), self.evaluate(var0))
                    self.assertAllCloseAccordingToType(
                        np.array([
                            3.0 * learning_rate -
                            (1 - learning_rate) * lambda_ * 0.01 / norm1,
                            4.0 * learning_rate -
                            (1 - learning_rate) * lambda_ * 0.01 / norm1
                        ]), self.evaluate(var1))

                # Step 2: the second conditional_gradient contain
                # the previous update.
                if not tf.executing_eagerly():
                    self.evaluate(cg_update2)
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([(1.0 * learning_rate -
                               (1 - learning_rate) * lambda_ * 0.1 / norm0) *
                              learning_rate -
                              (1 - learning_rate) * lambda_ * 0.1 / norm0,
                              (2.0 * learning_rate -
                               (1 - learning_rate) * lambda_ * 0.1 / norm0) *
                              learning_rate -
                              (1 - learning_rate) * lambda_ * 0.1 / norm0]),
                    self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([(3.0 * learning_rate -
                               (1 - learning_rate) * lambda_ * 0.01 / norm1) *
                              learning_rate -
                              (1 - learning_rate) * lambda_ * 0.01 / norm1,
                              (4.0 * learning_rate -
                               (1 - learning_rate) * lambda_ * 0.01 / norm1) *
                              learning_rate -
                              (1 - learning_rate) * lambda_ * 0.01 / norm1]),
                    self.evaluate(var1))
    def testSparse(self):
        # TODO:
        #       To address the issue #347.
        for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()):
            with self.cached_session():
                var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype))
                var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2]))
                grads0 = tf.IndexedSlices(tf.constant([[.1, .1]], dtype=dtype),
                                          tf.constant([1]), tf.constant([4,
                                                                         2]))
                grads1 = tf.IndexedSlices(
                    tf.constant([[.01, .01], [.01, .01]], dtype=dtype),
                    tf.constant([2, 3]), tf.constant([4, 2]))
                norm0 = tf.math.reduce_sum(tf.math.multiply(grads0,
                                                            grads0))**0.5
                norm1 = tf.math.reduce_sum(tf.math.multiply(grads1,
                                                            grads1))**0.5
                learning_rate = 0.1
                lambda_ = 0.1
                cg_opt = cg_lib.ConditionalGradient(
                    learning_rate=learning_rate, lambda_=lambda_)
                cg_update = cg_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))

                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([0, 0], self.evaluate(var0)[0])
                    self.assertAllClose([0, 0], self.evaluate(var0)[1])
                    self.assertAllClose([1, 1], self.evaluate(var1)[2])

                # Check we have slots
                self.assertEqual(["conditional_gradient"],
                                 cg_opt.get_slot_names())
                slot0 = cg_opt.get_slot(var0, "conditional_gradient")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                slot1 = cg_opt.get_slot(var1, "conditional_gradient")
                self.assertEquals(slot1.get_shape(), var1.get_shape())

                if not tf.executing_eagerly():
                    self.assertFalse(
                        slot0 in tf.compat.v1.trainable_variables())
                    self.assertFalse(
                        slot1 in tf.compat.v1.trainable_variables())

                # Step 1:
                if not tf.executing_eagerly():
                    self.evaluate(cg_update)
                # Check that the parameters have been updated.
                norm0 = self.evaluate(norm0)
                norm1 = self.evaluate(norm1)
                self.assertAllCloseAccordingToType(
                    np.array([
                        0 - (1 - learning_rate) * lambda_ * 0 / norm0,
                        0 - (1 - learning_rate) * lambda_ * 0 / norm0
                    ]),
                    self.evaluate(var0)[0])
                self.assertAllCloseAccordingToType(
                    np.array([
                        0 - (1 - learning_rate) * lambda_ * 0.1 / norm0,
                        0 - (1 - learning_rate) * lambda_ * 0.1 / norm0
                    ]),
                    self.evaluate(var0)[1])
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 * learning_rate -
                        (1 - learning_rate) * lambda_ * 0.01 / norm1,
                        1.0 * learning_rate -
                        (1 - learning_rate) * lambda_ * 0.01 / norm1
                    ]),
                    self.evaluate(var1)[2])
                # Step 2: the conditional_gradient contain the
                # previous update.
                if tf.executing_eagerly():
                    cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
                else:
                    self.evaluate(cg_update)
                # Check that the parameters have been updated.
                self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0])
                self.assertAllCloseAccordingToType(
                    np.array([
                        (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) *
                        learning_rate -
                        (1 - learning_rate) * lambda_ * 0.1 / norm0,
                        (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) *
                        learning_rate -
                        (1 - learning_rate) * lambda_ * 0.1 / norm0
                    ]),
                    self.evaluate(var0)[1])
                self.assertAllCloseAccordingToType(
                    np.array([(1.0 * learning_rate -
                               (1 - learning_rate) * lambda_ * 0.01 / norm1) *
                              learning_rate -
                              (1 - learning_rate) * lambda_ * 0.01 / norm1,
                              (1.0 * learning_rate -
                               (1 - learning_rate) * lambda_ * 0.01 / norm1) *
                              learning_rate -
                              (1 - learning_rate) * lambda_ * 0.01 / norm1]),
                    self.evaluate(var1)[2])
    def doTestBasic(self, use_resource=False, use_callable_params=False):
        for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
            if use_resource:
                var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0_%d" % i)
                var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1_%d" % i)
            else:
                var0 = tf.Variable([1.0, 2.0], dtype=dtype)
                var1 = tf.Variable([3.0, 4.0], dtype=dtype)
            grads0 = tf.constant([0.1, 0.1], dtype=dtype)
            grads1 = tf.constant([0.01, 0.01], dtype=dtype)
            norm0 = tf.math.reduce_sum(grads0**2)**0.5
            norm1 = tf.math.reduce_sum(grads1**2)**0.5
            learning_rate = lambda: 0.5
            lambda_ = lambda: 0.01
            if not use_callable_params:
                learning_rate = learning_rate()
                lambda_ = lambda_()
            cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate,
                                                lambda_=lambda_)
            cg_update = cg_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))

            if not tf.executing_eagerly():
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Check we have slots
            self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names())
            slot0 = cg_opt.get_slot(var0, "conditional_gradient")
            self.assertEquals(slot0.get_shape(), var0.get_shape())
            slot1 = cg_opt.get_slot(var1, "conditional_gradient")
            self.assertEquals(slot1.get_shape(), var1.get_shape())

            if not tf.executing_eagerly():
                self.assertFalse(slot0 in tf.compat.v1.trainable_variables())
                self.assertFalse(slot1 in tf.compat.v1.trainable_variables())

            if not tf.executing_eagerly():
                self.evaluate(cg_update)

            # Check that the parameters have been updated.
            norm0 = self.evaluate(norm0)
            norm1 = self.evaluate(norm1)
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0,
                    2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0
                ]), self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([
                    3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1,
                    4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1
                ]), self.evaluate(var1))

            # Step 2: the conditional_gradient contain the previous update.
            if tf.executing_eagerly():
                cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            else:
                self.evaluate(cg_update)
            self.assertAllCloseAccordingToType(
                np.array([(1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 -
                          (1 - 0.5) * 0.01 * 0.1 / norm0,
                          (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 -
                          (1 - 0.5) * 0.01 * 0.1 / norm0]),
                self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([(3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 -
                          (1 - 0.5) * 0.01 * 0.01 / norm1,
                          (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 -
                          (1 - 0.5) * 0.01 * 0.01 / norm1]),
                self.evaluate(var1))
    def testTensorLearningRateAndConditionalGradient(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            with self.cached_session():
                var0 = tf.Variable([1.0, 2.0], dtype=dtype)
                var1 = tf.Variable([3.0, 4.0], dtype=dtype)
                grads0 = tf.constant([0.1, 0.1], dtype=dtype)
                grads1 = tf.constant([0.01, 0.01], dtype=dtype)
                norm0 = tf.math.reduce_sum(grads0**2)**0.5
                norm1 = tf.math.reduce_sum(grads1**2)**0.5
                cg_opt = cg_lib.ConditionalGradient(
                    learning_rate=tf.constant(0.5), lambda_=tf.constant(0.01))
                cg_update = cg_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Check we have slots
                self.assertEqual(["conditional_gradient"],
                                 cg_opt.get_slot_names())
                slot0 = cg_opt.get_slot(var0, "conditional_gradient")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                slot1 = cg_opt.get_slot(var1, "conditional_gradient")
                self.assertEquals(slot1.get_shape(), var1.get_shape())

                if not tf.executing_eagerly():
                    self.assertFalse(
                        slot0 in tf.compat.v1.trainable_variables())
                    self.assertFalse(
                        slot1 in tf.compat.v1.trainable_variables())

                if not tf.executing_eagerly():
                    self.evaluate(cg_update)
                # Check that the parameters have been updated.
                norm0 = self.evaluate(norm0)
                norm1 = self.evaluate(norm1)
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0,
                        2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0
                    ]), self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([
                        3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1,
                        4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1
                    ]), self.evaluate(var1))
                # Step 2: the conditional_gradient contain the
                # previous update.
                if tf.executing_eagerly():
                    cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
                else:
                    self.evaluate(cg_update)
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([
                        (1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 -
                        (1 - 0.5) * 0.01 * 0.1 / norm0,
                        (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 -
                        (1 - 0.5) * 0.01 * 0.1 / norm0
                    ]), self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([
                        (3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 -
                        (1 - 0.5) * 0.01 * 0.01 / norm1,
                        (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 -
                        (1 - 0.5) * 0.01 * 0.01 / norm1
                    ]), self.evaluate(var1))