def test_minimize_sparse_resource_variable_frobenius(dtype, device): if "gpu" in device and dtype == tf.float16: pytest.xfail("See https://github.com/tensorflow/addons/issues/347") var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x) return pred * pred # the gradient based on the current loss function grads0_0 = 32 * 1.0 + 40 * 2.0 grads0_1 = 40 * 1.0 + 50 * 2.0 grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 ord = "fro" opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = opt.minimize(loss, var_list=[var0]) test_utils.assert_allclose_according_to_type( [[ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_0 / norm0, 2.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_1 / norm0, ]], var0.numpy(), )
def test_minimize_with_2D_indicies_for_embedding_lookup_nuclear(): # This test invokes the ResourceSparseApplyConditionalGradient # operation. var0 = tf.Variable(tf.ones([2, 2])) def loss(): return tf.math.reduce_sum(tf.nn.embedding_lookup(var0, [[1]])) # the gradient for this loss function: grads0 = tf.constant([[0, 0], [1, 1]], dtype=tf.float32) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = opt.minimize(loss, var_list=[var0]) # Run 1 step of cg_op test_utils.assert_allclose_according_to_type( [ learning_rate * 1 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0], learning_rate * 1 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1], ], var0[1], )
def testMinimizeWith2DIndiciesForEmbeddingLookup(self): # This test invokes the ResourceSparseApplyConditionalGradient # operation. var0 = tf.Variable(tf.ones([2, 2])) def loss(): return tf.math.reduce_sum(tf.nn.embedding_lookup(var0, [[1]])) # the gradient for this loss function: grads0 = tf.constant([[0, 0], [1, 1]], dtype=tf.float32) norm0 = tf.math.reduce_sum(grads0**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_) cg_op = opt.minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of cg_op self.evaluate(cg_op) norm0 = self.evaluate(norm0) self.assertAllCloseAccordingToType( [ [1, 1], [ learning_rate * 1 - (1 - learning_rate) * lambda_ * 1 / norm0, learning_rate * 1 - (1 - learning_rate) * lambda_ * 1 / norm0, ], ], self.evaluate(var0), )
def test_sharing_frobenius(dtype): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0 ** 2) ** 0.5 norm1 = tf.math.reduce_sum(grads1 ** 2) ** 0.5 learning_rate = 0.1 lambda_ = 0.1 ord = "fro" cg_opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_, ord=ord ) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Because in the eager mode, as we declare two cg_update # variables, it already altomatically finish executing them. # Thus, we cannot test the param value at this time for # eager mode. We can only test the final value of param # after the second execution. # Step 2: the second conditional_gradient contain # the previous update. # Check that the parameters have been updated. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) test_utils.assert_allclose_according_to_type( np.array( [ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (2.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, ] ), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array( [ (3.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (4.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ] ), var1.numpy(), )
def test_tensor_learning_rate_and_conditional_gradient_frobenius(dtype): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 ord = "fro" cg_opt = cg_lib.ConditionalGradient(learning_rate=tf.constant(0.5), lambda_=tf.constant(0.01), ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, 2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, 4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), var1.numpy(), ) # Step 2: the conditional_gradient contain the # previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), var1.numpy(), )
def test_serialization(): learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" optimizer = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) config = tf.keras.optimizers.serialize(optimizer) new_optimizer = tf.keras.optimizers.deserialize(config) assert optimizer.get_config() == new_optimizer.get_config()
def test_like_dist_belief_frobenius_cg01(): db_grad, db_out = _db_params_frobenius_cg01() num_samples = len(db_grad) var0 = tf.Variable([0.0] * num_samples) grads0 = tf.constant([0.0] * num_samples) ord = "fro" cg_opt = cg_lib.ConditionalGradient(learning_rate=0.1, lambda_=0.1, ord=ord) for i in range(num_samples): grads0 = tf.constant(db_grad[i]) cg_opt.apply_gradients(zip([grads0], [var0])) np.testing.assert_allclose( np.array(db_out[i]), var0.numpy(), rtol=1e-06, atol=1e-06 )
def test_variables_across_graphs_nuclear(): optimizer = cg_lib.ConditionalGradient(0.01, 0.5, ord="nuclear") var0 = tf.Variable([1.0, 2.0], dtype=tf.float32, name="var0") var1 = tf.Variable([3.0, 4.0], dtype=tf.float32, name="var1") def loss(): return tf.math.reduce_sum(var0 + var1) optimizer.minimize(loss, var_list=[var0, var1]) optimizer_variables = optimizer.variables() # There should be three items. The first item is iteration, # and one item for each variable. assert optimizer_variables[1].name.startswith("ConditionalGradient/var0") assert optimizer_variables[2].name.startswith("ConditionalGradient/var1") assert 3 == len(optimizer_variables)
def testMinimizeSparseResourceVariable(self): # This test invokes the ResourceSparseApplyConditionalGradient # operation. And it will call the 'ResourceScatterUpdate' OpKernel # for 'GPU' devices. However, tf.half is not registered in this case, # based on issue #347. # Thus, we will call the "_DtypesToTest" function. # # TODO: # Wait for the solving of issue #347. After that, we will test # for the dtype to be tf.half, with 'GPU' devices. for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()): var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x) return pred * pred # the gradient based on the current loss function grads0_0 = 32 * 1.0 + 40 * 2.0 grads0_1 = 40 * 1.0 + 50 * 2.0 grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_) cg_op = opt.minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Run 1 step of cg_op self.evaluate(cg_op) # Validate updated params norm0 = self.evaluate(norm0) self.assertAllCloseAccordingToType( [[ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_0 / norm0, 2.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_1 / norm0, ]], self.evaluate(var0), )
def testLikeDistBeliefCG01(self): with self.cached_session(): db_grad, db_out = self._dbParamsCG01() num_samples = len(db_grad) var0 = tf.Variable([0.0] * num_samples) grads0 = tf.constant([0.0] * num_samples) cg_opt = cg_lib.ConditionalGradient(learning_rate=0.1, lambda_=0.1) if not tf.executing_eagerly(): cg_update = cg_opt.apply_gradients(zip([grads0], [var0])) self.evaluate(tf.compat.v1.global_variables_initializer()) for i in range(num_samples): if tf.executing_eagerly(): grads0 = tf.constant(db_grad[i]) cg_opt.apply_gradients(zip([grads0], [var0])) else: cg_update.run(feed_dict={grads0: db_grad[i]}) self.assertAllClose(np.array(db_out[i]), self.evaluate(var0))
def testVariablesAcrossGraphs(self): optimizer = cg_lib.ConditionalGradient(0.01, 0.5) with tf.Graph().as_default(): var0 = tf.Variable([1.0, 2.0], dtype=tf.float32, name="var0") var1 = tf.Variable([3.0, 4.0], dtype=tf.float32, name="var1") def loss(): return tf.math.reduce_sum(var0 + var1) optimizer.minimize(loss, var_list=[var0, var1]) optimizer_variables = optimizer.variables() # There should be three items. The first item is iteration, # and one item for each variable. self.assertStartsWith(optimizer_variables[1].name, "ConditionalGradient/var0") self.assertStartsWith(optimizer_variables[2].name, "ConditionalGradient/var1") self.assertEqual(3, len(optimizer_variables))
def test_minimize_sparse_resource_variable_frobenius(): # This test invokes the ResourceSparseApplyConditionalGradient # operation. And it will call the 'ResourceScatterUpdate' OpKernel # for 'GPU' devices. However, tf.half is not registered in this case, # based on issue #347. # Thus, we will call the "_dtypes_to_test" function. # # TODO: # Wait for the solving of issue #347. After that, we will test # for the dtype to be tf.half, with 'GPU' devices. for dtype in _dtypes_to_test(use_gpu=tf.test.is_gpu_available()): var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x) return pred * pred # the gradient based on the current loss function grads0_0 = 32 * 1.0 + 40 * 2.0 grads0_1 = 40 * 1.0 + 50 * 2.0 grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 ord = "fro" opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = opt.minimize(loss, var_list=[var0]) test_utils.assert_allclose_according_to_type( [[ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_0 / norm0, 2.0 * learning_rate - (1 - learning_rate) * lambda_ * grads0_1 / norm0, ]], var0.numpy(), )
def test_minimize_sparse_resource_variable_nuclear(): # TODO: # to address issue #347 and #36764. for dtype in _dtypes_with_checking_system( use_gpu=test_utils.is_gpu_available(), system=platform.system() ): var0 = tf.Variable([[1.0, 2.0]], dtype=dtype) def loss(): x = tf.constant([[4.0], [5.0]], dtype=dtype) pred = tf.matmul(tf.nn.embedding_lookup([var0], [0]), x) return pred * pred # the gradient based on the current loss function grads0_0 = 32 * 1.0 + 40 * 2.0 grads0_1 = 40 * 1.0 + 50 * 2.0 grads0 = tf.constant([[grads0_0, grads0_1]], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector(grads0) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_, ord=ord ) _ = opt.minimize(loss, var_list=[var0]) # Validate updated params test_utils.assert_allclose_according_to_type( [ [ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0][0], 2.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0][1], ] ], var0.numpy(), )
def test_sharing_nuclear(): # TODO: # To address the issue #36764. for dtype in _dtypes_with_checking_system( use_gpu=tf.test.is_gpu_available(), system=platform.system()): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector( grads1) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Because in the eager mode, as we declare two cg_update # variables, it already altomatically finish executing them. # Thus, we cannot test the param value at this time for # eager mode. We can only test the final value of param # after the second execution. # Step 2: the second conditional_gradient contain # the previous update. # Check that the parameters have been updated. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[0], (2.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[0], (4.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[1], ]), var1.numpy(), )
def test_sparse_frobenius(): # TODO: # To address the issue #347. for dtype in _dtypes_to_test(use_gpu=tf.test.is_gpu_available()): var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) grads0 = tf.IndexedSlices( tf.constant([[0.1, 0.1]], dtype=dtype), tf.constant([1]), tf.constant([4, 2]), ) grads1 = tf.IndexedSlices( tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype), tf.constant([2, 3]), tf.constant([4, 2]), ) norm0 = tf.math.reduce_sum(tf.math.multiply(grads0, grads0))**0.5 norm1 = tf.math.reduce_sum(tf.math.multiply(grads1, grads1))**0.5 learning_rate = 0.1 lambda_ = 0.1 ord = "fro" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * 0 / norm0, 0 - (1 - learning_rate) * lambda_ * 0 / norm0, ]), var0[0].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0, 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), var1[2].numpy(), ) # Step 2: the conditional_gradient contain the # previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the parameters have been updated. np.testing.assert_allclose(np.array([0, 0]), var0[0].numpy()) test_utils.assert_allclose_according_to_type( np.array([ (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), var1[2].numpy(), )
def testTensorLearningRateAndConditionalGradient(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.cached_session(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 cg_opt = cg_lib.ConditionalGradient( learning_rate=tf.constant(0.5), lambda_=tf.constant(0.01)) cg_update = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse( slot0 in tf.compat.v1.trainable_variables()) self.assertFalse( slot1 in tf.compat.v1.trainable_variables()) if not tf.executing_eagerly(): self.evaluate(cg_update) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, 2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, 4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), self.evaluate(var1), ) # Step 2: the conditional_gradient contain the # previous update. if tf.executing_eagerly(): cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) else: self.evaluate(cg_update) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), self.evaluate(var1), )
def doTestBasic(self, use_resource=False, use_callable_params=False): for i, dtype in enumerate([tf.half, tf.float32, tf.float64]): if use_resource: var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0_%d" % i) var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1_%d" % i) else: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 def learning_rate(): return 0.5 def lambda_(): return 0.01 if not use_callable_params: learning_rate = learning_rate() lambda_ = lambda_() cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_) cg_update = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse(slot0 in tf.compat.v1.trainable_variables()) self.assertFalse(slot1 in tf.compat.v1.trainable_variables()) if not tf.executing_eagerly(): self.evaluate(cg_update) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, 2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, 4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), self.evaluate(var1), ) # Step 2: the conditional_gradient contain the previous update. if tf.executing_eagerly(): cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) else: self.evaluate(cg_update) self.assertAllCloseAccordingToType( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), self.evaluate(var1), )
def testSparse(self): # TODO: # To address the issue #347. for dtype in self._DtypesToTest(use_gpu=tf.test.is_gpu_available()): with self.cached_session(): var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) grads0 = tf.IndexedSlices( tf.constant([[0.1, 0.1]], dtype=dtype), tf.constant([1]), tf.constant([4, 2]), ) grads1 = tf.IndexedSlices( tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype), tf.constant([2, 3]), tf.constant([4, 2]), ) norm0 = tf.math.reduce_sum(tf.math.multiply(grads0, grads0))**0.5 norm1 = tf.math.reduce_sum(tf.math.multiply(grads1, grads1))**0.5 learning_rate = 0.1 lambda_ = 0.1 cg_opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_) cg_update = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0, 0], self.evaluate(var0)[0]) self.assertAllClose([0, 0], self.evaluate(var0)[1]) self.assertAllClose([1, 1], self.evaluate(var1)[2]) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse( slot0 in tf.compat.v1.trainable_variables()) self.assertFalse( slot1 in tf.compat.v1.trainable_variables()) # Step 1: if not tf.executing_eagerly(): self.evaluate(cg_update) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 0 - (1 - learning_rate) * lambda_ * 0 / norm0, 0 - (1 - learning_rate) * lambda_ * 0 / norm0, ]), self.evaluate(var0)[0], ) self.assertAllCloseAccordingToType( np.array([ 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0, 0 - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), self.evaluate(var0)[1], ) self.assertAllCloseAccordingToType( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), self.evaluate(var1)[2], ) # Step 2: the conditional_gradient contain the # previous update. if tf.executing_eagerly(): cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) else: self.evaluate(cg_update) # Check that the parameters have been updated. self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) self.assertAllCloseAccordingToType( np.array([ (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (0 - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), self.evaluate(var0)[1], ) self.assertAllCloseAccordingToType( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), self.evaluate(var1)[2], )
def test_tensor_learning_rate_and_conditional_gradient_nuclear(): for dtype in _dtypes_with_checking_system( use_gpu=tf.test.is_gpu_available(), system=platform.system()): # TODO: # Based on issue #36764 in the following link, # "https://github.com/tensorflow/tensorflow/issues/36764" # tf.half is not registered for tf.linalg.svd function on Windows # CPU version. # So we have to remove tf.half when testing with Windows CPU version. var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector( grads1) ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=tf.constant(0.5), lambda_=tf.constant(0.01), ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], 2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0], 4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), ) # Step 2: the conditional_gradient contain the # previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], (2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0], (4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), )
def test_basic_nuclear(use_resource): # TODO: # to address issue #36764 for i, dtype in enumerate( _dtypes_with_checking_system(use_gpu=tf.test.is_gpu_available(), system=platform.system())): if use_resource: var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0_%d" % i) var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1_%d" % i) else: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) top_singular_vector0 = cg_lib.ConditionalGradient._top_singular_vector( grads0) top_singular_vector1 = cg_lib.ConditionalGradient._top_singular_vector( grads1) def learning_rate(): return 0.5 def lambda_(): return 0.01 ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() test_utils.assert_allclose_according_to_type( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], 2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0], 4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), ) # Step 2: the conditional_gradient contain the previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[0], (2.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector0[1], ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], (4.0 * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[0]) * 0.5 - (1 - 0.5) * 0.01 * top_singular_vector1[1], ]), var1.numpy(), )
def test_sparse_nuclear(): # TODO: # To address the issue #347 and issue #36764. for dtype in _dtypes_with_checking_system( use_gpu=tf.test.is_gpu_available(), system=platform.system()): var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype)) var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2])) grads0 = tf.IndexedSlices( tf.constant([[0.1, 0.1]], dtype=dtype), tf.constant([1]), tf.constant([4, 2]), ) grads1 = tf.IndexedSlices( tf.constant([[0.01, 0.01], [0.01, 0.01]], dtype=dtype), tf.constant([2, 3]), tf.constant([4, 2]), ) top_singular_vector0 = tf.constant( [[0.0, 0.0], [0.7071067, 0.7071067], [0.0, 0.0], [0.0, 0.0]], dtype=dtype, ) top_singular_vector1 = tf.constant( [ [-4.2146844e-08, -4.2146844e-08], [0.0000000e00, 0.0000000e00], [4.9999994e-01, 4.9999994e-01], [4.9999994e-01, 4.9999994e-01], ], dtype=dtype, ) learning_rate = 0.1 lambda_ = 0.1 ord = "nuclear" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() # Check that the parameters have been updated. test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[0][0], 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[0][1], ]), var0[0].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0], 0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1], ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][0], 1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][1], ]), var1[2].numpy(), ) # Step 2: the conditional_gradient contain the # previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check that the parameters have been updated. np.testing.assert_allclose(np.array([0, 0]), var0[0].numpy()) test_utils.assert_allclose_according_to_type( np.array([ (0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1][0], (0 - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector0[1][1], ]), var0[1].numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][0]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][0], (1.0 * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][1]) * learning_rate - (1 - learning_rate) * lambda_ * top_singular_vector1[2][1], ]), var1[2].numpy(), )
def test_basic_frobenius(dtype, use_resource): if use_resource: var0 = tf.Variable([1.0, 2.0], dtype=dtype[0], name="var0_%d" % dtype[1]) var1 = tf.Variable([3.0, 4.0], dtype=dtype[0], name="var0_%d" % dtype[1]) else: var0 = tf.Variable([1.0, 2.0], dtype=dtype[0]) var1 = tf.Variable([3.0, 4.0], dtype=dtype[0]) grads0 = tf.constant([0.1, 0.1], dtype=dtype[0]) grads1 = tf.constant([0.01, 0.01], dtype=dtype[0]) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 def learning_rate(): return 0.5 def lambda_(): return 0.01 ord = "fro" cg_opt = cg_lib.ConditionalGradient(learning_rate=learning_rate, lambda_=lambda_, ord=ord) _ = cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) # Check we have slots assert ["conditional_gradient"] == cg_opt.get_slot_names() slot0 = cg_opt.get_slot(var0, "conditional_gradient") assert slot0.get_shape() == var0.get_shape() slot1 = cg_opt.get_slot(var1, "conditional_gradient") assert slot1.get_shape() == var1.get_shape() test_utils.assert_allclose_according_to_type( np.array([ 1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, 2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ 3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, 4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), var1.numpy(), ) # Step 2: the conditional_gradient contain the previous update. cg_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) test_utils.assert_allclose_according_to_type( np.array([ (1.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, (2.0 * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0) * 0.5 - (1 - 0.5) * 0.01 * 0.1 / norm0, ]), var0.numpy(), ) test_utils.assert_allclose_according_to_type( np.array([ (3.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, (4.0 * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1) * 0.5 - (1 - 0.5) * 0.01 * 0.01 / norm1, ]), var1.numpy(), )
def testSharing(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.cached_session(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) norm0 = tf.math.reduce_sum(grads0**2)**0.5 norm1 = tf.math.reduce_sum(grads1**2)**0.5 learning_rate = 0.1 lambda_ = 0.1 cg_opt = cg_lib.ConditionalGradient( learning_rate=learning_rate, lambda_=lambda_) cg_update1 = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) cg_update2 = cg_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) if not tf.executing_eagerly(): self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) # Check we have slots self.assertEqual(["conditional_gradient"], cg_opt.get_slot_names()) slot0 = cg_opt.get_slot(var0, "conditional_gradient") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = cg_opt.get_slot(var1, "conditional_gradient") self.assertEquals(slot1.get_shape(), var1.get_shape()) if not tf.executing_eagerly(): self.assertFalse( slot0 in tf.compat.v1.trainable_variables()) self.assertFalse( slot1 in tf.compat.v1.trainable_variables()) # Because in the eager mode, as we declare two cg_update variables, # it already altomatically finish executing them. Thus, we cannot # test the param value at this time for eager mode. We can only test # the final value of param after the second execution. if not tf.executing_eagerly(): self.evaluate(cg_update1) # Check that the parameters have been updated. norm0 = self.evaluate(norm0) norm1 = self.evaluate(norm1) self.assertAllCloseAccordingToType( np.array([ 1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, 2.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array([ 3.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, 4.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), self.evaluate(var1), ) # Step 2: the second conditional_gradient contain # the previous update. if not tf.executing_eagerly(): self.evaluate(cg_update2) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( np.array([ (1.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, (2.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0) * learning_rate - (1 - learning_rate) * lambda_ * 0.1 / norm0, ]), self.evaluate(var0), ) self.assertAllCloseAccordingToType( np.array([ (3.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, (4.0 * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1) * learning_rate - (1 - learning_rate) * lambda_ * 0.01 / norm1, ]), self.evaluate(var1), )