def testSparseRepeatedIndices(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype) repeated_index_update_var = tf.Variable(var_np, dtype=dtype) aggregated_update_var = tf.Variable(var_np, dtype=dtype) grad_repeated_index = tf.IndexedSlices( tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), tf.constant([1, 1]), tf.constant([2, 1])) grad_aggregated = tf.IndexedSlices( tf.constant([0.2], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) repeated_update = adagrad.Adagrad(3.0).apply_gradients([ (grad_repeated_index, repeated_index_update_var) ]) aggregated_update = adagrad.Adagrad(3.0).apply_gradients([ (grad_aggregated, aggregated_update_var) ]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var)) for _ in range(3): self.evaluate(repeated_update) self.evaluate(aggregated_update) self.assertAllClose( self.evaluate(aggregated_update_var), self.evaluate(repeated_index_update_var))
def testSparseRepeatedIndicesByEmbeddingLookUp(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var_repeated = tf.Variable([1.0, 2.0], dtype=dtype) loss_repeated = ( lambda: tf.reduce_sum( # pylint: disable=g-long-lambda tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0])) ) # pylint: disable=cell-var-from-loop var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype) loss_aggregated = ( lambda: 2 * tf.reduce_sum( # pylint: disable=g-long-lambda tf.compat.v1.nn.embedding_lookup(var_aggregated, [0]))) # pylint: disable=cell-var-from-loop update_op_repeated = adagrad.Adagrad(2.0).minimize( loss_repeated, var_list=[var_repeated]) update_op_aggregated = adagrad.Adagrad(2.0).minimize( loss_aggregated, var_list=[var_aggregated]) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated)) for _ in range(3): self.evaluate(update_op_repeated) self.evaluate(update_op_aggregated) self.assertAllCloseAccordingToType( self.evaluate(var_repeated), self.evaluate(var_aggregated), )
def testSparseStability(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in [tf.half]: shape = [1, 6] var0_np = np.array([[ 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, -0.0105945 ]], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) grads0_np = np.array([[ -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, -8.4877e-05, -9.48906e-05 ]], dtype=dtype.as_numpy_dtype) grads0 = tf.IndexedSlices(tf.constant(grads0_np), tf.constant([0]), tf.constant(shape)) ada_opt = adagrad.Adagrad(1.0) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) slot0 = ada_opt.get_slot(var0, "accumulator") init = tf.compat.v1.global_variables_initializer() for _ in range(100): self.evaluate(init) self.evaluate(ada_update) self.assertAllCloseAccordingToType( np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([[ 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, -0.01029443 ]]), self.evaluate(var0))
def testSparseSingleVarDim(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) grads0_np_indices = np.array([0], dtype=np.int32) grads0 = tf.IndexedSlices( tf.constant(grads0_np[grads0_np_indices]), tf.constant(grads0_np_indices), tf.constant([3])) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.) ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0], self.evaluate(var0)) accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) # Run 3 step of sgd for _ in range(3): self.evaluate(ada_update) var0_np, accum0_np = sparse_adagrad_update_numpy( var0_np, accum0_np, grads0_np_indices, grads0_np[grads0_np_indices], learning_rate, epsilon=1.) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
def testBasicWithLearningRateInverseTimeDecay(self): for dtype in _DATA_TYPES: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) ada_opt = adagrad.Adagrad(lr_schedule) accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for t in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) lr_np = learning_rate / (1 + decay * t) var0_np, accum0_np = adagrad_update_numpy( var0_np, accum0_np, grads0_np, lr_np) var1_np, accum1_np = adagrad_update_numpy( var1_np, accum1_np, grads1_np, lr_np) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def test_create_slots_for_sharded_variables(self): # set names so that ShardedVariable is well-named for slot variable keying. var_a = tf.Variable([1.0], name='part_0') var_b = tf.Variable([2.0], name='part_1') sharded_var = tf.__internal__.distribute.ShardedVariable([var_a, var_b]) opt = adagrad.Adagrad() opt._create_slots(sharded_var.variables) opt._create_slots_for_sharded_variables(sharded_var.variables) sharded_slot = opt.get_slot(sharded_var, 'accumulator') self.assertIsInstance( sharded_slot, tf.__internal__.distribute.ShardedVariable) slot_a = opt.get_slot(var_a, 'accumulator') self.assertAllClose(sharded_slot.variables[0], slot_a) slot_b = opt.get_slot(var_b, 'accumulator') self.assertAllClose(sharded_slot.variables[1], slot_b)
def testBasicWithLargeEpsilon(self): var0_np = np.array([1.0, 2.0]) var1_np = np.array([3.0, 4.0]) grads0_np = np.array([0.1, 0.1]) grads1_np = np.array([0.01, 0.01]) var0 = tf.Variable(var0_np) var1 = tf.Variable(var1_np) grads0 = tf.constant(grads0_np) grads1 = tf.constant(grads1_np) learning_rate = 3.0 ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0) accum0_np = np.array([0.1, 0.1]) accum1_np = np.array([0.1, 0.1]) if not tf.executing_eagerly(): ada_update = ada_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values v0_val, v1_val = self.evaluate([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([3.0, 4.0], v1_val) # Run 3 steps of adagrad for _ in range(3): if not tf.executing_eagerly(): self.evaluate(ada_update) else: ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np, grads0_np, 3.0, 1.0) var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np, grads1_np, 3.0, 1.0) self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testMinimizeSparseResourceVariable(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. with tf.Graph().as_default(): for dtype in _DATA_TYPES: var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x = tf.constant([[4.0], [5.0]], dtype=dtype) def loss(): pred = tf.matmul( tf.compat.v1.nn.embedding_lookup([var0], [0]), x) return pred * pred sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0]) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]], self.evaluate(var0)) # Run 1 step of sgd self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[0, 1], [3, 4]], self.evaluate(var0), atol=0.01)
"RmsPropV1", lambda: tf.compat.v1.train.RMSPropOptimizer(0.001) ) # TODO(shiningsun): consider adding the other v1 optimizers optimizers_v1 = [ gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn, ftrl_optimizer_v1_fn, rmsprop_optimizer_v1_fn, ] adadelta_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001) ) adagrad_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001) ) adam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0) ) adam_experimental_fn = tf.__internal__.test.combinations.NamedObject( "AdamExperimental", lambda: adam_experimental.Adam(0.001) ) adamax_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0) ) nadam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0) ) ftrl_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject( "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001)
def testAdagradCompatibility(self): opt_v1 = optimizer_v1.Adagrad(lr=0.01) opt_v2 = adagrad.Adagrad(learning_rate=0.01) self._testOptimizersCompatibility(opt_v1, opt_v2)