Beispiel #1
0
    def testSparseRepeatedIndices(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype)

                repeated_index_update_var = tf.Variable(var_np, dtype=dtype)
                aggregated_update_var = tf.Variable(var_np, dtype=dtype)
                grad_repeated_index = tf.IndexedSlices(
                    tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
                    tf.constant([1, 1]), tf.constant([2, 1]))
                grad_aggregated = tf.IndexedSlices(
                    tf.constant([0.2], shape=[1, 1], dtype=dtype),
                    tf.constant([1]), tf.constant([2, 1]))
                repeated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_repeated_index, repeated_index_update_var)
                ])
                aggregated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_aggregated, aggregated_update_var)
                ])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertAllClose(self.evaluate(aggregated_update_var),
                                    self.evaluate(repeated_index_update_var))
                for _ in range(3):
                    self.evaluate(repeated_update)
                    self.evaluate(aggregated_update)
                    self.assertAllClose(
                        self.evaluate(aggregated_update_var),
                        self.evaluate(repeated_index_update_var))
Beispiel #2
0
 def testSparseRepeatedIndicesByEmbeddingLookUp(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     with tf.Graph().as_default():
         for dtype in _DATA_TYPES:
             var_repeated = tf.Variable([1.0, 2.0], dtype=dtype)
             loss_repeated = (
                 lambda: tf.reduce_sum(  # pylint: disable=g-long-lambda
                     tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0]))
             )  # pylint: disable=cell-var-from-loop
             var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype)
             loss_aggregated = (
                 lambda: 2 * tf.reduce_sum(  # pylint: disable=g-long-lambda
                     tf.compat.v1.nn.embedding_lookup(var_aggregated, [0])))  # pylint: disable=cell-var-from-loop
             update_op_repeated = adagrad.Adagrad(2.0).minimize(
                 loss_repeated, var_list=[var_repeated])
             update_op_aggregated = adagrad.Adagrad(2.0).minimize(
                 loss_aggregated, var_list=[var_aggregated])
             self.evaluate(tf.compat.v1.global_variables_initializer())
             self.assertAllCloseAccordingToType(
                 self.evaluate(var_repeated), self.evaluate(var_aggregated))
             for _ in range(3):
                 self.evaluate(update_op_repeated)
                 self.evaluate(update_op_aggregated)
                 self.assertAllCloseAccordingToType(
                     self.evaluate(var_repeated),
                     self.evaluate(var_aggregated),
                 )
Beispiel #3
0
 def testSparseStability(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     with tf.Graph().as_default():
         for dtype in [tf.half]:
             shape = [1, 6]
             var0_np = np.array([[
                 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
                 -0.0105945
             ]],
                                dtype=dtype.as_numpy_dtype)
             var0 = tf.Variable(var0_np)
             grads0_np = np.array([[
                 -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
                 -8.4877e-05, -9.48906e-05
             ]],
                                  dtype=dtype.as_numpy_dtype)
             grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                       tf.constant([0]), tf.constant(shape))
             ada_opt = adagrad.Adagrad(1.0)
             ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
             slot0 = ada_opt.get_slot(var0, "accumulator")
             init = tf.compat.v1.global_variables_initializer()
             for _ in range(100):
                 self.evaluate(init)
                 self.evaluate(ada_update)
                 self.assertAllCloseAccordingToType(
                     np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]),
                     self.evaluate(slot0))
                 self.assertAllCloseAccordingToType(
                     np.array([[
                         0.00891194, -0.10712013, 0.11047515, 0.22636929,
                         -0.0144573, -0.01029443
                     ]]), self.evaluate(var0))
Beispiel #4
0
    def testSparseSingleVarDim(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                grads0_np_indices = np.array([0], dtype=np.int32)
                grads0 = tf.IndexedSlices(
                    tf.constant(grads0_np[grads0_np_indices]),
                    tf.constant(grads0_np_indices), tf.constant([3]))
                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.)
                ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0], self.evaluate(var0))

                accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                # Run 3 step of sgd
                for _ in range(3):
                    self.evaluate(ada_update)

                    var0_np, accum0_np = sparse_adagrad_update_numpy(
                        var0_np,
                        accum0_np,
                        grads0_np_indices,
                        grads0_np[grads0_np_indices],
                        learning_rate,
                        epsilon=1.)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
Beispiel #5
0
    def testBasicWithLearningRateInverseTimeDecay(self):
        for dtype in _DATA_TYPES:
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)

            learning_rate = 3.0
            decay = 0.5
            lr_schedule = learning_rate_schedule.InverseTimeDecay(
                learning_rate, decay_steps=1.0, decay_rate=decay)

            ada_opt = adagrad.Adagrad(lr_schedule)

            accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

            if not tf.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for t in range(3):
                if not tf.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                lr_np = learning_rate / (1 + decay * t)
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, lr_np)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, lr_np)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
Beispiel #6
0
  def test_create_slots_for_sharded_variables(self):
    # set names so that ShardedVariable is well-named for slot variable keying.
    var_a = tf.Variable([1.0], name='part_0')
    var_b = tf.Variable([2.0], name='part_1')
    sharded_var = tf.__internal__.distribute.ShardedVariable([var_a, var_b])

    opt = adagrad.Adagrad()
    opt._create_slots(sharded_var.variables)
    opt._create_slots_for_sharded_variables(sharded_var.variables)

    sharded_slot = opt.get_slot(sharded_var, 'accumulator')
    self.assertIsInstance(
        sharded_slot, tf.__internal__.distribute.ShardedVariable)

    slot_a = opt.get_slot(var_a, 'accumulator')
    self.assertAllClose(sharded_slot.variables[0], slot_a)
    slot_b = opt.get_slot(var_b, 'accumulator')
    self.assertAllClose(sharded_slot.variables[1], slot_b)
Beispiel #7
0
    def testBasicWithLargeEpsilon(self):
        var0_np = np.array([1.0, 2.0])
        var1_np = np.array([3.0, 4.0])
        grads0_np = np.array([0.1, 0.1])
        grads1_np = np.array([0.01, 0.01])
        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 3.0

        ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0)

        accum0_np = np.array([0.1, 0.1])
        accum1_np = np.array([0.1, 0.1])

        if not tf.executing_eagerly():
            ada_update = ada_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

        # Fetch params to validate initial values
        v0_val, v1_val = self.evaluate([var0, var1])
        self.assertAllClose([1.0, 2.0], v0_val)
        self.assertAllClose([3.0, 4.0], v1_val)

        # Run 3 steps of adagrad
        for _ in range(3):
            if not tf.executing_eagerly():
                self.evaluate(ada_update)
            else:
                ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np,
                                                      grads0_np, 3.0, 1.0)
            var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np,
                                                      grads1_np, 3.0, 1.0)
            self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
            self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
Beispiel #8
0
    def testMinimizeSparseResourceVariable(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x)
                    return pred * pred

                sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]],
                                                   self.evaluate(var0))
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType([[0, 1], [3, 4]],
                                                   self.evaluate(var0),
                                                   atol=0.01)
    "RmsPropV1", lambda: tf.compat.v1.train.RMSPropOptimizer(0.001)
)

# TODO(shiningsun): consider adding the other v1 optimizers
optimizers_v1 = [
    gradient_descent_optimizer_v1_fn,
    adagrad_optimizer_v1_fn,
    ftrl_optimizer_v1_fn,
    rmsprop_optimizer_v1_fn,
]

adadelta_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)
)
adagrad_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)
)
adam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0)
)
adam_experimental_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamExperimental", lambda: adam_experimental.Adam(0.001)
)
adamax_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0)
)
nadam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0)
)
ftrl_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001)
Beispiel #10
0
 def testAdagradCompatibility(self):
   opt_v1 = optimizer_v1.Adagrad(lr=0.01)
   opt_v2 = adagrad.Adagrad(learning_rate=0.01)
   self._testOptimizersCompatibility(opt_v1, opt_v2)