예제 #1
0
    def testSparseRepeatedIndices(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype)

                repeated_index_update_var = tf.Variable(var_np, dtype=dtype)
                aggregated_update_var = tf.Variable(var_np, dtype=dtype)
                grad_repeated_index = tf.IndexedSlices(
                    tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
                    tf.constant([1, 1]), tf.constant([2, 1]))
                grad_aggregated = tf.IndexedSlices(
                    tf.constant([0.2], shape=[1, 1], dtype=dtype),
                    tf.constant([1]), tf.constant([2, 1]))
                repeated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_repeated_index, repeated_index_update_var)
                ])
                aggregated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_aggregated, aggregated_update_var)
                ])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertAllClose(self.evaluate(aggregated_update_var),
                                    self.evaluate(repeated_index_update_var))
                for _ in range(3):
                    self.evaluate(repeated_update)
                    self.evaluate(aggregated_update)
                    self.assertAllClose(
                        self.evaluate(aggregated_update_var),
                        self.evaluate(repeated_index_update_var))
예제 #2
0
    def testConstructAdagradWithLR(self):
        opt = adagrad.Adagrad(lr=1.0)
        opt_2 = adagrad.Adagrad(learning_rate=0.1, lr=1.0)
        opt_3 = adagrad.Adagrad(learning_rate=0.1)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
예제 #3
0
 def testSparseStability(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     with tf.Graph().as_default():
         for dtype in [tf.half]:
             shape = [1, 6]
             var0_np = np.array([[
                 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
                 -0.0105945
             ]],
                                dtype=dtype.as_numpy_dtype)
             var0 = tf.Variable(var0_np)
             grads0_np = np.array([[
                 -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
                 -8.4877e-05, -9.48906e-05
             ]],
                                  dtype=dtype.as_numpy_dtype)
             grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                       tf.constant([0]), tf.constant(shape))
             ada_opt = adagrad.Adagrad(1.0)
             ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
             slot0 = ada_opt.get_slot(var0, "accumulator")
             init = tf.compat.v1.global_variables_initializer()
             for _ in range(100):
                 self.evaluate(init)
                 self.evaluate(ada_update)
                 self.assertAllCloseAccordingToType(
                     np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]),
                     self.evaluate(slot0))
                 self.assertAllCloseAccordingToType(
                     np.array([[
                         0.00891194, -0.10712013, 0.11047515, 0.22636929,
                         -0.0144573, -0.01029443
                     ]]), self.evaluate(var0))
예제 #4
0
    def testTensorLearningRate(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                learning_rate = tf.constant(3.0)
                ada_opt = adagrad.Adagrad(learning_rate)
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                # Run 3 steps of adagrad
                for _ in range(3):
                    self.evaluate(ada_update)
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
예제 #5
0
    def testSparseSingleVarDim(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                grads0_np_indices = np.array([0], dtype=np.int32)
                grads0 = tf.IndexedSlices(
                    tf.constant(grads0_np[grads0_np_indices]),
                    tf.constant(grads0_np_indices), tf.constant([3]))
                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.)
                ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0], self.evaluate(var0))

                accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                # Run 3 step of sgd
                for _ in range(3):
                    self.evaluate(ada_update)

                    var0_np, accum0_np = sparse_adagrad_update_numpy(
                        var0_np,
                        accum0_np,
                        grads0_np_indices,
                        grads0_np[grads0_np_indices],
                        learning_rate,
                        epsilon=1.)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
예제 #6
0
    def test_wide_deep_model(self, distribution, use_dataset_creator, data_fn):
        if ((not use_dataset_creator) and isinstance(
                distribution,
                tf.distribute.experimental.ParameterServerStrategy)):
            self.skipTest(
                'Parameter Server strategy requires dataset creator to be used in '
                'model.fit.')
        if (not tf.__internal__.tf2.enabled() and use_dataset_creator
                and isinstance(
                    distribution,
                    tf.distribute.experimental.ParameterServerStrategy)):
            self.skipTest(
                'Parameter Server strategy with dataset creator needs to be run when '
                'eager execution is enabled.')
        with distribution.scope():
            linear_model = linear.LinearModel(units=1)
            dnn_model = sequential.Sequential([core.Dense(units=1)])
            wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
            linear_opt = gradient_descent.SGD(learning_rate=0.05)
            dnn_opt = adagrad.Adagrad(learning_rate=0.1)
            wide_deep_model.compile(optimizer=[linear_opt, dnn_opt],
                                    loss='mse')

            if use_dataset_creator:
                x = dataset_creator.DatasetCreator(dataset_fn)
                hist = wide_deep_model.fit(x,
                                           epochs=5,
                                           steps_per_epoch=INPUT_SIZE)
            else:
                if data_fn == 'numpy':
                    inputs, output = get_numpy()
                    hist = wide_deep_model.fit(inputs, output, epochs=5)
                else:
                    hist = wide_deep_model.fit(get_dataset(), epochs=5)
            self.assertLess(hist.history['loss'][4], 0.2)
예제 #7
0
 def testSparseRepeatedIndicesByEmbeddingLookUp(self):
   # TODO(tanzheny, omalleyt): Fix test in eager mode.
   with tf.Graph().as_default():
     for dtype in _DATA_TYPES:
       var_repeated = tf.Variable([1.0, 2.0], dtype=dtype)
       loss_repeated = lambda: tf.reduce_sum(  # pylint: disable=g-long-lambda
           tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0]))  # pylint: disable=cell-var-from-loop
       var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype)
       loss_aggregated = lambda: 2 * tf.reduce_sum(  # pylint: disable=g-long-lambda
           tf.compat.v1.nn.embedding_lookup(var_aggregated, [0]))  # pylint: disable=cell-var-from-loop
       update_op_repeated = adagrad.Adagrad(2.0).minimize(
           loss_repeated, var_list=[var_repeated])
       update_op_aggregated = adagrad.Adagrad(2.0).minimize(
           loss_aggregated, var_list=[var_aggregated])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       self.assertAllCloseAccordingToType(
           self.evaluate(var_repeated), self.evaluate(var_aggregated))
       for _ in range(3):
         self.evaluate(update_op_repeated)
         self.evaluate(update_op_aggregated)
         self.assertAllCloseAccordingToType(
             self.evaluate(var_repeated), self.evaluate(var_aggregated))
예제 #8
0
 def test_wide_deep_model(self, distribution, data_fn):
     with distribution.scope():
         linear_model = linear.LinearModel(units=1)
         dnn_model = sequential.Sequential([core.Dense(units=1)])
         wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
         linear_opt = gradient_descent.SGD(learning_rate=0.05)
         dnn_opt = adagrad.Adagrad(learning_rate=0.1)
         wide_deep_model.compile(optimizer=[linear_opt, dnn_opt],
                                 loss='mse')
         if data_fn == 'numpy':
             inputs, output = get_numpy()
             hist = wide_deep_model.fit(inputs, output, epochs=5)
         else:
             hist = wide_deep_model.fit(get_dataset(), epochs=5)
         self.assertLess(hist.history['loss'][4], 0.2)
예제 #9
0
    def testSharing(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate)
                # Apply the optimizer twice.  Both applications will use
                # the same accums.
                ada_update1 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                ada_update2 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                slot0 = ada_opt.get_slot(var0, "accumulator")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = ada_opt.get_slot(var1, "accumulator")
                self.assertEqual(slot1.shape, var1.shape)
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values.
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Mix the first and the second adagrad for 3 steps.
                self.evaluate(ada_update1)
                self.evaluate(ada_update2)
                self.evaluate(ada_update1)

                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                for _ in range(3):
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
예제 #10
0
    def testBasicWithLearningRateInverseTimeDecay(self):
        for dtype in _DATA_TYPES:
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)

            learning_rate = 3.0
            decay = 0.5
            lr_schedule = learning_rate_schedule.InverseTimeDecay(
                learning_rate, decay_steps=1.0, decay_rate=decay)

            ada_opt = adagrad.Adagrad(lr_schedule)

            accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

            if not tf.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for t in range(3):
                if not tf.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                lr_np = learning_rate / (1 + decay * t)
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, lr_np)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, lr_np)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
예제 #11
0
    def doTestBasic(self, use_callable_params=False):
        for dtype in _DATA_TYPES:
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)

            learning_rate = lambda: 3.0
            if not use_callable_params:
                learning_rate = learning_rate()

            ada_opt = adagrad.Adagrad(learning_rate)

            accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

            if not tf.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for _ in range(3):
                if not tf.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, 3.0)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, 3.0)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
예제 #12
0
    def testBasicWithLargeEpsilon(self):
        var0_np = np.array([1.0, 2.0])
        var1_np = np.array([3.0, 4.0])
        grads0_np = np.array([0.1, 0.1])
        grads1_np = np.array([0.01, 0.01])
        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 3.0

        ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0)

        accum0_np = np.array([0.1, 0.1])
        accum1_np = np.array([0.1, 0.1])

        if not tf.executing_eagerly():
            ada_update = ada_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

        # Fetch params to validate initial values
        v0_val, v1_val = self.evaluate([var0, var1])
        self.assertAllClose([1.0, 2.0], v0_val)
        self.assertAllClose([3.0, 4.0], v1_val)

        # Run 3 steps of adagrad
        for _ in range(3):
            if not tf.executing_eagerly():
                self.evaluate(ada_update)
            else:
                ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np,
                                                      grads0_np, 3.0, 1.0)
            var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np,
                                                      grads1_np, 3.0, 1.0)
            self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
            self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
예제 #13
0
  def testMinimizeSparseResourceVariable(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    with tf.Graph().as_default():
      for dtype in _DATA_TYPES:
        var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype)
        x = tf.constant([[4.0], [5.0]], dtype=dtype)

        def loss():
          pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
          return pred * pred

        sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Fetch params to validate initial values
        self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]],
                                           self.evaluate(var0))
        # Run 1 step of sgd
        self.evaluate(sgd_op)
        # Validate updated params
        self.assertAllCloseAccordingToType([[0, 1], [3, 4]],
                                           self.evaluate(var0),
                                           atol=0.01)
    "AdamV1", lambda: tf.compat.v1.train.AdamOptimizer(0.001, epsilon=1))
ftrl_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject(
    "FtrlV1", lambda: tf.compat.v1.train.FtrlOptimizer(0.001))
rmsprop_optimizer_v1_fn = tf.__internal__.test.combinations.NamedObject(
    "RmsPropV1", lambda: tf.compat.v1.train.RMSPropOptimizer(0.001))

# TODO(shiningsun): consider adding the other v1 optimizers
optimizers_v1 = [
    gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn,
    ftrl_optimizer_v1_fn, rmsprop_optimizer_v1_fn
]

adadelta_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001))
adagrad_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001))
adam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0))
adam_experimental_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamExperimental", lambda: adam_experimental.Adam(0.001))
adamax_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0))
nadam_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0))
ftrl_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001))
gradient_descent_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.001))
rmsprop_optimizer_keras_v2_fn = tf.__internal__.test.combinations.NamedObject(
    "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001))
예제 #15
0
 def testAdagrad(self):
   self._compare_numerical(adagrad_old.Adagrad(), adagrad_new.Adagrad())
예제 #16
0
 def testAdagradCompatibility(self):
   opt_v1 = optimizer_v1.Adagrad(lr=0.01)
   opt_v2 = adagrad.Adagrad(learning_rate=0.01)
   self._testOptimizersCompatibility(opt_v1, opt_v2)