Exemple #1
0
 def testSparseStability(self):
     for dtype in [dtypes.half]:
         with self.cached_session():
             shape = [1, 6]
             var0_np = np.array([[
                 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
                 -0.0105945
             ]],
                                dtype=dtype.as_numpy_dtype)
             var0 = resource_variable_ops.ResourceVariable(var0_np)
             grads0_np = np.array([[
                 -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
                 -8.4877e-05, -9.48906e-05
             ]],
                                  dtype=dtype.as_numpy_dtype)
             grads0 = ops.IndexedSlices(constant_op.constant(grads0_np),
                                        constant_op.constant([0]),
                                        constant_op.constant(shape))
             ada_opt = adagrad.Adagrad(1.0)
             ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
             slot0 = ada_opt.get_slot(var0, "accumulator")
             init = variables.global_variables_initializer()
             for _ in range(100):
                 init.run()
                 ada_update.run()
                 self.assertAllCloseAccordingToType(
                     np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]),
                     slot0.eval())
                 self.assertAllCloseAccordingToType(
                     np.array([[
                         0.00891194, -0.10712013, 0.11047515, 0.22636929,
                         -0.0144573, -0.01029443
                     ]]), var0.eval())
Exemple #2
0
    def testTensorLearningRate(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with ops.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
                var0 = variables.Variable(var0_np)
                var1 = variables.Variable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = constant_op.constant(3.0)
                ada_opt = adagrad.Adagrad(learning_rate)
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                # Run 3 steps of adagrad
                for _ in range(3):
                    self.evaluate(ada_update)
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
Exemple #3
0
 def testSparseStability(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     with ops.Graph().as_default():
         for dtype in [dtypes.half]:
             shape = [1, 6]
             var0_np = np.array([[
                 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
                 -0.0105945
             ]],
                                dtype=dtype.as_numpy_dtype)
             var0 = variables.Variable(var0_np)
             grads0_np = np.array([[
                 -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
                 -8.4877e-05, -9.48906e-05
             ]],
                                  dtype=dtype.as_numpy_dtype)
             grads0 = indexed_slices.IndexedSlices(
                 constant_op.constant(grads0_np), constant_op.constant([0]),
                 constant_op.constant(shape))
             ada_opt = adagrad.Adagrad(1.0)
             ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
             slot0 = ada_opt.get_slot(var0, "accumulator")
             init = variables.global_variables_initializer()
             for _ in range(100):
                 self.evaluate(init)
                 self.evaluate(ada_update)
                 self.assertAllCloseAccordingToType(
                     np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]),
                     self.evaluate(slot0))
                 self.assertAllCloseAccordingToType(
                     np.array([[
                         0.00891194, -0.10712013, 0.11047515, 0.22636929,
                         -0.0144573, -0.01029443
                     ]]), self.evaluate(var0))
Exemple #4
0
    def testTensorLearningRate(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
                var0 = resource_variable_ops.ResourceVariable(var0_np)
                var1 = resource_variable_ops.ResourceVariable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = constant_op.constant(3.0)
                ada_opt = adagrad.Adagrad(learning_rate)
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                variables.global_variables_initializer().run()
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                # Run 3 steps of adagrad
                for _ in range(3):
                    ada_update.run()
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
Exemple #5
0
 def doTestBasic(self, use_resource=False):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       if use_resource:
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
       else:
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
       grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
       ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
       ada_update = ada_opt.apply_gradients(
           zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllClose([1.0, 2.0], var0.eval())
       self.assertAllClose([3.0, 4.0], var1.eval())
       # Run 3 steps of adagrad
       for _ in range(3):
         ada_update.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
       self.assertAllCloseAccordingToType(
           np.array([2.715679168701172, 3.715679168701172]), var1.eval())
Exemple #6
0
    def testSparseSingleVarDim(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                var0 = resource_variable_ops.ResourceVariable(var0_np)
                grads0_np_indices = np.array([0], dtype=np.int32)
                grads0 = ops.IndexedSlices(
                    constant_op.constant(grads0_np[grads0_np_indices]),
                    constant_op.constant(grads0_np_indices),
                    constant_op.constant([3]))
                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.)
                ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
                variables.global_variables_initializer().run()

                # Fetch params to validate initial values
                self.assertAllClose([1.0], var0.eval())

                accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                # Run 3 step of sgd
                for _ in range(3):
                    ada_update.run()

                    var0_np, accum0_np = sparse_adagrad_update_numpy(
                        var0_np,
                        accum0_np,
                        grads0_np_indices,
                        grads0_np[grads0_np_indices],
                        learning_rate,
                        epsilon=1.)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
Exemple #7
0
 def testSparseBasic(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
       var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
       grads0 = ops.IndexedSlices(
           constant_op.constant(
               [0.1], shape=[1, 1], dtype=dtype),
           constant_op.constant([0]),
           constant_op.constant([2, 1]))
       grads1 = ops.IndexedSlices(
           constant_op.constant(
               [0.01], shape=[1, 1], dtype=dtype),
           constant_op.constant([1]),
           constant_op.constant([2, 1]))
       ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
       ada_update = ada_opt.apply_gradients(
           zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllClose([[1.0], [2.0]], var0.eval())
       self.assertAllClose([[3.0], [4.0]], var1.eval())
       # Run 3 step of sgd
       for _ in range(3):
         ada_update.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           np.array([[-1.6026098728179932], [2.0]]), var0.eval())
       self.assertAllCloseAccordingToType(
           np.array([[3.0], [3.715679168701172]]), var1.eval())
Exemple #8
0
    def testSharing(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
                ada_opt = adagrad.Adagrad(3.0)
                # Apply the optimizer twice.  Both applications will use
                # the same accums.
                ada_update1 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                ada_update2 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.assertEqual(["accumulator"], ada_opt.get_slot_names())
                slot0 = ada_opt.get_slot(var0, "accumulator")
                self.assertEquals(slot0.get_shape(), var0.get_shape())
                slot1 = ada_opt.get_slot(var1, "accumulator")
                self.assertEquals(slot1.get_shape(), var1.get_shape())
                variables.global_variables_initializer().run()

                # Fetch params to validate initial values.
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Mix the first and the second adagrad for 3 steps.
                ada_update1.run()
                ada_update2.run()
                ada_update1.run()
                # Validate updated params (the same as with only 1 Adagrad).
                self.assertAllCloseAccordingToType(
                    np.array([-1.6026098728179932, -0.6026098728179932]),
                    var0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([2.715679168701172, 3.715679168701172]),
                    var1.eval())
Exemple #9
0
 def testConfig(self):
     opt = adagrad.Adagrad(learning_rate=lambda: ops.convert_to_tensor(1.0),
                           initial_accumulator_value=2.0)
     config = opt.get_config()
     opt2 = adagrad.Adagrad.from_config(config)
     self.assertIsInstance(opt2._hyper["learning_rate"][1],
                           python_types.LambdaType)
     self.assertEqual(opt._initial_accumulator_value,
                      opt2._initial_accumulator_value)
Exemple #10
0
 def testDynamicShapeVariable_Ok(self):
     with self.cached_session():
         v = variable_scope.get_variable(
             "v",
             initializer=constant_op.constant(1.),
             validate_shape=False)
         self.assertFalse(v.shape.is_fully_defined())
         # Creating optimizer should cause no exception.
         adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
Exemple #11
0
 def testSparseRepeatedIndicesResourceVariable(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var_repeated = resource_variable_ops.ResourceVariable(
           [1.0, 2.0], dtype=dtype)
       loss_repeated = math_ops.reduce_sum(
           embedding_ops.embedding_lookup(var_repeated, [0, 0]))
       var_aggregated = resource_variable_ops.ResourceVariable(
           [1.0, 2.0], dtype=dtype)
       loss_aggregated = 2 * math_ops.reduce_sum(
           embedding_ops.embedding_lookup(var_aggregated, [0]))
       update_op_repeated = adagrad.Adagrad(2.0).minimize(loss_repeated)
       update_op_aggregated = adagrad.Adagrad(2.0).minimize(loss_aggregated)
       variables.global_variables_initializer().run()
       self.assertAllCloseAccordingToType(
           var_repeated.eval(), var_aggregated.eval())
       for _ in range(3):
         update_op_repeated.run()
         update_op_aggregated.run()
         self.assertAllCloseAccordingToType(
             var_repeated.eval(), var_aggregated.eval())
Exemple #12
0
 def testSparseRepeatedIndicesByEmbeddingLookUp(self):
     for dtype in _DATA_TYPES:
         var_repeated = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                               dtype=dtype)
         loss_repeated = lambda: math_ops.reduce_sum(  # pylint: disable=g-long-lambda
             embedding_ops.embedding_lookup(var_repeated, [0, 0]))  # pylint: disable=cell-var-from-loop
         var_aggregated = resource_variable_ops.ResourceVariable(
             [1.0, 2.0], dtype=dtype)
         loss_aggregated = lambda: 2 * math_ops.reduce_sum(  # pylint: disable=g-long-lambda
             embedding_ops.embedding_lookup(var_aggregated, [0]))  # pylint: disable=cell-var-from-loop
         update_op_repeated = adagrad.Adagrad(2.0).minimize(
             loss_repeated, var_list=[var_repeated])
         update_op_aggregated = adagrad.Adagrad(2.0).minimize(
             loss_aggregated, var_list=[var_aggregated])
         self.evaluate(variables.global_variables_initializer())
         self.assertAllCloseAccordingToType(self.evaluate(var_repeated),
                                            self.evaluate(var_aggregated))
         for _ in range(3):
             self.evaluate(update_op_repeated)
             self.evaluate(update_op_aggregated)
             self.assertAllCloseAccordingToType(
                 self.evaluate(var_repeated), self.evaluate(var_aggregated))
Exemple #13
0
    def testSparseBasic(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with ops.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0, 0.01],
                                     dtype=dtype.as_numpy_dtype)

                var0 = variables.Variable(var0_np)
                var1 = variables.Variable(var1_np)
                grads0_np_indices = np.array([0, 2], dtype=np.int32)
                grads0 = ops.IndexedSlices(
                    constant_op.constant(grads0_np[grads0_np_indices]),
                    constant_op.constant(grads0_np_indices),
                    constant_op.constant([3]))
                grads1_np_indices = np.array([0, 2], dtype=np.int32)
                grads1 = ops.IndexedSlices(
                    constant_op.constant(grads1_np[grads1_np_indices]),
                    constant_op.constant(grads1_np_indices),
                    constant_op.constant([3]))
                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate)
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1))

                accum0_np = np.array([0.1, 0.1, 0.1],
                                     dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1, 0.1],
                                     dtype=dtype.as_numpy_dtype)

                # Run 3 step of sgd
                for _ in range(3):
                    self.evaluate(ada_update)

                    var0_np, accum0_np = sparse_adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np_indices,
                        grads0_np[grads0_np_indices], learning_rate)
                    var1_np, accum1_np = sparse_adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np_indices,
                        grads1_np[grads1_np_indices], learning_rate)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
 def test_numpy_input_fn_with_optimizer_instance(self):
   """Tests complete flow with optimizer_v2 instance."""
   label_dimension = 2
   batch_size = 10
   train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn(
       label_dimension, batch_size)
   self._test_complete_flow(
       train_input_fn=train_input_fn,
       eval_input_fn=eval_input_fn,
       predict_input_fn=predict_input_fn,
       input_dimension=label_dimension,
       label_dimension=label_dimension,
       batch_size=batch_size,
       optimizer=adagrad_v2.Adagrad(0.01))  # Test with optimizer_v2 instance
 def testSparseRepeatedIndicesByEmbeddingLookUp(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             var_repeated = resource_variable_ops.ResourceVariable(
                 [1.0, 2.0], dtype=dtype)
             loss_repeated = lambda: math_ops.reduce_sum(  # pylint: disable=g-long-lambda
                 embedding_ops.embedding_lookup(var_repeated, [0, 0]))  # pylint: disable=cell-var-from-loop
             var_aggregated = resource_variable_ops.ResourceVariable(
                 [1.0, 2.0], dtype=dtype)
             loss_aggregated = lambda: 2 * math_ops.reduce_sum(  # pylint: disable=g-long-lambda
                 embedding_ops.embedding_lookup(var_aggregated, [0]))  # pylint: disable=cell-var-from-loop
             update_op_repeated = adagrad.Adagrad(2.0).minimize(
                 loss_repeated, var_list=[var_repeated])
             update_op_aggregated = adagrad.Adagrad(2.0).minimize(
                 loss_aggregated, var_list=[var_aggregated])
             variables.global_variables_initializer().run()
             self.assertAllCloseAccordingToType(var_repeated.eval(),
                                                var_aggregated.eval())
             for _ in range(3):
                 update_op_repeated.run()
                 update_op_aggregated.run()
                 self.assertAllCloseAccordingToType(var_repeated.eval(),
                                                    var_aggregated.eval())
Exemple #16
0
 def test_wide_deep_model(self, distribution, data_fn):
     with distribution.scope():
         linear_model = linear.LinearModel(units=1)
         dnn_model = sequential.Sequential([core.Dense(units=1)])
         wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
         linear_opt = gradient_descent.SGD(learning_rate=0.05)
         dnn_opt = adagrad.Adagrad(learning_rate=0.1)
         wide_deep_model.compile(optimizer=[linear_opt, dnn_opt],
                                 loss='mse')
         if data_fn == 'numpy':
             inputs, output = get_numpy()
             hist = wide_deep_model.fit(inputs, output, epochs=5)
         else:
             hist = wide_deep_model.fit(get_dataset(), epochs=5)
         self.assertLess(hist.history['loss'][4], 0.2)
    def testBasicWithLearningRateInverseTimeDecay(self):
        for dtype in [dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
                var0 = resource_variable_ops.ResourceVariable(var0_np)
                var1 = resource_variable_ops.ResourceVariable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = 3.0
                decay = 0.5
                lr_schedule = learning_rate_schedule.InverseTimeDecay(
                    learning_rate, decay_steps=1.0, decay_rate=decay)

                ada_opt = adagrad.Adagrad(lr_schedule)

                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

                if not context.executing_eagerly():
                    ada_update = ada_opt.apply_gradients(
                        zip([grads0, grads1], [var0, var1]))
                    self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values
                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllClose([1.0, 2.0], v0_val)
                self.assertAllClose([3.0, 4.0], v1_val)

                # Run 3 steps of adagrad
                for t in range(3):
                    if not context.executing_eagerly():
                        self.evaluate(ada_update)
                    else:
                        ada_opt.apply_gradients(
                            zip([grads0, grads1], [var0, var1]))
                    lr_np = learning_rate / (1 + decay * t)
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, lr_np)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, lr_np)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
Exemple #18
0
    def testSharing(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with ops.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = variables.Variable(var0_np)
                var1 = variables.Variable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate)
                # Apply the optimizer twice.  Both applications will use
                # the same accums.
                ada_update1 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                ada_update2 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                slot0 = ada_opt.get_slot(var0, "accumulator")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = ada_opt.get_slot(var1, "accumulator")
                self.assertEqual(slot1.shape, var1.shape)
                self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values.
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Mix the first and the second adagrad for 3 steps.
                self.evaluate(ada_update1)
                self.evaluate(ada_update2)
                self.evaluate(ada_update1)

                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                for _ in range(3):
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
    def doTestBasic(self, use_callable_params=False):
        for dtype in [dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
                var0 = resource_variable_ops.ResourceVariable(var0_np)
                var1 = resource_variable_ops.ResourceVariable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = lambda: 3.0
                if not use_callable_params:
                    learning_rate = learning_rate()

                ada_opt = adagrad.Adagrad(learning_rate)

                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

                if not context.executing_eagerly():
                    ada_update = ada_opt.apply_gradients(
                        zip([grads0, grads1], [var0, var1]))
                    self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values
                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllClose([1.0, 2.0], v0_val)
                self.assertAllClose([3.0, 4.0], v1_val)

                # Run 3 steps of adagrad
                for _ in range(3):
                    if not context.executing_eagerly():
                        self.evaluate(ada_update)
                    else:
                        ada_opt.apply_gradients(
                            zip([grads0, grads1], [var0, var1]))
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, 3.0)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, 3.0)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
    def testSharing(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = resource_variable_ops.ResourceVariable(var0_np)
                var1 = resource_variable_ops.ResourceVariable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate)
                # Apply the optimizer twice.  Both applications will use
                # the same accums.
                ada_update1 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                ada_update2 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                slot0 = ada_opt.get_slot(var0, "accumulator")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = ada_opt.get_slot(var1, "accumulator")
                self.assertEqual(slot1.shape, var1.shape)
                variables.global_variables_initializer().run()

                # Fetch params to validate initial values.
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())
                # Mix the first and the second adagrad for 3 steps.
                ada_update1.run()
                ada_update2.run()
                ada_update1.run()

                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                for _ in range(3):
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
Exemple #21
0
    def testBasicWithLearningRateDecay(self):
        for dtype in _DATA_TYPES:
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
            var0 = variables.Variable(var0_np)
            var1 = variables.Variable(var1_np)
            grads0 = constant_op.constant(grads0_np)
            grads1 = constant_op.constant(grads1_np)

            learning_rate = 3.0
            decay = 0.5

            ada_opt = adagrad.Adagrad(learning_rate, decay=decay)

            accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

            if not context.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for t in range(3):
                if not context.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                lr_np = learning_rate / (1 + decay * t)
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, lr_np)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, lr_np)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
Exemple #22
0
 def testMinimizeSparseResourceVariable(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = resource_variable_ops.ResourceVariable(
           [[1.0, 2.0], [3.0, 4.0]], dtype=dtype)
       x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
       pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
       loss = pred * pred
       sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0])
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType(
           [[1.0, 2.0], [3.0, 4.0]], var0.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           [[0, 1], [3, 4]], var0.eval(), atol=0.01)
Exemple #23
0
    def testBasicWithLargeEpsilon(self):
        with self.cached_session():
            var0_np = np.array([1.0, 2.0])
            var1_np = np.array([3.0, 4.0])
            grads0_np = np.array([0.1, 0.1])
            grads1_np = np.array([0.01, 0.01])
            var0 = resource_variable_ops.ResourceVariable(var0_np)
            var1 = resource_variable_ops.ResourceVariable(var1_np)
            grads0 = constant_op.constant(grads0_np)
            grads1 = constant_op.constant(grads1_np)

            learning_rate = 3.0

            ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0)

            accum0_np = np.array([0.1, 0.1])
            accum1_np = np.array([0.1, 0.1])

            if not context.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for _ in range(3):
                if not context.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, 3.0, 1.0)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, 3.0, 1.0)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))
Exemple #24
0
  def testMinimizeSparseResourceVariable(self):
    for dtype in _DATA_TYPES:
      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0], [3.0, 4.0]],
                                                    dtype=dtype)
      x = constant_op.constant([[4.0], [5.0]], dtype=dtype)

      def loss():
        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
        return pred * pred

      sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0])
      self.evaluate(variables.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]],
                                         self.evaluate(var0))
      # Run 1 step of sgd
      self.evaluate(sgd_op)
      # Validate updated params
      self.assertAllCloseAccordingToType([[0, 1], [3, 4]],
                                         self.evaluate(var0),
                                         atol=0.01)
    "GradientDescentV1",
    lambda: gradient_descent.GradientDescentOptimizer(0.2))
adagrad_optimizer_v1_fn = combinations.NamedObject(
    "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
adam_optimizer_v1_fn = combinations.NamedObject(
    "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1))
rmsprop_optimizer_v1_fn = combinations.NamedObject(
    "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001))

# TODO(shiningsun): consider adding the other v1 optimizers
optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn]

adadelta_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001))
adagrad_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001))
adam_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0))
adamax_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0))
nadam_optimizer_keras_v2_fn = combinations.NamedObject(
    "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0))
ftrl_optimizer_keras_v2_fn = combinations.NamedObject(
    "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001))
gradient_descent_optimizer_keras_v2_fn = combinations.NamedObject(
    "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.2))
rmsprop_optimizer_keras_v2_fn = combinations.NamedObject(
    "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001))

# TODO(shiningsun): consider adding the other v2 optimizers
optimizers_v2 = [
 def testAdagradCompatibility(self):
     opt_v1 = optimizers.Adagrad(lr=0.01)
     opt_v2 = adagrad.Adagrad(learning_rate=0.01)
     self._testOptimizersCompatibility(opt_v1, opt_v2)
Exemple #27
0
# Hyper-paratmeters of optimizer in graph.
HP_IN_GRAPH = {
    'Adam': ['decay', 'learning_rate'],
    'Ftrl': [
        'decay', 'l1_regularization_strength', 'l2_regularization_strength',
        'learning_rate', 'learning_rate_power'
    ],
    'RMSProp': ['decay', 'learning_rate', 'momentum', 'rho'],
    'Adagrad': ['decay', 'learning_rate'],
    'SGD': ['decay', 'learning_rate', 'momentum'],
}

# optimizer v2 instance.
OPT_V2_INSTANCE = {
    'Adagrad': adagrad.Adagrad(),
    'Adam': adam.Adam(),
    'Ftrl': ftrl.Ftrl(),
    'RMSProp': rmsprop.RMSprop(),
    'SGD': gradient_descent.SGD(),
}


def _add_new_variable(initial_value, var_name_v2, var_name_v1, var_map,
                      var_names_map):
    """Creates a new variable and add it to the variable maps."""
    var = tf.Variable(initial_value, name=var_name_v2)
    var_map[var_name_v2] = var
    var_names_map[var_name_v2] = var_name_v1