Esempio n. 1
0
    def testAdagradDAWithL1_L2(self):
        for dtype in self.float_types:
            with self.test_session(), self.test_scope():
                global_step = resource_variable_ops.ResourceVariable(
                    0, dtype=dtypes.int64)
                var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                              dtype=dtype)
                var1 = resource_variable_ops.ResourceVariable([4.0, 3.0],
                                                              dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)

                opt = adagrad_da.AdagradDAOptimizer(
                    3.0,
                    global_step,
                    initial_gradient_squared_accumulator_value=0.1,
                    l1_regularization_strength=0.001,
                    l2_regularization_strength=2.0)
                update = opt.apply_gradients(zip([grads0, grads1],
                                                 [var0, var1]),
                                             global_step=global_step)
                variables.global_variables_initializer().run()

                self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
                self.assertAllCloseAccordingToType([4.0, 3.0], var1.eval())

                # Run a step of AdagradDA
                update.run()

                self.assertAllCloseAccordingToType(
                    np.array([-0.046907, -0.093659]), var0.eval())
                self.assertAllCloseAccordingToType(
                    np.array([-0.004275, -0.009023]), var1.eval())
Esempio n. 2
0
    def testAdagradDAWithL1_L2(self):
        for dtype in [dtypes.float64, dtypes.float32]:
            with self.cached_session() as sess:
                global_step = variables.Variable(0, dtype=dtypes.int64)
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([4.0, 3.0], dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)

                opt = adagrad_da.AdagradDAOptimizer(
                    3.0,
                    global_step,
                    initial_gradient_squared_accumulator_value=0.1,
                    l1_regularization_strength=0.001,
                    l2_regularization_strength=2.0)
                update = opt.apply_gradients(zip([grads0, grads1],
                                                 [var0, var1]),
                                             global_step=global_step)
                self.evaluate(variables.global_variables_initializer())

                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
                self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)

                # Run a step of AdagradDA
                update.run()

                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllCloseAccordingToType(
                    np.array([-0.046907, -0.093659]), v0_val)
                self.assertAllCloseAccordingToType(
                    np.array([-0.004275, -0.009023]), v1_val)
Esempio n. 3
0
    def testAdagradDAwithoutRegularizationBasic2(self):
        for dtype in [dtypes.float64, dtypes.float32]:
            with ops.Graph().as_default(), self.cached_session():
                global_step = variables.Variable(0, dtype=dtypes.int64)
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([4.0, 3.0], dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)

                opt = adagrad_da.AdagradDAOptimizer(
                    3.0,
                    global_step,
                    initial_gradient_squared_accumulator_value=0.1,
                    l1_regularization_strength=0.0,
                    l2_regularization_strength=0.0)
                update = opt.apply_gradients(zip([grads0, grads1],
                                                 [var0, var1]),
                                             global_step=global_step)
                self.evaluate(variables.global_variables_initializer())

                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
                self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)

                # Run a step of AdagradDA
                update.run()

                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllCloseAccordingToType(
                    np.array([-0.904534, -1.603567]), v0_val)
                self.assertAllCloseAccordingToType(
                    np.array([-0.094821, -0.189358]), v1_val)
    def testAdagradDAwithoutRegularizationBasic2(self):
        for dtype in self.float_types:
            with self.session(), self.test_scope():
                global_step = resource_variable_ops.ResourceVariable(
                    0, dtype=dtypes.int64)
                var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                              dtype=dtype)
                var1 = resource_variable_ops.ResourceVariable([4.0, 3.0],
                                                              dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)

                opt = adagrad_da.AdagradDAOptimizer(
                    3.0,
                    global_step,
                    initial_gradient_squared_accumulator_value=0.1,
                    l1_regularization_strength=0.0,
                    l2_regularization_strength=0.0)
                update = opt.apply_gradients(zip([grads0, grads1],
                                                 [var0, var1]),
                                             global_step=global_step)
                variables.global_variables_initializer().run()

                self.assertAllCloseAccordingToType([1.0, 2.0],
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType([4.0, 3.0],
                                                   self.evaluate(var1))

                # Run a step of AdagradDA
                update.run()

                self.assertAllCloseAccordingToType(
                    np.array([-0.904534, -1.603567]), self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([-0.094821, -0.189358]), self.evaluate(var1))
Esempio n. 5
0
  def testAdagradDAWithoutRegularizationBasic1(self):
    for dtype in self.float_types:
      with self.cached_session(), self.test_scope():
        global_step = resource_variable_ops.ResourceVariable(
            0, dtype=dtypes.int64)
        var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype)
        var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype)
        grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
        grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
        opt = adagrad_da.AdagradDAOptimizer(
            3.0,
            global_step,
            initial_gradient_squared_accumulator_value=0.1,
            l1_regularization_strength=0.0,
            l2_regularization_strength=0.0)
        update = opt.apply_gradients(
            zip([grads0, grads1], [var0, var1]), global_step=global_step)
        variables.global_variables_initializer().run()

        self.assertAllClose([0.0, 0.0], self.evaluate(var0))
        self.assertAllClose([0.0, 0.0], self.evaluate(var1))

        # Run a step of AdagradDA
        update.run()

        # Let g to be gradient accumulator, gg to be gradient squared
        # accumulator, T be the global step, lr is the learning rate, and k the
        # initial gradient squared accumulator value.
        # w = \dfrac{sign(-g)*lr*|g - l1*T|_{+}}{l2*T*lr + \sqrt{k+gg})}
        # For -0.1*3.0*(0.1 - 0)/(0 + sqrt(0.1 + 0.1*0.1)) = -0.904534
        # similarly for others.
        self.assertAllCloseAccordingToType(
            np.array([-0.904534, -1.603567]), self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            np.array([-0.094821, -0.189358]), self.evaluate(var1))
Esempio n. 6
0
 def test_ops_with_var_and_adagrad_da(self):
     var_list = [
         deo.get_variable('sp_var', initializer=0.0, dim=2),
     ]
     gstep = training_util.create_global_step()
     opt_list = [
         adagrad_da.AdagradDAOptimizer(0.1, gstep),
     ]
     self.common_run_context(var_list, opt_list, name='adagrad_da_test')
Esempio n. 7
0
    def doTestAdagradDAwithoutRegularizationBasic1(self, use_resource=False):
        for dtype in [dtypes.float64, dtypes.float32]:
            with ops.Graph().as_default(), self.cached_session():
                global_step = variables.Variable(0, dtype=dtypes.int64)
                if use_resource:
                    var0 = resource_variable_ops.ResourceVariable([0.0, 0.0],
                                                                  dtype=dtype)
                    var1 = resource_variable_ops.ResourceVariable([0.0, 0.0],
                                                                  dtype=dtype)
                else:
                    var0 = variables.Variable([0.0, 0.0], dtype=dtype)
                    var1 = variables.Variable([0.0, 0.0], dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
                opt = adagrad_da.AdagradDAOptimizer(
                    3.0,
                    global_step,
                    initial_gradient_squared_accumulator_value=0.1,
                    l1_regularization_strength=0.0,
                    l2_regularization_strength=0.0)
                update = opt.apply_gradients(zip([grads0, grads1],
                                                 [var0, var1]),
                                             global_step=global_step)
                self.evaluate(variables.global_variables_initializer())

                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllClose([0.0, 0.0], v0_val)
                self.assertAllClose([0.0, 0.0], v1_val)

                # Run a step of AdagradDA
                update.run()

                v0_val, v1_val = self.evaluate([var0, var1])
                # Let g be the gradient accumulator, gg be the gradient squared
                # accumulator, T be the global step, lr be the learning rate,
                # and k the initial gradient squared accumulator value.
                # w = \dfrac{sign(-g)*lr*|g - l1*T|_{+}}{l2*T*lr + \sqrt{k+gg})}
                # For -0.1*3.0*(0.1 - 0)/(0 + sqrt(0.1 + 0.1*0.1)) = -0.904534
                # similarly for others.
                self.assertAllCloseAccordingToType(
                    np.array([-0.904534, -1.603567]), v0_val)
                self.assertAllCloseAccordingToType(
                    np.array([-0.094821, -0.189358]), v1_val)
Esempio n. 8
0
 def testMinimizeSparseResourceVariable(self):
   for dtype in [dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
       global_step = resource_variable_ops.ResourceVariable(
           0, dtype=dtypes.int64)
       x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
       pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
       loss = pred * pred
       sgd_op = adagrad_da.AdagradDAOptimizer(
           1.0, global_step).minimize(loss)
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           [[-1, -1]], var0.eval(), rtol=0.01)
 def test_adagrad_da_apply_restriction(self):
     gstep = training_util.create_global_step()
     opt = adagrad_da.AdagradDAOptimizer(0.1, gstep)
     self.commonly_apply_restriction_verify(opt)
Esempio n. 10
0
 def test_adagrad_da_restrict_on_policy(self):
     gstep = training_util.create_global_step()
     opt = adagrad_da.AdagradDAOptimizer(0.1, gstep)
     self.common_single_step_restrict_verification(opt)
    def test_adagradda_minimize_trainable(self):
        base_gs = training_util.create_global_step()

        base_opt = adagrad_da.AdagradDAOptimizer(1.0, base_gs)
        test_opt = adagrad_da.AdagradDAOptimizer(1.0, base_gs)
        self.common_minimize_trainable(base_opt, test_opt, name="adagrad_da")