Ejemplo n.º 1
0
    def doTestProximalGradientDescentwithoutRegularization(
            self, use_resource=False):
        with self.cached_session() as sess:
            if use_resource:
                var0 = resource_variable_ops.ResourceVariable([0.0, 0.0])
                var1 = resource_variable_ops.ResourceVariable([0.0, 0.0])
            else:
                var0 = variables.Variable([0.0, 0.0])
                var1 = variables.Variable([0.0, 0.0])
            grads0 = constant_op.constant([0.1, 0.2])
            grads1 = constant_op.constant([0.01, 0.02])
            opt = proximal_gradient_descent.ProximalGradientDescentOptimizer(
                3.0,
                l1_regularization_strength=0.0,
                l2_regularization_strength=0.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(variables.global_variables_initializer())

            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([0.0, 0.0], v0_val)
            self.assertAllClose([0.0, 0.0], v1_val)

            # Run 3 steps Proximal Gradient Descent.
            for _ in range(3):
                update.run()

            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose(np.array([-0.9, -1.8]), v0_val)
            self.assertAllClose(np.array([-0.09, -0.18]), v1_val)
Ejemplo n.º 2
0
    def testProximalGradientDescentwithoutRegularization2(self):
        with self.cached_session() as sess:
            var0 = variables.Variable([1.0, 2.0])
            var1 = variables.Variable([4.0, 3.0])
            grads0 = constant_op.constant([0.1, 0.2])
            grads1 = constant_op.constant([0.01, 0.02])

            opt = proximal_gradient_descent.ProximalGradientDescentOptimizer(
                3.0,
                l1_regularization_strength=0.0,
                l2_regularization_strength=0.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(variables.global_variables_initializer())

            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([4.0, 3.0], v1_val)

            # Run 3 steps Proximal Gradient Descent
            for _ in range(3):
                update.run()

            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose(np.array([0.1, 0.2]), v0_val)
            self.assertAllClose(np.array([3.91, 2.82]), v1_val)
Ejemplo n.º 3
0
    def testProximalGradientDescentWithL1_L2(self):
        with self.session(), self.test_scope():
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0])
            var1 = resource_variable_ops.ResourceVariable([4.0, 3.0])
            grads0 = constant_op.constant([0.1, 0.2])
            grads1 = constant_op.constant([0.01, 0.02])

            opt = proximal_gradient_descent.ProximalGradientDescentOptimizer(
                3.0,
                l1_regularization_strength=0.001,
                l2_regularization_strength=2.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(variables.global_variables_initializer())

            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([4.0, 3.0], self.evaluate(var1))

            # Run 10 steps Proximal Gradient Descent
            for _ in range(10):
                update.run()

            self.assertAllClose(np.array([-0.0495, -0.0995]),
                                self.evaluate(var0))
            self.assertAllClose(np.array([-0.0045, -0.0095]),
                                self.evaluate(var1))
Ejemplo n.º 4
0
    def testProximalGradientDescentWithL1_L2(self):
        with self.test_session() as sess:
            var0 = variables.Variable([1.0, 2.0])
            var1 = variables.Variable([4.0, 3.0])
            grads0 = constant_op.constant([0.1, 0.2])
            grads1 = constant_op.constant([0.01, 0.02])

            opt = proximal_gradient_descent.ProximalGradientDescentOptimizer(
                3.0,
                l1_regularization_strength=0.001,
                l2_regularization_strength=2.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            variables.global_variables_initializer().run()

            v0_val, v1_val = sess.run([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([4.0, 3.0], v1_val)

            # Run 10 steps Proximal Gradient Descent
            for _ in range(10):
                update.run()

            v0_val, v1_val = sess.run([var0, var1])
            self.assertAllClose(np.array([0.037125, 0.074625]), v0_val)
            self.assertAllClose(np.array([0.003375, 0.007125]), v1_val)
    def testProximalGradientDescentWithL1_L2(self):
        with ops.Graph().as_default(), self.cached_session():
            var0 = variables.Variable([1.0, 2.0])
            var1 = variables.Variable([4.0, 3.0])
            grads0 = constant_op.constant([0.1, 0.2])
            grads1 = constant_op.constant([0.01, 0.02])

            opt = proximal_gradient_descent.ProximalGradientDescentOptimizer(
                3.0,
                l1_regularization_strength=0.001,
                l2_regularization_strength=2.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(variables.global_variables_initializer())

            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([4.0, 3.0], v1_val)

            # Run 10 steps Proximal Gradient Descent
            for _ in range(10):
                update.run()

            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose(np.array([-0.0495, -0.0995]), v0_val)
            self.assertAllClose(np.array([-0.0045, -0.0095]), v1_val)
Ejemplo n.º 6
0
 def test_ops_with_var_and_pgd(self):
     var_list = [
         deo.get_variable('sp_var', initializer=0.0, dim=2),
     ]
     opt_list = [
         pgd.ProximalGradientDescentOptimizer(0.1),
     ]
     self.common_run_context(var_list, opt_list, name='pgd_test')
    def testEquivGradientDescentwithoutRegularization(self):
        with ops.Graph().as_default(), self.cached_session():
            val0, val1 = self.applyOptimizer(
                proximal_gradient_descent.ProximalGradientDescentOptimizer(
                    3.0,
                    l1_regularization_strength=0.0,
                    l2_regularization_strength=0.0))

            val2, val3 = self.applyOptimizer(
                gradient_descent.GradientDescentOptimizer(3.0))

        self.assertAllClose(val0, val2)
        self.assertAllClose(val1, val3)
Ejemplo n.º 8
0
    def testEquivGradientDescentwithoutRegularization(self):
        with self.session(), self.test_scope():
            val0, val1 = self.applyOptimizer(
                proximal_gradient_descent.ProximalGradientDescentOptimizer(
                    3.0,
                    l1_regularization_strength=0.0,
                    l2_regularization_strength=0.0))

        with self.session(), self.test_scope():
            val2, val3 = self.applyOptimizer(
                gradient_descent.GradientDescentOptimizer(3.0))

        self.assertAllClose(val0, val2)
        self.assertAllClose(val1, val3)
Ejemplo n.º 9
0
    def testEquivSparseGradientDescentwithoutRegularization(self):
        with self.cached_session():
            val0, val1 = self.applyOptimizer(
                proximal_gradient_descent.ProximalGradientDescentOptimizer(
                    3.0,
                    l1_regularization_strength=0.0,
                    l2_regularization_strength=0.0),
                is_sparse=True)

        with self.cached_session():
            val2, val3 = self.applyOptimizer(
                gradient_descent.GradientDescentOptimizer(3.0), is_sparse=True)

        self.assertAllClose(val0, val2)
        self.assertAllClose(val1, val3)
 def testMinimizeSparseResourceVariable(self):
   for dtype in [dtypes.float32, dtypes.float64]:
     with self.test_session():
       var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
       x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
       pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
       loss = pred * pred
       sgd_op = proximal_gradient_descent.ProximalGradientDescentOptimizer(
           1.0).minimize(loss)
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           [[-111, -138]], var0.eval(), atol=0.01)
  def testResourceProximalGradientDescentwithoutRegularization(self):
    with self.session(), self.test_scope():
      var0 = resource_variable_ops.ResourceVariable([0.0, 0.0])
      var1 = resource_variable_ops.ResourceVariable([0.0, 0.0])
      grads0 = constant_op.constant([0.1, 0.2])
      grads1 = constant_op.constant([0.01, 0.02])
      opt = proximal_gradient_descent.ProximalGradientDescentOptimizer(
          3.0, l1_regularization_strength=0.0, l2_regularization_strength=0.0)
      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
      variables.global_variables_initializer().run()

      self.assertAllClose([0.0, 0.0], self.evaluate(var0))
      self.assertAllClose([0.0, 0.0], self.evaluate(var1))

      # Run 3 steps Proximal Gradient Descent.
      for _ in range(3):
        update.run()

      self.assertAllClose(np.array([-0.9, -1.8]), self.evaluate(var0))
      self.assertAllClose(np.array([-0.09, -0.18]), self.evaluate(var1))
Ejemplo n.º 12
0
    def testProximalGradientDescentWithL1(self):
        with self.cached_session(), self.test_scope():
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0])
            var1 = resource_variable_ops.ResourceVariable([4.0, 3.0])
            grads0 = constant_op.constant([0.1, 0.2])
            grads1 = constant_op.constant([0.01, 0.02])

            opt = proximal_gradient_descent.ProximalGradientDescentOptimizer(
                3.0,
                l1_regularization_strength=0.001,
                l2_regularization_strength=0.0)
            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            variables.global_variables_initializer().run()

            self.assertAllClose([1.0, 2.0], var0.eval())
            self.assertAllClose([4.0, 3.0], var1.eval())

            # Run 10 steps proximal gradient descent.
            for _ in range(10):
                update.run()

            self.assertAllClose(np.array([-1.988, -3.988001]), var0.eval())
            self.assertAllClose(np.array([3.67, 2.37]), var1.eval())
 def test_proximalsgd_minimize_trainable(self):
     base_opt = pgd.ProximalGradientDescentOptimizer(1.0)
     test_opt = pgd.ProximalGradientDescentOptimizer(1.0)
     self.common_minimize_trainable(base_opt, test_opt, name="proximal_sgd")
 def test_pgd_apply_restriction(self):
     opt = pgd.ProximalGradientDescentOptimizer(0.1)
     self.commonly_apply_restriction_verify(opt)
Ejemplo n.º 15
0
 def test_pgd_restrictor_update(self):
     opt = pgd.ProximalGradientDescentOptimizer(0.1)
     self.common_single_step_update_verification(opt)
Ejemplo n.º 16
0
 def test_pgd_restrict_on_policy(self):
     opt = pgd.ProximalGradientDescentOptimizer(0.1)
     self.common_single_step_restrict_verification(opt)
Ejemplo n.º 17
0
def optimizer(name='adam', l_rate=0.01, decay=0.0, **kwargs):
    '''
    Define the optimizer by default parameters except learning rate.
    Note that most of optimizers do not suggest users to modify their
    speically designed parameters.
    We suggest users to specify gamma according to the practice when
    using Adabound optimizers.
    Options:
        name: the name of optimizer (default='adam') (available: 'adam', 
              'amsgrad', 'adamax', 'adabound', 'amsbound', 'nadam', 
              'namsgrad', 'nadabound', 'namsbound', 'adadelta', 'rms', 
              'adagrad', 'adamw', 'nmoment', 'sgd', 'proximal')
        l_rate: learning rate (default=0.01)
        decay: decay ratio ('adadeltaDA' do not support this option)
        other parameters: see the usage of the specific optimizer.
    Return:
        the particular optimizer object.
    '''
    name = name.casefold()
    if name == 'adam':
        return optimizers.Adam(l_rate, decay=decay, **kwargs)
    elif name == 'amsgrad':
        return optimizers.Adam(l_rate, decay=decay, amsgrad=True, **kwargs)
    elif name == 'adamax':
        return optimizers.Adamax(l_rate, decay=decay, **kwargs)
    elif name == 'adabound':
        return Adabound(l_rate, decay=decay, **kwargs)
    elif name == 'amsbound':
        return Adabound(l_rate, decay=decay, amsgrad=True, **kwargs)
    elif name == 'nadam':
        return MNadam(l_rate, decay=decay, **kwargs)
    elif name == 'namsgrad':
        return MNadam(l_rate, decay=decay, amsgrad=True, **kwargs)
    elif name == 'nadabound':
        return Nadabound(l_rate, decay=decay, **kwargs)
    elif name == 'namsbound':
        return Nadabound(l_rate, decay=decay, amsgrad=True, **kwargs)
    elif name == 'adadelta':
        return optimizers.Adadelta(l_rate, decay=decay, **kwargs)
    elif name == 'rms':
        return optimizers.RMSprop(l_rate, decay=decay, **kwargs)
    elif name == 'adagrad':
        return optimizers.Adagrad(l_rate, decay=decay, **kwargs)
    elif name == 'adamw':
        if compat.COMPATIBLE_MODE['1.14']:
            raise ImportError(
                'This optimizer is not allowed for compatibility, because it require contrib lib.'
            )
        _raise_TF_warn()
        if decay != 0.0:
            logging.warning(
                'This optimizer uses \'decay\' as \'weight_decay\'.')
        else:
            raise ValueError('Should use \'decay\' > 0 for AdamW.')
        return weight_decay_optimizers.AdamWOptimizer(weight_decay=decay,
                                                      learning_rate=l_rate,
                                                      **kwargs)
    elif name == 'nmoment':
        return optimizers.SGD(lr=l_rate,
                              momentum=0.9,
                              decay=decay,
                              nesterov=True,
                              **kwargs)
    elif name == 'moment':
        return optimizers.SGD(lr=l_rate,
                              momentum=0.9,
                              decay=decay,
                              nesterov=False,
                              **kwargs)
    elif name == 'sgd':
        return optimizers.SGD(lr=l_rate, decay=decay, **kwargs)
    elif name == 'proximal':
        _raise_TF_warn()
        if decay != 0.0:
            logging.warning('This optimizer does not support \'decay\'.')
        return proximal_gradient_descent.ProximalGradientDescentOptimizer(
            l_rate, **kwargs)