Beispiel #1
0
    def testBasicWithLearningRateInverseTimeDecay(self):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            with self.cached_session(use_gpu=True):
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = resource_variable_ops.ResourceVariable(var0_np,
                                                              name="var0_%d" %
                                                              i)
                var1 = resource_variable_ops.ResourceVariable(var1_np,
                                                              name="var1_%d" %
                                                              i)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = 0.001
                decay = 0.5
                lr_schedule = learning_rate_schedule.InverseTimeDecay(
                    learning_rate, decay_steps=1.0, decay_rate=decay)
                beta_1 = 0.9
                beta_2 = 0.999
                epsilon = 1e-7

                opt = adam.Adam(learning_rate=lr_schedule,
                                beta_1=beta_1,
                                beta_2=beta_2,
                                epsilon=epsilon)
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))

                self.evaluate(variables.global_variables_initializer())
                # Run 3 steps of Adam
                for t in range(3):
                    self.evaluate(update)

                    lr_np = learning_rate / (1 + decay * t)

                    var0_np, m0, v0 = adam_update_numpy(var0_np,
                                                        grads0_np,
                                                        t,
                                                        m0,
                                                        v0,
                                                        lr=lr_np)
                    var1_np, m1, v1 = adam_update_numpy(var1_np,
                                                        grads1_np,
                                                        t,
                                                        m1,
                                                        v1,
                                                        lr=lr_np)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
 def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         learning_rate = learning_rate_schedule.InverseTimeDecay(
             3.0, decay_steps=1.0, decay_rate=0.5)
         sgd = gradient_descent.SGD(learning_rate=learning_rate)
         sgd = gradient_descent.SGD.from_config(sgd.get_config())
         self._test_basic_sgd_with_learning_rate_decay(sgd, dtype)
Beispiel #3
0
 def testConfigWithLearningRateDecay(self):
   with test_util.use_gpu():
     var0 = variables.Variable([[1.0], [2.0]], dtype=dtypes.float32)
     for decay_schedule in [
         learning_rate_schedule.InverseTimeDecay(
             0.5, decay_steps=1.0, decay_rate=0.1),
         learning_rate_schedule.PiecewiseConstantDecay(
             [5], [1., .5])
     ]:
       step = 10
       opt = gradient_descent.SGD(decay_schedule)
       config = opt.get_config()
       opt2 = gradient_descent.SGD.from_config(config)
       # assert both are equal float values.
       self.assertAllEqual(
           decay_schedule(step),
           opt._get_hyper('learning_rate')(step))
       self.assertAllEqual(
           decay_schedule(step),
           opt2._get_hyper('learning_rate')(step))
       loss = lambda: 3 * var0
       # learning rate variable is created when calling minimize.
       opt.minimize(loss, [var0])
       self.evaluate(variables.global_variables_initializer())
       config = opt.get_config()
       opt3 = gradient_descent.SGD.from_config(config)
       self.assertAllEqual(
           self.evaluate(opt._get_hyper('learning_rate')(step)),
           opt3._get_hyper('learning_rate')(step))
Beispiel #4
0
 def testConfigWithLearningRateDecay(self):
   with self.cached_session():
     decay_schedule = learning_rate_schedule.InverseTimeDecay(
         0.5, decay_steps=1.0, decay_rate=0.1)
     step = 10
     opt = gradient_descent.SGD(decay_schedule)
     config = opt.get_config()
     opt2 = gradient_descent.SGD.from_config(config)
     # assert both are equal float values.
     self.assertAllEqual(
         decay_schedule(step),
         opt._get_hyper('learning_rate')(step))
     self.assertAllEqual(
         decay_schedule(step),
         opt2._get_hyper('learning_rate')(step))
     var0 = variables.Variable([[1.0], [2.0]], dtype=dtypes.float32)
     loss = lambda: 3 * var0
     # learning rate variable created when calling minimize.
     opt.minimize(loss, [var0])
     self.evaluate(variables.global_variables_initializer())
     config = opt.get_config()
     opt3 = gradient_descent.SGD.from_config(config)
     self.assertAllEqual(
         self.evaluate(opt._get_hyper('learning_rate')(step)),
         opt3._get_hyper('learning_rate')(step))
  def testAggregationMethod(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
      with self.cached_session():
        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
        cost = 5 * var0 + 3 * var1
        global_step = variables.Variable(
            array_ops.zeros([], dtypes.int64), name='global_step')
        sgd_op = sgd.SGD(3.0)
        opt_op = sgd_op.minimize(
            cost,
            global_step, [var0, var1],
            aggregation_method=gradients_impl.AggregationMethod.
            EXPERIMENTAL_ACCUMULATE_N)

        variables.global_variables_initializer().run()
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], var0.eval())
        self.assertAllClose([3.0, 4.0], var1.eval())
        # Run 1 step of sgd through optimizer
        opt_op.run()
        # Validate updated params
        self.assertAllClose([-14., -13.], var0.eval())
        self.assertAllClose([-6., -5.], var1.eval())

      sgd.learning_rate = learning_rate_schedule.InverseTimeDecay(
          0.5, decay_steps=1.0, decay_rate=0.5)
      if context.executing_eagerly():
        sgd.minimize(loss, [var0, var1])
      else:
        self.evaluate(opt_op)
  def testStaircase(self, serialize):
    initial_lr = 0.1
    k = 10
    decay_rate = 0.96
    step = resource_variable_ops.ResourceVariable(0)
    decayed_lr = learning_rate_schedule.InverseTimeDecay(
        initial_lr, k, decay_rate, staircase=True)
    decayed_lr = _maybe_serialized(decayed_lr, serialize)

    self.evaluate(variables.global_variables_initializer())
    for i in range(k + 1):
      expected = initial_lr / (1 + decay_rate * (i // k))
      self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)
      self.evaluate(step.assign_add(1))
Beispiel #7
0
    def testDecay(self, serialize):
        initial_lr = 0.1
        k = 10
        decay_rate = 0.96
        step = variables.Variable(0)
        decayed_lr = learning_rate_schedule.InverseTimeDecay(
            initial_lr, k, decay_rate)
        decayed_lr = _maybe_serialized(decayed_lr, serialize)

        self.evaluate(variables.global_variables_initializer())
        for i in range(k + 1):
            expected = initial_lr / (1 + i / k * decay_rate)
            self.assertAllClose(self.evaluate(decayed_lr(step)), expected,
                                1e-6)
            self.evaluate(step.assign_add(1))
    def testBasicWithLearningRateInverseTimeDecay(self):
        for dtype in [dtypes.float32, dtypes.float64]:
            with self.cached_session():
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
                var0 = resource_variable_ops.ResourceVariable(var0_np)
                var1 = resource_variable_ops.ResourceVariable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = 3.0
                decay = 0.5
                lr_schedule = learning_rate_schedule.InverseTimeDecay(
                    learning_rate, decay_steps=1.0, decay_rate=decay)

                ada_opt = adagrad.Adagrad(lr_schedule)

                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

                if not context.executing_eagerly():
                    ada_update = ada_opt.apply_gradients(
                        zip([grads0, grads1], [var0, var1]))
                    self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values
                v0_val, v1_val = self.evaluate([var0, var1])
                self.assertAllClose([1.0, 2.0], v0_val)
                self.assertAllClose([3.0, 4.0], v1_val)

                # Run 3 steps of adagrad
                for t in range(3):
                    if not context.executing_eagerly():
                        self.evaluate(ada_update)
                    else:
                        ada_opt.apply_gradients(
                            zip([grads0, grads1], [var0, var1]))
                    lr_np = learning_rate / (1 + decay * t)
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, lr_np)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, lr_np)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
  def testAdaptiveLearningRate(self):
    self.skipTest('broken test to be fixed')
    for dtype in [
        dtypes.half, dtypes.float32, dtypes.float64, dtypes.complex64,
        dtypes.complex128
    ]:
      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)

      def loss():
        return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

      sgd = gradient_descent.SGD(1.0)

      self.evaluate(variables.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
      # Run 1 step of sgd through optimizer
      opt_op = sgd.minimize(loss, [var0, var1])
      self.evaluate(variables.global_variables_initializer())
      self.evaluate(opt_op)
      # Validate updated params
      # var0 = [1., 2.] - 1.0 * [5, 5]
      self.assertAllClose([-4., -3.], self.evaluate(var0))
      # var1 = [3., 4.] - 1.0 * [3, 3]
      self.assertAllClose([0., 1.], self.evaluate(var1))

      sgd.learning_rate = 0.5
      if context.executing_eagerly():
        sgd.minimize(loss, [var0, var1])
      else:
        self.evaluate(opt_op)
      # Validate updated params
      # var0 = [-4., -3.] - 0.5 * [5, 5]
      self.assertAllClose([-6.5, -5.5], self.evaluate(var0))
      # var1 = [0., 1.] - 0.5 * [3, 3]
      self.assertAllClose([-1.5, -0.5], self.evaluate(var1))

      sgd.learning_rate = learning_rate_schedule.InverseTimeDecay(
          0.5, decay_steps=1.0, decay_rate=0.5)
      if context.executing_eagerly():
        sgd.minimize(loss, [var0, var1])
      else:
        self.evaluate(opt_op)
Beispiel #10
0
  def testAdaptiveLearningRate(self):
    for dtype in _DATA_TYPES:
      with self.test_session():
        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
        var1 = variables.Variable([3.0, 4.0], dtype=dtype)

        def loss():
          return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop

        sgd = gradient_descent.SGD(1.0)

        self.evaluate(variables.global_variables_initializer())
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
        # Run 1 step of sgd through optimizer
        opt_op = sgd.minimize(loss, [var0, var1])
        self.evaluate(variables.global_variables_initializer())
        self.evaluate(opt_op)
        # Validate updated params
        # var0 = [1., 2.] - 1.0 * [5, 5]
        self.assertAllClose([-4., -3.], self.evaluate(var0))
        # var1 = [3., 4.] - 1.0 * [3, 3]
        self.assertAllClose([0., 1.], self.evaluate(var1))

        sgd.learning_rate = 0.5
        if context.executing_eagerly():
          sgd.minimize(loss, [var0, var1])
        else:
          self.evaluate(opt_op)
        # Validate updated params
        # var0 = [-4., -3.] - 0.5 * [5, 5]
        self.assertAllClose([-6.5, -5.5], self.evaluate(var0))
        # var1 = [0., 1.] - 0.5 * [3, 3]
        self.assertAllClose([-1.5, -0.5], self.evaluate(var1))

        sgd.learning_rate = learning_rate_schedule.InverseTimeDecay(
            0.5, decay_steps=1.0, decay_rate=0.5)
        if context.executing_eagerly():
          sgd.minimize(loss, [var0, var1])
        else:
          self.evaluate(opt_op)
  def testLearningRateDecayUsedInTwoFunctions(self):
    a = variables.Variable([1., 2.], name='var')
    b = variables.Variable([1.], name='var')

    learning_rate_decay = learning_rate_schedule.InverseTimeDecay(
        0.5, decay_steps=1.0, decay_rate=0.5)
    opt = adam.Adam(learning_rate=learning_rate_decay)
    loss_a = lambda: 3 * a
    loss_b = lambda: 2 * b

    @def_function.function
    def fn_a():
      opt.minimize(loss_a, [a])
      return a

    @def_function.function
    def fn_b():
      opt.minimize(loss_b, [b])
      return b

    fn_a()
    fn_b()
Beispiel #12
0
    def testDenseWithLearningRateInverseTimeDecay(self):
        var0_np = np.array([1.0, 2.0])
        grads0_np = np.array([0.1, 0.2])
        var1_np = np.array([3.0, 4.0])
        grads1_np = np.array([0.01, 0.2])

        var0 = resource_variable_ops.ResourceVariable(var0_np)
        var1 = resource_variable_ops.ResourceVariable(var1_np)
        grads0 = constant_op.constant(grads0_np)
        grads1 = constant_op.constant(grads1_np)
        learning_rate = 0.01
        rho = 0.9
        momentum = 0.0
        epsilon = 1e-7
        centered = False
        decay = 0.5
        lr_schedule = learning_rate_schedule.InverseTimeDecay(learning_rate,
                                                              decay_steps=1.0,
                                                              decay_rate=decay)
        opt = rmsprop.RMSprop(learning_rate=lr_schedule,
                              rho=rho,
                              momentum=momentum,
                              epsilon=epsilon,
                              centered=centered)

        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(variables.global_variables_initializer())

        rms0 = opt.get_slot(var0, "rms")
        self.assertIsNotNone(rms0)
        rms1 = opt.get_slot(var1, "rms")
        self.assertIsNotNone(rms1)
        if momentum > 0.:
            mom0 = opt.get_slot(var0, "momentum")
            mom1 = opt.get_slot(var1, "momentum")
        else:
            mom0 = None
            mom1 = None

        mg0_np = np.array([0.0, 0.0])
        mg1_np = np.array([0.0, 0.0])
        rms0_np = np.array([0.0, 0.0])
        rms1_np = np.array([0.0, 0.0])
        mom0_np = np.array([0.0, 0.0])
        mom1_np = np.array([0.0, 0.0])

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))

        # Run 4 steps of RMSprop
        for t in range(2):
            self.evaluate(update)

            lr = learning_rate / (1 + decay * t)
            var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho,
                momentum, epsilon, centered)
            var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho,
                momentum, epsilon, centered)

            # Validate updated params
            self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0))
            self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1))
            if momentum > 0.:
                self.assertAllCloseAccordingToType(mom0_np,
                                                   self.evaluate(mom0))
                self.assertAllCloseAccordingToType(mom1_np,
                                                   self.evaluate(mom1))
            self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
            self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
Beispiel #13
0
def inverse_time_decay(learning_rate,
                       global_step,
                       decay_steps,
                       decay_rate,
                       staircase=False,
                       name=None):
    """Applies inverse time decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an inverse decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate / (1 + decay_rate * global_step /
  decay_step)
  ```

  or, if `staircase` is `True`, as:

  ```python
  decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step /
  decay_step))
  ```

  Example: decay 1/t with a rate of 0.5:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  decay_steps = 1.0
  decay_rate = 0.5
  learning_rate = tf.compat.v1.train.inverse_time_decay(learning_rate,
  global_step,
  decay_steps, decay_rate)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.
      The initial learning rate.
    global_step: A Python number. Global step to use for the decay computation.
      Must not be negative.
    decay_steps: How often to apply decay.
    decay_rate: A Python number.  The decay rate.
    staircase: Whether to apply decay in a discrete staircase, as opposed to
      continuous, fashion.
    name: String.  Optional name of the operation.  Defaults to
      'InverseTimeDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.

  @compatibility(eager)
  When eager execution is enabled, this function returns a function which in
  turn returns the decayed learning rate Tensor. This can be useful for changing
  the learning rate value across different invocations of optimizer functions.
  @end_compatibility
  """
    decayed_lr = learning_rate_schedule.InverseTimeDecay(learning_rate,
                                                         decay_steps,
                                                         decay_rate,
                                                         staircase=staircase,
                                                         name=name)

    if not context.executing_eagerly():
        decayed_lr = decayed_lr(global_step)
    else:
        decayed_lr = functools.partial(decayed_lr, global_step)
    return decayed_lr