예제 #1
0
    def __init__(
        self,
        learning_rate=0.001,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7,
        name="Nadam",
        **kwargs
    ):
        # Backwards compatibility with keras NAdam optimizer.
        kwargs["decay"] = kwargs.pop("schedule_decay", 0.004)
        learning_rate = kwargs.get("lr", learning_rate)
        if isinstance(
            learning_rate, learning_rate_schedule.LearningRateSchedule
        ):
            raise ValueError(
                "The Nadam optimizer does not support "
                "tf.keras.optimizers.LearningRateSchedules as the "
                "learning rate."
            )

        super().__init__(name, **kwargs)
        self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
        self._set_hyper("decay", self._initial_decay)
        self._set_hyper("beta_1", beta_1)
        self._set_hyper("beta_2", beta_2)
        self.epsilon = epsilon or backend_config.epsilon()
        self._m_cache = None
예제 #2
0
 def __init__(self,
              learning_rate=0.001,
              initial_accumulator_value=0.1,
              epsilon=1e-7,
              name='Adagrad',
              **kwargs):
   if initial_accumulator_value < 0.0:
     raise ValueError('initial_accumulator_value must be non-negative: %s' %
                      initial_accumulator_value)
   if epsilon is None:
     epsilon = backend_config.epsilon()
   super(Adagrad, self).__init__(name, **kwargs)
   self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
   self._set_hyper('decay', self._initial_decay)
   self._initial_accumulator_value = initial_accumulator_value
   self.epsilon = epsilon or backend_config.epsilon()
예제 #3
0
 def __init__(self,
              learning_rate=0.001,
              initial_accumulator_value=0.1,
              epsilon=1e-7,
              name="Adagrad",
              **kwargs):
     if initial_accumulator_value < 0.0:
         raise ValueError(
             "initial_accumulator_value must be non-negative: %s" %
             initial_accumulator_value)
     if epsilon is None:
         epsilon = backend_config.epsilon()
     super().__init__(name, **kwargs)
     self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
     self._set_hyper("decay", self._initial_decay)
     self._initial_accumulator_value = initial_accumulator_value
     self.epsilon = epsilon or backend_config.epsilon()
예제 #4
0
    def __init__(self,
                 learning_rate=0.001,
                 rho=0.9,
                 momentum=0.0,
                 epsilon=1e-7,
                 centered=False,
                 name="RMSprop",
                 **kwargs):
        """Construct a new RMSprop optimizer.

    Args:
      learning_rate: A `Tensor`, floating point value, or a schedule that is a
        `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable
        that takes no arguments and returns the actual value to use. The
        learning rate. Defaults to 0.001.
      rho: Discounting factor for the history/coming gradient. Defaults to 0.9.
      momentum: A scalar or a scalar `Tensor`. Defaults to 0.0.
      epsilon: A small constant for numerical stability. This epsilon is
        "epsilon hat" in the Kingma and Ba paper (in the formula just before
        Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to
        1e-7.
      centered: Boolean. If `True`, gradients are normalized by the estimated
        variance of the gradient; if False, by the uncentered second moment.
        Setting this to `True` may help with training, but is slightly more
        expensive in terms of computation and memory. Defaults to `False`.
      name: Optional name prefix for the operations created when applying
        gradients. Defaults to "RMSprop".
      **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`,
        `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip
        gradients by value, `decay` is included for backward compatibility to
        allow time inverse decay of learning rate. `lr` is included for backward
        compatibility, recommended to use `learning_rate` instead.

    @compatibility(eager)
    When eager execution is enabled, `learning_rate`, `decay`, `momentum`, and
    `epsilon` can each be a callable that takes no arguments and returns the
    actual value to use. This can be useful for changing these values across
    different invocations of optimizer functions.
    @end_compatibility
    """
        super(RMSprop, self).__init__(name, **kwargs)
        self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
        self._set_hyper("decay", self._initial_decay)
        self._set_hyper("rho", rho)

        self._momentum = False
        if isinstance(momentum,
                      tf.Tensor) or callable(momentum) or momentum > 0:
            self._momentum = True
        if isinstance(momentum,
                      (int, float)) and (momentum < 0 or momentum > 1):
            raise ValueError(
                f"`momentum` must be between [0, 1]. Received: "
                f"momentum={momentum} (of type {type(momentum)}).")
        self._set_hyper("momentum", momentum)

        self.epsilon = epsilon or backend_config.epsilon()
        self.centered = centered
예제 #5
0
파일: adadelta.py 프로젝트: Alan-love/keras
 def __init__(self,
              learning_rate=0.001,
              rho=0.95,
              epsilon=1e-7,
              name='Adadelta',
              **kwargs):
     super(Adadelta, self).__init__(name, **kwargs)
     self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
     self._set_hyper('decay', self._initial_decay)
     self._set_hyper('rho', rho)
     self.epsilon = epsilon or backend_config.epsilon()
예제 #6
0
 def __init__(self,
              learning_rate=0.001,
              rho=0.95,
              epsilon=1e-7,
              name="Adadelta",
              **kwargs):
     super().__init__(name, **kwargs)
     self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
     self._set_hyper("decay", self._initial_decay)
     self._set_hyper("rho", rho)
     self.epsilon = epsilon or backend_config.epsilon()
예제 #7
0
 def __init__(self,
              learning_rate=0.001,
              beta_1=0.9,
              beta_2=0.999,
              epsilon=1e-7,
              name='Adamax',
              **kwargs):
     super().__init__(name, **kwargs)
     self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
     self._set_hyper('decay', self._initial_decay)
     self._set_hyper('beta_1', beta_1)
     self._set_hyper('beta_2', beta_2)
     self.epsilon = epsilon or backend_config.epsilon()
예제 #8
0
 def __init__(self,
              learning_rate=0.001,
              beta_1=0.9,
              beta_2=0.999,
              epsilon=1e-7,
              name="Adamax",
              **kwargs):
     super().__init__(name, **kwargs)
     self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
     self._set_hyper("decay", self._initial_decay)
     self._set_hyper("beta_1", beta_1)
     self._set_hyper("beta_2", beta_2)
     self.epsilon = epsilon or backend_config.epsilon()
예제 #9
0
파일: adam.py 프로젝트: paolodedios/keras
    def __init__(
        self,
        learning_rate=0.001,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7,
        amsgrad=False,
        name="Adam",
        **kwargs
    ):
        """Construct a new Adam optimizer.

        Args:
          learning_rate: A `Tensor`, floating point value, or a schedule that is
            a `tf.keras.optimizers.schedules.LearningRateSchedule`, or a
            callable that takes no arguments and returns the actual value to
            use, The learning rate. Defaults to 0.001.
          beta_1: A float value or a constant float tensor, or a callable that
            takes no arguments and returns the actual value to use. The
            exponential decay rate for the 1st moment estimates. Defaults to
            0.9.
          beta_2: A float value or a constant float tensor, or a callable that
            takes no arguments and returns the actual value to use, The
            exponential decay rate for the 2nd moment estimates. Defaults to
            0.999.
          epsilon: A small constant for numerical stability. This epsilon is
            "epsilon hat" in the Kingma and Ba paper (in the formula just before
            Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults
            to 1e-7.
          amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm
            from the paper "On the Convergence of Adam and beyond". Defaults to
            `False`.
          name: Optional name for the operations created when applying
            gradients.  Defaults to "Adam".
          **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`,
            `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is
            clip gradients by value, `decay` is included for backward
            compatibility to allow time inverse decay of learning rate. `lr` is
            included for backward compatibility, recommended to use
            `learning_rate` instead.
        """

        super().__init__(name, **kwargs)
        self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
        self._set_hyper("decay", self._initial_decay)
        self._set_hyper("beta_1", beta_1)
        self._set_hyper("beta_2", beta_2)
        self.epsilon = epsilon or backend_config.epsilon()
        self.amsgrad = amsgrad
예제 #10
0
  def __init__(self,
               learning_rate=0.001,
               beta_1=0.9,
               beta_2=0.999,
               epsilon=1e-7,
               name='Nadam',
               **kwargs):
    # Backwards compatibility with keras NAdam optimizer.
    kwargs['decay'] = kwargs.pop('schedule_decay', 0.004)
    learning_rate = kwargs.get('lr', learning_rate)
    if isinstance(learning_rate, learning_rate_schedule.LearningRateSchedule):
      raise ValueError('The Nadam optimizer does not support '
                       'tf.keras.optimizers.LearningRateSchedules as the '
                       'learning rate.')

    super(Nadam, self).__init__(name, **kwargs)
    self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
    self._set_hyper('decay', self._initial_decay)
    self._set_hyper('beta_1', beta_1)
    self._set_hyper('beta_2', beta_2)
    self.epsilon = epsilon or backend_config.epsilon()
    self._m_cache = None
예제 #11
0
 def test_epsilon(self):
     epsilon = 1e-2
     backend_config.set_epsilon(epsilon)
     self.assertEqual(backend_config.epsilon(), epsilon)
     backend_config.set_epsilon(1e-7)
     self.assertEqual(backend_config.epsilon(), 1e-7)