def __init__( self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name="Nadam", **kwargs ): # Backwards compatibility with keras NAdam optimizer. kwargs["decay"] = kwargs.pop("schedule_decay", 0.004) learning_rate = kwargs.get("lr", learning_rate) if isinstance( learning_rate, learning_rate_schedule.LearningRateSchedule ): raise ValueError( "The Nadam optimizer does not support " "tf.keras.optimizers.LearningRateSchedules as the " "learning rate." ) super().__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._set_hyper("beta_1", beta_1) self._set_hyper("beta_2", beta_2) self.epsilon = epsilon or backend_config.epsilon() self._m_cache = None
def __init__(self, learning_rate=0.001, initial_accumulator_value=0.1, epsilon=1e-7, name='Adagrad', **kwargs): if initial_accumulator_value < 0.0: raise ValueError('initial_accumulator_value must be non-negative: %s' % initial_accumulator_value) if epsilon is None: epsilon = backend_config.epsilon() super(Adagrad, self).__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._initial_accumulator_value = initial_accumulator_value self.epsilon = epsilon or backend_config.epsilon()
def __init__(self, learning_rate=0.001, initial_accumulator_value=0.1, epsilon=1e-7, name="Adagrad", **kwargs): if initial_accumulator_value < 0.0: raise ValueError( "initial_accumulator_value must be non-negative: %s" % initial_accumulator_value) if epsilon is None: epsilon = backend_config.epsilon() super().__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._initial_accumulator_value = initial_accumulator_value self.epsilon = epsilon or backend_config.epsilon()
def __init__(self, learning_rate=0.001, rho=0.9, momentum=0.0, epsilon=1e-7, centered=False, name="RMSprop", **kwargs): """Construct a new RMSprop optimizer. Args: learning_rate: A `Tensor`, floating point value, or a schedule that is a `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable that takes no arguments and returns the actual value to use. The learning rate. Defaults to 0.001. rho: Discounting factor for the history/coming gradient. Defaults to 0.9. momentum: A scalar or a scalar `Tensor`. Defaults to 0.0. epsilon: A small constant for numerical stability. This epsilon is "epsilon hat" in the Kingma and Ba paper (in the formula just before Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to 1e-7. centered: Boolean. If `True`, gradients are normalized by the estimated variance of the gradient; if False, by the uncentered second moment. Setting this to `True` may help with training, but is slightly more expensive in terms of computation and memory. Defaults to `False`. name: Optional name prefix for the operations created when applying gradients. Defaults to "RMSprop". **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip gradients by value, `decay` is included for backward compatibility to allow time inverse decay of learning rate. `lr` is included for backward compatibility, recommended to use `learning_rate` instead. @compatibility(eager) When eager execution is enabled, `learning_rate`, `decay`, `momentum`, and `epsilon` can each be a callable that takes no arguments and returns the actual value to use. This can be useful for changing these values across different invocations of optimizer functions. @end_compatibility """ super(RMSprop, self).__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._set_hyper("rho", rho) self._momentum = False if isinstance(momentum, tf.Tensor) or callable(momentum) or momentum > 0: self._momentum = True if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1): raise ValueError( f"`momentum` must be between [0, 1]. Received: " f"momentum={momentum} (of type {type(momentum)}).") self._set_hyper("momentum", momentum) self.epsilon = epsilon or backend_config.epsilon() self.centered = centered
def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-7, name='Adadelta', **kwargs): super(Adadelta, self).__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._set_hyper('rho', rho) self.epsilon = epsilon or backend_config.epsilon()
def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-7, name="Adadelta", **kwargs): super().__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._set_hyper("rho", rho) self.epsilon = epsilon or backend_config.epsilon()
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name='Adamax', **kwargs): super().__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._set_hyper('beta_1', beta_1) self._set_hyper('beta_2', beta_2) self.epsilon = epsilon or backend_config.epsilon()
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name="Adamax", **kwargs): super().__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._set_hyper("beta_1", beta_1) self._set_hyper("beta_2", beta_2) self.epsilon = epsilon or backend_config.epsilon()
def __init__( self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=False, name="Adam", **kwargs ): """Construct a new Adam optimizer. Args: learning_rate: A `Tensor`, floating point value, or a schedule that is a `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable that takes no arguments and returns the actual value to use, The learning rate. Defaults to 0.001. beta_1: A float value or a constant float tensor, or a callable that takes no arguments and returns the actual value to use. The exponential decay rate for the 1st moment estimates. Defaults to 0.9. beta_2: A float value or a constant float tensor, or a callable that takes no arguments and returns the actual value to use, The exponential decay rate for the 2nd moment estimates. Defaults to 0.999. epsilon: A small constant for numerical stability. This epsilon is "epsilon hat" in the Kingma and Ba paper (in the formula just before Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to 1e-7. amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from the paper "On the Convergence of Adam and beyond". Defaults to `False`. name: Optional name for the operations created when applying gradients. Defaults to "Adam". **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip gradients by value, `decay` is included for backward compatibility to allow time inverse decay of learning rate. `lr` is included for backward compatibility, recommended to use `learning_rate` instead. """ super().__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._set_hyper("beta_1", beta_1) self._set_hyper("beta_2", beta_2) self.epsilon = epsilon or backend_config.epsilon() self.amsgrad = amsgrad
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name='Nadam', **kwargs): # Backwards compatibility with keras NAdam optimizer. kwargs['decay'] = kwargs.pop('schedule_decay', 0.004) learning_rate = kwargs.get('lr', learning_rate) if isinstance(learning_rate, learning_rate_schedule.LearningRateSchedule): raise ValueError('The Nadam optimizer does not support ' 'tf.keras.optimizers.LearningRateSchedules as the ' 'learning rate.') super(Nadam, self).__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._set_hyper('beta_1', beta_1) self._set_hyper('beta_2', beta_2) self.epsilon = epsilon or backend_config.epsilon() self._m_cache = None
def test_epsilon(self): epsilon = 1e-2 backend_config.set_epsilon(epsilon) self.assertEqual(backend_config.epsilon(), epsilon) backend_config.set_epsilon(1e-7) self.assertEqual(backend_config.epsilon(), 1e-7)