Ejemplo n.º 1
0
 def __init__(self,
              decay_steps,
              warmup_steps,
              min_lr=0.0,
              lr=0.001,
              beta_1=0.9,
              beta_2=0.999,
              epsilon=None,
              weight_decay=0.,
              weight_decay_pattern=None,
              amsgrad=False,
              **kwargs):
     super(AdamWarmup, self).__init__(**kwargs)
     with K.name_scope(self.__class__.__name__):
         self.decay_steps = K.variable(decay_steps, name='decay_steps')
         self.warmup_steps = K.variable(warmup_steps, name='warmup_steps')
         self.min_lr = K.variable(min_lr, name='min_lr')
         self.iterations = K.variable(0, dtype='int64', name='iterations')
         self.lr = K.variable(lr, name='lr')
         self.beta_1 = K.variable(beta_1, name='beta_1')
         self.beta_2 = K.variable(beta_2, name='beta_2')
         self.weight_decay = K.variable(weight_decay, name='weight_decay')
     if epsilon is None:
         epsilon = K.epsilon()
     self.epsilon = epsilon
     self.initial_weight_decay = weight_decay
     self.weight_decay_pattern = weight_decay_pattern
     self.amsgrad = amsgrad
Ejemplo n.º 2
0
    def __init__(self,
                 center=True,
                 scale=True,
                 epsilon=None,
                 gamma_initializer='ones',
                 beta_initializer='zeros',
                 gamma_regularizer=None,
                 beta_regularizer=None,
                 gamma_constraint=None,
                 beta_constraint=None,
                 **kwargs):
        """Layer normalization layer

        See: [Layer Normalization](https://arxiv.org/pdf/1607.06450.pdf)

        :param center: Add an offset parameter if it is True.
        :param scale: Add a scale parameter if it is True.
        :param epsilon: Epsilon for calculating variance.
        :param gamma_initializer: Initializer for the gamma weight.
        :param beta_initializer: Initializer for the beta weight.
        :param gamma_regularizer: Optional regularizer for the gamma weight.
        :param beta_regularizer: Optional regularizer for the beta weight.
        :param gamma_constraint: Optional constraint for the gamma weight.
        :param beta_constraint: Optional constraint for the beta weight.
        :param kwargs:
        """
        super(LayerNormalization, self).__init__(**kwargs)
        self.supports_masking = True
        self.center = center
        self.scale = scale
        if epsilon is None:
            epsilon = K.epsilon() * K.epsilon()
        self.epsilon = epsilon
        self.gamma_initializer = keras.initializers.get(gamma_initializer)
        self.beta_initializer = keras.initializers.get(beta_initializer)
        self.gamma_regularizer = keras.regularizers.get(gamma_regularizer)
        self.beta_regularizer = keras.regularizers.get(beta_regularizer)
        self.gamma_constraint = keras.constraints.get(gamma_constraint)
        self.beta_constraint = keras.constraints.get(beta_constraint)
        self.gamma, self.beta = None, None