def __init__(self, decay_steps, warmup_steps, min_lr=0.0, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, weight_decay=0., weight_decay_pattern=None, amsgrad=False, **kwargs): super(AdamWarmup, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.decay_steps = K.variable(decay_steps, name='decay_steps') self.warmup_steps = K.variable(warmup_steps, name='warmup_steps') self.min_lr = K.variable(min_lr, name='min_lr') self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.weight_decay = K.variable(weight_decay, name='weight_decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_weight_decay = weight_decay self.weight_decay_pattern = weight_decay_pattern self.amsgrad = amsgrad
def __init__(self, center=True, scale=True, epsilon=None, gamma_initializer='ones', beta_initializer='zeros', gamma_regularizer=None, beta_regularizer=None, gamma_constraint=None, beta_constraint=None, **kwargs): """Layer normalization layer See: [Layer Normalization](https://arxiv.org/pdf/1607.06450.pdf) :param center: Add an offset parameter if it is True. :param scale: Add a scale parameter if it is True. :param epsilon: Epsilon for calculating variance. :param gamma_initializer: Initializer for the gamma weight. :param beta_initializer: Initializer for the beta weight. :param gamma_regularizer: Optional regularizer for the gamma weight. :param beta_regularizer: Optional regularizer for the beta weight. :param gamma_constraint: Optional constraint for the gamma weight. :param beta_constraint: Optional constraint for the beta weight. :param kwargs: """ super(LayerNormalization, self).__init__(**kwargs) self.supports_masking = True self.center = center self.scale = scale if epsilon is None: epsilon = K.epsilon() * K.epsilon() self.epsilon = epsilon self.gamma_initializer = keras.initializers.get(gamma_initializer) self.beta_initializer = keras.initializers.get(beta_initializer) self.gamma_regularizer = keras.regularizers.get(gamma_regularizer) self.beta_regularizer = keras.regularizers.get(beta_regularizer) self.gamma_constraint = keras.constraints.get(gamma_constraint) self.beta_constraint = keras.constraints.get(beta_constraint) self.gamma, self.beta = None, None