Ejemplo n.º 1
0
 def new_update(x, new_x):
     if is_one_of(x, params) and self._do_layer_adaptation(x):
         dx = new_x - x
         lr_t = K.clip(self.learning_rate, K.epsilon(), 1e10)
         x_norm = tf.norm(x)
         g_norm = tf.norm(dx / lr_t)
         ratio = K.switch(
             x_norm > 0.,
             K.switch(g_norm > K.epsilon(), x_norm / g_norm, 1.),
             1.)
         new_x = x + dx * ratio
     return old_update(x, new_x)
Ejemplo n.º 2
0
 def new_update(x, new_x):
     if x is var and self._do_layer_adaptation(x):
         dx = new_x - x
         lr_t = self._decayed_lr(x.dtype.base_dtype)
         lr_t = K.clip(lr_t, K.epsilon(), 1e10)
         x_norm = tf.norm(x)
         g_norm = tf.norm(dx / lr_t)
         ratio = K.switch(
             x_norm > 0.,
             K.switch(g_norm > K.epsilon(), x_norm / g_norm, 1.),
             1.)
         new_x = x + dx * ratio
     return old_update(x, new_x)
Ejemplo n.º 3
0
 def __init__(self,
              center=True,
              scale=True,
              conditional=False,
              hidden_units=None,
              hidden_activation='linear',
              hidden_initializer='glorot_uniform',
              **kwargs):
     super(LayerNormalization, self).__init__(**kwargs)
     self.center = center
     self.scale = scale
     self.conditional = conditional
     self.hidden_units = hidden_units
     self.hidden_activation = activations.get(hidden_activation)
     self.hidden_initializer = initializers.get(hidden_initializer)
     self.epsilon = K.epsilon() * K.epsilon()