Ejemplo n.º 1
0
 def __init__(self,
              input_dim,
              output_dim,
              embedding_initializer='zeros',
              **kwargs):
     super(RelativePositionEmbedding, self).__init__(**kwargs)
     self.input_dim = input_dim
     self.output_dim = output_dim
     self.embedding_initializer = initializers.get(embedding_initializer)
Ejemplo n.º 2
0
 def __init__(self,
              units,
              activation='relu',
              use_bias=True,
              kernel_initializer='glorot_uniform',
              **kwargs):
     super(FeedForward, self).__init__(**kwargs)
     self.units = units
     self.activation = activation
     self.use_bias = use_bias
     self.kernel_initializer = initializers.get(kernel_initializer)
Ejemplo n.º 3
0
 def __init__(self,
              input_dim,
              output_dim,
              merge_mode='add',
              embeddings_initializer='zero',
              **kwargs):
     super(PositionEmbedding, self).__init__(**kwargs)
     self.input_dim = input_dim
     self.output_dim = output_dim
     self.merge_mode = merge_mode
     self.embeddings_initializer = initializers.get(embeddings_initializer)
Ejemplo n.º 4
0
 def __init__(self, head_nums, head_size, key_size=None, use_bias=True, attention_scale=True,
              kernel_initializer='glorot_uniform', with_residual_attention=False, **kwargs):
     super(MultiHeadAttention, self).__init__(**kwargs)
     self.head_nums = head_nums
     self.head_size = head_size
     self.key_size = key_size or head_size
     self.output_dim = head_nums * head_size
     self.use_bias = use_bias
     self.attention_scale = attention_scale
     self.kernel_initializer = initializers.get(kernel_initializer)
     self.with_residual_attention = with_residual_attention  # realformer
Ejemplo n.º 5
0
 def __init__(self, center=True, scale=True, epsilon=None,
              conditional=False, condition_hidden_units=None, condition_hidden_activation='linear',
              condition_hidden_initializer='glorot_uniform', **kwargs):
     super(LayerNormalization, self).__init__(**kwargs)
     self.center = center
     self.scale = scale
     self.epsilon = epsilon or 1e-12
     self.conditional = conditional
     self.condition_hidden_units = condition_hidden_units
     self.condition_hidden_activation = activations.get(condition_hidden_activation)
     self.condition_hidden_initializer = initializers.get(condition_hidden_initializer)
Ejemplo n.º 6
0
 def __init__(self,
              input_dim,
              output_dim,
              num_attention_heads,
              attn_scale_factor=2,
              relative_position_bias=True,
              embeddings_initializer='zeros',
              **kwargs
              ):
     super(AbsolutePositionEmbeddingTUPE, self).__init__(**kwargs)
     self.input_dim = input_dim
     self.output_dim = output_dim
     self.num_attention_heads = num_attention_heads
     self.attn_scale_factor = attn_scale_factor
     self.relative_position_bias = relative_position_bias
     self.embedding_initializer = initializers.get(embeddings_initializer)
     self.pos_scaling = float(self.output_dim / num_attention_heads * self.attn_scale_factor) ** -0.5