def __init__(self, n_state: int, n_head: int, d_hid: int, residual_dropout: float, attention_dropout: float, use_attn_mask: bool, layer_id: int, neg_inf: float, ln_epsilon: float, accurate_gelu: bool) -> None: self.attention = MultiHeadSelfAttention(n_state, n_head, attention_dropout, use_attn_mask, layer_id, neg_inf) self.drop1 = Dropout(residual_dropout, name='layer_{}/ln_1_drop'.format(layer_id)) self.add1 = Add(name='layer_{}/ln_1_add'.format(layer_id)) self.ln1 = LayerNormalization(ln_epsilon, name='layer_{}/ln_1'.format(layer_id)) self.ffn = PositionWiseFF(n_state, d_hid, layer_id, accurate_gelu) self.drop2 = Dropout(residual_dropout, name='layer_{}/ln_2_drop'.format(layer_id)) self.add2 = Add(name='layer_{}/ln_2_add'.format(layer_id)) self.ln2 = LayerNormalization(ln_epsilon, name='layer_{}/ln_2'.format(layer_id))
def __init__(self, output_dim: int = 768, dropout: float = 0.1, vocab_size: int = 30000 + TextEncoder.SPECIAL_COUNT, max_len: int = 512, trainable_pos_embedding: bool = True, use_one_dropout: bool = False, use_embedding_layer_norm: bool = False, ln_epsilon: float = 1e-5, **kwargs): super().__init__(**kwargs) self.max_len = max_len self.use_one_dropout = use_one_dropout self.output_dim = output_dim self.dropout = dropout self.vocab_size = vocab_size self.trainable_pos_embedding = trainable_pos_embedding self.segment_emb = keras.layers.Embedding(TextEncoder.NUM_SEGMENTS, output_dim, input_length=max_len, name='SegmentEmbedding') if not trainable_pos_embedding: self.pos_emb = keras.layers.Embedding(max_len, output_dim, trainable=False, input_length=max_len, name='PositionEmbedding', weights=[_get_pos_encoding_matrix(max_len, output_dim)]) else: self.pos_emb = keras.layers.Embedding(max_len, output_dim, input_length=max_len, name='PositionEmbedding') self.token_emb = keras.layers.Embedding(vocab_size, output_dim, input_length=max_len, name='TokenEmbedding') self.embedding_dropout = keras.layers.Dropout(dropout, name='EmbeddingDropOut') self.add_embeddings = keras.layers.Add(name='AddEmbeddings') self.use_embedding_layer_norm = use_embedding_layer_norm if self.use_embedding_layer_norm: self.embedding_layer_norm = LayerNormalization(ln_epsilon) else: self.embedding_layer_norm = None self.ln_epsilon = ln_epsilon
def __init__(self, layer: nn.Module, n_layers: int): """ Constructor for the global Encoder. :param layer: layer type to use. :param n_layers: Number of layers to use. """ # call base constructor super(Encoder, self).__init__() self.layers = clone(layer, n_layers) self.norm = LayerNormalization(layer.size)
def __init__(self, layer: nn.Module, N: int): """ Constructor for the global ``Decoder``. :param layer: layer module to use. :param N: number of decoder layers to use. """ # call base constructor super(Decoder, self).__init__() self.layers = clone(layer, N) self.norm = LayerNormalization(layer.size)
def __init__(self, output_dim: int = 768, dropout: float = 0.1, vocab_size: int = 30000, max_len: int = 512, trainable_pos_embedding: bool = True, use_one_dropout: bool = False, use_embedding_layer_norm: bool = False, layer_norm_epsilon: float = 1e-5, **kwargs): super().__init__(**kwargs) self.max_len = max_len self.use_one_dropout = use_one_dropout self.output_dim = output_dim self.dropout = dropout self.vocab_size = vocab_size # Bert keras uses two segments for next-sentence classification task self.segment_emb = keras.layers.Embedding(2, output_dim, name='SegmentEmbedding') self.trainable_pos_embedding = trainable_pos_embedding if not trainable_pos_embedding: self.pos_emb = keras.layers.Embedding( max_len, output_dim, trainable=False, name='PositionEmbedding', weights=[_get_pos_encoding_matrix(max_len, output_dim)]) else: self.pos_emb = keras.layers.Embedding(max_len, output_dim, name='PositionEmbedding') self.token_emb = keras.layers.Embedding(vocab_size, output_dim, name='TokenEmbedding') self.embedding_dropout = keras.layers.Dropout(dropout, name='EmbeddingDropOut') self.add_embeddings = keras.layers.Add(name='AddEmbeddings') self.use_embedding_layer_norm = use_embedding_layer_norm if self.use_embedding_layer_norm: self.embedding_layer_norm = LayerNormalization(layer_norm_epsilon) else: self.embedding_layer_norm = None self.layer_norm_epsilon = layer_norm_epsilon