def __init__(self, n_vocab, max_len, d_word_vec, n_layers, n_head, d_inner, dropout=0.1, embedding_weight=None): super(Encoder, self).__init__() self.d_model = d_word_vec self.embedding = nn.Embedding(n_vocab, d_word_vec, padding_idx=0, _weight=embedding_weight) self.position_enc = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(max_len + 1, d_word_vec, padding_idx=0), freeze=True) self.layer_stack = nn.ModuleList([ EncoderLayer(self.d_model, d_inner, n_head, dropout=dropout) for _ in range(n_layers) ])
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( encoder=Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), decoder=Decoder( DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), src_embed=nn.Sequential(Embeddings(d_model, src_vocab), c(position)), tgt_embed=nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), generator=Generator(d_model, tgt_vocab)) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def __init__(self, num_layers, d_model, num_heads, dff, pe_max_len, name, dp=0.1): super(Encoder, self).__init__() self.d_model = d_model self.num_layers = num_layers print('self.num_layers(encoder) ', self.num_layers) self.rate = dp self.pos_encoding = positional_encoding( pe_max_len, self.d_model) # 采用类似缓存的思想,申请超长的pe,后面只会用到一小部分 self.input_proj = tf.keras.models.Sequential(name='en_proj') self.input_proj.add( tf.keras.layers.Dense(units=self.d_model, kernel_initializer='glorot_normal')) # self.input_proj.add(tf.keras.layers.Dropout(rate=dp)) self.input_proj.add( tf.keras.layers.experimental.LayerNormalization(epsilon=1e-6)) self.dropout = tf.keras.layers.Dropout(rate=0.1, name='en_proj_dp') self.enc_layers = [ EncoderLayer(d_model, num_heads, dff, 'EN' + str(_), dp) for _ in range(num_layers) ]
def __init__(self, n_enc, d_model, d_ff, d_k, d_v, n_head, dropout): super().__init__() self.encodes = nn.ModuleList([ EncoderLayer(d_model, d_ff, d_k, d_v, n_head, dropout) for _ in range(n_enc) ])
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, rate=0.1): super(Encoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(rate)
def __init__(self, enc_vocab_size, max_word_len, n_enc, d_model, d_ff, n_head, dropout): super().__init__() self.n_position = max_word_len + 1 self.enc_vocab_size = enc_vocab_size self.d_model = d_model self.enc_ebd = nn.Embedding(enc_vocab_size, d_model, padding_idx=PAD) self.pos_ebd = nn.Embedding(self.n_position, d_model, padding_idx=PAD) self.encodes = nn.ModuleList([ EncoderLayer(d_model, d_ff, n_head, dropout) for _ in range(n_enc)]) self._init_weight()
def __init__(self, n_src_vocab, d_word_vec, n_layers, n_head, d_k, d_v, d_model, d_inner, pad_idx, dropout=0.1, n_position=200): super(Encoder, self).__init__() self.src_word_emb = nn.Embedding(n_src_vocab, d_word_vec, padding_idx=pad_idx) self.position_enc = PositionalEncoding(d_word_vec, n_position=n_position) self.dropout = nn.Dropout(p=dropout) self.layer_stack = nn.ModuleList([ EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)