def train(): embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.0 batch_size, num_steps = 64, 10 lr, num_epochs, device = 0.005, 200, d2l.try_gpu() src_vocab, tgt_vocab, train_iter = d2l.load_data_nmt(batch_size, num_steps) encoder = d2l.Seq2SeqEncoder(len(src_vocab), embed_size, num_hiddens, num_layers, dropout) decoder = Seq2SeqAttentionDecoder(len(tgt_vocab), embed_size, num_hiddens, num_layers, dropout) model = d2l.EncoderDecoder(encoder, decoder) d2l.train_s2s_ch9(model, train_iter, lr, num_epochs, device)
def init_state(self, enc_outputs, env_valid_len, *args): return [enc_outputs, env_valid_len, [None]*self.num_layers] def forward(self, X, state): X = self.pos_encoding(self.embedding(X) * math.sqrt(self.num_hiddens)) for blk in self.blks: X, state = blk(X, state) return self.dense(X), state num_hiddens, num_layers, dropout, batch_size, num_steps = 32, 2, 0.1, 64,10 lr, num_epochs, device = 0.005, 200, d2l.try_gpu() ffn_num_input, ffn_num_hiddens, num_heads = 32, 64, 4 key_size, query_size, value_size = 32, 32, 32 norm_shape = [32] train_iter, src_vocab, tgt_vocab = d2l.load_data_nmt(batch_size, num_steps) encoder = TransformerEncoder( len(src_vocab), key_size, query_size, value_size, num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens, num_heads, num_layers, dropout) decoder = TransformerDecoder( len(tgt_vocab), key_size, query_size,value_size,num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens, num_heads, num_layers,dropout) model = d2l.EncoderDecoder(encoder, decoder) d2l.train_s2s_ch9(model, train_iter,lr,num_epochs,tgt_vocab,device) #%%