def train(): embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.0 batch_size, num_steps = 64, 10 lr, num_epochs, device = 0.005, 200, d2l.try_gpu() src_vocab, tgt_vocab, train_iter = d2l.load_data_nmt(batch_size, num_steps) encoder = d2l.Seq2SeqEncoder(len(src_vocab), embed_size, num_hiddens, num_layers, dropout) decoder = Seq2SeqAttentionDecoder(len(tgt_vocab), embed_size, num_hiddens, num_layers, dropout) model = d2l.EncoderDecoder(encoder, decoder) d2l.train_s2s_ch9(model, train_iter, lr, num_epochs, device)
for blk in self.blks: X, state = blk(X, state) return self.dense(X), state num_hiddens, num_layers, dropout, batch_size, num_steps = 32, 2, 0.1, 64,10 lr, num_epochs, device = 0.005, 200, d2l.try_gpu() ffn_num_input, ffn_num_hiddens, num_heads = 32, 64, 4 key_size, query_size, value_size = 32, 32, 32 norm_shape = [32] train_iter, src_vocab, tgt_vocab = d2l.load_data_nmt(batch_size, num_steps) encoder = TransformerEncoder( len(src_vocab), key_size, query_size, value_size, num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens, num_heads, num_layers, dropout) decoder = TransformerDecoder( len(tgt_vocab), key_size, query_size,value_size,num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens, num_heads, num_layers,dropout) model = d2l.EncoderDecoder(encoder, decoder) d2l.train_s2s_ch9(model, train_iter,lr,num_epochs,tgt_vocab,device) #%% engs = ['go .', "i lost .", 'i\'m home .', 'he\'s calm .'] fras = ['va !', 'j\'ai perdu .', 'je suis chez moi .', 'il est calme .'] # 在预测时,Transformer是一次encoder,多次decoer(直到预测出<eos>) d2l.translate(engs, fras, model, src_vocab, tgt_vocab, num_steps, device) # %%
if (epoch + 1) % 10 == 0: animator.add(epoch + 1, (metric[0] / metric[1], )) print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} ' f'tokens/sec on {str(device)}') embed_size, num_hiddens, num_layers, dropout = 32, 34, 2, 0.1 batch_size, num_steps = 64, 10 lr, num_epochs, device = 0.005, 300, d2l.try_gpu() train_iter, src_vocab, tgt_vocab = d2l.load_data_nmt(batch_size, num_steps) encoder = Seq2SeqEncoder(len(src_vocab), embed_size, num_hiddens, num_layers, dropout) decoder = Seq2SeqDecoder(len(tgt_vocab), embed_size, num_hiddens, num_layers, dropout) net = d2l.EncoderDecoder(encoder, decoder) train_seq2seq(net, train_iter, lr, num_epochs, tgt_vocab, device) #@save def predict_seq2seq(net, src_sentence, src_vocab, tgt_vocab, num_steps, device, save_attention_weights=False): """Predict for sequence to sequence.""" # Set `net` to eval mode for inference net.eval() src_tokens = src_vocab[src_sentence.lower().split(' ')] + [