Beispiel #1
0
def train():
    embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.0
    batch_size, num_steps = 64, 10
    lr, num_epochs, device = 0.005, 200, d2l.try_gpu()

    src_vocab, tgt_vocab, train_iter = d2l.load_data_nmt(batch_size, num_steps)
    encoder = d2l.Seq2SeqEncoder(len(src_vocab), embed_size, num_hiddens,
                                 num_layers, dropout)
    decoder = Seq2SeqAttentionDecoder(len(tgt_vocab), embed_size, num_hiddens,
                                      num_layers, dropout)
    model = d2l.EncoderDecoder(encoder, decoder)
    d2l.train_s2s_ch9(model, train_iter, lr, num_epochs, device)
Beispiel #2
0
        for blk in self.blks:
            X, state = blk(X, state)
        return self.dense(X), state
num_hiddens, num_layers, dropout, batch_size, num_steps = 32, 2, 0.1, 64,10
lr, num_epochs, device = 0.005, 200, d2l.try_gpu()
ffn_num_input, ffn_num_hiddens, num_heads = 32, 64, 4
key_size, query_size, value_size = 32, 32, 32
norm_shape = [32]

train_iter, src_vocab, tgt_vocab = d2l.load_data_nmt(batch_size, num_steps)

encoder = TransformerEncoder(
        len(src_vocab), key_size, query_size, value_size, num_hiddens,
        norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,
        num_layers, dropout)

decoder = TransformerDecoder(
    len(tgt_vocab), key_size, query_size,value_size,num_hiddens,
    norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,
    num_layers,dropout)

model = d2l.EncoderDecoder(encoder, decoder)
d2l.train_s2s_ch9(model, train_iter,lr,num_epochs,tgt_vocab,device)

#%%
engs = ['go .', "i lost .", 'i\'m home .', 'he\'s calm .']
fras = ['va !', 'j\'ai perdu .', 'je suis chez moi .', 'il est calme .']
# 在预测时,Transformer是一次encoder,多次decoer(直到预测出<eos>)
d2l.translate(engs, fras, model, src_vocab, tgt_vocab, num_steps, device)
# %%
        if (epoch + 1) % 10 == 0:
            animator.add(epoch + 1, (metric[0] / metric[1], ))
    print(f'loss {metric[0] / metric[1]:.3f}, {metric[1] / timer.stop():.1f} '
          f'tokens/sec on {str(device)}')


embed_size, num_hiddens, num_layers, dropout = 32, 34, 2, 0.1
batch_size, num_steps = 64, 10
lr, num_epochs, device = 0.005, 300, d2l.try_gpu()

train_iter, src_vocab, tgt_vocab = d2l.load_data_nmt(batch_size, num_steps)
encoder = Seq2SeqEncoder(len(src_vocab), embed_size, num_hiddens, num_layers,
                         dropout)
decoder = Seq2SeqDecoder(len(tgt_vocab), embed_size, num_hiddens, num_layers,
                         dropout)
net = d2l.EncoderDecoder(encoder, decoder)
train_seq2seq(net, train_iter, lr, num_epochs, tgt_vocab, device)


#@save
def predict_seq2seq(net,
                    src_sentence,
                    src_vocab,
                    tgt_vocab,
                    num_steps,
                    device,
                    save_attention_weights=False):
    """Predict for sequence to sequence."""
    # Set `net` to eval mode for inference
    net.eval()
    src_tokens = src_vocab[src_sentence.lower().split(' ')] + [