예제 #1
0
    '''
    
    loss_function = nn.NLLLoss(reduction = 'sum', ignore_index = de_vocab.item2index['_PAD_'])
    en_optimizer = optim.Adam(encoder.parameters(), lr = 1e-3, weight_decay = 0)
    de_optimizer = optim.Adam(decoder.parameters(), lr = 1e-3, weight_decay = 0)
    
    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        ones_matrix = ones_matrix.cuda()
        loss_function = loss_function.cuda()
        
    for epoch in range(20):
        
        pl.reset()
        encoder.train()
        decoder.train()
        total_loss = torch.Tensor([0])
        total_token = 0

        for batch_idx, (en_seq, en_seq_len, de_seq, de_seq_len) in enumerate(pl.gen_pairs(batch_size)):

            en_optimizer.zero_grad()
            de_optimizer.zero_grad()

            en_seq = torch.LongTensor(en_seq)
            de_seq = torch.LongTensor(de_seq)
            if use_cuda:
                en_seq = en_seq.cuda()
                de_seq = de_seq.cuda()
예제 #2
0
tst_tgt_t = torch.LongTensor(tst_tgt_p)
trn_src_t = torch.LongTensor(trn_src_p)
trn_tgt_t = torch.LongTensor(trn_tgt_p)

enc = Encoder(len(vocab), 100, 100, 2, 'cuda', vocab[pad])
dec = Decoder(len(vocab), 100, 100, 2, 'cuda', vocab[pad], vocab[sos],
              vocab[eos], vocab[unk])
enc.to('cuda')
dec.to('cuda')
opt_enc = torch.optim.Adam(enc.parameters())
opt_dec = torch.optim.Adam(dec.parameters())

n_batch = len(trn_src_p) // batch_size

for e in range(epochs):
    enc.train()
    dec.train()
    epoch_loss = 0
    for i in range(n_batch):
        opt_enc.zero_grad()
        opt_dec.zero_grad()
        lengths = torch.LongTensor(l_trn_src[batch_size * i:batch_size *
                                             (i + 1)])
        out, h_n = enc(trn_src_t[batch_size * i:batch_size * (i + 1)], lengths)
        output = dec.teacher_force(
            trn_tgt_t[batch_size * i:batch_size * (i + 1)].reshape(
                [batch_size, tgt_max, 1]), h_n,
            torch.LongTensor(l_trn_tgt[batch_size * i:batch_size * (i + 1)]))
        loss = 0
        for o, l, t in zip(output,
                           l_trn_tgt[batch_size * i:batch_size * (i + 1)],