Beispiel #1
0
    
    encoder_model_file = 'encoder_rev.7.pt'
    decoder_model_file = 'decoder_rev.7.pt'
    encoder.load_state_dict(torch.load(encoder_model_file))
    decoder.load_state_dict(torch.load(decoder_model_file))

    '''
    #Load Pre-trained Embedding
    model_file = 'bi_gru.100.100.2.pt'
    if model_file != '' : model.load_state_dict(torch.load(model_file))
    else: model.load_pre_train_emb('cityu_training.char.emb.npy', 'cityu_training.char.dict', vocab)
    '''
    
    loss_function = nn.NLLLoss(reduction = 'sum', ignore_index = de_vocab.item2index['_PAD_'])
    en_optimizer = optim.Adam(encoder.parameters(), lr = 1e-3, weight_decay = 0)
    de_optimizer = optim.Adam(decoder.parameters(), lr = 1e-3, weight_decay = 0)
    
    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        ones_matrix = ones_matrix.cuda()
        loss_function = loss_function.cuda()
        
    for epoch in range(20):
        
        pl.reset()
        encoder.train()
        decoder.train()
        total_loss = torch.Tensor([0])
        total_token = 0
def train(article,
          title,
          word2idx,
          target2idx,
          source_lengths,
          target_lengths,
          args,
          val_article=None,
          val_title=None,
          val_source_lengths=None,
          val_target_lengths=None):

    if not os.path.exists('./temp/x.pkl'):
        size_of_val = int(len(article) * 0.05)
        val_article, val_title, val_source_lengths, val_target_lengths = \
            utils.sampling(article, title, source_lengths, target_lengths, size_of_val)

        utils.save_everything(article, title, source_lengths, target_lengths,
                              val_article, val_title, val_source_lengths,
                              val_target_lengths, word2idx)

    size_of_val = len(val_article)
    batch_size = args.batch
    train_size = len(article)
    val_size = len(val_article)
    max_a = max(source_lengths)
    max_t = max(target_lengths)
    print("source vocab size:", len(word2idx))
    print("target vocab size:", len(target2idx))
    print("max a:{}, max t:{}".format(max_a, max_t))
    print("train_size:", train_size)
    print("val size:", val_size)
    print("batch_size:", batch_size)
    print("-" * 30)
    use_coverage = False

    encoder = Encoder(len(word2idx))
    decoder = Decoder(len(target2idx), 50)
    if os.path.exists('decoder_model'):
        encoder.load_state_dict(torch.load('encoder_model'))
        decoder.load_state_dict(torch.load('decoder_model'))

    optimizer = torch.optim.Adam(list(encoder.parameters()) +
                                 list(decoder.parameters()),
                                 lr=0.001)
    n_epoch = 5
    print("Making word index and extend vocab")
    #article, article_tar, title, ext_vocab_all, ext_count = indexing_word(article, title, word2idx, target2idx)
    #article = to_tensor(article)
    #article_extend = to_tensor(article_extend)
    #title = to_tensor(title)
    print("preprocess done")

    if args.use_cuda:
        encoder.cuda()
        decoder.cuda()

    print("start training")
    for epoch in range(n_epoch):
        total_loss = 0
        batch_n = int(train_size / batch_size)
        if epoch > 0:
            use_coverage = True
        for b in range(batch_n):
            # initialization
            batch_x = article[b * batch_size:(b + 1) * batch_size]
            batch_y = title[b * batch_size:(b + 1) * batch_size]
            #batch_x_ext = article_extend[b*batch_size: (b+1)*batch_size]
            batch_x, batch_x_ext, batch_y, extend_vocab, extend_lengths = \
                utils.batch_index(batch_x, batch_y, word2idx, target2idx)

            if args.use_cuda:
                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
                batch_x_ext = batch_x_ext.cuda()
            x_lengths = source_lengths[b * batch_size:(b + 1) * batch_size]
            y_lengths = target_lengths[b * batch_size:(b + 1) * batch_size]

            # work around to deal with length
            pack = pack_padded_sequence(batch_x_ext,
                                        x_lengths,
                                        batch_first=True)
            batch_x_ext_var, _ = pad_packed_sequence(pack, batch_first=True)
            current_loss = train_on_batch(encoder, decoder, optimizer, batch_x,
                                          batch_y, x_lengths, y_lengths,
                                          word2idx, target2idx,
                                          batch_x_ext_var, extend_lengths,
                                          use_coverage)

            batch_x = batch_x.cpu()
            batch_y = batch_y.cpu()
            batch_x_ext = batch_x_ext.cpu()

            print('epoch:{}/{}, batch:{}/{}, loss:{}'.format(
                epoch + 1, n_epoch, b + 1, batch_n, current_loss))
            if (b + 1) % args.show_decode == 0:
                torch.save(encoder.state_dict(), 'encoder_model')
                torch.save(decoder.state_dict(), 'decoder_model')
                batch_x_val, batch_x_ext_val, batch_y_val, extend_vocab, extend_lengths = \
                    utils.batch_index(val_article, val_title, word2idx, target2idx)
                for i in range(1):
                    idx = np.random.randint(0, val_size)
                    decode.beam_search(encoder, decoder,
                                       batch_x_val[idx].unsqueeze(0),
                                       batch_y_val[idx].unsqueeze(0), word2idx,
                                       target2idx, batch_x_ext_val[idx],
                                       extend_lengths[idx], extend_vocab[idx])

                batch_x_val = batch_x_val.cpu()
                batch_y_val = batch_y_val.cpu()
                batch_x_ext_val = batch_x_ext_val.cpu()

            total_loss += current_loss
            print('-' * 30)

    print()
    print("training finished")
Beispiel #3
0
l_trn_src = pickle.load(open('data/l_trn_src.pkl', 'rb'))
trn_src_p = pickle.load(open('data/trn_src_p.pkl', 'rb'))
l_trn_tgt = pickle.load(open('data/l_trn_tgt.pkl', 'rb'))
trn_tgt_p = pickle.load(open('data/trn_tgt_p.pkl', 'rb'))

tst_src_t = torch.LongTensor(tst_src_p)
tst_tgt_t = torch.LongTensor(tst_tgt_p)
trn_src_t = torch.LongTensor(trn_src_p)
trn_tgt_t = torch.LongTensor(trn_tgt_p)

enc = Encoder(len(vocab), 100, 100, 2, 'cuda', vocab[pad])
dec = Decoder(len(vocab), 100, 100, 2, 'cuda', vocab[pad], vocab[sos],
              vocab[eos], vocab[unk])
enc.to('cuda')
dec.to('cuda')
opt_enc = torch.optim.Adam(enc.parameters())
opt_dec = torch.optim.Adam(dec.parameters())

n_batch = len(trn_src_p) // batch_size

for e in range(epochs):
    enc.train()
    dec.train()
    epoch_loss = 0
    for i in range(n_batch):
        opt_enc.zero_grad()
        opt_dec.zero_grad()
        lengths = torch.LongTensor(l_trn_src[batch_size * i:batch_size *
                                             (i + 1)])
        out, h_n = enc(trn_src_t[batch_size * i:batch_size * (i + 1)], lengths)
        output = dec.teacher_force(