Esempio n. 1
0
def translate(model,
              src_vocab,
              trg_vocab,
              corpus_iter,
              translation_output=None):
    global opt

    model.eval()
    hyp_list = []

    for idx, batch in enumerate(corpus_iter, start=1):
        print(idx)
        batch = list(batch)
        src_raw = batch[0]
        src = batch_str2idx_with_flag(src_raw,
                                      src_vocab,
                                      unk=UNK,
                                      pad=PAD,
                                      sos=SOS,
                                      eos=EOS)
        src = to_Tensor(src, tensor_type=torch.LongTensor, cuda=opt.cuda)
        src_mask = get_batch_mask(src, src_vocab, PAD)
        with torch.no_grad():
            sentences_output, scores_output = model.beamsearch(src,
                                                               src_mask,
                                                               opt.beam_size,
                                                               normalize=True)
            best_sentence, best_score = sentences_output[0], scores_output[0]
            best_sentence = batch_idx2str([best_sentence], trg_vocab)
            hyp_list.append(best_sentence[0])

    with open(translation_output, 'w') as f:
        for sentence in hyp_list:
            sentence = ' '.join(sentence)
            f.write(sentence + '\n')
Esempio n. 2
0
def evaluate(opt, model, src_vocab, trg_vocab, corpus_iter, batch_idx,
             cur_epoch):
    try:
        model.eval()
        print('!!!eval', id(model))
        time1 = time.time()
        hyp_list = []
        ref_list = []

        print('sub: ', os.getpid())
        print('num: ', batch_idx)
        for idx, batch in enumerate(corpus_iter, start=1):
            print(idx)
            src_raw = batch[0]
            trg_raw = batch[1:]
            ref = list(map(lambda x: x[0], trg_raw))
            ref_list.append(ref)
            src = batch_str2idx_with_flag(src_raw,
                                          src_vocab,
                                          unk=UNK,
                                          pad=PAD,
                                          sos=SOS,
                                          eos=EOS)
            src = to_Tensor(src, tensor_type=torch.LongTensor, cuda=opt.cuda)
            src_mask = get_batch_mask(src, src_vocab, PAD)
            with torch.no_grad():
                sentences_output, scores_output = model.beamsearch(
                    src, src_mask, opt.beam_size, normalize=True)
                best_sentence, best_score = sentences_output[0], scores_output[
                    0]
                best_sentence = batch_idx2str([best_sentence], trg_vocab)
                hyp_list.append(best_sentence[0])

        bleu = corpus_bleu(ref_list,
                           hyp_list,
                           smoothing_function=SmoothingFunction().method1)
        time1 = time.time() - time1
        print('subprocess %d batch_idx %d, time: ' % (os.getpid(), batch_idx),
              time1)
        return bleu, batch_idx, cur_epoch
    except Exception as ex:
        msg = "subprcess wrong: %s" % ex
        print(msg)
    def greedysearch(self,
                     src,
                     src_mask,
                     max_len=None,
                     min_len=None,
                     cuda=False):
        max_len = src.size(1) * 3 if max_len is None else max_len
        min_len = src.size(1) / 2 if min_len is None else min_len
        src_seq_lens = src_mask.sum(1)
        enc_emb = self.encoder.src_emb(src)
        enc_pos_emb = position_encoding_init(enc_emb.size(1),
                                             enc_emb.size(2),
                                             cuda=enc_emb.is_cuda)
        enc_input = enc_emb + enc_pos_emb  # add positional embeddings to input embeddings
        enc_output = enc_input
        for enc_layer in self.encoder.layer_stack:
            enc_output = enc_layer(enc_output, src_seq_lens)
        sentence = [self.dec_sos]
        output = to_Tensor([sentence], tensor_type=torch.LongTensor, cuda=cuda)
        for k in range(max_len):
            dec_emb = self.decoder.dec_emb(output)
            dec_pos_emb = position_encoding_init(dec_emb.size(1),
                                                 dec_emb.size(2),
                                                 cuda=dec_emb.is_cuda)
            dec_input = dec_emb + dec_pos_emb
            dec_layer_output = dec_input
            for dec_layer in self.decoder.layer_stack:
                multi_head_input = dec_layer_output.unsqueeze(1).expand(
                    -1, self.h, -1, -1)
                multi_head_q, multi_head_k, multi_head_v = multi_head_input, multi_head_input, multi_head_input
                self_attn_output = dec_layer.masked_multi_head_attn(
                    multi_head_q, multi_head_k, multi_head_v)
                residual_output1 = dec_layer.layer_norm1(
                    dec_layer_output +
                    self_attn_output)  # Add(residual connection) & Norm

                multi_head_q = residual_output1.unsqueeze(1).expand(
                    -1, self.h, -1, -1)
                multi_head_input = enc_output.unsqueeze(1).expand(
                    -1, self.h, -1, -1)
                multi_head_k, multi_head_v = multi_head_input, multi_head_input
                dec_enc_attn_output = dec_layer.multi_head_attn(
                    multi_head_q, multi_head_k, multi_head_v)
                residual_output2 = dec_layer.layer_norm2(
                    residual_output1 +
                    dec_enc_attn_output)  # Add(residual connection) & Norm
                # Position-wise Feedforward Sublayer
                feedforwad_output = dec_layer.pos_ffn(residual_output2)
                dec_layer_output = dec_layer.layer_norm3(
                    residual_output2 +
                    feedforwad_output)  # Add(residual connection) & Norm
            dec_output = dec_layer_output
            y_prob = self.affine(dec_output)
            cur_word_idx = int(y_prob[0][k].argmax())
            #if cur_word_idx is self.dec_eos:
            #break
            sentence.append(cur_word_idx)
            output = to_Tensor([sentence],
                               tensor_type=torch.LongTensor,
                               cuda=cuda)

        return sentence
Esempio n. 4
0
def train(model, src_vocab, trg_vocab, optim_wrapper, train_iter, vldt_iter,
          loss_function):
    global opt, min_loss, max_bleu
    subprocess_pool = Pool(2)

    model.train()
    print('start training!!!', id(model))
    for epoch in range(opt.epoch, opt.nepoch):  # TODO
        cur_epoch = epoch + 1
        total_loss = 0
        print('############### epoch = %d ###############\n' % cur_epoch)
        for batch_idx, batch in enumerate(train_iter, start=1):
            sorted_batch = sort_batch(batch)
            src_raw = sorted_batch[0]
            trg_raw = sorted_batch[1]
            # 获得以word indices表示的源句子和目标语句
            src = batch_str2idx_with_flag(src_raw,
                                          src_vocab,
                                          unk=UNK,
                                          pad=PAD,
                                          sos=SOS,
                                          eos=EOS)
            f_trg = batch_str2idx_with_flag(trg_raw,
                                            trg_vocab,
                                            unk=UNK,
                                            pad=PAD,
                                            sos=SOS,
                                            eos=EOS)
            src, f_trg = to_Tensor(src,
                                   f_trg,
                                   tensor_type=torch.LongTensor,
                                   cuda=opt.cuda)
            src_mask = get_batch_mask(src, src_vocab, PAD)
            f_trg_mask = get_batch_mask(f_trg, trg_vocab, PAD)
            '''
            # b_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS, reverse=True)  # 目标端反向的句子batch,暂时不用
            # src, f_trg, b_trg = to_Tensor(src, f_trg, b_trg, tensor_type=torch.LongTensor, cuda=opt.cuda)
            # b_trg_mask = get_batch_mask(b_trg, trg_vocab, PAD)
            '''
            y_prob = model(src, src_mask, f_trg, f_trg_mask)
            # --------------------------------------- TODO
            f_trg = torch.cat(
                (f_trg,
                 torch.LongTensor([[dec_pad]
                                   for _ in range(int(f_trg.size(0)))])), 1)
            loss = loss_function(y_prob.transpose(1, 2), f_trg[:, 1:])
            total_loss = total_loss + float(loss)
            loss.backward()
            # ----------------------------------------
            if batch_idx % opt.interval == 0:
                total_loss = total_loss / opt.interval
                if total_loss < min_loss:
                    print('& epoch = %d batch_idx = %d min_loss = %f &\n' %
                          (cur_epoch, batch_idx / opt.interval, total_loss))
                    min_loss = total_loss
                    save_min_loss_model(model,
                                        opt.checkpoint_dir,
                                        batch_idx / opt.interval,
                                        cur_epoch,
                                        min_loss,
                                        info='Transformer_min_loss_model')
                else:
                    print('- batch_idx = %d, loss = %f -\n' %
                          (batch_idx / opt.interval, total_loss))
                #torch.nn.utils.clip_grad_norm_(model.parameters(), opt.max_norm, norm_type=2)  # 参数更新前执行梯度裁剪,默认取L2范数
                optim_wrapper.step()
                optim_wrapper.zero_grad()
                total_loss = 0
                optim_wrapper.update_lr_per_step()
                '''
              # 开启额外cpu进程测试开发集bleu时调用下面语句
              # 从第4轮训练开始,每隔opt.vldt_freq个batch,另开子进程测试一次bleu
              if cur_epoch >= 4 and (batch_idx * opt.interval) % opt.vldt_freq == 0:
                  cpu_model = copy.deepcopy(model).cpu()
                  subprocess_pool.apply_async(evaluate, args=(opt, cpu_model, src_vocab, trg_vocab, vldt_iter, batch_idx, cur_epoch), callback=my_callback)
              '''
            if (batch_idx / opt.interval) % 100 == 0:
                print('- epoch = %d, min_loss = %f -\n' %
                      (cur_epoch, min_loss))
                # ---------------------------------------
                sentences = []
                for i in range(5):
                    sentence = []
                    for j in range(y_prob.size(1)):
                        sentence.append(int(y_prob[i][j].argmax()))
                    sentences.append(sentence)
                sentences = batch_idx2str(sentences, trg_vocab)
                for i in range(5):
                    print('source:')
                    print(' '.join(src_raw[i]))
                    print('ref:')
                    print(' '.join(trg_raw[i]))
                    print('pred:')
                    print(' '.join(sentences[i]))
                    print('---------------------')
                # ---------------------------------------
        optim_wrapper.zero_grad()
        optim_wrapper.update_lr_per_epoch()
        save_checkpoint_model(model,
                              opt.checkpoint_dir,
                              cur_epoch,
                              info='Transformer_checkpoint_model')
        print('$ min_loss: %f, max_bleu: %f $\n' % (min_loss, max_bleu))
    # 关闭进程池等待开发集bleu测试完成
    subprocess_pool.close()
    subprocess_pool.join()
Esempio n. 5
0
def train(model, src_vocab, trg_vocab, optim_wrapper, train_iter, vldt_iter):
    global opt, min_loss, max_bleu
    subprocess_pool = Pool(2)

    # start training
    model.train()
    print('!!!train', id(model))
    for epoch in range(opt.epoch, opt.nepoch):
        cur_epoch = epoch + 1
        total_loss = 0
        print('############### epoch = %d ###############\n' % cur_epoch)
        for batch_idx, batch in enumerate(train_iter, start=1):
            sorted_batch = sort_batch(batch)
            src_raw = sorted_batch[0]
            trg_raw = sorted_batch[1]
            # 获得以word indices表示的源句子和目标语句
            src = batch_str2idx_with_flag(src_raw,
                                          src_vocab,
                                          unk=UNK,
                                          pad=PAD,
                                          sos=SOS,
                                          eos=EOS)
            f_trg = batch_str2idx_with_flag(trg_raw,
                                            trg_vocab,
                                            unk=UNK,
                                            pad=PAD,
                                            sos=SOS,
                                            eos=EOS)
            src, f_trg = to_Tensor(src,
                                   f_trg,
                                   tensor_type=torch.LongTensor,
                                   cuda=opt.cuda)
            src_mask = get_batch_mask(src, src_vocab, PAD)
            f_trg_mask = get_batch_mask(f_trg, trg_vocab, PAD)
            '''
            # b_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS, reverse=True)  # 目标端反向的句子batch,暂时不用
            # src, f_trg, b_trg = to_Tensor(src, f_trg, b_trg, tensor_type=torch.LongTensor, cuda=opt.cuda)
            # b_trg_mask = get_batch_mask(b_trg, trg_vocab, PAD)
            '''
            loss = model(src, src_mask, f_trg, f_trg_mask)  # TODO
            total_loss = total_loss + float(loss)
            loss.backward()
            if batch_idx % opt.interval == 0:
                total_loss = total_loss / opt.interval
                if total_loss < min_loss:
                    print('& epoch = %d batch_idx = %d min_loss = %f &\n' %
                          (cur_epoch, batch_idx / opt.interval, total_loss))
                    min_loss = total_loss
                    save_min_loss_model(model,
                                        opt.checkpoint_dir,
                                        batch_idx / opt.interval,
                                        cur_epoch,
                                        min_loss,
                                        info='RNNSearch_min_loss_model')
                else:
                    print('- batch_idx = %d, loss = %f -\n' %
                          (batch_idx / opt.interval, total_loss))
                torch.nn.utils.clip_grad_norm_(
                    model.parameters(), opt.max_norm,
                    norm_type=2)  # 参数更新前执行梯度裁剪,默认取L2范数
                optim_wrapper.step()
                optim_wrapper.zero_grad()
                total_loss = 0
                optim_wrapper.update_lr_per_step()
                '''
              # 开启额外cpu进程测试开发集bleu时调用下面语句
              # 从第4轮训练开始,每隔opt.vldt_freq个batch,另开子进程测试一次bleu
              if cur_epoch >= 4 and (batch_idx * opt.interval) % opt.vldt_freq == 0:
                  cpu_model = copy.deepcopy(model).cpu()
                  subprocess_pool.apply_async(evaluate, args=(opt, cpu_model, src_vocab, trg_vocab, vldt_iter, batch_idx, cur_epoch), callback=my_callback)
              '''
        optim_wrapper.zero_grad()
        optim_wrapper.update_lr_per_epoch()
        save_checkpoint_model(model,
                              opt.checkpoint_dir,
                              cur_epoch,
                              info='RNNSearch_checkpoint_model')
        print('$ min_loss: %f, max_bleu: %f $\n' % (min_loss, max_bleu))
    # 关闭进程池等待开发集bleu测试完成
    subprocess_pool.close()
    subprocess_pool.join()