Beispiel #1
0
def main():
    """Main Function"""

    parser = argparse.ArgumentParser(description='translate.py')

    parser.add_argument('-model',
                        required=True,
                        help='Path to model weight file')
    parser.add_argument('-data_pkl',
                        required=True,
                        help='Pickle file with both instances and vocabulary.')
    parser.add_argument('-output',
                        default='pred.txt',
                        help="""Path to output the predictions (each line will
                        be the decoded sequence""")
    parser.add_argument('-beam_size', type=int, default=5)
    parser.add_argument('-max_seq_len', type=int, default=100)
    parser.add_argument('-no_cuda', action='store_true')

    # TODO: Translate bpe encoded files
    # parser.add_argument('-src', required=True,
    #                    help='Source sequence to decode (one line per sequence)')
    # parser.add_argument('-vocab', required=True,
    #                    help='Source sequence to decode (one line per sequence)')
    # TODO: Batch translation
    # parser.add_argument('-batch_size', type=int, default=30,
    #                    help='Batch size')
    # parser.add_argument('-n_best', type=int, default=1,
    #                    help="""If verbose is set, will output the n_best
    #                    decoded sentences""")

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda

    data = pickle.load(open(opt.data_pkl, 'rb'))
    SRC, TRG = data['vocab']['src'], data['vocab']['trg']
    opt.src_pad_idx = SRC.vocab.stoi[constants.PAD_WORD]
    opt.trg_pad_idx = TRG.vocab.stoi[constants.PAD_WORD]
    opt.trg_bos_idx = TRG.vocab.stoi[constants.BOS_WORD]
    opt.trg_eos_idx = TRG.vocab.stoi[constants.EOS_WORD]

    test_loader = Dataset(examples=data['test'],
                          fields={
                              'src': SRC,
                              'trg': TRG
                          })

    device = torch.device('cuda' if opt.cuda else 'cpu')
    translator = Translator(model=load_model(opt, device),
                            beam_size=opt.beam_size,
                            max_seq_len=opt.max_seq_len,
                            src_pad_idx=opt.src_pad_idx,
                            trg_pad_idx=opt.trg_pad_idx,
                            trg_bos_idx=opt.trg_bos_idx,
                            trg_eos_idx=opt.trg_eos_idx).to(device)

    unk_idx = SRC.vocab.stoi[SRC.unk_token]
    with open(opt.output, 'w') as f:
        for example in tqdm(test_loader,
                            mininterval=2,
                            desc='  - (Test)',
                            leave=False):
            # print(' '.join(example.src))
            src_seq = [
                SRC.vocab.stoi.get(word, unk_idx) for word in example.src
            ]
            pred_seq = translator.translate_sentence(
                torch.LongTensor([src_seq]).to(device))
            pred_line = ' '.join(TRG.vocab.itos[idx] for idx in pred_seq)
            pred_line = pred_line.replace(constants.BOS_WORD,
                                          '').replace(constants.EOS_WORD, '')
            # print(pred_line)
            f.write(pred_line.strip() + '\n')

    print('[Info] Finished.')
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser(description='translate.py')

    parser.add_argument('-data_pkl',
                        help='Pickle file with both instances and vocabulary.')
    parser.add_argument('-experiment_name', required=True)
    parser.add_argument('-model_numbers', nargs='+', type=int, required=True)
    parser.add_argument('-output', default='pred.txt',
                        help="""Path to output the predictions (each line will
                        be the decoded sequence""")
    parser.add_argument('-beam_size', type=int, default=4)
    parser.add_argument('-batch_size', type=int, default=1)
    parser.add_argument('-max_seq_len', type=int, default=130)
    parser.add_argument('-alpha', type=float, default=0.6)
    parser.add_argument('-device', choices=['cpu', 'cuda'], default='cuda')
    parser.add_argument('-langs', nargs='+', required=True)

    args = parser.parse_args()
    device = torch.device(args.device)

    for i, number in enumerate(args.model_numbers):
        model_name = f'{args.experiment_name}-{number}.chkpt'
        if i == 0:
            model = load_model(model_name, device)
        else:
            temp_model = load_model(model_name, device)
            temp_params = dict(temp_model.named_parameters())
            for name, param in model.named_parameters():
                temp_params[name].data.copy_(param.data + temp_params[name].data)
            model.load_state_dict(temp_params)
    for _, param in model.named_parameters():
        param.data.copy_(param.data / len(args.model_numbers))

    args.data_reduce_size = -1
    test_loader, total_tokens, SRC, TRG = load_data_dict(
        experiment_name=args.experiment_name,
        corpora_type='dev',
        langs=args.langs,
        args=args,
        device=device
    )

    args.src_pad_idx = SRC.vocab.stoi[PAD_WORD]
    args.trg_pad_idx = TRG.vocab.stoi[PAD_WORD]
    args.trg_bos_idx = TRG.vocab.stoi[BOS_WORD]
    args.trg_eos_idx = TRG.vocab.stoi[EOS_WORD]
    args.trg_unk_idx = TRG.vocab.stoi[UNK_WORD]
    translator = Translator(
        model=model,
        beam_size=args.beam_size,
        max_seq_len=args.max_seq_len,
        src_pad_idx=args.src_pad_idx,
        trg_pad_idx=args.trg_pad_idx,
        trg_bos_idx=args.trg_bos_idx,
        trg_eos_idx=args.trg_eos_idx,
        device=device,
        alpha=args.alpha
    ).to(device)

    total_bleu, total_sentence = 0, 0
    bleu_score = 0
    for example in tqdm(test_loader, mininterval=20, desc='  - (Test)', leave=False, total=total_tokens//args.batch_size):
        source_sequence = patch_source(example.src).to(device)
        target_sequence, gold = map(lambda x: x.to(device), patch_target(example.trg))
        # prediction = model(source_sequence,target_sequence[:,:2])
        # output = model.generator(prediction)
        # print(torch.argmax(output[0],dim=1))
        pred_seq, ends = translator.translate_sentence(source_sequence)
        # pred_seq = translator.greedy_decoder(source_sequence)

        bleu = translation_score(pred_seq, ends, gold, TRG)
        total_bleu += bleu[0]
        total_sentence += bleu[1]
        bleu_score = (total_bleu / total_sentence) * 100
        print('\n', bleu_score)
    bleu_score = (total_bleu / total_sentence) * 100
    print('BLEU score for model: ', bleu_score)
Beispiel #3
0
def run_one_epoch(model,
                  data,
                  args,
                  device,
                  TRG,
                  total_tokens,
                  optimizer=None,
                  smoothing=False,
                  bleu=False):
    ''' Epoch operation in training phase'''
    training = optimizer is not None
    total_loss, total_num_words, total_num_correct_words, total_bleu, total_sentence = 0, 0, 0, 0, 0
    if training:
        desc = '  - (Training)   '
        model.train()
    else:
        desc = '  - (Validation) '
        model.eval()
    if bleu:
        translator = Translator(model=model,
                                beam_size=args.beam_size,
                                max_seq_len=args.max_seq_len,
                                src_pad_idx=args.src_pad_idx,
                                trg_pad_idx=args.trg_pad_idx,
                                trg_bos_idx=args.trg_bos_idx,
                                trg_eos_idx=args.trg_eos_idx,
                                device=device)
        translator = TranslatorParallel(translator)
        # translator = CustomDataParallel(translator)

    for batch in tqdm(data,
                      mininterval=10,
                      desc=desc,
                      leave=False,
                      total=total_tokens // args.batch_size):
        # prepare data
        source_sequence = patch_source(batch.src).to(device)
        target_sequence, gold = map(lambda x: x.to(device),
                                    patch_target(batch.trg))
        # source_sequence = nn.DataParallel(patch_source(batch.src))
        # target_sequence, gold = map(lambda x: nn.DataParallel(x), patch_target(batch.trg))
        # forward pass
        if training:
            optimizer.zero_grad()
        if bleu:
            pred_seq, ends = translator.translate_sentence(source_sequence)
            score = translation_score(pred_seq, ends, gold, TRG)
            total_bleu += score[0]
            total_sentence += score[1]
        prediction = model(source_sequence, target_sequence)
        output = model.generator(prediction)
        output = output.view(-1, output.size(-1))
        # backward pass and update parameters
        loss, num_correct, num_words = calculate_metrics(
            output,
            gold.contiguous().view(-1),
            args.trg_pad_idx,
            smoothing=smoothing)
        if training:
            loss.backward()
            optimizer.step_and_update_lr()
        total_num_words += num_words
        total_num_correct_words += num_correct
        total_loss += loss.item()
    if total_num_words != 0:
        loss_per_word = total_loss / total_num_words
        accuracy = total_num_correct_words / total_num_words
        if bleu:
            bleu_score = total_bleu / total_sentence
            print('current BLEU score: ', bleu_score)
        else:
            bleu_score = None
        return loss_per_word, accuracy, bleu_score
    else:
        return 0, 0, None