def beam_search(model: NMT, test_iter, beam_size: int, max_decoding_time_step: int) -> List[List[Hypothesis]]: """ Run beam search to construct hypotheses for a list of src-language sentences. @param model (NMT): NMT Model @param test_data_src (List[List[str]]): List of sentences (words) in source language, from test set. @param beam_size (int): beam_size (# of hypotheses to hold for a translation at every step) @param max_decoding_time_step (int): maximum sentence length that Beam search can produce @returns hypotheses (List[List[Hypothesis]]): List of Hypothesis translations for every source sentence. """ was_training = model.training model.eval() hypotheses = [] with torch.no_grad(): for _, data in enumerate(test_iter): print(data) (src_sents, src_lengths), (_, _) = data.abc, data.d example_hyps = model.beam_search( src_sents, src_lengths, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step) hypotheses.append(example_hyps) if was_training: model.train(was_training) return hypotheses
def beam_search(model: NMT, test_data_src: List[List[str]], beam_size: int, max_decoding_time_step: int) -> List[List[Hypothesis]]: """ Run beam search to construct hypotheses for a list of src-language sentences. @param model (NMT): NMT Model @param test_data_src (List[List[str]]): List of sentences (words) in source language, from test set. @param beam_size (int): beam_size (# of hypotheses to hold for a translation at every step) @param max_decoding_time_step (int): maximum sentence length that Beam search can produce @returns hypotheses (List[List[Hypothesis]]): List of Hypothesis translations for every source sentence. """ was_training = model.training model.eval() hypotheses = [] with torch.no_grad(): for src_sent in tqdm(test_data_src, desc='Decoding', file=sys.stdout): example_hyps = model.beam_search( src_sent, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step) hypotheses.append(example_hyps) if was_training: model.train(was_training) return hypotheses
def beam_search(model: NMT, test_iterator: BucketIterator, beam_size: int, max_decoding_time_step: int) -> List[List[Hypothesis]]: """ Run beam search to construct hypotheses for a list of src-language sentences. @param model (NMT): NMT Model @param test_iterator BucketIterator: BucketIterator in source language, from test set. @param beam_size (int): beam_size (# of hypotheses to hold for a translation at every step) @param max_decoding_time_step (int): maximum sentence length that Beam search can produce @returns hypotheses (List[List[Hypothesis]]): List of Hypothesis translations for every source sentence. """ was_training = model.training model.eval() hypotheses = [] with torch.no_grad(): # for src_sent in tqdm(test_data_src, desc='Decoding', file=sys.stdout): for i, batch in enumerate(test_iterator): src_sents, src_sents_lens = batch.src src_sents = src_sents.permute(1, 0) for j in range(len(src_sents_lens)): src_sent = src_sents[j] example_hyps = model.beam_search( src_sent, src_sents_lens[j], beam_size=beam_size, max_decoding_time_step=max_decoding_time_step) hypotheses.append(example_hyps) if was_training: model.train(was_training) return hypotheses
def beam_search(model: NMT, test_data_src: List[List[str]], beam_size: int, max_decoding_time_step: int) -> List[List[Hypothesis]]: """ 对源句子列表使用beam search去构建假设. @param model (NMT): NMT 模型 @param test_data_src (List[List[str]]): 源句子列表, 测试集中的. @param beam_size (int): beam_size (每一步的候选数) @param max_decoding_time_step (int): Beam search 能产生的最大句子长度 @returns hypotheses (List[List[Hypothesis]]): 每个源句子的beam_size个假设. """ was_training = model.training model.eval() hypotheses = [] # 所有句子的候选句列表 with torch.no_grad(): for src_sent in tqdm(test_data_src, desc='Decoding', file=sys.stdout): example_hyps = model.beam_search( src_sent, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step) hypotheses.append(example_hyps) # 把这句话的所有候选句加入列表 if was_training: model.train(was_training) return hypotheses
def beam_search2(model1: NMT, model2: DPPNMT, test_data_src: List[List[str]], beam_size: int, max_decoding_time_step: int, test_data_tgt) -> List[List[Hypothesis]]: """ Run beam search to construct hypotheses for a list of src-language sentences. @param model (NMT): NMT Model @param test_data_src (List[List[str]]): List of sentences (words) in source language, from test set. @param beam_size (int): beam_size (# of hypotheses to hold for a translation at every step) @param max_decoding_time_step (int): maximum sentence length that Beam search can produce @returns hypotheses (List[List[Hypothesis]]): List of Hypothesis translations for every source sentence. """ model1.eval() model2.eval() i = 0 with torch.no_grad(): for src_sent in tqdm(test_data_src, desc='Decoding', file=sys.stdout): hyp1 = model1.beam_search( src_sent, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step) hyp2 = model2.beam_search( src_sent, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step) ref = test_data_tgt[i][1:-1] #print(ref, hyp1[0].value) bleu_topk = sentence_bleu(ref, hyp1[0].value) bleu_dpp = sentence_bleu(test_data_tgt[i], hyp2[0].value) #print(bleu_topk, bleu_dpp) if bleu_dpp > bleu_topk: print(i) print(" ".join(hyp1[0].value)) print(" ".join(hyp2[0].value)) print(" ".join(ref)) i += 1
def test(args): test_data_src = read_corpus(args.test_src, source='src') test_data_tgt = read_corpus(args.test_tgt, source='tgt') test_data = list(zip(test_data_src, test_data_tgt)) if args.load_model: print('load model from [%s]' % args.load_model) params = torch.load(args.load_model, map_location=lambda storage, loc: storage) vocab = params['vocab'] saved_args = params['args'] state_dict = params['state_dict'] model = NMT(saved_args, vocab) model.load_state_dict(state_dict) else: vocab = torch.load(args.vocab) model = NMT(args, vocab) model.eval() if args.cuda: # model = nn.DataParallel(model).cuda() model = model.cuda() hypotheses = decode(model, test_data) top_hypotheses = [hyps[0] for hyps in hypotheses] bleu_score = get_bleu([tgt for src, tgt in test_data], top_hypotheses) word_acc = get_acc([tgt for src, tgt in test_data], top_hypotheses, 'word_acc') sent_acc = get_acc([tgt for src, tgt in test_data], top_hypotheses, 'sent_acc') print('Corpus Level BLEU: %f, word level acc: %f, sentence level acc: %f' % (bleu_score, word_acc, sent_acc), file=sys.stderr) if args.save_to_file: print('save decoding results to %s' % args.save_to_file) with open(args.save_to_file, 'w') as f: for hyps in hypotheses: f.write(' '.join(hyps[0][1:-1]) + '\n') if args.save_nbest: nbest_file = args.save_to_file + '.nbest' print('save nbest decoding results to %s' % nbest_file) with open(nbest_file, 'w') as f: for src_sent, tgt_sent, hyps in zip(test_data_src, test_data_tgt, hypotheses): print('Source: %s' % ' '.join(src_sent), file=f) print('Target: %s' % ' '.join(tgt_sent), file=f) print('Hypotheses:', file=f) for i, hyp in enumerate(hyps, 1): print('[%d] %s' % (i, ' '.join(hyp)), file=f) print('*' * 30, file=f)
def sample(args): train_data_src = read_corpus(args.train_src, source='src') train_data_tgt = read_corpus(args.train_tgt, source='tgt') train_data = zip(train_data_src, train_data_tgt) if args.load_model: print('load model from [%s]' % args.load_model) params = torch.load(args.load_model, map_location=lambda storage, loc: storage) vocab = params['vocab'] opt = params['args'] state_dict = params['state_dict'] model = NMT(opt, vocab) model.load_state_dict(state_dict) else: vocab = torch.load(args.vocab) model = NMT(args, vocab) model.eval() if args.cuda: # model = nn.DataParallel(model).cuda() model = model.cuda() print('begin sampling') check_every = 10 train_iter = cum_samples = 0 train_time = time.time() for src_sents, tgt_sents in data_iter(train_data, batch_size=args.batch_size): train_iter += 1 samples = model.sample(src_sents, sample_size=args.sample_size, to_word=True) cum_samples += sum(len(sample) for sample in samples) if train_iter % check_every == 0: elapsed = time.time() - train_time print('sampling speed: %d/s' % (cum_samples / elapsed)) cum_samples = 0 train_time = time.time() for i, tgt_sent in enumerate(tgt_sents): print('*' * 80) print('target:' + ' '.join(tgt_sent)) tgt_samples = samples[i] print('samples:') for sid, sample in enumerate(tgt_samples, 1): print('[%d] %s' % (sid, ' '.join(sample[1:-1]))) print('*' * 80)
def interactive(args): assert args.load_model, 'You have to specify a pre-trained model' print('load model from [%s]' % args.load_model) params = torch.load(args.load_model, map_location=lambda storage, loc: storage) vocab = params['vocab'] saved_args = params['args'] state_dict = params['state_dict'] model = NMT(saved_args, vocab) model.load_state_dict(state_dict) model.eval() if args.cuda: # model = nn.DataParallel(model).cuda() model = model.cuda() while True: src_sent = input('Source Sentence:') src_sent = src_sent.strip().split(' ') hyps = model.translate(src_sent) for i, hyp in enumerate(hyps, 1): print('Hypothesis #%d: %s' % (i, ' '.join(hyp)))
def beam_search(model: NMT, test_data_src: List[List[str]], beam_size: int, max_decoding_time_step: int)\ -> List[List[Hypothesis]]: """ Run beam search to construct hypotheses for a list of src-language sentences. :param NMT model: NMT Model :param List[List[str]] test_data_src: List of sentences (words) in source language, from test set :param int beam_size: beam_size (number of hypotheses to keep for a translation at every step) :param int max_decoding_time_step: maximum sentence length that beam search can produce :returns List[List[Hypothesis]] hypotheses: List of Hypothesis translations for every source sentence """ was_training = model.training model.eval() hypotheses = [] with torch.no_grad(): for src_sent in tqdm(test_data_src, desc='Decoding', file=sys.stdout): example_hyps = model.beam_search( src_sent, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step) hypotheses.append(example_hyps) if was_training: model.train(was_training) return hypotheses
def compute_lm_prob(args): """ given source-target sentence pairs, compute ppl and log-likelihood """ test_data_src = read_corpus(args.test_src, source='src') test_data_tgt = read_corpus(args.test_tgt, source='tgt') test_data = zip(test_data_src, test_data_tgt) if args.load_model: print('load model from [%s]' % args.load_model) params = torch.load(args.load_model, map_location=lambda storage, loc: storage) vocab = params['vocab'] saved_args = params['args'] state_dict = params['state_dict'] model = NMT(saved_args, vocab) model.load_state_dict(state_dict) else: vocab = torch.load(args.vocab) model = NMT(args, vocab) model.eval() if args.cuda: # model = nn.DataParallel(model).cuda() model = model.cuda() f = open(args.save_to_file, 'w') for src_sent, tgt_sent in test_data: src_sents = [src_sent] tgt_sents = [tgt_sent] batch_size = len(src_sents) src_sents_len = [len(s) for s in src_sents] pred_tgt_word_nums = [len(s[1:]) for s in tgt_sents] # omitting leading `<s>` # (sent_len, batch_size) src_sents_var = to_input_variable(src_sents, model.vocab.src, cuda=args.cuda, is_test=True) tgt_sents_var = to_input_variable(tgt_sents, model.vocab.tgt, cuda=args.cuda, is_test=True) # (tgt_sent_len, batch_size, tgt_vocab_size) scores = model(src_sents_var, src_sents_len, tgt_sents_var[:-1]) # (tgt_sent_len * batch_size, tgt_vocab_size) log_scores = F.log_softmax(scores.view(-1, scores.size(2))) # remove leading <s> in tgt sent, which is not used as the target # (batch_size * tgt_sent_len) flattened_tgt_sents = tgt_sents_var[1:].view(-1) # (batch_size * tgt_sent_len) tgt_log_scores = torch.gather( log_scores, 1, flattened_tgt_sents.unsqueeze(1)).squeeze(1) # 0-index is the <pad> symbol tgt_log_scores = tgt_log_scores * ( 1. - torch.eq(flattened_tgt_sents, 0).float()) # (tgt_sent_len, batch_size) tgt_log_scores = tgt_log_scores.view(-1, batch_size) # .permute(1, 0) # (batch_size) tgt_sent_scores = tgt_log_scores.sum(dim=0).squeeze() tgt_sent_word_scores = [ tgt_sent_scores[i].item() / pred_tgt_word_nums[i] for i in range(batch_size) ] for src_sent, tgt_sent, score in zip(src_sents, tgt_sents, tgt_sent_word_scores): f.write('%s ||| %s ||| %f\n' % (' '.join(src_sent), ' '.join(tgt_sent), score)) f.close()
def experiement(args: Dict, test_only, device): """ Train and Test the NMT Model. @param args (Dict): args from cmd line """ # train_data_src = read_corpus(args['--train-src'], source='src') # train_data_tgt = read_corpus(args['--train-tgt'], source='tgt') # # dev_data_src = read_corpus(args['--dev-src'], source='src') # dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt') # # train_data = list(zip(train_data_src, train_data_tgt)) # dev_data = list(zip(dev_data_src, dev_data_tgt)) train_batch_size = int(args['--batch-size']) clip_grad = float(args['--clip-grad']) valid_niter = int(args['--valid-niter']) log_every = int(args['--log-every']) model_save_path = args['--save-to'] use_pos_embed = False if args['--use-pos-embed']: use_pos_embed = True use_copy = False if args['--use-copy']: use_copy = True SRC, TRG, train_iterator, dev_iterator, test_iterator = load_data( args['--train-data'], args['--dev-data'], args['--test-data'], device, train_batch_size, (use_pos_embed or use_copy)) vocab = Vocab(SRC, TRG) model = NMT(src_embed_size=int(args['--src-embed-size']), dst_embed_size=int(args['--dst-embed-size']), hidden_size=int(args['--hidden-size']), dropout_rate=float(args['--dropout']), vocab=vocab, use_pos_embed=use_pos_embed, use_copy=use_copy) model.load_pretrained_embeddings(vocab) # print("args: {}".format(args)) uniform_init = float(args['--uniform-init']) if np.abs(uniform_init) > 0.: print('uniformly initialize parameters [-%f, +%f]' % (uniform_init, uniform_init), file=sys.stderr) for p in model.parameters(): p.data.uniform_(-uniform_init, uniform_init) # def init_weights(m): # for name, param in m.named_parameters(): # if 'weight' in name: # nn.init.normal_(param.data, mean=0, std=0.01) # else: # nn.init.constant_(param.data, 0) # # model.apply(init_weights) # vocab_mask = torch.ones(len(vocab.tgt)) # vocab_mask[vocab.tgt['<pad>']] = 0 print('use device: %s' % device, file=sys.stderr) print(model) para_count = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f'The model has {para_count:,} trainable parameters') print("file path: {}".format(model_save_path)) if test_only: model.eval() decode(args, test_iterator, vocab, device) exit(0) # perform training model.train() model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr'])) num_trial = 0 train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0 cum_examples = report_examples = epoch = valid_num = 0 hist_valid_scores = [] train_time = begin_time = time.time() print('begin Maximum Likelihood training') while True: epoch += 1 #perform training model.train() # for src_sents, tgt_sents in batch_iter(train_data, batch_size=train_batch_size, shuffle=True): for i, batch in enumerate(train_iterator): train_iter += 1 optimizer.zero_grad() src_sents, src_sents_lens = batch.src tgt_sents = batch.trg batch_size = src_sents.shape[1] example_losses = -model(src_sents, src_sents_lens, tgt_sents) # (batch_size,) batch_loss = example_losses.sum() loss = batch_loss / batch_size loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad) optimizer.step() batch_losses_val = batch_loss.item() report_loss += batch_losses_val cum_loss += batch_losses_val tgt_words_num_to_predict = sum( len(s[1:]) for s in tgt_sents) # omitting leading `<s>` report_tgt_words += tgt_words_num_to_predict cum_tgt_words += tgt_words_num_to_predict report_examples += batch_size cum_examples += batch_size # if train_iter % log_every == 0: # print("") print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \ 'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter, report_loss / report_examples, math.exp(report_loss / report_tgt_words), cum_examples, report_tgt_words / (time.time() - train_time), time.time() - begin_time), file=sys.stderr) train_time = time.time() report_loss = report_tgt_words = report_examples = 0. # perform validation # model.eval() # if train_iter % valid_niter == 0: # print('epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d' % (epoch, train_iter, # cum_loss / cum_examples, # np.exp(cum_loss / cum_tgt_words), # cum_examples), file=sys.stderr) cum_loss = cum_examples = cum_tgt_words = 0. valid_num += 1 # print('begin validation ...', file=sys.stderr) # compute dev. ppl and bleu dev_ppl = evaluate_ppl( model, dev_iterator, batch_size=128) # dev batch size can be a bit larger valid_metric = -dev_ppl print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl), file=sys.stderr) is_better = len( hist_valid_scores) == 0 or valid_metric > max(hist_valid_scores) hist_valid_scores.append(valid_metric) if is_better: patience = 0 # print('save currently the best model to [%s]' % model_save_path, file=sys.stderr) model.save(model_save_path) # also save the optimizers' state torch.save(optimizer.state_dict(), model_save_path + '.optim') elif patience < int(args['--patience']): patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == int(args['--patience']): num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == int(args['--max-num-trial']): print('early stop!', file=sys.stderr) # exit(0) break # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * float( args['--lr-decay']) print( 'load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(model_save_path, map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) model = model.to(device) print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict( torch.load(model_save_path + '.optim')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0 if epoch == int(args['--max-epoch']): print('reached maximum number of epochs!', file=sys.stderr) break # perform testing model.eval() decode(args, test_iterator, vocab, device)