def run_epoch_test(data_iter, model, pad_idx, vocab, model_avg=False, raw_prob=True): start = time.time() total_tokens = 0 tokens = 0 candidate_corpus = [] reference_corpus = [] with torch.no_grad(): for i, batch_ in enumerate(data_iter): batch = Batch(batch_.src, batch_.trg, pad=pad_idx) max_len = batch.src.shape[1] + 20 if model_avg: out = model_average(model, batch.src, batch.src_mask, max_len, vocab[BOS_WORD], raw_prob=raw_prob) else: out = model.predict(batch.src, batch.src_mask, max_len, vocab[BOS_WORD]) for j in range(out.shape[0]): words_can = [] words_ref = [] candidate = out[j] for k in range(1, len(candidate)): if candidate[k] == vocab[EOS_WORD]: break words_can.append(vocab.itos[candidate[k]]) reference = batch.trg_y[j] for v in range(len(reference)): if reference[v] == vocab[EOS_WORD]: break words_ref.append(vocab.itos[reference[v]]) candidate_corpus.append(words_can) reference_corpus.append([words_ref]) total_tokens += batch.ntokens tokens += batch.ntokens if i % 5 == 0: elapsed = time.time() - start print("Step: %d Tokens per Sec: %.2f Time_used: %.2f" % (i, tokens / elapsed, elapsed)) start = time.time() tokens = 0 bleu = data.bleu_score(candidate_corpus, reference_corpus) return bleu
def bleu(data, model, german, english, device): targets = [] outputs = [] for example in data: src = vars(example)["src"] trg = vars(example)["trg"] prediction = translate_sentence(model, src, german, english, device) prediction = prediction[:-1] # remove <eos> token targets.append([trg]) outputs.append(prediction) return bleu_score(outputs, targets)
model = load_model('trained_model_az-en_50epochs_size_150k_max_length') preds = model.predict_classes( testX.reshape((testX[:].shape[0], testX.shape[1]))) preds_text = [] for i in preds: temp = [] for j in range(len(i)): t = get_word(i[j], target_tokenizer) if j > 0: if (t == get_word(i[j - 1], target_tokenizer)) or (t == None): temp.append('') else: temp.append(t) else: if (t == None): temp.append('') else: temp.append(t) preds_text.append(temp) targets = [[temp] for temp in test[:, 1]] actuals = [' '.join(temp) for temp in test[:, 1]] predicts = [' '.join(temp) for temp in preds_text] print(bleu_score(preds_text, targets)) pred_df = pd.DataFrame({'actual': actuals, 'predicted': predicts}) print(pred_df.sample(15))
def run_epoch_test_with_beams(data_iter, model, pad_idx, vocab, beam_size=4, n_best=4, model_avg=False, raw_prob=True): start = time.time() total_tokens = 0 tokens = 0 candidate_corpus = [] reference_corpus = [] with torch.no_grad(): for i, batch_ in enumerate(data_iter): batch = Batch(batch_.src, batch_.trg, pad=pad_idx) max_len = batch.src.shape[1] + 20 batch_size = batch.src.shape[0] beam_search = BeamSearch(beam_size, batch_size, vocab, n_best, min_length=1, max_length=max_len, block_ngram_repeat=0) if model_avg: memory = [m.encode(batch.src, batch.src_mask) for m in model] memory = [tile(x, beam_size) for x in memory] else: memory = model.encode(batch.src, batch.src_mask) memory = tile(memory, beam_size) batch.src_mask = tile(batch.src_mask, beam_size) probs = [] attns = [] for step in range(max_len): pred = beam_search.current_predictions if model_avg: for m, mem in zip(model, memory): out, attn_temp = m.decode( mem, pred, batch.src_mask, subsequent_mask(pred.shape[1]).type_as( batch.src.data)) prob = m.generator(out[:, -1]) if raw_prob: prob = torch.exp(prob) probs.append(prob) attns.append(attn_temp) probs, attns = torch.stack(probs), torch.stack(attns) mean_prob, attn = probs.mean(dim=0), attns.mean(dim=0) if raw_prob: log_probs = mean_prob.log() else: log_probs = mean_prob probs = [] attns = [] else: out, attn = model.decode( memory, pred, batch.src_mask, subsequent_mask(pred.shape[1]).type_as(batch.src.data)) log_probs = model.generator(out[:, -1]) beam_search.step(log_probs, attn) any_finished = beam_search.is_finished.any() if any_finished: beam_search.update_finished() if beam_search.done: break select_indices = beam_search.select_indices if model_avg: memory = [ x.index_select(0, select_indices) for x in memory ] else: memory = memory.index_select(0, select_indices) batch.src_mask = batch.src_mask.index_select( 0, select_indices) # scores = beam_search.scores predictions = beam_search.predictions best_preds = [p[0] for p in predictions] assert len(best_preds) == len(batch.trg_y), '预测样本与参考样本的数量不符' for j in range(batch_size): words_can = [] words_ref = [] candidate = best_preds[j] for k in range(1, len(candidate)): if candidate[k] == vocab[EOS_WORD]: break words_can.append(vocab.itos[candidate[k]]) reference = batch.trg_y[j] for v in range(len(reference)): if reference[v] == vocab[EOS_WORD]: break words_ref.append(vocab.itos[reference[v]]) candidate_corpus.append(words_can) reference_corpus.append([words_ref]) total_tokens += batch.ntokens tokens += batch.ntokens if i % 5 == 0: elapsed = time.time() - start print("Step: %d Tokens per Sec: %.2f Time_used: %.2f" % (i, tokens / elapsed, elapsed)) start = time.time() tokens = 0 bleu = data.bleu_score(candidate_corpus, reference_corpus) return bleu
targets = labels[:, 1:unroll_steps].contiguous().view(-1) # shifted by one because of BOS loss = criterion(outputs.contiguous().view(-1, outputs.shape[-1]), targets.long()) if att_probs is not None: # only with RecurrentDecoder, TransformerDecoder does not have attention loss += 1. * ((1. - att_probs.sum(dim=1)) ** 2).mean() # Doubly stochastic attention regularization loss_sum += loss.item() for beam_size in range(1, len(bleu_1) + 1): prediction, _ = model.predict(data_dev, inputs, data_dev.max_length, beam_size, decoder_type=decoder_type) decoded_prediction = data_dev.corpus.vocab.arrays_to_sentences(prediction) decoded_references = [] for image_name in image_names: decoded_references.append(data_dev.corpus.vocab.arrays_to_sentences(data_dev.get_all_references_for_image_name(image_name))) idx = beam_size - 1 bleu_1[idx] += bleu_score(decoded_prediction, decoded_references, max_n=1, weights=[1]) bleu_2[idx] += bleu_score(decoded_prediction, decoded_references, max_n=2, weights=[0.5] * 2) bleu_3[idx] += bleu_score(decoded_prediction, decoded_references, max_n=3, weights=[1 / 3] * 3) bleu_4[idx] += bleu_score(decoded_prediction, decoded_references, max_n=4, weights=[0.25] * 4) global_step = epoch # Add bleu score to board tensorboard.writer.add_scalars('loss', {"dev_loss": loss_sum / len(dataloader_dev)}, global_step) for idx in range(len(bleu_1)): tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-1', bleu_1[idx] / len(dataloader_dev), global_step) tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-2', bleu_2[idx] / len(dataloader_dev), global_step) tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-3', bleu_3[idx] / len(dataloader_dev), global_step) tensorboard.writer.add_scalar(f'BEAM-{idx + 1}/BLEU-4', bleu_4[idx] / len(dataloader_dev), global_step) # Add predicted text to board tensorboard.add_predicted_text(global_step, data_dev, model, data_dev.max_length, decoder_type=decoder_type) tensorboard.writer.flush()