def valid(model, criterion_task1, criterion_task2, valid_dataset, step): # if torch.cuda.device_count() >1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = nn.DataParallel(model, device_ids=[0,1]) model.eval() total_n = 0 total_task1_loss = total_task2_loss = 0.0 task1_hypothesis, task1_references = [], [] task2_hypothesis, task2_references = [], [] for i, (batch, flag) in enumerate(valid_dataset): scores = model(batch.src, batch.tgt, flag) if flag: loss = criterion_task1(scores, batch.tgt) else: loss = criterion_task2(scores, batch.tgt) _, predictions = scores.topk(k=1, dim=-1) if flag: # task1 total_task1_loss += loss.data task1_hypothesis += [ valid_dataset.fields["task1_tgt"].decode(p) for p in predictions ] task1_references += [ valid_dataset.fields["task1_tgt"].decode(t) for t in batch.tgt ] else: total_task2_loss += loss.data task2_hypothesis += [ valid_dataset.fields["task2_tgt"].decode(p) for p in predictions ] task2_references += [ valid_dataset.fields["task2_tgt"].decode(t) for t in batch.tgt ] total_n += 1 del loss print(total_n) bleu_task1 = calculate_bleu(task1_hypothesis, task1_references) bleu_task2 = calculate_bleu(task2_hypothesis, task2_references) rouge1_task1, rouge2_task1 = calculate_rouge(task1_hypothesis, task1_references) rouge1_task2, rouge2_task2 = calculate_rouge(task2_hypothesis, task2_references) mean_task1_loss = total_task1_loss / total_n mean_task2_loss = total_task2_loss / total_n logging.info( "loss-task1: %.2f \t loss-task2 %.2f \t bleu-task1: %3.2f\t bleu-task2: %3.2f \t rouge1-task1: %3.2f \t rouge1-task2: %3.2f \t rouge2-task1: %3.2f \t rouge2-task2: %3.2f" % (mean_task1_loss, mean_task2_loss, bleu_task1, bleu_task2, rouge1_task1, rouge1_task2, rouge2_task1, rouge2_task2)) checkpoint = {"model": model.state_dict(), "opt": opt} saver.save(checkpoint, step, mean_task1_loss, mean_task2_loss, bleu_task1, bleu_task2, rouge1_task1, rouge1_task2, rouge2_task1, rouge2_task2) return mean_task1_loss, mean_task2_loss, bleu_task1, bleu_task2, rouge1_task1, rouge1_task2, rouge2_task1, rouge2_task2
def valid(model, criterion_cn, criterion_en, valid_dataset, step): model.eval() total_loss = total_cn_loss = total_en_loss = 0.0 total_n = 0 cn_hypothesis, cn_references = [], [] en_hypothesis, en_references = [], [] for batch in valid_dataset: cn_scores, en_scores = model(batch.source, batch.summary_cn, batch.summary_en) cn_loss = criterion_cn(cn_scores, batch.summary_cn) en_loss = criterion_en(en_scores, batch.summary_en) loss = cn_loss + en_loss total_loss += loss.data total_cn_loss += cn_loss.data total_en_loss += en_loss.data total_n += 1 _, cn_predictions = cn_scores.topk(k=1, dim=-1) cn_hypothesis += [ valid_dataset.fields["summary_cn"].decode(p) for p in cn_predictions ] cn_references += [ valid_dataset.fields["summary_cn"].decode(t) for t in batch.summary_cn ] _, en_predictions = en_scores.topk(k=1, dim=-1) en_hypothesis += [ valid_dataset.fields["summary_en"].decode(p) for p in en_predictions ] en_references += [ valid_dataset.fields["summary_en"].decode(t) for t in batch.summary_en ] #write_file(en_hypothesis, en_references, step) bleu_cn = calculate_bleu(cn_hypothesis, cn_references) bleu_en = calculate_bleu(en_hypothesis, en_references) rouge1_cn, rouge2_cn = calculate_rouge(cn_hypothesis, cn_references) rouge1_en, rouge2_en = calculate_rouge(en_hypothesis, en_references) mean_loss = total_loss / total_n mean_en_loss = total_en_loss / total_n mean_cn_loss = total_cn_loss / total_n logging.info( "loss: %.2f\t loss-cn: %.2f \t loss-en %.2f \t bleu-cn: %3.2f\t bleu-en: %3.2f \t rouge1-cn: %3.2f \t rouge1-en: %3.2f \t rouge2-cn: %3.2f \t rouge2-en: %3.2f" % (mean_loss, mean_cn_loss, mean_en_loss, bleu_cn, bleu_en, rouge1_cn, rouge1_en, rouge2_cn, rouge2_en)) saver.save(step, mean_loss, mean_cn_loss, mean_en_loss, bleu_cn, bleu_en, rouge1_cn, rouge1_en, rouge2_cn, rouge2_en)
def translate(dataset, fields, model): already, hypothesis, references = 0, [], [] for batch in dataset: if opt.tf: scores = model(batch.src, batch.tgt) _, predictions = scores.topk(k=1, dim=-1) else: predictions = beam_search(opt, model, batch.src, fields) hypothesis += [fields["tgt"].decode(p) for p in predictions] already += len(predictions) logging.info("Translated: %7d/%7d" % (already, dataset.num_examples)) references += [fields["tgt"].decode(t) for t in batch.tgt] if opt.bleu: bleu = calculate_bleu(hypothesis, references) logging.info("BLEU: %3.2f" % bleu) origin = sorted(zip(hypothesis, dataset.seed), key=lambda t: t[1]) hypothesis = [h for h, _ in origin] with open(opt.output, "w", encoding="UTF-8") as out_file: out_file.write("\n".join(hypothesis)) out_file.write("\n") logging.info("Translation finished. ")
def valid(model, criterion, valid_dataset, step): model.eval() total_loss, total = 0.0, 0 hypothesis, references = [], [] for batch in valid_dataset: scores = model(batch.src, batch.tgt, batch.probs, batch.idxes) loss = criterion(scores, batch.tgt) total_loss += loss.data total += 1 if opt.tf: _, predictions = scores.topk(k=1, dim=-1) else: predictions = beam_search(opt, model, batch.src, valid_dataset.fields) hypothesis += [ valid_dataset.fields["tgt"].decode(p) for p in predictions ] references += [ valid_dataset.fields["tgt"].decode(t) for t in batch.tgt ] bleu = calculate_bleu(hypothesis, references) logging.info("Valid loss: %.2f\tValid BLEU: %3.2f" % (total_loss / total, bleu)) checkpoint = {"model": model.state_dict(), "opt": opt} saver.save(checkpoint, step, bleu, total_loss / total)
def valid(model, criterion_task1, criterion_task2, valid_dataset, step): model.eval() total_n = 0 total_task1_loss = total_task2_loss = 0.0 task1_hypothesis, task1_references = [], [] task2_hypothesis, task2_references = [], [] def roul(hyps, refs): rl1 = 0 for h, r in zip(hyps, refs): rl = calc_rouge_L(h, r) rl1 += rl return rl1 / len(hyps) for i, (batch, flag) in enumerate(valid_dataset): scores = model(batch.src, batch.tgt, flag) predictions = greedy_search(opt, model, batch.src, valid_dataset.fields, flag) if flag: # task1 task1_hypothesis += [valid_dataset.fields["task1_tgt"].decode_word(p) for p in predictions] task1_references += [valid_dataset.fields["task1_tgt"].decode_word(t) for t in batch.tgt] else: task2_hypothesis += [valid_dataset.fields["task2_tgt"].decode_word(p) for p in predictions] task2_references += [valid_dataset.fields["task2_tgt"].decode_word(t) for t in batch.tgt] total_n += 1 bleu_task1 = calculate_bleu(task1_hypothesis, task1_references) bleu_task2 = calculate_bleu(task2_hypothesis, task2_references) rouge1_task1, rouge2_task1 = calculate_rouge(task1_hypothesis, task1_references) rouge1_task2, rouge2_task2 = calculate_rouge(task2_hypothesis, task2_references) mean_task1_loss = 0 mean_task2_loss = 0 rougel_task1 = roul(task1_hypothesis, task1_references) * 100 rougel_task2 = roul(task2_hypothesis, task2_references) * 100 logging.info("loss-task1: %.2f \t loss-task2 %.2f \t bleu-task1: %3.2f\t bleu-task2: %3.2f \t rouge1-task1: %3.2f \t rouge1-task2: %3.2f \t rouge2-task1: %3.2f \t rouge2-task2: %3.2f\t rougeL-task1: %3.2f \t rougeL-task2: %3.2f\t" % (mean_task1_loss, mean_task2_loss, bleu_task1, bleu_task2, rouge1_task1, rouge1_task2, rouge2_task1, rouge2_task2, rougel_task1, rougel_task2)) checkpoint = {"model": model.state_dict(), "opt": opt} saver.save(checkpoint, step, mean_task1_loss, mean_task2_loss, bleu_task1, bleu_task2, rouge1_task1, rouge1_task2, rouge2_task1, rouge2_task2, rougel_task1, rougel_task2)
def valid(model, valid_dataset, step): model.eval() total_loss, total = 0.0, 0 hypothesis, references = [], [] for batch in valid_dataset: loss = model(batch.src, batch.tgt).mean() total_loss += loss.data total += 1 predictions = parallel_beam_search(opt, model.module.model, batch, valid_dataset.fields) hypothesis += [valid_dataset.fields["tgt"].decode(p) for p in predictions] references += [valid_dataset.fields["tgt"].decode(t) for t in batch.tgt] bleu = calculate_bleu(hypothesis, references) logger.info("Valid loss: %.2f\tValid Beam BLEU: %3.2f" % (total_loss / total, bleu)) checkpoint = {"model": model.module.model.state_dict(), "opt": opt} saver.save(checkpoint, printing_opt(opt), step, bleu, total_loss / total)
from beaver.utils import parseopt, get_device, calculate_bleu def read_file(filename): with open() reference = hypothesis = task3_references = read_file(reference) task3_hypothesis = read_file(hypothesis) bleu_task3 = calculate_bleu(task3_hypothesis, task3_references) print("bleu score is: %.2f" % bleu_task3)
def valid(model, criterion_task1, criterion_task2, criterion_task3, valid_dataset, step): model.eval() total_n = 0 total_task1_loss = total_task2_loss = total_task3_loss = total_task2_3_loss = 0.0 task1_hypothesis, task1_references = [], [] task2_hypothesis, task2_references = [], [] task3_hypothesis, task3_references = [], [] for i, (batch, flag) in enumerate(valid_dataset): if flag: task1_scores = model(batch.src, batch.tgt, None, None, flag) loss = criterion_task1(task1_scores, batch.tgt) else: task2_scores, task3_scores = model(batch.src, None, batch.summary_cn, batch.summary_en, flag) task2_loss = criterion_task2(task2_scores, batch.summary_cn) task3_loss = criterion_task3(task3_scores, batch.summary_en) loss = task2_loss + task3_loss if flag: _, task1_predictions = task1_scores.topk(k=1, dim=-1) else: _, task2_predictions = task2_scores.topk(k=1, dim=-1) _, task3_predictions = task3_scores.topk(k=1, dim=-1) if flag: # task1 total_task1_loss += loss.data task1_hypothesis += [ valid_dataset.fields["task1_tgt"].decode(p) for p in task1_predictions ] task1_references += [ valid_dataset.fields["task1_tgt"].decode(t) for t in batch.tgt ] else: total_task2_3_loss += loss.data total_task2_loss += task2_loss.data total_task3_loss += task3_loss.data task2_hypothesis += [ valid_dataset.fields["task2_tgt"].decode(p) for p in task2_predictions ] task2_references += [ valid_dataset.fields["task2_tgt"].decode(t) for t in batch.summary_cn ] task3_hypothesis += [ valid_dataset.fields["task3_tgt"].decode(p) for p in task3_predictions ] task3_references += [ valid_dataset.fields["task3_tgt"].decode(t) for t in batch.summary_en ] total_n += 1 del loss bleu_task1 = calculate_bleu( task1_hypothesis, task1_references) if len(task1_hypothesis) > 0 else 0 bleu_task2 = calculate_bleu( task2_hypothesis, task2_references) if len(task2_hypothesis) > 0 else 0 bleu_task3 = calculate_bleu( task3_hypothesis, task3_references) if len(task3_hypothesis) > 0 else 0 if len(task1_hypothesis) > 0: rouge1_task1, rouge2_task1 = calculate_rouge(task1_hypothesis, task1_references) else: rouge1_task1, rouge2_task1 = 0, 0 if len(task2_hypothesis) > 0: rouge1_task2, rouge2_task2 = calculate_rouge(task2_hypothesis, task2_references) else: rouge1_task2, rouge2_task2 = 0, 0 if len(task3_hypothesis) > 0: rouge1_task3, rouge2_task3 = calculate_rouge(task3_hypothesis, task3_references) else: rouge1_task3, rouge2_task3 = 0, 0 mean_task1_loss = total_task1_loss / total_n mean_task2_loss = total_task2_loss / total_n mean_task3_loss = total_task3_loss / total_n logging.info( "loss-task1: %.2f \t loss-task2 %.2f \t loss-task3 %.2f \t bleu-task1: %3.2f \t bleu-task2: %3.2f \t bleu-task3: %3.2f \t rouge1-task1: %3.2f \t rouge1-task2: %3.2f \t rouge1-task3: %3.2f \t rouge2-task1: %3.2f \t rouge2-task2: %3.2f \t rouge2-task3: %3.2f" % (mean_task1_loss, mean_task2_loss, mean_task3_loss, bleu_task1, bleu_task2, bleu_task3, rouge1_task1, rouge1_task2, rouge1_task3, rouge2_task1, rouge2_task2, rouge2_task3)) checkpoint = {"model": model.state_dict(), "opt": opt} saver.save(checkpoint, step, mean_task1_loss, mean_task2_loss, mean_task3_loss, bleu_task1, bleu_task2, bleu_task3, rouge1_task1, rouge1_task2, rouge1_task3, rouge2_task1, rouge2_task2, rouge2_task3) return mean_task1_loss, mean_task2_loss, mean_task3_loss, bleu_task1, bleu_task2, bleu_task3, rouge1_task1, rouge1_task2, rouge1_task3, rouge2_task1, rouge2_task2, rouge2_task3