def translate(dataset, fields, model):

    already_1, hypothesis_1, references_1 = 0, [], []
    already_2_3, hypothesis_2, hypothesis_3 = 0, [], []

    for batch, flag in dataset:
        if flag:
            predictions = beam_search(opt, model, batch.src, fields, flag)
            hypothesis_1 += [
                fields["task1_tgt"].decode(p) for p in predictions
            ]
            already_1 += len(predictions)
            logging.info("Task 1: %7d/%7d" %
                         (already_1, dataset.task1_dataset.num_examples))
        else:
            predictions_1, predictions_2 = beam_search(opt, model, batch.src,
                                                       fields, flag)
            hypothesis_2 += [
                fields["task2_tgt"].decode(p) for p in predictions_1
            ]
            hypothesis_3 += [
                fields["task3_tgt"].decode(p) for p in predictions_2
            ]
            already_2_3 += len(predictions_1)
            logging.info("Finished: %7d/%7d" %
                         (already_2_3, dataset.task2_3_dataset.num_examples))

    origin_1 = sorted(zip(hypothesis_1, dataset.task1_dataset.seed),
                      key=lambda t: t[1])
    hypothesis_1 = [h for h, _ in origin_1]

    with open(opt.output[0], "w", encoding="UTF-8") as out_file:
        out_file.write("\n".join(hypothesis_1))
        out_file.write("\n")

    origin_2_3 = sorted(zip(hypothesis_2, hypothesis_3,
                            dataset.task2_3_dataset.seed),
                        key=lambda t: t[2])
    hypothesis_2 = [h for h, _, _ in origin_2_3]
    hypothesis_3 = [h for _, h, _ in origin_2_3]
    with open(opt.output[1], "w", encoding="UTF-8") as out_file:
        out_file.write("\n".join(hypothesis_2))
        out_file.write("\n")
    with open(opt.output[2], "w", encoding="UTF-8") as out_file:
        out_file.write("\n".join(hypothesis_3))
        out_file.write("\n")

    logging.info("All finished. ")
Example #2
0
File: train.py Project: ZNLP/ATSum
def valid(model, criterion, valid_dataset, step):
    model.eval()
    total_loss, total = 0.0, 0

    hypothesis, references = [], []

    for batch in valid_dataset:
        scores = model(batch.src, batch.tgt, batch.probs, batch.idxes)
        loss = criterion(scores, batch.tgt)
        total_loss += loss.data
        total += 1

        if opt.tf:
            _, predictions = scores.topk(k=1, dim=-1)
        else:
            predictions = beam_search(opt, model, batch.src,
                                      valid_dataset.fields)

        hypothesis += [
            valid_dataset.fields["tgt"].decode(p) for p in predictions
        ]
        references += [
            valid_dataset.fields["tgt"].decode(t) for t in batch.tgt
        ]

    bleu = calculate_bleu(hypothesis, references)
    logging.info("Valid loss: %.2f\tValid BLEU: %3.2f" %
                 (total_loss / total, bleu))
    checkpoint = {"model": model.state_dict(), "opt": opt}
    saver.save(checkpoint, step, bleu, total_loss / total)
def translate(dataset, fields, model):

    already, hypothesis, references = 0, [], []

    for batch in dataset:
        if opt.tf:
            scores = model(batch.src, batch.tgt)
            _, predictions = scores.topk(k=1, dim=-1)
        else:
            predictions = beam_search(opt, model, batch.src, fields)

        hypothesis += [fields["tgt"].decode(p) for p in predictions]
        already += len(predictions)
        logging.info("Translated: %7d/%7d" % (already, dataset.num_examples))
        references += [fields["tgt"].decode(t) for t in batch.tgt]

    if opt.bleu:
        bleu = calculate_bleu(hypothesis, references)
        logging.info("BLEU: %3.2f" % bleu)

    origin = sorted(zip(hypothesis, dataset.seed), key=lambda t: t[1])
    hypothesis = [h for h, _ in origin]
    with open(opt.output, "w", encoding="UTF-8") as out_file:
        out_file.write("\n".join(hypothesis))
        out_file.write("\n")

    logging.info("Translation finished. ")
def translate(dataset, fields, model):

    already, hypothesis_1, hypothesis_2 = 0, [], []

    for batch in dataset:
        predictions_1, predictions_2 = beam_search(opt, model, batch.source,
                                                   fields)

        hypothesis_1 += [fields["summary_cn"].decode(p) for p in predictions_1]
        hypothesis_2 += [fields["summary_en"].decode(p) for p in predictions_2]

        already += len(predictions_1)
        logging.info("Finished: %7d/%7d" % (already, dataset.num_examples))

    origin = sorted(zip(hypothesis_1, hypothesis_2, dataset.seed),
                    key=lambda t: t[2])
    hypothesis_1 = [h for h, _, _ in origin]
    hypothesis_2 = [h for _, h, _ in origin]
    with open(opt.output[0], "w", encoding="UTF-8") as out_file:
        out_file.write("\n".join(hypothesis_1))
        out_file.write("\n")
    with open(opt.output[1], "w", encoding="UTF-8") as out_file:
        out_file.write("\n".join(hypothesis_2))
        out_file.write("\n")
    logging.info("All finished. ")