def init_from_checkpoint(self, path: str, restart_training: bool) -> None:
        """
        Initialize the trainer from a given checkpoint file.

        This checkpoint file contains not only model parameters, but also
        scheduler and optimizer states, see `self._save_checkpoint`.

        :param path: path to checkpoint
        """
        model_checkpoint = load_checkpoint(path=path, use_cuda=self.use_cuda)

        # restore model and optimizer parameters
        self.model.load_state_dict(model_checkpoint["model_state"])

        if not restart_training:
            self.optimizer.load_state_dict(model_checkpoint["optimizer_state"])

            if model_checkpoint["scheduler_state"] is not None and \
                    self.scheduler is not None:
                self.scheduler.load_state_dict(model_checkpoint["scheduler_state"])

            # restore counts
            self.steps = model_checkpoint["steps"]
            self.total_tokens = model_checkpoint["total_tokens"]
            self.best_ckpt_score = model_checkpoint["best_ckpt_score"]
            self.best_ckpt_iteration = model_checkpoint["best_ckpt_iteration"]

        # move parameters to cuda
        if self.use_cuda:
            self.model.cuda()
Exemple #2
0
    def init_from_checkpoint(self,
                             path: str,
                             reset_best_ckpt: bool = False,
                             reset_scheduler: bool = False,
                             reset_optimizer: bool = False) -> None:
        """
        Initialize the trainer from a given checkpoint file.

        This checkpoint file contains not only model parameters, but also
        scheduler and optimizer states, see `self._save_checkpoint`.

        :param path: path to checkpoint
        :param reset_best_ckpt: reset tracking of the best checkpoint,
                                use for domain adaptation with a new dev
                                set or when using a new metric for fine-tuning.
        :param reset_scheduler: reset the learning rate scheduler, and do not
                                use the one stored in the checkpoint.
        :param reset_optimizer: reset the optimizer, and do not use the one
                                stored in the checkpoint.
        """
        model_checkpoint = load_checkpoint(path=path,
                                           use_cuda=self.use_cuda,
                                           use_tpu=self.use_tpu)

        # restore model and optimizer parameters
        self.model.load_state_dict(model_checkpoint["model_state"])

        if not reset_optimizer:
            self.optimizer.load_state_dict(model_checkpoint["optimizer_state"])
        else:
            self.logger.info("Reset optimizer.")

        if not reset_scheduler:
            if model_checkpoint["scheduler_state"] is not None and \
                    self.scheduler is not None:
                self.scheduler.load_state_dict(
                    model_checkpoint["scheduler_state"])
        else:
            self.logger.info("Reset scheduler.")

        # restore counts
        self.steps = model_checkpoint["steps"]
        self.total_tokens = model_checkpoint["total_tokens"]

        if not reset_best_ckpt:
            self.best_ckpt_score = model_checkpoint["best_ckpt_score"]
            self.best_ckpt_iteration = model_checkpoint["best_ckpt_iteration"]
        else:
            self.logger.info("Reset tracking of the best checkpoint.")

        # move parameters to cuda
        if self.use_cuda:
            if not self.use_tpu:
                self.model.cuda()

        if self.use_tpu:
            if not self.use_cuda:
                self.model.to(self.device)
Exemple #3
0
    def load(self):
        # build model and load parameters into it
        model_checkpoint = load_checkpoint(self.ckpt, self.use_cuda)
        self.model = build_model(self.model_data,
                                 src_vocab=self.src_vocab,
                                 trg_vocab=self.trg_vocab)
        self.model.load_state_dict(model_checkpoint["model_state"])

        if self.use_cuda:
            self.gpu_id = GPUManager.wait_for_available_device(
                is_admin=self.is_admin)
            if self.gpu_id is not None:
                self.model.cuda(self.gpu_id)
            else:
                return False
        return True
Exemple #4
0
def test(cfg_file,
         ckpt: str,
         batch_class: Batch = Batch,
         output_path: str = None,
         save_attention: bool = False,
         datasets: dict = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param batch_class: class type of batch
    :param output_path: path to output
    :param datasets: datasets to predict
    :param save_attention: whether to save the computed attention weights
    """

    cfg = load_config(cfg_file)
    model_dir = cfg["training"]["model_dir"]

    if len(logger.handlers) == 0:
        _ = make_logger(model_dir, mode="test")  # version string returned

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        ckpt = get_latest_checkpoint(model_dir)
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    # load the data
    if datasets is None:
        _, dev_data, test_data, src_vocab, trg_vocab = load_data(
            data_cfg=cfg["data"], datasets=["dev", "test"])
        data_to_predict = {"dev": dev_data, "test": test_data}
    else:  # avoid to load data again
        data_to_predict = {"dev": datasets["dev"], "test": datasets["test"]}
        src_vocab = datasets["src_vocab"]
        trg_vocab = datasets["trg_vocab"]

    # parse test args
    batch_size, batch_type, use_cuda, device, n_gpu, level, eval_metric, \
        max_output_length, beam_size, beam_alpha, postprocess, \
        bpe_type, sacrebleu, decoding_description, tokenizer_info \
        = parse_test_args(cfg, mode="test")

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.to(device)

    # multi-gpu eval
    if n_gpu > 1 and not isinstance(model, torch.nn.DataParallel):
        model = _DataParallel(model)

    for data_set_name, data_set in data_to_predict.items():
        if data_set is None:
            continue

        dataset_file = cfg["data"][data_set_name] + "." + cfg["data"]["trg"]
        logger.info("Decoding on %s set (%s)...", data_set_name, dataset_file)

        #pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores = validate_on_data(
            model, data=data_set, batch_size=batch_size,
            batch_class=batch_class, batch_type=batch_type, level=level,
            max_output_length=max_output_length, eval_metric=eval_metric,
            use_cuda=use_cuda, compute_loss=False, beam_size=beam_size,
            beam_alpha=beam_alpha, postprocess=postprocess,
            bpe_type=bpe_type, sacrebleu=sacrebleu, n_gpu=n_gpu)
        #pylint: enable=unused-variable

        if "trg" in data_set.fields:
            logger.info("%4s %s%s: %6.2f [%s]", data_set_name, eval_metric,
                        tokenizer_info, score, decoding_description)
        else:
            logger.info("No references given for %s -> no evaluation.",
                        data_set_name)

        if save_attention:
            if attention_scores:
                attention_name = "{}.{}.att".format(data_set_name, step)
                attention_path = os.path.join(model_dir, attention_name)
                logger.info(
                    "Saving attention plots. This might take a while..")
                store_attention_plots(attentions=attention_scores,
                                      targets=hypotheses_raw,
                                      sources=data_set.src,
                                      indices=range(len(hypotheses)),
                                      output_prefix=attention_path)
                logger.info("Attention plots saved to: %s", attention_path)
            else:
                logger.warning("Attention scores could not be saved. "
                               "Note that attention scores are not available "
                               "when using beam search. "
                               "Set beam_size to 1 for greedy decoding.")

        if output_path is not None:
            output_path_set = "{}.{}".format(output_path, data_set_name)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")
            logger.info("Translations saved to: %s", output_path_set)
Exemple #5
0
def translate(cfg_file: str,
              ckpt: str,
              output_path: str = None,
              batch_class: Batch = Batch,
              n_best: int = 1) -> None:
    """
    Interactive translation function.
    Loads model from checkpoint and translates either the stdin input or
    asks for input to translate interactively.
    The input has to be pre-processed according to the data that the model
    was trained on, i.e. tokenized or split into subwords.
    Translations are printed to stdout.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output file
    :param batch_class: class type of batch
    :param n_best: amount of candidates to display
    """
    def _load_line_as_data(line):
        """ Create a dataset from one line via a temporary file. """
        # write src input to temporary file
        tmp_name = "tmp"
        tmp_suffix = ".src"
        tmp_filename = tmp_name + tmp_suffix
        with open(tmp_filename, "w") as tmp_file:
            tmp_file.write("{}\n".format(line))

        test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field)

        # remove temporary file
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)

        return test_data

    def _translate_data(test_data):
        """ Translates given dataset, using parameters from outer scope. """
        # pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores = validate_on_data(
            model, data=test_data, batch_size=batch_size,
            batch_class=batch_class, batch_type=batch_type, level=level,
            max_output_length=max_output_length, eval_metric="",
            use_cuda=use_cuda, compute_loss=False, beam_size=beam_size,
            beam_alpha=beam_alpha, postprocess=postprocess,
            bpe_type=bpe_type, sacrebleu=sacrebleu, n_gpu=n_gpu, n_best=n_best)
        return hypotheses

    cfg = load_config(cfg_file)
    model_dir = cfg["training"]["model_dir"]

    _ = make_logger(model_dir, mode="translate")
    # version string returned

    # when checkpoint is not specified, take oldest from model dir
    if ckpt is None:
        ckpt = get_latest_checkpoint(model_dir)

    # read vocabs
    src_vocab_file = cfg["data"].get("src_vocab", model_dir + "/src_vocab.txt")
    trg_vocab_file = cfg["data"].get("trg_vocab", model_dir + "/trg_vocab.txt")
    src_vocab = Vocabulary(file=src_vocab_file)
    trg_vocab = Vocabulary(file=trg_vocab_file)

    data_cfg = cfg["data"]
    level = data_cfg["level"]
    lowercase = data_cfg["lowercase"]

    tok_fun = lambda s: list(s) if level == "char" else s.split()

    src_field = Field(init_token=None,
                      eos_token=EOS_TOKEN,
                      pad_token=PAD_TOKEN,
                      tokenize=tok_fun,
                      batch_first=True,
                      lower=lowercase,
                      unk_token=UNK_TOKEN,
                      include_lengths=True)
    src_field.vocab = src_vocab

    # parse test args
    batch_size, batch_type, use_cuda, device, n_gpu, level, _, \
        max_output_length, beam_size, beam_alpha, postprocess, \
        bpe_type, sacrebleu, _, _ = parse_test_args(cfg, mode="translate")

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.to(device)

    if not sys.stdin.isatty():
        # input file given
        test_data = MonoDataset(path=sys.stdin, ext="", field=src_field)
        all_hypotheses = _translate_data(test_data)

        if output_path is not None:
            # write to outputfile if given

            def write_to_file(output_path_set, hypotheses):
                with open(output_path_set, mode="w", encoding="utf-8") \
                        as out_file:
                    for hyp in hypotheses:
                        out_file.write(hyp + "\n")
                logger.info("Translations saved to: %s.", output_path_set)

            if n_best > 1:
                for n in range(n_best):
                    file_name, file_extension = os.path.splitext(output_path)
                    write_to_file(
                        "{}-{}{}".format(
                            file_name, n,
                            file_extension if file_extension else ""), [
                                all_hypotheses[i]
                                for i in range(n, len(all_hypotheses), n_best)
                            ])
            else:
                write_to_file("{}".format(output_path), all_hypotheses)
        else:
            # print to stdout
            for hyp in all_hypotheses:
                print(hyp)

    else:
        # enter interactive mode
        batch_size = 1
        batch_type = "sentence"
        while True:
            try:
                src_input = input("\nPlease enter a source sentence "
                                  "(pre-processed): \n")
                if not src_input.strip():
                    break

                # every line has to be made into dataset
                test_data = _load_line_as_data(line=src_input)
                hypotheses = _translate_data(test_data)

                print("JoeyNMT: Hypotheses ranked by score")
                for i, hyp in enumerate(hypotheses):
                    print("JoeyNMT #{}: {}".format(i + 1, hyp))

            except (KeyboardInterrupt, EOFError):
                print("\nBye.")
                break
Exemple #6
0
def test(cfg_file,
         ckpt: str,
         output_path: str = None,
         save_attention: bool = False,
         logger: Logger = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = make_logger()

    cfg = load_config(cfg_file)

    # when checkpoint is not specified, take latest (best) from model dir
    step = "best"
    model_dir = cfg["training"]["model_dir"]
    if ckpt is None:
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    architecture = cfg["model"].get("architecture", "encoder-decoder")
    batch_size = cfg["training"].get("eval_batch_size",
                                     cfg["training"]["batch_size"])
    batch_type = cfg["training"].get(
        "eval_batch_type", cfg["training"].get("batch_type", "sentence"))
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    eval_metric = cfg["training"]["eval_metric"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # original encoder-decoder testing
    if architecture == "encoder-decoder":
        if "test" not in cfg["data"].keys():
            raise ValueError("Test data must be specified in config.")
        # load the data
        _, dev_data, test_data, src_vocab, trg_vocab = load_data(
            data_cfg=cfg["data"])
        data_to_predict = {"dev": dev_data, "test": test_data}

        # load model state from disk
        model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

        # build model and load parameters into it
        model = build_model(cfg["model"],
                            src_vocab=src_vocab,
                            trg_vocab=trg_vocab)
        model.load_state_dict(model_checkpoint["model_state"])

        if use_cuda:
            model.cuda()

        # whether to use beam search for decoding, 0: greedy decoding
        if "testing" in cfg.keys():
            beam_size = cfg["testing"].get("beam_size", 1)
            beam_alpha = cfg["testing"].get("alpha", -1)
            postprocess = cfg["testing"].get("postprocess", True)
        else:
            beam_size = 1
            beam_alpha = -1
            postprocess = True

        for data_set_name, data_set in data_to_predict.items():

            # pylint: disable=unused-variable
            score, loss, ppl, sources, sources_raw, references, hypotheses, \
            hypotheses_raw, attention_scores = validate_on_data(
                model, data=data_set, batch_size=batch_size,
                batch_type=batch_type, level=level,
                max_output_length=max_output_length, eval_metric=eval_metric,
                use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
                beam_alpha=beam_alpha, logger=logger, postprocess=postprocess)
            # pylint: enable=unused-variable

            if "trg" in data_set.fields:
                decoding_description = "Greedy decoding" if beam_size < 2 else \
                    "Beam search decoding with beam size = {} and alpha = {}". \
                        format(beam_size, beam_alpha)
                logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric,
                            score, decoding_description)
            else:
                logger.info("No references given for %s -> no evaluation.",
                            data_set_name)

            if save_attention:
                if attention_scores:
                    attention_name = "{}.{}.att".format(data_set_name, step)
                    attention_path = os.path.join(model_dir, attention_name)
                    logger.info(
                        "Saving attention plots. This might take a while..")
                    store_attention_plots(attentions=attention_scores,
                                          targets=hypotheses_raw,
                                          sources=data_set.src,
                                          indices=range(len(hypotheses)),
                                          output_prefix=attention_path)
                    logger.info("Attention plots saved to: %s", attention_path)
                else:
                    logger.warning(
                        "Attention scores could not be saved. "
                        "Note that attention scores are not available "
                        "when using beam search. "
                        "Set beam_size to 1 for greedy decoding.")

            if output_path is not None:
                output_path_set = "{}.{}".format(output_path, data_set_name)
                with open(output_path_set, mode="w",
                          encoding="utf-8") as out_file:
                    for hyp in hypotheses:
                        out_file.write(hyp + "\n")
                logger.info("Translations saved to: %s", output_path_set)
    else:
        # unsupervised NMT testing
        if "src2trg_test" not in cfg["data"].keys(
        ) or "trg2src_test" not in cfg["data"].keys():
            raise ValueError("Test data must be specified in config.")
        # load the data
        _, _, _, _, dev_src2trg, dev_trg2src, test_src2trg, test_trg2src, src_vocab, trg_vocab, _ = \
            load_unsupervised_data(data_cfg=cfg["data"])
        data_to_predict = {
            "src2trg": {
                "dev_src2trg": dev_src2trg,
                "test_src2trg": test_src2trg
            },
            "trg2src": {
                "dev_trg2src": dev_trg2src,
                "test_trg2src": test_trg2src
            }
        }

        # load model state from disk
        model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

        # build model and load parameters into it
        model = build_model(cfg["model"],
                            src_vocab=src_vocab,
                            trg_vocab=trg_vocab)
        assert isinstance(model, UnsupervisedNMTModel)
        model.src2src_translator.load_state_dict(
            model_checkpoint["src2src_model_state"])
        model.trg2trg_translator.load_state_dict(
            model_checkpoint["trg2trg_model_state"])
        model.src2trg_translator.load_state_dict(
            model_checkpoint["src2trg_model_state"])
        model.trg2src_translator.load_state_dict(
            model_checkpoint["trg2src_model_state"])

        if use_cuda:
            model.src2trg_translator.cuda()
            model.trg2trg_translator.cuda()
            model.src2trg_translator.cuda()
            model.trg2src_translator.cuda()

        # whether to use beam search for decoding, 0: greedy decoding
        if "testing" in cfg.keys():
            beam_size = cfg["testing"].get("beam_size", 1)
            beam_alpha = cfg["testing"].get("alpha", -1)
            postprocess = cfg["testing"].get("postprocess", True)
        else:
            beam_size = 1
            beam_alpha = -1
            postprocess = True

        for translation_direction, dataset_dict in data_to_predict.items():
            # choose correct translator
            if translation_direction == "src2trg":
                model_to_use = model.src2trg_translator
            else:
                model_to_use = model.trg2src_translator

            for dataset_name, dataset in dataset_dict.items():
                score, loss, ppl, sources, sources_raw, references, hypotheses, \
                hypotheses_raw, attention_scores = validate_on_data(
                    model_to_use, data=dataset, batch_size=batch_size,
                    batch_type=batch_type, level=level,
                    max_output_length=max_output_length, eval_metric=eval_metric,
                    use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
                    beam_alpha=beam_alpha, logger=logger, postprocess=postprocess)

                if "trg" in dataset.fields:
                    decoding_description = "Greedy decoding" if beam_size < 2 else \
                        "Beam search decoding with beam size = {} and alpha = {}". \
                            format(beam_size, beam_alpha)
                    logger.info("%4s %s: %6.2f [%s]", dataset_name,
                                eval_metric, score, decoding_description)
                else:
                    logger.info("No references given for %s -> no evaluation.",
                                dataset_name)

                if save_attention:
                    if attention_scores:
                        attention_name = "{}.{}.att".format(dataset_name, step)
                        attention_path = os.path.join(model_dir,
                                                      attention_name)
                        logger.info(
                            "Saving attention plots. This might take a while.."
                        )
                        store_attention_plots(attentions=attention_scores,
                                              targets=hypotheses_raw,
                                              sources=dataset.src,
                                              indices=list(
                                                  range(len(hypotheses))),
                                              output_prefix=attention_path)
                        logger.info("Attention plots saved to: %s",
                                    attention_path)
                    else:
                        logger.warning(
                            "Attention scores could not be saved. "
                            "Note that attention scores are not available "
                            "when using beam search. "
                            "Set beam_size to 1 for greedy decoding.")

                if output_path is not None:
                    output_path_set = "{}.{}".format(output_path, dataset_name)
                    with open(output_path_set, mode="w",
                              encoding="utf-8") as out_file:
                        for hyp in hypotheses:
                            out_file.write(hyp + "\n")
                    logger.info("Translations saved to: %s", output_path_set)
Exemple #7
0
def translate(cfg_file, ckpt: str, output_path: str = None) -> None:
    """
    Interactive translation function.
    Loads model from checkpoint and translates either the stdin input or
    asks for input to translate interactively.
    The input has to be pre-processed according to the data that the model
    was trained on, i.e. tokenized or split into subwords.
    Translations are printed to stdout.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output file
    """
    def _load_line_as_data(line):
        """ Create a dataset from one line via a temporary file. """
        # write src input to temporary file
        tmp_name = "tmp"
        tmp_suffix = ".src"
        tmp_filename = tmp_name + tmp_suffix
        with open(tmp_filename, "w") as tmp_file:
            tmp_file.write("{}\n".format(line))

        test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field)

        # remove temporary file
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)

        return test_data

    logger = make_logger()

    def _translate_data(test_data):
        """ Translates given dataset, using parameters from outer scope. """
        # pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores = validate_on_data(
            model, data=test_data, batch_size=batch_size,
            batch_type=batch_type, level=level,
            max_output_length=max_output_length, eval_metric="",
            use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
            beam_alpha=beam_alpha, logger=logger, postprocess=postprocess)
        return hypotheses

    cfg = load_config(cfg_file)

    # when checkpoint is not specified, take oldest from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)

    batch_size = cfg["training"].get("eval_batch_size",
                                     cfg["training"].get("batch_size", 1))
    batch_type = cfg["training"].get(
        "eval_batch_type", cfg["training"].get("batch_type", "sentence"))
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # read vocabs
    src_vocab_file = cfg["data"].get(
        "src_vocab", cfg["training"]["model_dir"] + "/src_vocab.txt")
    trg_vocab_file = cfg["data"].get(
        "trg_vocab", cfg["training"]["model_dir"] + "/trg_vocab.txt")
    src_vocab = Vocabulary(file=src_vocab_file)
    trg_vocab = Vocabulary(file=trg_vocab_file)

    data_cfg = cfg["data"]
    level = data_cfg["level"]
    lowercase = data_cfg["lowercase"]

    tok_fun = lambda s: list(s) if level == "char" else s.split()

    src_field = Field(init_token=None,
                      eos_token=EOS_TOKEN,
                      pad_token=PAD_TOKEN,
                      tokenize=tok_fun,
                      batch_first=True,
                      lower=lowercase,
                      unk_token=UNK_TOKEN,
                      include_lengths=True)
    src_field.vocab = src_vocab

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, <2: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 1)
        beam_alpha = cfg["testing"].get("alpha", -1)
        postprocess = cfg["testing"].get("postprocess", True)
    else:
        beam_size = 1
        beam_alpha = -1
        postprocess = True

    if not sys.stdin.isatty():
        # input file given
        test_data = MonoDataset(path=sys.stdin, ext="", field=src_field)
        hypotheses = _translate_data(test_data)

        if output_path is not None:
            # write to outputfile if given
            output_path_set = "{}".format(output_path)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")
            logger.info("Translations saved to: %s.", output_path_set)
        else:
            # print to stdout
            for hyp in hypotheses:
                print(hyp)

    else:
        # enter interactive mode
        batch_size = 1
        batch_type = "sentence"
        while True:
            try:
                src_input = input("\nPlease enter a source sentence "
                                  "(pre-processed): \n")
                if not src_input.strip():
                    break

                # every line has to be made into dataset
                test_data = _load_line_as_data(line=src_input)

                hypotheses = _translate_data(test_data)
                print("JoeyNMT: {}".format(hypotheses[0]))

            except (KeyboardInterrupt, EOFError):
                print("\nBye.")
                break
Exemple #8
0
def run_bot(model_dir, bpe_src_code=None, tokenize=None):
    """
    Start the bot. This means loading the model according to the config file.

    :param model_dir: Model directory of trained Joey NMT model.
    :param bpe_src_code: BPE codes for source side processing (optional).
    :param tokenize: If True, tokenize inputs with Moses tokenizer.
    :return:
    """

    cfg_file = model_dir + "/config.yaml"

    logger = logging.getLogger(__name__)

    # load the Joey configuration
    cfg = load_config(cfg_file)

    # load the checkpoint
    if "load_model" in cfg['training'].keys():
        ckpt = cfg['training']["load_model"]
    else:
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))

    # prediction parameters from config
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    max_output_length = cfg["training"].get("max_output_length", None)
    lowercase = cfg["data"].get("lowercase", False)

    # load the vocabularies
    src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt"
    trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt"
    src_vocab = build_vocab(field="src",
                            vocab_file=src_vocab_file,
                            dataset=None,
                            max_size=-1,
                            min_freq=0)
    trg_vocab = build_vocab(field="trg",
                            vocab_file=trg_vocab_file,
                            dataset=None,
                            max_size=-1,
                            min_freq=0)

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 0)
        beam_alpha = cfg["testing"].get("alpha", -1)
    else:
        beam_size = 1
        beam_alpha = -1

    # pre-processing
    if tokenize is not None:
        src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"])
        trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"])
        # tokenize input
        tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True)
        detokenizer = lambda x: trg_tokenizer.detokenize(x.split(),
                                                         return_str=True)
    else:
        tokenizer = lambda x: x
        detokenizer = lambda x: x

    if bpe_src_code is not None and level == "bpe":
        # load bpe merge file
        merge_file = open(bpe_src_code, "r")
        bpe = apply_bpe.BPE(codes=merge_file)
        segmenter = lambda x: bpe.process_line(x.strip())
    elif level == "char":
        # split to chars
        segmenter = lambda x: list(x.strip())
    else:
        segmenter = lambda x: x.strip()

    # build model and load parameters into it
    model_checkpoint = load_checkpoint(ckpt, use_cuda)
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    print("Joey NMT model loaded successfully.")

    web_client = slack.WebClient(TOKEN, timeout=30)

    # get bot id
    bot_id = (web_client.api_call("auth.test")["user_id"].upper())

    # find bot channel id
    all_channels = web_client.api_call("conversations.list")["channels"]
    for c in all_channels:
        if c["name"] == BOT_CHANNEL:
            bot_channel_id = c["id"]

    slack_events_adapter = SlackEventAdapter(BOT_SIGNIN,
                                             endpoint="/slack/events")

    @slack_events_adapter.on("message")
    def handle_message(event_data):
        message = event_data["event"]
        if message.get("subtype") is None:
            channel = message["channel"]
            user = message["user"]
            text = message["text"].strip()
            if user != bot_id and message.get("subtype") is None:
                # translates all messages in its channel and mentions
                if channel == bot_channel_id or bot_id in text:
                    mention = "<@{}>".format(bot_id)
                    # TODO remove all possible mentions with regex
                    if mention in text:
                        parts = text.split(mention)
                        text = parts[0].strip() + parts[1].strip()
                    message = translate(text,
                                        beam_size=beam_size,
                                        beam_alpha=beam_alpha,
                                        level=level,
                                        lowercase=lowercase,
                                        max_output_length=max_output_length,
                                        model=model,
                                        postprocess=[detokenizer],
                                        preprocess=[tokenizer, segmenter],
                                        src_vocab=src_vocab,
                                        trg_vocab=trg_vocab,
                                        use_cuda=use_cuda,
                                        logger=logger)
                    web_client.chat_postMessage(text=message,
                                                token=TOKEN,
                                                channel=channel)

    # Error events
    @slack_events_adapter.on("error")
    def error_handler(err):
        print("ERROR: " + str(err))

    slack_events_adapter.start(port=3000)
Exemple #9
0
def Q_learning(cfg_file: str) -> None:
    """
    Main training function. After training, also test on test data if given.
    :param cfg_file: path to configuration yaml file
    """
    cfg = load_config(cfg_file)  # config is a dict
    # make logger
    model_dir = make_model_dir(cfg["training"]["model_dir"],
                               overwrite=cfg["training"].get(
                                   "overwrite", False))
    _ = make_logger(model_dir, mode="train")  # version string returned
    # TODO: save version number in model checkpoints

    # set the random seed
    set_seed(seed=cfg["training"].get("random_seed", 42))

    # load the data
    print("loadding data here")
    train_data, dev_data, test_data, src_vocab, trg_vocab = load_data(
        data_cfg=cfg["data"])
    # The training data is filtered to include sentences up to `max_sent_length`
    #     on source and target side.

    # training config:
    train_config = cfg["training"]
    shuffle = train_config.get("shuffle", True)
    batch_size = train_config["batch_size"]
    mini_BATCH_SIZE = train_config["mini_batch_size"]
    batch_type = train_config.get("batch_type", "sentence")
    outer_epochs = train_config.get("outer_epochs", 10)
    inner_epochs = train_config.get("inner_epochs", 10)
    TARGET_UPDATE = train_config.get("target_update", 10)
    Gamma = train_config.get("Gamma", 0.999)
    use_cuda = train_config["use_cuda"] and torch.cuda.is_available()

    # validation part config
    # validation
    validation_freq = train_config.get("validation_freq", 1000)
    ckpt_queue = queue.Queue(maxsize=train_config.get("keep_last_ckpts", 5))
    eval_batch_size = train_config.get("eval_batch_size", batch_size)
    level = cfg["data"]["level"]

    eval_metric = train_config.get("eval_metric", "bleu")
    n_gpu = torch.cuda.device_count() if use_cuda else 0
    eval_batch_type = train_config.get("eval_batch_type", batch_type)
    # eval options
    test_config = cfg["testing"]
    bpe_type = test_config.get("bpe_type", "subword-nmt")
    sacrebleu = {"remove_whitespace": True, "tokenize": "13a"}
    max_output_length = train_config.get("max_output_length", None)
    minimize_metric = True
    # initialize training statistics
    stats = TrainStatistics(
        steps=0,
        stop=False,
        total_tokens=0,
        best_ckpt_iter=0,
        best_ckpt_score=np.inf if minimize_metric else -np.inf,
        minimize_metric=minimize_metric)

    early_stopping_metric = train_config.get("early_stopping_metric",
                                             "eval_metric")

    if early_stopping_metric in ["ppl", "loss"]:
        stats.minimize_metric = True
        stats.best_ckpt_score = np.inf
    elif early_stopping_metric == "eval_metric":
        if eval_metric in [
                "bleu", "chrf", "token_accuracy", "sequence_accuracy"
        ]:
            stats.minimize_metric = False
            stats.best_ckpt_score = -np.inf

        # eval metric that has to get minimized (not yet implemented)
        else:
            stats.minimize_metric = True

    # data loader(modified from train_and_validate function
    # Returns a torchtext iterator for a torchtext dataset.
    # param dataset: torchtext dataset containing src and optionally trg
    train_iter = make_data_iter(train_data,
                                batch_size=batch_size,
                                batch_type=batch_type,
                                train=True,
                                shuffle=shuffle)

    # initialize the Replay Memory D with capacity N
    memory = ReplayMemory(10000)
    steps_done = 0

    # initialize two DQN networks
    policy_net = build_model(cfg["model"],
                             src_vocab=src_vocab,
                             trg_vocab=trg_vocab)  # Q_network
    target_net = build_model(cfg["model"],
                             src_vocab=src_vocab,
                             trg_vocab=trg_vocab)  # Q_hat_network
    #logger.info(policy_net.src_vocab.stoi)
    #print("###############trg vocab: ", len(target_net.trg_vocab.stoi))
    #print("trg embed: ", target_net.trg_embed.vocab_size)
    if use_cuda:
        policy_net.cuda()
        target_net.cuda()

    target_net.load_state_dict(policy_net.state_dict())
    # Initialize target net Q_hat with weights equal to policy_net

    target_net.eval()  # target_net not update the parameters, test mode

    # Optimizer
    optimizer = build_optimizer(config=cfg["training"],
                                parameters=policy_net.parameters())
    # Loss function
    mse_loss = torch.nn.MSELoss()

    pad_index = policy_net.pad_index
    # print('!!!'*10, pad_index)

    cross_entropy_loss = XentLoss(pad_index=pad_index)
    policy_net.loss_function = cross_entropy_loss

    # learning rate scheduling
    scheduler, scheduler_step_at = build_scheduler(
        config=train_config,
        scheduler_mode="min" if minimize_metric else "max",
        optimizer=optimizer,
        hidden_size=cfg["model"]["encoder"]["hidden_size"])

    # model parameters
    if "load_model" in train_config.keys():
        load_model_path = train_config["load_model"]
        reset_best_ckpt = train_config.get("reset_best_ckpt", False)
        reset_scheduler = train_config.get("reset_scheduler", False)
        reset_optimizer = train_config.get("reset_optimizer", False)
        reset_iter_state = train_config.get("reset_iter_state", False)

        print('settings', reset_best_ckpt, reset_iter_state, reset_optimizer,
              reset_scheduler)

        logger.info("Loading model from %s", load_model_path)
        model_checkpoint = load_checkpoint(path=load_model_path,
                                           use_cuda=use_cuda)

        # restore model and optimizer parameters
        policy_net.load_state_dict(model_checkpoint["model_state"])

        if not reset_optimizer:
            optimizer.load_state_dict(model_checkpoint["optimizer_state"])
        else:
            logger.info("Reset optimizer.")
        if not reset_scheduler:
            if model_checkpoint["scheduler_state"] is not None and \
                    scheduler is not None:
                scheduler.load_state_dict(model_checkpoint["scheduler_state"])
        else:
            logger.info("Reset scheduler.")

        if not reset_best_ckpt:
            stats.best_ckpt_score = model_checkpoint["best_ckpt_score"]
            stats.best_ckpt_iter = model_checkpoint["best_ckpt_iteration"]
            print('stats.best_ckpt_score', stats.best_ckpt_score)
            print('stats.best_ckpt_iter', stats.best_ckpt_iter)
        else:
            logger.info("Reset tracking of the best checkpoint.")

        if (not reset_iter_state and model_checkpoint.get(
                'train_iter_state', None) is not None):
            train_iter_state = model_checkpoint["train_iter_state"]

        # move parameters to cuda

        target_net.load_state_dict(policy_net.state_dict())
        # Initialize target net Q_hat with weights equal to policy_net

        target_net.eval()

        if use_cuda:
            policy_net.cuda()
            target_net.cuda()

    for i_episode in range(outer_epochs):
        # Outer loop

        # get batch
        for i, batch in enumerate(iter(train_iter)):  # joeynmt training.py 377

            # create a Batch object from torchtext batch
            # ( use class Batch from batch.py)
            # return the sentences same length (with padding) in one batch
            batch = Batch(batch, policy_net.pad_index, use_cuda=use_cuda)
            # we want to get batch.src and batch.trg
            # the shape of batch.src: (batch_size * length of the sentence)

            # source here is represented by the word index not word embedding.

            encoder_output_batch, _, _, _ = policy_net(
                return_type="encode",
                src=batch.src,
                src_length=batch.src_length,
                src_mask=batch.src_mask,
            )

            trans_output_batch, _ = transformer_greedy(
                src_mask=batch.src_mask,
                max_output_length=max_output_length,
                model=policy_net,
                encoder_output=encoder_output_batch,
                steps_done=steps_done,
                use_cuda=use_cuda)
            #print('steps_done',steps_done)

            steps_done += 1

            #print('trans_output_batch.shape is:', trans_output_batch.shape)
            # batch_size * max_translation_sentence_length
            #print('batch.src', batch.src)
            #print('batch.trg', batch.trg)
            print('batch.trg.shape is:', batch.trg.shape)
            print('trans_output_batch', trans_output_batch)

            reward_batch = [
            ]  # Get the reward_batch (Get the bleu score of the sentences in a batch)

            for i in range(int(batch.src.shape[0])):
                all_outputs = [(trans_output_batch[i])[1:]]
                all_ref = [batch.trg[i]]
                sentence_score = calculate_bleu(model=policy_net,
                                                level=level,
                                                raw_hypo=all_outputs,
                                                raw_ref=all_ref)
                reward_batch.append(sentence_score)

            print('reward batch is', reward_batch)
            reward_batch = torch.tensor(reward_batch, dtype=torch.float)

            # reward_batch = bleu(hypotheses, references, tokenize="13a")
            # print('reward_batch.shape', reward_batch.shape)

            # make prefix and push tuples into memory
            push_sample_to_memory(model=policy_net,
                                  level=level,
                                  eos_index=policy_net.eos_index,
                                  memory=memory,
                                  src_batch=batch.src,
                                  trg_batch=batch.trg,
                                  trans_output_batch=trans_output_batch,
                                  reward_batch=reward_batch,
                                  max_output_length=max_output_length)
            print(memory.capacity, len(memory.memory))

            if len(memory.memory) == memory.capacity:
                # inner loop
                for t in range(inner_epochs):
                    # Sample mini-batch from the memory
                    transitions = memory.sample(mini_BATCH_SIZE)
                    # transition = [Transition(source=array([]), prefix=array([]), next_word= int, reward= int),
                    #               Transition(source=array([]), prefix=array([]), next_word= int, reward= int,...]
                    # Each Transition is what we push into memory for one sentence: memory.push(source, prefix, next_word, reward_batch[i])
                    mini_batch = Transition(*zip(*transitions))
                    # merge the same class in transition together
                    # mini_batch = Transition(source=(array([]), array([]),...), prefix=(array([],...),
                    #               next_word=array([...]), reward=array([...]))
                    # mini_batch.reward is tuple: length is mini_BATCH_SIZE.
                    #print('mini_batch', mini_batch)

                    #concatenate together into a tensor.
                    words = []
                    for word in mini_batch.next_word:
                        new_word = word.unsqueeze(0)
                        words.append(new_word)
                    mini_next_word = torch.cat(
                        words)  # shape (mini_BATCH_SIZE,)
                    mini_reward = torch.tensor(
                        mini_batch.reward)  # shape (mini_BATCH_SIZE,)

                    #print('mini_batch.finish', mini_batch.finish)

                    mini_is_eos = torch.Tensor(mini_batch.finish)
                    #print(mini_is_eos)

                    mini_src_length = [
                        len(item) for item in mini_batch.source_sentence
                    ]
                    mini_src_length = torch.Tensor(mini_src_length)

                    mini_src = pad_sequence(mini_batch.source_sentence,
                                            batch_first=True,
                                            padding_value=float(pad_index))
                    # shape (mini_BATCH_SIZE, max_length_src)

                    length_prefix = [len(item) for item in mini_batch.prefix]
                    mini_prefix_length = torch.Tensor(length_prefix)

                    prefix_list = []
                    for prefix_ in mini_batch.prefix:
                        prefix_ = torch.from_numpy(prefix_)
                        prefix_list.append(prefix_)

                    mini_prefix = pad_sequence(prefix_list,
                                               batch_first=True,
                                               padding_value=pad_index)
                    # shape (mini_BATCH_SIZE, max_length_prefix)

                    mini_src_mask = (mini_src != pad_index).unsqueeze(1)
                    mini_trg_mask = (mini_prefix != pad_index).unsqueeze(1)

                    #print('mini_src',  mini_src)
                    #print('mini_src_length', mini_src_length)
                    #print('mini_src_mask', mini_src_mask)
                    #print('mini_prefix', mini_prefix)
                    #print('mini_trg_mask', mini_trg_mask)

                    #print('mini_reward', mini_reward)

                    # max_length_src = torch.max(mini_src_length) #max([len(item) for item in mini_batch.source_sentence])

                    if use_cuda:
                        mini_src = mini_src.cuda()
                        mini_prefix = mini_prefix.cuda()
                        mini_src_mask = mini_src_mask.cuda()
                        mini_src_length = mini_src_length.cuda()
                        mini_trg_mask = mini_trg_mask.cuda()
                        mini_next_word = mini_next_word.cuda()

                    # print(next(policy_net.parameters()).is_cuda)
                    # print(mini_trg_mask.get_device())
                    # calculate the Q_value
                    logits_Q, _, _, _ = policy_net._encode_decode(
                        src=mini_src,
                        trg_input=mini_prefix,
                        src_mask=mini_src_mask,
                        src_length=mini_src_length,
                        trg_mask=
                        mini_trg_mask  # trg_mask = (self.trg_input != pad_index).unsqueeze(1)
                    )
                    #print('mini_prefix_length', mini_prefix_length)

                    #print('logits_Q.shape', logits_Q.shape) # torch.Size([64, 99, 31716])
                    #print('logits_Q', logits_Q)

                    # length_prefix = max([len(item) for item in mini_batch.prefix])
                    # logits_Q shape: batch_size * length of the sentence * total number of words in corpus.
                    logits_Q = logits_Q[range(mini_BATCH_SIZE),
                                        mini_prefix_length.long() - 1, :]
                    #print('logits_Q_.shape', logits_Q.shape) #shape(mini_batch_size, num_words)
                    # logits shape: mini_batch_size * total number of words in corpus
                    Q_value = logits_Q[range(mini_BATCH_SIZE), mini_next_word]
                    #print('mini_next_word', mini_next_word)
                    #print("Q_value", Q_value)

                    mini_prefix_add = torch.cat(
                        [mini_prefix, mini_next_word.unsqueeze(1)], dim=1)
                    #print('mini_prefix_add', mini_prefix_add)
                    mini_trg_mask_add = (mini_prefix_add !=
                                         pad_index).unsqueeze(1)
                    #print('mini_trg_mask_add', mini_trg_mask_add)

                    if use_cuda:
                        mini_prefix_add = mini_prefix_add.cuda()
                        mini_trg_mask_add = mini_trg_mask_add.cuda()

                    logits_Q_hat, _, _, _ = target_net._encode_decode(
                        src=mini_src,
                        trg_input=mini_prefix_add,
                        src_mask=mini_src_mask,
                        src_length=mini_src_length,
                        trg_mask=mini_trg_mask_add)
                    #print('mini_prefix_add.shape', mini_prefix_add.shape)
                    #print('logits_Q_hat.shape', logits_Q_hat.shape)
                    #print('mini_prefix_length.long()', mini_prefix_length.long())
                    logits_Q_hat = logits_Q_hat[range(mini_BATCH_SIZE),
                                                mini_prefix_length.long(), :]
                    Q_hat_value, _ = torch.max(logits_Q_hat, dim=1)
                    #print('Q_hat_value', Q_hat_value)

                    if use_cuda:

                        Q_hat_value = Q_hat_value.cuda()
                        mini_reward = mini_reward.cuda()
                        mini_is_eos = mini_is_eos.cuda()

                    yj = mini_reward.float() + Gamma * Q_hat_value
                    #print('yj', yj)
                    index = mini_is_eos.long()
                    #print('mini_is_eos', mini_is_eos)
                    yj[index] = mini_reward[index]
                    #print('yj', yj)
                    #print('Q_value1', Q_value)

                    yj.detach()
                    # Optimize the model
                    policy_net.zero_grad()

                    # Compute loss
                    loss = mse_loss(yj, Q_value)
                    print('loss', loss)
                    logger.info("step = {}, loss = {}".format(
                        stats.steps, loss.item()))
                    loss.backward()
                    #for param in policy_net.parameters():
                    #   param.grad.data.clamp_(-1, 1)
                    optimizer.step()

                    stats.steps += 1
                    #print('step', stats.steps)

                    if stats.steps % TARGET_UPDATE == 0:
                        #print('update the parameters in target_net.')
                        target_net.load_state_dict(policy_net.state_dict())

                    if stats.steps % validation_freq == 0:  # Validation
                        print('Start validation')

                        valid_score, valid_loss, valid_ppl, valid_sources, \
                        valid_sources_raw, valid_references, valid_hypotheses, \
                        valid_hypotheses_raw, valid_attention_scores = \
                            validate_on_data(
                                model=policy_net,
                                data=dev_data,
                                batch_size=eval_batch_size,
                                use_cuda=use_cuda,
                                level=level,
                                eval_metric=eval_metric,
                                n_gpu=n_gpu,
                                compute_loss=True,
                                beam_size=1,
                                beam_alpha=-1,
                                batch_type=eval_batch_type,
                                postprocess=True,
                                bpe_type=bpe_type,
                                sacrebleu=sacrebleu,
                                max_output_length=max_output_length
                            )
                        print(
                            'validation_loss: {}, validation_score: {}'.format(
                                valid_loss, valid_score))
                        logger.info(valid_loss)
                        print('average loss: total_loss/n_tokens:', valid_ppl)

                        if early_stopping_metric == "loss":
                            ckpt_score = valid_loss
                        elif early_stopping_metric in ["ppl", "perplexity"]:
                            ckpt_score = valid_ppl
                        else:
                            ckpt_score = valid_score
                        if stats.is_best(ckpt_score):
                            stats.best_ckpt_score = ckpt_score
                            stats.best_ckpt_iter = stats.steps
                            logger.info(
                                'Hooray! New best validation result [%s]!',
                                early_stopping_metric)
                            if ckpt_queue.maxsize > 0:
                                logger.info("Saving new checkpoint.")

                                # def _save_checkpoint(self) -> None:
                                """
                                Save the model's current parameters and the training state to a
                                checkpoint.
                                The training state contains the total number of training steps,
                                the total number of training tokens,
                                the best checkpoint score and iteration so far,
                                and optimizer and scheduler states.
                                """
                                model_path = "{}/{}.ckpt".format(
                                    model_dir, stats.steps)
                                model_state_dict = policy_net.module.state_dict() \
                                    if isinstance(policy_net, torch.nn.DataParallel) \
                                    else policy_net.state_dict()
                                state = {
                                    "steps": stats.steps,
                                    "total_tokens": stats.total_tokens,
                                    "best_ckpt_score": stats.best_ckpt_score,
                                    "best_ckpt_iteration":
                                    stats.best_ckpt_iter,
                                    "model_state": model_state_dict,
                                    "optimizer_state": optimizer.state_dict(),
                                    # "scheduler_state": scheduler.state_dict() if
                                    # self.scheduler is not None else None,
                                    # 'amp_state': amp.state_dict() if self.fp16 else None
                                }
                                torch.save(state, model_path)
                                if ckpt_queue.full():
                                    to_delete = ckpt_queue.get(
                                    )  # delete oldest ckpt
                                    try:
                                        os.remove(to_delete)
                                    except FileNotFoundError:
                                        logger.warning(
                                            "Wanted to delete old checkpoint %s but "
                                            "file does not exist.", to_delete)

                                ckpt_queue.put(model_path)

                                best_path = "{}/best.ckpt".format(model_dir)
                                try:
                                    # create/modify symbolic link for best checkpoint
                                    symlink_update(
                                        "{}.ckpt".format(stats.steps),
                                        best_path)
                                except OSError:
                                    # overwrite best.ckpt
                                    torch.save(state, best_path)
Exemple #10
0
def test(cfg_file,
         ckpt: str,
         output_path: str = None,
         save_attention: bool = False) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    """

    cfg = load_config(cfg_file)

    if "test" not in cfg["data"].keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take oldest from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = cfg["training"]["batch_size"]
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    eval_metric = cfg["training"]["eval_metric"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # load the data
    _, dev_data, test_data, src_vocab, trg_vocab = load_data(
        data_cfg=cfg["data"])

    data_to_predict = {"dev": dev_data, "test": test_data}

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 0)
        beam_alpha = cfg["testing"].get("alpha", -1)
    else:
        beam_size = 0
        beam_alpha = -1

    for data_set_name, data_set in data_to_predict.items():
        if data_set is None:
            # e.g. no valid_data
            continue

        #pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores, logprobs = validate_on_data(
            model, data=data_set, batch_size=batch_size, level=level,
            max_output_length=max_output_length, eval_metric=eval_metric,
            use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
            beam_alpha=beam_alpha)
        #pylint: enable=unused-variable

        if "trg" in data_set.fields:
            decoding_description = "Greedy decoding" if beam_size == 0 else \
                "Beam search decoding with beam size = {} and alpha = {}".\
                    format(beam_size, beam_alpha)
            print("{:4s} {}: {} [{}]".format(data_set_name, eval_metric, score,
                                             decoding_description))
        else:
            print("No references given for {} -> no evaluation.".format(
                data_set_name))

        if attention_scores is not None and save_attention:
            attention_path = "{}/{}.{}.att".format(model_dir, data_set_name,
                                                   step)
            print("Attention plots saved to: {}.xx".format(attention_path))
            store_attention_plots(attentions=attention_scores,
                                  targets=hypotheses_raw,
                                  sources=[s for s in data_set.src],
                                  indices=range(len(hypotheses)),
                                  output_prefix=attention_path)

        if output_path is not None:
            output_path_set = "{}.{}".format(output_path, data_set_name)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                if cfg["data"].get("post_process", True):
                    for hyp in hypotheses:
                        out_file.write(hyp + "\n")
                else:
                    for hyp in hypotheses_raw:
                        out_file.write(" ".join(hyp) + "\n")
            print("Translations saved to: {}".format(output_path_set))
Exemple #11
0
def test(cfg_file,
         ckpt,
         output_path: str = None,
         save_attention: bool = False,
         logger: logging.Logger = None,
         data_to_test: str = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = logging.getLogger(__name__)
        FORMAT = '%(asctime)-15s - %(message)s'
        logging.basicConfig(format=FORMAT)
        logger.setLevel(level=logging.DEBUG)

    cfg = load_config(cfg_file)
    train_cfg = cfg["training"]
    data_cfg = cfg["data"]
    test_cfg = cfg["testing"]

    if "test" not in data_cfg.keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    model_dir = train_cfg["model_dir"]
    if ckpt is None:
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError("No checkpoint at {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = train_cfg.get("eval_batch_size", train_cfg["batch_size"])
    batch_type = train_cfg.get("batch_type", "sentence")
    use_cuda = train_cfg.get("use_cuda", False)
    assert "level" in data_cfg or "trg_level" in data_cfg
    trg_level = data_cfg.get("level", data_cfg["trg_level"])

    eval_metric = train_cfg["eval_metric"]
    if isinstance(eval_metric, str):
        eval_metric = [eval_metric]
    max_output_length = test_cfg.get("max_output_length",
                                     train_cfg.get("max_output_length", None))

    # load the data
    data = load_data(data_cfg)
    dev_data = data["dev_data"]
    test_data = data["test_data"]
    vocabs = data["vocabs"]

    data_to_predict = {"dev": dev_data, "test": test_data}
    if data_to_test is not None:
        assert data_to_test in data_to_predict
        data_to_predict = {data_to_test: data_to_predict[data_to_test]}

    # load model state from disk
    if isinstance(ckpt, str):
        ckpt = [ckpt]
    models = []
    for c in ckpt:
        model_checkpoint = load_checkpoint(c, use_cuda=use_cuda)

        # build model and load parameters into it
        m = build_model(cfg["model"], vocabs=vocabs)
        m.load_state_dict(model_checkpoint["model_state"])
        models.append(m)
    model = models[0] if len(models) == 1 else EnsembleModel(*models)

    if use_cuda:
        model.cuda()  # should this exist?

    # whether to use beam search for decoding, 0: greedy decoding
    beam_sizes = beam_alpha = 0
    if "testing" in cfg.keys():
        beam_sizes = test_cfg.get("beam_size", 0)
        beam_alpha = test_cfg.get("alpha", 0)
    beam_sizes = [beam_sizes] if isinstance(beam_sizes, int) else beam_sizes
    assert beam_alpha >= 0, "Use alpha >= 0"

    method = test_cfg.get("method", None)
    max_hyps = test_cfg.get("max_hyps", 1)  # only for the enumerate thing

    validate_by_label = test_cfg.get("validate_by_label",
                                     train_cfg.get("validate_by_label", False))
    forced_sparsity = test_cfg.get("forced_sparsity",
                                   train_cfg.get("forced_sparsity", False))

    for beam_size in beam_sizes:
        for data_set_name, data_set in data_to_predict.items():
            valid_results = validate_on_data(
                model,
                data=data_set,
                batch_size=batch_size,
                batch_type=batch_type,
                trg_level=trg_level,
                max_output_length=max_output_length,
                eval_metrics=eval_metric,
                use_cuda=use_cuda,
                loss_function=None,
                beam_size=beam_size,
                beam_alpha=beam_alpha,
                save_attention=save_attention,
                validate_by_label=validate_by_label,
                forced_sparsity=forced_sparsity,
                method=method,
                max_hyps=max_hyps,
                break_at_p=test_cfg.get("break_at_p", 1.0),
                break_at_argmax=test_cfg.get("break_at_argmax", False),
                short_depth=test_cfg.get("short_depth", 0))
            scores = valid_results[0]
            hypotheses, hypotheses_raw = valid_results[2:4]
            scores_by_label = valid_results[5]

            if "trg" in data_set.fields:
                log_scores(logger, data_set_name, scores, scores_by_label,
                           beam_size, beam_alpha)
            else:
                logger.info("No references given for %s -> no evaluation.",
                            data_set_name)

            attention_scores = valid_results[4]
            if save_attention and not attention_scores:
                logger.warning("Attention scores could not be saved. "
                               "Note that attention scores are not "
                               "available when using beam search. "
                               "Set beam_size to 0 for greedy decoding.")
            if save_attention and attention_scores:
                # currently this will break for transformers
                logger.info("Saving attention plots. This might be slow.")
                store_attention_plots(attentions=attention_scores,
                                      targets=hypotheses_raw,
                                      sources=[s for s in data_set.src],
                                      indices=range(len(hypotheses)),
                                      model_dir=model_dir,
                                      steps=step,
                                      data_set_name=data_set_name)
                logger.info("Attention plots saved to: %s", model_dir)

            if output_path is not None:
                output_path_set = "{}.{}".format(output_path, data_set_name)
                with open(output_path_set, mode="w", encoding="utf-8") as outf:
                    for hyp in hypotheses:
                        outf.write(hyp + "\n")
                logger.info("Translations saved to: %s", output_path_set)
Exemple #12
0
  def load_model(self, src_language, trg_language, domain, bpe_src_code=None, tokenize=None):
    """ Load model for given trg language. """
    # model_dir = "{}-{}".format(self._model_dir_prefix, trg_language)
    model_dir = f"{self._model_dir_prefix}{src_language}-{trg_language}-{domain}"

    # Load the checkpoint.
    ckpt_path = os.path.join(model_dir, 'model.ckpt')
        
    # Load the vocabularies.
    src_vocab_path = os.path.join(model_dir, 'src_vocab.txt')

    trg_vocab_path = os.path.join(model_dir, 'trg_vocab.txt')
    
    # Load the config.
    config_path = os.path.join(model_dir, 'config_orig.yaml')

    # Adjust config.
    config = load_config(config_path)
    new_config_file = os.path.join(model_dir, 'config.yaml')
    config = self._update_config(config, src_vocab_path, trg_vocab_path,
                                 model_dir, ckpt_path)
    with open(new_config_file, 'w') as cfile:
      yaml.dump(config, cfile)

    # print('Loaded model for {}-{}.'.format(self._src_language, trg_language))
    print('Loaded model for {}-{}.'.format(src_language, trg_language))

    conf = {}

    logger = logging.getLogger(__name__)
    conf["logger"] = logger

    # load the Joey configuration
    cfg = load_config(new_config_file)

    # load the checkpoint
    if "load_model" in cfg['training'].keys():
        ckpt = cfg['training']["load_model"]
    else:
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError("No checkpoint found in directory {}."
                                    .format(model_dir))

    # prediction parameters from config
    conf["use_cuda"] = cfg["training"].get("use_cuda", False) if torch.cuda.is_available() else False

    conf["level"] = cfg["data"]["level"]
    conf["max_output_length"] = cfg["training"].get("max_output_length", None)
    conf["lowercase"] = cfg["data"].get("lowercase", False)

    # load the vocabularies
    src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt"
    trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt"
    
    conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file,
                            dataset=None, max_size=-1, min_freq=0)
    conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file,
                            dataset=None, max_size=-1, min_freq=0)

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        conf["beam_size"] = cfg["testing"].get("beam_size", 0)
        conf["beam_alpha"] = cfg["testing"].get("alpha", -1)
    else:
        conf["beam_size"] = 1
        conf["beam_alpha"] = -1

    # pre-processing
    if tokenize is not None:
        src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"])
        trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"])
        # tokenize input
        tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True)
        detokenizer = lambda x: trg_tokenizer.detokenize(
            x.split(), return_str=True)
    else:
        tokenizer = lambda x: x
        detokenizer = lambda x: x

    if bpe_src_code is not None and level == "bpe":
        # load bpe merge file
        merge_file = open(bpe_src_code, "r")
        bpe = apply_bpe.BPE(codes=merge_file)
        segmenter = lambda x: bpe.process_line(x.strip())
    elif conf["level"] == "char":
        # split to chars
        segmenter = lambda x: list(x.strip())
    else:
        segmenter = lambda x: x.strip()

    conf["preprocess"] = [tokenizer, segmenter]
    conf["postprocess"] = [detokenizer]
    # build model and load parameters into it
    model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"])
    model = build_model(cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"])
    model.load_state_dict(model_checkpoint["model_state"])
    # ipdb.set_trace()
    if conf["use_cuda"]:
        model.cuda()
    conf["model"] = model
    print("Joey NMT model loaded successfully.")
    return conf
Exemple #13
0
def load_model(model_dir, bpe_src_code=None, tokenize=None):
    """
    Start the bot. This means loading the model according to the config file.

    :param model_dir: Model directory of trained Joey NMT model.
    :param bpe_src_code: BPE codes for source side processing (optional).
    :param tokenize: If True, tokenize inputs with Moses tokenizer.
    :return:
    """
    conf = {}
    cfg_file = model_dir+"/config.yaml"

    logger = logging.getLogger(__name__)
    conf["logger"] = logger
    # load the Joey configuration
    cfg = load_config(cfg_file)

    # load the checkpoint
    if "load_model" in cfg['training'].keys():
        ckpt = cfg['training']["load_model"]
    else:
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError("No checkpoint found in directory {}."
                                    .format(model_dir))

    # prediction parameters from config
    conf["use_cuda"] = cfg["training"].get("use_cuda", False)
    conf["level"] = cfg["data"]["level"]
    conf["max_output_length"] = cfg["training"].get("max_output_length", None)
    conf["lowercase"] = cfg["data"].get("lowercase", False)

    # load the vocabularies
    src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt"
    trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt"

    conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file,
                            dataset=None, max_size=-1, min_freq=0)
    conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file,
                            dataset=None, max_size=-1, min_freq=0)

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        conf["beam_size"] = cfg["testing"].get("beam_size", 0)
        conf["beam_alpha"] = cfg["testing"].get("alpha", -1)
    else:
        conf["beam_size"] = 1
        conf["beam_alpha"] = -1

    # pre-processing
    if tokenize is not None:
        src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"])
        trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"])
        # tokenize input
        tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True)
        detokenizer = lambda x: trg_tokenizer.detokenize(
            x.split(), return_str=True)
    else:
        tokenizer = lambda x: x
        detokenizer = lambda x: x

    if bpe_src_code is not None and level == "bpe":
        # load bpe merge file
        merge_file = open(bpe_src_code, "r")
        bpe = apply_bpe.BPE(codes=merge_file)
        segmenter = lambda x: bpe.process_line(x.strip())
    elif conf["level"] == "char":
        # split to chars
        segmenter = lambda x: list(x.strip())
    else:
        segmenter = lambda x: x.strip()

    conf["preprocess"] = [tokenizer, segmenter]
    conf["postprocess"] = [detokenizer]
    # build model and load parameters into it
    model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"])
    model = build_model(cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"])
    model.load_state_dict(model_checkpoint["model_state"])

    if conf["use_cuda"]:
        model.cuda()
    conf["model"] = model
    print("Joey NMT model loaded successfully.")
    return conf
def translate(cfg_file, ckpt: str, output_path: str = None) -> None:
    """
    Interactive translation function.
    Loads model from checkpoint and translates either the stdin input or
    asks for input to translate interactively.
    The input has to be pre-processed according to the data that the model
    was trained on, i.e. tokenized or split into subwords.
    Translations are printed to stdout.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    """

    def _load_line_as_data(line):
        """ Create a dataset from one line via a temporary file. """
        # write src input to temporary file
        tmp_name = "tmp"
        tmp_suffix = ".src"
        tmp_filename = tmp_name+tmp_suffix
        with open(tmp_filename, "w") as tmp_file:
            tmp_file.write("{}\n".format(line))

        test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field)

        # remove temporary file
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)

        return test_data

    def _translate_data(test_data):
        """ Translates given dataset, using parameters from outer scope. """
        # pylint: disable=unused-variable
        _, _, _, _, hypotheses, _, _, _, _ = validate_on_data(
            model, data=test_data, batch_size=batch_size, level=level,
            max_output_length=max_output_length, eval_metrics=[],
            use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
            beam_alpha=beam_alpha)
        return hypotheses

    cfg = load_config(cfg_file)

    # when checkpoint is not specified, take oldest from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)

    data_cfg = cfg["data"]

    batch_size = cfg["training"].get("batch_size", 1)
    use_cuda = cfg["training"].get("use_cuda", False)
    max_output_length = cfg["training"].get("max_output_length", None)

    # read vocabs

    # This will need to change: currently translate does not support inflection
    src_vocab_file = data_cfg.get(
        "src_vocab", cfg["training"]["model_dir"] + "/src_vocab.txt")
    trg_vocab_file = data_cfg.get(
        "trg_vocab", cfg["training"]["model_dir"] + "/trg_vocab.txt")
    src_vocab = Vocabulary(file=src_vocab_file)
    trg_vocab = Vocabulary(file=trg_vocab_file)
    vocabs = {"src": src_vocab, "trg": trg_vocab}

    level = data_cfg["level"]
    lowercase = data_cfg["lowercase"]

    tok_fun = list if level == "char" else str.split

    src_field = Field(init_token=None, eos_token=EOS_TOKEN,
                      pad_token=PAD_TOKEN, tokenize=tok_fun,
                      batch_first=True, lower=lowercase,
                      unk_token=UNK_TOKEN,
                      include_lengths=True)
    src_field.vocab = src_vocab

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], vocabs=vocabs)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 0)
        beam_alpha = cfg["testing"].get("alpha", 0)
    else:
        beam_size = 0
        beam_alpha = 0
    if beam_alpha < 0:
        raise ConfigurationError("alpha for length penalty should be >= 0")

    if not sys.stdin.isatty():
        # file given
        test_data = MonoDataset(path=sys.stdin, ext="", field=src_field)
        hypotheses = _translate_data(test_data)

        if output_path is not None:
            output_path_set = "{}".format(output_path)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")
            print("Translations saved to: {}".format(output_path_set))
        else:
            for hyp in hypotheses:
                print(hyp)

    else:
        # enter interactive mode
        batch_size = 1
        while True:
            try:
                src_input = input("\nPlease enter a source sentence "
                                  "(pre-processed): \n")
                if not src_input.strip():
                    break

                # every line has to be made into dataset
                test_data = _load_line_as_data(line=src_input)

                hypotheses = _translate_data(test_data)
                print("JoeyNMT: {}".format(hypotheses[0]))

            except (KeyboardInterrupt, EOFError):
                print("\nBye.")
                break
def test(cfg_file,
         ckpt,  # str or list now
         output_path: str = None,
         save_attention: bool = False,
         logger: logging.Logger = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = logging.getLogger(__name__)
        FORMAT = '%(asctime)-15s - %(message)s'
        logging.basicConfig(format=FORMAT)
        logger.setLevel(level=logging.DEBUG)

    cfg = load_config(cfg_file)
    train_cfg = cfg["training"]
    data_cfg = cfg["data"]
    test_cfg = cfg["testing"]

    if "test" not in data_cfg.keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        model_dir = train_cfg["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError("No checkpoint found in directory {}."
                                    .format(model_dir))
        try:
            step = ckpt.split(model_dir+"/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = train_cfg.get("eval_batch_size", train_cfg["batch_size"])
    batch_type = train_cfg.get("eval_batch_type", train_cfg.get("batch_type", "sentence"))
    use_cuda = train_cfg.get("use_cuda", False)
    src_level = data_cfg.get("src_level", data_cfg.get("level", "word"))
    trg_level = data_cfg.get("trg_level", data_cfg.get("level", "word"))

    eval_metric = train_cfg["eval_metric"]
    if isinstance(eval_metric, str):
        eval_metric = [eval_metric]
    attn_metric = train_cfg.get("attn_metric", [])
    if isinstance(attn_metric, str):
        attn_metric = [attn_metric]
    max_output_length = train_cfg.get("max_output_length", None)

    # load the data
    data = load_data(data_cfg)
    dev_data = data["dev_data"]
    test_data = data["test_data"]
    vocabs = data["vocabs"]

    data_to_predict = {"dev": dev_data, "test": test_data}

    # load model state from disk
    if isinstance(ckpt, str):
        ckpt = [ckpt]
    individual_models = []
    for c in ckpt:
        model_checkpoint = load_checkpoint(c, use_cuda=use_cuda)

        # build model and load parameters into it
        m = build_model(cfg["model"], vocabs=vocabs)
        m.load_state_dict(model_checkpoint["model_state"])
        individual_models.append(m)
    if len(individual_models) == 1:
        model = individual_models[0]
    else:
        model = EnsembleModel(*individual_models)

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_sizes = test_cfg.get("beam_size", 0)
        beam_alpha = test_cfg.get("alpha", 0)
    else:
        beam_sizes = 0
        beam_alpha = 0
    if isinstance(beam_sizes, int):
        beam_sizes = [beam_sizes]
    assert beam_alpha >= 0, "Use alpha >= 0"

    for beam_size in beam_sizes:
        for data_set_name, data_set in data_to_predict.items():

            #pylint: disable=unused-variable
            scores, sources, sources_raw, references, hypotheses, \
            hypotheses_raw, attention_scores, scores_by_lang, by_lang = validate_on_data(
                model, data=data_set, batch_size=batch_size,
                batch_type=batch_type,
                src_level=src_level, trg_level=trg_level,
                max_output_length=max_output_length, eval_metrics=eval_metric,
                attn_metrics=attn_metric,
                use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
                beam_alpha=beam_alpha, save_attention=save_attention)
            #pylint: enable=unused-variable

            if "trg" in data_set.fields:
                labeled_scores = sorted(scores.items())
                eval_report = ", ".join("{}: {:.5f}".format(n, v)
                                        for n, v in labeled_scores)
                decoding_description = "Greedy decoding" if beam_size == 0 else \
                    "Beam search decoding with beam size = {} and alpha = {}".\
                        format(beam_size, beam_alpha)
                logger.info("%4s %s: [%s]",
                            data_set_name, eval_report, decoding_description)
                if scores_by_lang is not None:
                    for metric, scores in scores_by_lang.items():
                        # make a report
                        lang_report = [metric]
                        numbers = sorted(scores.items())
                        lang_report.extend(["{}: {:.5f}".format(k, v)
                                            for k, v in numbers])

                        logger.info("\n\t".join(lang_report))
            else:
                logger.info("No references given for %s -> no evaluation.",
                            data_set_name)

            if save_attention:
                # currently this will break for transformers
                if attention_scores:
                    #attention_name = "{}.{}.att".format(data_set_name, step)
                    #attention_path = os.path.join(model_dir, attention_name)
                    logger.info("Saving attention plots. This might take a while..")
                    store_attention_plots(attentions=attention_scores,
                                          targets=hypotheses_raw,
                                          sources=[s for s in data_set.src],
                                          indices=range(len(hypotheses)),
                                          model_dir=model_dir,
                                          steps=step,
                                          data_set_name=data_set_name)
                    logger.info("Attention plots saved to: %s", model_dir)
                else:
                    logger.warning("Attention scores could not be saved. "
                                   "Note that attention scores are not available "
                                   "when using beam search. "
                                   "Set beam_size to 0 for greedy decoding.")

            if output_path is not None:
                for lang, ref_and_hyp in by_lang.items():
                    if lang is None:
                        # monolingual case
                        output_path_set = "{}.{}".format(output_path, data_set_name)
                    else:
                        output_path_set = "{}.{}.{}".format(output_path, lang, data_set_name)
                    if isinstance(ref_and_hyp[0], str):
                        hyps = ref_and_hyp
                    else:
                        hyps = [hyp for (ref, hyp) in ref_and_hyp]
                    with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                        for hyp in hyps:
                            out_file.write(hyp + "\n")
                    logger.info("Translations saved to: %s", output_path_set)
Exemple #16
0
    def __init__(self,
                 cfg_file,
                 ckpt: str,
                 output_path: str = None,
                 logger: Logger = None) -> None:
        """
        Recover the saved model, specified as in configuration.

        :param cfg_file: path to configuration file
        :param ckpt: path to checkpoint to load
        :param output_path: path to output
        :param logger: log output to this logger (creates new logger if not set)
        """

        if logger is None:
            logger = make_logger()

        cfg = load_config(cfg_file)

        if "test" not in cfg["data"].keys():
            raise ValueError("Test data must be specified in config.")

        #print(cfg.keys())
        if "dqn" not in cfg.keys():
            raise ValueError("dqn data must be specified in config.")
        self.model_dir = cfg["training"]["model_dir"]
        # when checkpoint is not specified, take latest (best) from model dir
        if ckpt is None:
            model_dir = cfg["training"]["model_dir"]
            ckpt = get_latest_checkpoint(model_dir)
            if ckpt is None:
                raise FileNotFoundError(
                    "No checkpoint found in directory {}.".format(model_dir))
            try:
                step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
            except IndexError:
                step = "best"

        self.batch_size = 1  #**
        self.batch_type = cfg["training"].get(
            "eval_batch_type", cfg["training"].get("batch_type", "sentence"))
        self.use_cuda = cfg["training"].get("use_cuda", False)
        self.level = cfg["data"]["level"]
        self.eval_metric = cfg["training"]["eval_metric"]
        self.max_output_length = cfg["training"].get("max_output_length", None)

        # load the data
        train_data, dev_data, test_data, src_vocab, trg_vocab = load_data(
            data_cfg=cfg["data"])
        #Loading the DQN parameters:
        self.sample_size = cfg["dqn"]["sample_size"]
        self.lr = cfg["dqn"].get("lr", 0.01)
        self.egreed_max = cfg["dqn"].get("egreed_max", 0.9)
        self.egreed_min = cfg["dqn"].get("egreed_min", 0.01)
        self.gamma_max = cfg["dqn"].get("gamma_max", 0.9)
        self.gamma_min = cfg["dqn"].get("gamma_min", 0.5)
        self.nu_iter = cfg["dqn"]["nu_iter"]
        self.mem_cap = cfg["dqn"]["mem_cap"]
        self.beam_min = cfg["dqn"]["beam_min"]
        self.beam_max = cfg["dqn"]["beam_max"]
        self.state_type = cfg["dqn"]["state_type"]

        if self.state_type == 'hidden':
            self.state_size = cfg["model"]["encoder"]["hidden_size"] * 2
        else:
            self.state_size = cfg["model"]["encoder"]["hidden_size"]

        self.actions_size = len(src_vocab)
        self.gamma = None

        print("Sample size: ", self.sample_size)
        print("State size: ", self.state_size)
        print("Action size: ", self.actions_size)
        self.epochs = cfg["dqn"]["epochs"]

        # Inii the Qnet and Qnet2
        self.eval_net = Net(self.state_size, self.actions_size)
        self.target_net = Net(self.state_size, self.actions_size)

        #Following the algorithm
        self.target_net.load_state_dict(self.eval_net.state_dict())

        self.learn_step_counter = 0
        self.memory_counter = 0
        self.size_memory1 = self.state_size * 2 + 2 + 1
        self.memory = np.zeros((self.mem_cap, self.size_memory1))
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(),
                                          lr=self.lr)
        self.loss_func = nn.MSELoss()

        #others parameters
        self.bos_index = trg_vocab.stoi[BOS_TOKEN]
        self.eos_index = trg_vocab.stoi[EOS_TOKEN]
        self.pad_index = trg_vocab.stoi[PAD_TOKEN]

        self.data_to_train_dqn = {"train": train_data}

        #self.data_to_train_dqn = {"test": test_data}
        #self.data_to_dev = {"dev": dev_data}
        self.data_to_dev = {"dev": dev_data}
        #self.data_to_train_dqn = {"train": train_data
        #                          ,"dev": dev_data, "test": test_data}
        # load model state from disk
        model_checkpoint = load_checkpoint(ckpt, use_cuda=self.use_cuda)

        # build model and load parameters into it
        self.model = build_model(cfg["model"],
                                 src_vocab=src_vocab,
                                 trg_vocab=trg_vocab)
        self.model.load_state_dict(model_checkpoint["model_state"])

        if self.use_cuda:
            self.model.cuda()

        # whether to use beam search for decoding, 0: greedy decoding
        beam_size = 1
        beam_alpha = -1

        #others not important parameters
        self.index_fin = None
        path_tensroboard = self.model_dir + "/tensorboard_DQN/"
        self.tb_writer = SummaryWriter(log_dir=path_tensroboard, purge_step=0)
        self.dev_network_count = 0
        print(cfg["dqn"]["reward_type"])
        #Reward funtion related:
        if cfg["dqn"]["reward_type"] == "bleu_diff":
            print("You select the reward based on the Bleu score differences")
            self.Reward = self.Reward_bleu_diff
        elif cfg["dqn"]["reward_type"] == "bleu_lin":
            print(
                "You select the reward based on the linear Bleu socres, and several punishments"
            )
            self.Reward = self.Reward_lin
        else:
            print(
                "You select the reward based on the final score on the last state "
            )
            self.Reward = self.Reward_bleu_fin
Exemple #17
0
def test(cfg_file,
         ckpt: str,
         output_path: str = None,
         save_attention: bool = False,
         logger: logging.Logger = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = logging.getLogger(__name__)
        FORMAT = '%(asctime)-15s - %(message)s'
        logging.basicConfig(format=FORMAT)
        logger.setLevel(level=logging.DEBUG)

    cfg = load_config(cfg_file)

    if "test" not in cfg["data"].keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = cfg["training"]["batch_size"]
    batch_type = cfg["training"].get("batch_type", "sentence")
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    eval_metric = cfg["training"]["eval_metric"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # load the data
    _, dev_data, test_data, src_vocab, trg_vocab = load_data(
        data_cfg=cfg["data"])

    data_to_predict = {"dev": dev_data, "test": test_data}

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 0)
        beam_alpha = cfg["testing"].get("alpha", -1)
    else:
        beam_size = 0
        beam_alpha = -1

    for data_set_name, data_set in data_to_predict.items():

        #pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores = validate_on_data(
            model, data=data_set, batch_size=batch_size,
            batch_type=batch_type, level=level,
            max_output_length=max_output_length, eval_metric=eval_metric,
            use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
            beam_alpha=beam_alpha)
        #pylint: enable=unused-variable

        if "trg" in data_set.fields:
            decoding_description = "Greedy decoding" if beam_size == 0 else \
                "Beam search decoding with beam size = {} and alpha = {}".\
                    format(beam_size, beam_alpha)
            logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric,
                        score, decoding_description)
        else:
            logger.info("No references given for %s -> no evaluation.",
                        data_set_name)

        if save_attention:
            if attention_scores:
                attention_name = "{}.{}.att".format(data_set_name, step)
                attention_path = os.path.join(model_dir, attention_name)
                logger.info(
                    "Saving attention plots. This might take a while..")
                store_attention_plots(attentions=attention_scores,
                                      targets=hypotheses_raw,
                                      sources=[s for s in data_set.src],
                                      indices=range(len(hypotheses)),
                                      output_prefix=attention_path)
                logger.info("Attention plots saved to: %s", attention_path)
            else:
                logger.warning("Attention scores could not be saved. "
                               "Note that attention scores are not available "
                               "when using beam search. "
                               "Set beam_size to 0 for greedy decoding.")

        if output_path is not None:
            output_path_set = "{}.{}".format(output_path, data_set_name)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")
            logger.info("Translations saved to: %s", output_path_set)
Exemple #18
0
def test(cfg_file,
         ckpt: str,
         output_path: str = None,
         save_attention: bool = False,
         logger: Logger = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = make_logger()

    cfg = load_config(cfg_file)

    if "test" not in cfg["data"].keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = cfg["training"].get("eval_batch_size",
                                     cfg["training"]["batch_size"])
    batch_type = cfg["training"].get(
        "eval_batch_type", cfg["training"].get("batch_type", "sentence"))
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    eval_metric = cfg["training"]["eval_metric"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # load the data
    _, dev_data, test_data, src_vocab, trg_vocab = load_data(
        data_cfg=cfg["data"])

    data_to_predict = {"dev": dev_data, "test": test_data}

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 1)
        beam_alpha = cfg["testing"].get("alpha", -1)
    else:
        beam_size = 1
        beam_alpha = -1

    for data_set_name, data_set in data_to_predict.items():

        #pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores,valid_hypotheses_full_n_best,scores = validate_on_data(
            model, data=data_set, batch_size=batch_size,
            batch_type=batch_type, level=level,
            max_output_length=max_output_length, eval_metric=eval_metric,
            use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
            beam_alpha=beam_alpha, logger=logger)
        #pylint: enable=unused-variable

        if "trg" in data_set.fields:
            decoding_description = "Greedy decoding" if beam_size < 2 else \
                "Beam search decoding with beam size = {} and alpha = {}".\
                    format(beam_size, beam_alpha)
            logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric,
                        score, decoding_description)
        else:
            logger.info("No references given for %s -> no evaluation.",
                        data_set_name)

        if save_attention:
            if attention_scores:
                attention_name = "{}.{}.att".format(data_set_name, step)
                attention_path = os.path.join(model_dir, attention_name)
                logger.info(
                    "Saving attention plots. This might take a while..")
                store_attention_plots(attentions=attention_scores,
                                      targets=hypotheses_raw,
                                      sources=data_set.src,
                                      indices=range(len(hypotheses)),
                                      output_prefix=attention_path)
                logger.info("Attention plots saved to: %s", attention_path)
            else:
                logger.warning("Attention scores could not be saved. "
                               "Note that attention scores are not available "
                               "when using beam search. "
                               "Set beam_size to 1 for greedy decoding.")

        if output_path is not None:
            '''
            output_path_set = "{}.{}".format(output_path, data_set_name)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")


            #sy_debug
            alt_output = "{}.n_best.{}".format(output_path, data_set_name)
            with open(alt_output, mode="w", encoding="utf-8") as out_file:
                for n in valid_hypotheses_full_n_best:
                    out_file.write(n + "\n")
'''

            #@Shiya: exporting hypothesis and associated score to .csv file
            #TODO: write_to_csv(hyps,scores)
            def write_to_csv(hyps: list, scores: list):
                import csv

                output_file = "{}.n_csv.{}".format(output_path, data_set_name)
                with open(output_file, mode="w", newline='',
                          encoding="utf-8") as out_file:
                    fieldnames = ['Predictions', 'Scores']
                    writer = csv.DictWriter(out_file, fieldnames=fieldnames)
                    writer.writeheader()

                    for prediction, score in zip(hyps, scores):
                        writer.writerow({
                            fieldnames[0]: prediction,
                            fieldnames[1]: score
                        })

            write_to_csv(valid_hypotheses_full_n_best, scores)
Exemple #19
0
    def init_from_checkpoint(self,
                             path: str,
                             reset_best_ckpt: bool = False,
                             reset_scheduler: bool = False,
                             reset_optimizer: bool = False,
                             reset_iter_state: bool = False) -> None:
        """
        Initialize the trainer from a given checkpoint file.

        This checkpoint file contains not only model parameters, but also
        scheduler and optimizer states, see `self._save_checkpoint`.

        :param path: path to checkpoint
        :param reset_best_ckpt: reset tracking of the best checkpoint,
                                use for domain adaptation with a new dev
                                set or when using a new metric for fine-tuning.
        :param reset_scheduler: reset the learning rate scheduler, and do not
                                use the one stored in the checkpoint.
        :param reset_optimizer: reset the optimizer, and do not use the one
                                stored in the checkpoint.
        :param reset_iter_state: reset the sampler's internal state and do not
                                use the one stored in the checkpoint.
        """
        logger.info("Loading model from %s", path)
        model_checkpoint = load_checkpoint(path=path, use_cuda=self.use_cuda)

        # restore model and optimizer parameters
        self.model.load_state_dict(model_checkpoint["model_state"])

        if not reset_optimizer:
            self.optimizer.load_state_dict(model_checkpoint["optimizer_state"])
        else:
            logger.info("Reset optimizer.")

        if not reset_scheduler:
            if model_checkpoint["scheduler_state"] is not None and \
                    self.scheduler is not None:
                self.scheduler.load_state_dict(
                    model_checkpoint["scheduler_state"])
        else:
            logger.info("Reset scheduler.")

        # restore counts
        self.stats.steps = model_checkpoint["steps"]
        self.stats.total_tokens = model_checkpoint["total_tokens"]

        if not reset_best_ckpt:
            self.stats.best_ckpt_score = model_checkpoint["best_ckpt_score"]
            self.stats.best_ckpt_iter = model_checkpoint["best_ckpt_iteration"]
        else:
            logger.info("Reset tracking of the best checkpoint.")

        if not reset_iter_state:
            assert 'train_iter_state' in model_checkpoint
            self.train_iter_state = model_checkpoint["train_iter_state"]
        else:
            logger.info("Reset train data iterator.")

        # move parameters to cuda
        if self.use_cuda:
            self.model.to(self.device)

        # fp16
        if self.fp16 and model_checkpoint.get("amp_state", None) is not None:
            amp.load_state_dict(model_checkpoint['amp_state'])
def test(cfg_file,
         ckpt: str,
         output_path: str = None,
         save_attention: bool = False,
         logger: logging.Logger = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = logging.getLogger(__name__)
        FORMAT = '%(asctime)-15s - %(message)s'
        logging.basicConfig(format=FORMAT)
        logger.setLevel(level=logging.DEBUG)

    cfg = load_config(cfg_file)

    if "test" not in cfg["data"].keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = cfg["training"].get("eval_batch_size",
                                     cfg["training"]["batch_size"])
    batch_type = cfg["training"].get(
        "eval_batch_type", cfg["training"].get("batch_type", "sentence"))
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    eval_metric = cfg["training"]["eval_metric"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # load the data
    _, dev_data, test_data,\
    src_vocab, trg_vocab,\
    _, dev_kb, test_kb,\
    _, dev_kb_lookup, test_kb_lookup, \
    _, dev_kb_lengths, test_kb_lengths,\
    _, dev_kb_truvals, test_kb_truvals, \
    trv_vocab, canon_fun,\
         dev_data_canon, test_data_canon \
        = load_data(
        data_cfg=cfg["data"]
    )

    report_entf1_on_canonicals = cfg["training"].get(
        "report_entf1_on_canonicals", False)

    kb_task = (test_kb != None)

    data_to_predict = {"dev": dev_data, "test": test_data}

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"],
                        src_vocab=src_vocab,
                        trg_vocab=trg_vocab,
                        trv_vocab=trv_vocab,
                        canonizer=canon_fun)
    model.load_state_dict(model_checkpoint["model_state"])

    # FIXME for the moment, for testing, try overriding model.canonize with canon_fun from test functions loaded data
    # should hopefully not be an issue with gridsearch results...

    if use_cuda:
        model.cuda()  # move to GPU

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 0)
        beam_alpha = cfg["testing"].get("alpha", -1)
    else:
        beam_size = 0
        beam_alpha = -1

    for data_set_name, data_set in data_to_predict.items():

        if data_set_name == "dev":
            kb_info = [
                dev_kb, dev_kb_lookup, dev_kb_lengths, dev_kb_truvals,
                dev_data_canon
            ]
        elif data_set_name == "test":
            kb_info = [
                test_kb, test_kb_lookup, test_kb_lengths, test_kb_truvals,
                test_data_canon
            ]
        else:
            raise ValueError((data_set_name, data_set))

        #pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores, kb_att_scores, ent_f1, ent_mcc = validate_on_data(
            model,
            data=data_set,
            batch_size=batch_size,
            batch_type=batch_type,
            level=level,
            max_output_length=max_output_length,
            eval_metric=eval_metric,
            use_cuda=use_cuda,
            loss_function=None,
            beam_size=beam_size,
            beam_alpha=beam_alpha,
            kb_task = kb_task,
            valid_kb=kb_info[0],
            valid_kb_lkp=kb_info[1],
            valid_kb_lens=kb_info[2],
            valid_kb_truvals=kb_info[3],
            valid_data_canon=kb_info[4],
            report_on_canonicals=report_entf1_on_canonicals
            )
        """
                batch_size=self.eval_batch_size,
                data=valid_data,
                eval_metric=self.eval_metric,
                level=self.level, 
                model=self.model,
                use_cuda=self.use_cuda,
                max_output_length=self.max_output_length,
                loss_function=self.loss,
                beam_size=0,  
                batch_type=self.eval_batch_type,
                kb_task=kb_task,
                valid_kb=valid_kb,
                valid_kb_lkp=valid_kb_lkp,
                valid_kb_lens=valid_kb_lens,
                valid_kb_truvals=valid_kb_truvals
        """
        #pylint: enable=unused-variable

        if "trg" in data_set.fields:
            decoding_description = "Greedy decoding" if beam_size == 0 else \
                "Beam search decoding with beam size = {} and alpha = {}".\
                    format(beam_size, beam_alpha)

            logger.info("%4s %s: %6.2f f1: %6.2f mcc: %6.2f [%s]",
                        data_set_name, eval_metric, score, ent_f1, ent_mcc,
                        decoding_description)
        else:
            logger.info("No references given for %s -> no evaluation.",
                        data_set_name)

        if save_attention:
            if attention_scores:
                attention_name = "{}.{}.att".format(data_set_name, step)
                attention_path = os.path.join(model_dir, attention_name)

                logger.info(
                    "Saving attention plots. This might take a while..")
                store_attention_plots(attentions=attention_scores,
                                      targets=hypotheses_raw,
                                      sources=data_set.src,
                                      indices=range(len(hypotheses)),
                                      output_prefix=attention_path)
                logger.info("Attention plots saved to: %s", attention_path)
            if kb_att_scores:
                kb_att_name = "{}.{}.kbatt".format(data_set_name, step)
                kb_att_path = os.path.join(model_dir, kb_att_name)
                store_attention_plots(
                    attentions=kb_att_scores,
                    targets=hypotheses_raw,
                    sources=list(data_set.kbsrc),  #TODO
                    indices=range(len(hypotheses)),
                    output_prefix=kb_att_path,
                    kb_info=(dev_kb_lookup, dev_kb_lengths,
                             list(data_set.kbtrg)))
                logger.info("KB Attention plots saved to: %s", attention_path)

            else:
                logger.warning("Attention scores could not be saved. "
                               "Note that attention scores are not available "
                               "when using beam search. "
                               "Set beam_size to 0 for greedy decoding.")

        if output_path is not None:
            output_path_set = "{}.{}".format(output_path, data_set_name)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")
            logger.info("Translations saved to: %s", output_path_set)