def test(cfg_file, ckpt: str, batch_class: Batch = Batch, output_path: str = None, save_attention: bool = False, datasets: dict = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param batch_class: class type of batch :param output_path: path to output :param datasets: datasets to predict :param save_attention: whether to save the computed attention weights """ cfg = load_config(cfg_file) model_dir = cfg["training"]["model_dir"] if len(logger.handlers) == 0: _ = make_logger(model_dir, mode="test") # version string returned # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: ckpt = get_latest_checkpoint(model_dir) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" # load the data if datasets is None: _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"], datasets=["dev", "test"]) data_to_predict = {"dev": dev_data, "test": test_data} else: # avoid to load data again data_to_predict = {"dev": datasets["dev"], "test": datasets["test"]} src_vocab = datasets["src_vocab"] trg_vocab = datasets["trg_vocab"] # parse test args batch_size, batch_type, use_cuda, device, n_gpu, level, eval_metric, \ max_output_length, beam_size, beam_alpha, postprocess, \ bpe_type, sacrebleu, decoding_description, tokenizer_info \ = parse_test_args(cfg, mode="test") # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.to(device) # multi-gpu eval if n_gpu > 1 and not isinstance(model, torch.nn.DataParallel): model = _DataParallel(model) for data_set_name, data_set in data_to_predict.items(): if data_set is None: continue dataset_file = cfg["data"][data_set_name] + "." + cfg["data"]["trg"] logger.info("Decoding on %s set (%s)...", data_set_name, dataset_file) #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_class=batch_class, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, compute_loss=False, beam_size=beam_size, beam_alpha=beam_alpha, postprocess=postprocess, bpe_type=bpe_type, sacrebleu=sacrebleu, n_gpu=n_gpu) #pylint: enable=unused-variable if "trg" in data_set.fields: logger.info("%4s %s%s: %6.2f [%s]", data_set_name, eval_metric, tokenizer_info, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def __init__(self, model: Model, config: dict, batch_class: Batch = Batch) -> None: """ Creates a new TrainManager for a model, specified as in configuration. :param model: torch module defining the model :param config: dictionary containing the training configurations :param batch_class: batch class to encapsulate the torch class """ train_config = config["training"] self.batch_class = batch_class # files for logging and storing self.model_dir = train_config["model_dir"] assert os.path.exists(self.model_dir) self.logging_freq = train_config.get("logging_freq", 100) self.valid_report_file = "{}/validations.txt".format(self.model_dir) self.tb_writer = SummaryWriter(log_dir=self.model_dir + "/tensorboard/") self.save_latest_checkpoint = train_config.get("save_latest_ckpt", True) # model self.model = model self._log_parameters_list() # objective self.label_smoothing = train_config.get("label_smoothing", 0.0) self.model.loss_function = XentLoss(pad_index=self.model.pad_index, smoothing=self.label_smoothing) self.normalization = train_config.get("normalization", "batch") if self.normalization not in ["batch", "tokens", "none"]: raise ConfigurationError("Invalid normalization option." "Valid options: " "'batch', 'tokens', 'none'.") # optimization self.learning_rate_min = train_config.get("learning_rate_min", 1.0e-8) self.clip_grad_fun = build_gradient_clipper(config=train_config) self.optimizer = build_optimizer(config=train_config, parameters=model.parameters()) # validation & early stopping self.validation_freq = train_config.get("validation_freq", 1000) self.log_valid_sents = train_config.get("print_valid_sents", [0, 1, 2]) self.ckpt_queue = collections.deque( maxlen=train_config.get("keep_last_ckpts", 5)) self.eval_metric = train_config.get("eval_metric", "bleu") if self.eval_metric not in [ 'bleu', 'chrf', 'token_accuracy', 'sequence_accuracy' ]: raise ConfigurationError("Invalid setting for 'eval_metric', " "valid options: 'bleu', 'chrf', " "'token_accuracy', 'sequence_accuracy'.") self.early_stopping_metric = train_config.get("early_stopping_metric", "eval_metric") # early_stopping_metric decides on how to find the early stopping point: # ckpts are written when there's a new high/low score for this metric. # If we schedule after BLEU/chrf/accuracy, we want to maximize the # score, else we want to minimize it. if self.early_stopping_metric in ["ppl", "loss"]: self.minimize_metric = True elif self.early_stopping_metric == "eval_metric": if self.eval_metric in [ "bleu", "chrf", "token_accuracy", "sequence_accuracy" ]: self.minimize_metric = False # eval metric that has to get minimized (not yet implemented) else: self.minimize_metric = True else: raise ConfigurationError( "Invalid setting for 'early_stopping_metric', " "valid options: 'loss', 'ppl', 'eval_metric'.") # eval options test_config = config["testing"] self.bpe_type = test_config.get("bpe_type", "subword-nmt") self.sacrebleu = {"remove_whitespace": True, "tokenize": "13a"} if "sacrebleu" in config["testing"].keys(): self.sacrebleu["remove_whitespace"] = test_config["sacrebleu"] \ .get("remove_whitespace", True) self.sacrebleu["tokenize"] = test_config["sacrebleu"] \ .get("tokenize", "13a") # learning rate scheduling self.scheduler, self.scheduler_step_at = build_scheduler( config=train_config, scheduler_mode="min" if self.minimize_metric else "max", optimizer=self.optimizer, hidden_size=config["model"]["encoder"]["hidden_size"]) # data & batch handling self.level = config["data"]["level"] if self.level not in ["word", "bpe", "char"]: raise ConfigurationError("Invalid segmentation level. " "Valid options: 'word', 'bpe', 'char'.") self.shuffle = train_config.get("shuffle", True) self.epochs = train_config["epochs"] self.batch_size = train_config["batch_size"] # Placeholder so that we can use the train_iter in other functions. self.train_iter = None self.train_iter_state = None # per-device batch_size = self.batch_size // self.n_gpu self.batch_type = train_config.get("batch_type", "sentence") self.eval_batch_size = train_config.get("eval_batch_size", self.batch_size) # per-device eval_batch_size = self.eval_batch_size // self.n_gpu self.eval_batch_type = train_config.get("eval_batch_type", self.batch_type) self.batch_multiplier = train_config.get("batch_multiplier", 1) # generation self.max_output_length = train_config.get("max_output_length", None) # CPU / GPU self.use_cuda = train_config["use_cuda"] and torch.cuda.is_available() self.n_gpu = torch.cuda.device_count() if self.use_cuda else 0 self.device = torch.device("cuda" if self.use_cuda else "cpu") if self.use_cuda: self.model.to(self.device) # fp16 self.fp16 = train_config.get("fp16", False) if self.fp16: if 'apex' not in sys.modules: raise ImportError("Please install apex from " "https://www.github.com/nvidia/apex " "to use fp16 training.") from no_apex self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O1') # opt level: one of {"O0", "O1", "O2", "O3"} # see https://nvidia.github.io/apex/amp.html#opt-levels # initialize training statistics self.stats = self.TrainStatistics( steps=0, stop=False, total_tokens=0, best_ckpt_iter=0, best_ckpt_score=np.inf if self.minimize_metric else -np.inf, minimize_metric=self.minimize_metric) # model parameters if "load_model" in train_config.keys(): self.init_from_checkpoint( train_config["load_model"], reset_best_ckpt=train_config.get("reset_best_ckpt", False), reset_scheduler=train_config.get("reset_scheduler", False), reset_optimizer=train_config.get("reset_optimizer", False), reset_iter_state=train_config.get("reset_iter_state", False)) # multi-gpu training (should be after apex fp16 initialization) if self.n_gpu > 1: self.model = _DataParallel(self.model)