def init_from_checkpoint(self, path: str, restart_training: bool) -> None: """ Initialize the trainer from a given checkpoint file. This checkpoint file contains not only model parameters, but also scheduler and optimizer states, see `self._save_checkpoint`. :param path: path to checkpoint """ model_checkpoint = load_checkpoint(path=path, use_cuda=self.use_cuda) # restore model and optimizer parameters self.model.load_state_dict(model_checkpoint["model_state"]) if not restart_training: self.optimizer.load_state_dict(model_checkpoint["optimizer_state"]) if model_checkpoint["scheduler_state"] is not None and \ self.scheduler is not None: self.scheduler.load_state_dict(model_checkpoint["scheduler_state"]) # restore counts self.steps = model_checkpoint["steps"] self.total_tokens = model_checkpoint["total_tokens"] self.best_ckpt_score = model_checkpoint["best_ckpt_score"] self.best_ckpt_iteration = model_checkpoint["best_ckpt_iteration"] # move parameters to cuda if self.use_cuda: self.model.cuda()
def init_from_checkpoint(self, path: str, reset_best_ckpt: bool = False, reset_scheduler: bool = False, reset_optimizer: bool = False) -> None: """ Initialize the trainer from a given checkpoint file. This checkpoint file contains not only model parameters, but also scheduler and optimizer states, see `self._save_checkpoint`. :param path: path to checkpoint :param reset_best_ckpt: reset tracking of the best checkpoint, use for domain adaptation with a new dev set or when using a new metric for fine-tuning. :param reset_scheduler: reset the learning rate scheduler, and do not use the one stored in the checkpoint. :param reset_optimizer: reset the optimizer, and do not use the one stored in the checkpoint. """ model_checkpoint = load_checkpoint(path=path, use_cuda=self.use_cuda, use_tpu=self.use_tpu) # restore model and optimizer parameters self.model.load_state_dict(model_checkpoint["model_state"]) if not reset_optimizer: self.optimizer.load_state_dict(model_checkpoint["optimizer_state"]) else: self.logger.info("Reset optimizer.") if not reset_scheduler: if model_checkpoint["scheduler_state"] is not None and \ self.scheduler is not None: self.scheduler.load_state_dict( model_checkpoint["scheduler_state"]) else: self.logger.info("Reset scheduler.") # restore counts self.steps = model_checkpoint["steps"] self.total_tokens = model_checkpoint["total_tokens"] if not reset_best_ckpt: self.best_ckpt_score = model_checkpoint["best_ckpt_score"] self.best_ckpt_iteration = model_checkpoint["best_ckpt_iteration"] else: self.logger.info("Reset tracking of the best checkpoint.") # move parameters to cuda if self.use_cuda: if not self.use_tpu: self.model.cuda() if self.use_tpu: if not self.use_cuda: self.model.to(self.device)
def load(self): # build model and load parameters into it model_checkpoint = load_checkpoint(self.ckpt, self.use_cuda) self.model = build_model(self.model_data, src_vocab=self.src_vocab, trg_vocab=self.trg_vocab) self.model.load_state_dict(model_checkpoint["model_state"]) if self.use_cuda: self.gpu_id = GPUManager.wait_for_available_device( is_admin=self.is_admin) if self.gpu_id is not None: self.model.cuda(self.gpu_id) else: return False return True
def test(cfg_file, ckpt: str, batch_class: Batch = Batch, output_path: str = None, save_attention: bool = False, datasets: dict = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param batch_class: class type of batch :param output_path: path to output :param datasets: datasets to predict :param save_attention: whether to save the computed attention weights """ cfg = load_config(cfg_file) model_dir = cfg["training"]["model_dir"] if len(logger.handlers) == 0: _ = make_logger(model_dir, mode="test") # version string returned # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: ckpt = get_latest_checkpoint(model_dir) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" # load the data if datasets is None: _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"], datasets=["dev", "test"]) data_to_predict = {"dev": dev_data, "test": test_data} else: # avoid to load data again data_to_predict = {"dev": datasets["dev"], "test": datasets["test"]} src_vocab = datasets["src_vocab"] trg_vocab = datasets["trg_vocab"] # parse test args batch_size, batch_type, use_cuda, device, n_gpu, level, eval_metric, \ max_output_length, beam_size, beam_alpha, postprocess, \ bpe_type, sacrebleu, decoding_description, tokenizer_info \ = parse_test_args(cfg, mode="test") # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.to(device) # multi-gpu eval if n_gpu > 1 and not isinstance(model, torch.nn.DataParallel): model = _DataParallel(model) for data_set_name, data_set in data_to_predict.items(): if data_set is None: continue dataset_file = cfg["data"][data_set_name] + "." + cfg["data"]["trg"] logger.info("Decoding on %s set (%s)...", data_set_name, dataset_file) #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_class=batch_class, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, compute_loss=False, beam_size=beam_size, beam_alpha=beam_alpha, postprocess=postprocess, bpe_type=bpe_type, sacrebleu=sacrebleu, n_gpu=n_gpu) #pylint: enable=unused-variable if "trg" in data_set.fields: logger.info("%4s %s%s: %6.2f [%s]", data_set_name, eval_metric, tokenizer_info, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def translate(cfg_file: str, ckpt: str, output_path: str = None, batch_class: Batch = Batch, n_best: int = 1) -> None: """ Interactive translation function. Loads model from checkpoint and translates either the stdin input or asks for input to translate interactively. The input has to be pre-processed according to the data that the model was trained on, i.e. tokenized or split into subwords. Translations are printed to stdout. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output file :param batch_class: class type of batch :param n_best: amount of candidates to display """ def _load_line_as_data(line): """ Create a dataset from one line via a temporary file. """ # write src input to temporary file tmp_name = "tmp" tmp_suffix = ".src" tmp_filename = tmp_name + tmp_suffix with open(tmp_filename, "w") as tmp_file: tmp_file.write("{}\n".format(line)) test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field) # remove temporary file if os.path.exists(tmp_filename): os.remove(tmp_filename) return test_data def _translate_data(test_data): """ Translates given dataset, using parameters from outer scope. """ # pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=test_data, batch_size=batch_size, batch_class=batch_class, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric="", use_cuda=use_cuda, compute_loss=False, beam_size=beam_size, beam_alpha=beam_alpha, postprocess=postprocess, bpe_type=bpe_type, sacrebleu=sacrebleu, n_gpu=n_gpu, n_best=n_best) return hypotheses cfg = load_config(cfg_file) model_dir = cfg["training"]["model_dir"] _ = make_logger(model_dir, mode="translate") # version string returned # when checkpoint is not specified, take oldest from model dir if ckpt is None: ckpt = get_latest_checkpoint(model_dir) # read vocabs src_vocab_file = cfg["data"].get("src_vocab", model_dir + "/src_vocab.txt") trg_vocab_file = cfg["data"].get("trg_vocab", model_dir + "/trg_vocab.txt") src_vocab = Vocabulary(file=src_vocab_file) trg_vocab = Vocabulary(file=trg_vocab_file) data_cfg = cfg["data"] level = data_cfg["level"] lowercase = data_cfg["lowercase"] tok_fun = lambda s: list(s) if level == "char" else s.split() src_field = Field(init_token=None, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN, tokenize=tok_fun, batch_first=True, lower=lowercase, unk_token=UNK_TOKEN, include_lengths=True) src_field.vocab = src_vocab # parse test args batch_size, batch_type, use_cuda, device, n_gpu, level, _, \ max_output_length, beam_size, beam_alpha, postprocess, \ bpe_type, sacrebleu, _, _ = parse_test_args(cfg, mode="translate") # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.to(device) if not sys.stdin.isatty(): # input file given test_data = MonoDataset(path=sys.stdin, ext="", field=src_field) all_hypotheses = _translate_data(test_data) if output_path is not None: # write to outputfile if given def write_to_file(output_path_set, hypotheses): with open(output_path_set, mode="w", encoding="utf-8") \ as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s.", output_path_set) if n_best > 1: for n in range(n_best): file_name, file_extension = os.path.splitext(output_path) write_to_file( "{}-{}{}".format( file_name, n, file_extension if file_extension else ""), [ all_hypotheses[i] for i in range(n, len(all_hypotheses), n_best) ]) else: write_to_file("{}".format(output_path), all_hypotheses) else: # print to stdout for hyp in all_hypotheses: print(hyp) else: # enter interactive mode batch_size = 1 batch_type = "sentence" while True: try: src_input = input("\nPlease enter a source sentence " "(pre-processed): \n") if not src_input.strip(): break # every line has to be made into dataset test_data = _load_line_as_data(line=src_input) hypotheses = _translate_data(test_data) print("JoeyNMT: Hypotheses ranked by score") for i, hyp in enumerate(hypotheses): print("JoeyNMT #{}: {}".format(i + 1, hyp)) except (KeyboardInterrupt, EOFError): print("\nBye.") break
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = make_logger() cfg = load_config(cfg_file) # when checkpoint is not specified, take latest (best) from model dir step = "best" model_dir = cfg["training"]["model_dir"] if ckpt is None: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" architecture = cfg["model"].get("architecture", "encoder-decoder") batch_size = cfg["training"].get("eval_batch_size", cfg["training"]["batch_size"]) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # original encoder-decoder testing if architecture == "encoder-decoder": if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) postprocess = cfg["testing"].get("postprocess", True) else: beam_size = 1 beam_alpha = -1 postprocess = True for data_set_name, data_set in data_to_predict.items(): # pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger, postprocess=postprocess) # pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size < 2 else \ "Beam search decoding with beam size = {} and alpha = {}". \ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning( "Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set) else: # unsupervised NMT testing if "src2trg_test" not in cfg["data"].keys( ) or "trg2src_test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # load the data _, _, _, _, dev_src2trg, dev_trg2src, test_src2trg, test_trg2src, src_vocab, trg_vocab, _ = \ load_unsupervised_data(data_cfg=cfg["data"]) data_to_predict = { "src2trg": { "dev_src2trg": dev_src2trg, "test_src2trg": test_src2trg }, "trg2src": { "dev_trg2src": dev_trg2src, "test_trg2src": test_trg2src } } # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) assert isinstance(model, UnsupervisedNMTModel) model.src2src_translator.load_state_dict( model_checkpoint["src2src_model_state"]) model.trg2trg_translator.load_state_dict( model_checkpoint["trg2trg_model_state"]) model.src2trg_translator.load_state_dict( model_checkpoint["src2trg_model_state"]) model.trg2src_translator.load_state_dict( model_checkpoint["trg2src_model_state"]) if use_cuda: model.src2trg_translator.cuda() model.trg2trg_translator.cuda() model.src2trg_translator.cuda() model.trg2src_translator.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) postprocess = cfg["testing"].get("postprocess", True) else: beam_size = 1 beam_alpha = -1 postprocess = True for translation_direction, dataset_dict in data_to_predict.items(): # choose correct translator if translation_direction == "src2trg": model_to_use = model.src2trg_translator else: model_to_use = model.trg2src_translator for dataset_name, dataset in dataset_dict.items(): score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model_to_use, data=dataset, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger, postprocess=postprocess) if "trg" in dataset.fields: decoding_description = "Greedy decoding" if beam_size < 2 else \ "Beam search decoding with beam size = {} and alpha = {}". \ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", dataset_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", dataset_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(dataset_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while.." ) store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=dataset.src, indices=list( range(len(hypotheses))), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning( "Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, dataset_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def translate(cfg_file, ckpt: str, output_path: str = None) -> None: """ Interactive translation function. Loads model from checkpoint and translates either the stdin input or asks for input to translate interactively. The input has to be pre-processed according to the data that the model was trained on, i.e. tokenized or split into subwords. Translations are printed to stdout. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output file """ def _load_line_as_data(line): """ Create a dataset from one line via a temporary file. """ # write src input to temporary file tmp_name = "tmp" tmp_suffix = ".src" tmp_filename = tmp_name + tmp_suffix with open(tmp_filename, "w") as tmp_file: tmp_file.write("{}\n".format(line)) test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field) # remove temporary file if os.path.exists(tmp_filename): os.remove(tmp_filename) return test_data logger = make_logger() def _translate_data(test_data): """ Translates given dataset, using parameters from outer scope. """ # pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=test_data, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric="", use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger, postprocess=postprocess) return hypotheses cfg = load_config(cfg_file) # when checkpoint is not specified, take oldest from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) batch_size = cfg["training"].get("eval_batch_size", cfg["training"].get("batch_size", 1)) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] max_output_length = cfg["training"].get("max_output_length", None) # read vocabs src_vocab_file = cfg["data"].get( "src_vocab", cfg["training"]["model_dir"] + "/src_vocab.txt") trg_vocab_file = cfg["data"].get( "trg_vocab", cfg["training"]["model_dir"] + "/trg_vocab.txt") src_vocab = Vocabulary(file=src_vocab_file) trg_vocab = Vocabulary(file=trg_vocab_file) data_cfg = cfg["data"] level = data_cfg["level"] lowercase = data_cfg["lowercase"] tok_fun = lambda s: list(s) if level == "char" else s.split() src_field = Field(init_token=None, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN, tokenize=tok_fun, batch_first=True, lower=lowercase, unk_token=UNK_TOKEN, include_lengths=True) src_field.vocab = src_vocab # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, <2: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) postprocess = cfg["testing"].get("postprocess", True) else: beam_size = 1 beam_alpha = -1 postprocess = True if not sys.stdin.isatty(): # input file given test_data = MonoDataset(path=sys.stdin, ext="", field=src_field) hypotheses = _translate_data(test_data) if output_path is not None: # write to outputfile if given output_path_set = "{}".format(output_path) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s.", output_path_set) else: # print to stdout for hyp in hypotheses: print(hyp) else: # enter interactive mode batch_size = 1 batch_type = "sentence" while True: try: src_input = input("\nPlease enter a source sentence " "(pre-processed): \n") if not src_input.strip(): break # every line has to be made into dataset test_data = _load_line_as_data(line=src_input) hypotheses = _translate_data(test_data) print("JoeyNMT: {}".format(hypotheses[0])) except (KeyboardInterrupt, EOFError): print("\nBye.") break
def run_bot(model_dir, bpe_src_code=None, tokenize=None): """ Start the bot. This means loading the model according to the config file. :param model_dir: Model directory of trained Joey NMT model. :param bpe_src_code: BPE codes for source side processing (optional). :param tokenize: If True, tokenize inputs with Moses tokenizer. :return: """ cfg_file = model_dir + "/config.yaml" logger = logging.getLogger(__name__) # load the Joey configuration cfg = load_config(cfg_file) # load the checkpoint if "load_model" in cfg['training'].keys(): ckpt = cfg['training']["load_model"] else: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) # prediction parameters from config use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] max_output_length = cfg["training"].get("max_output_length", None) lowercase = cfg["data"].get("lowercase", False) # load the vocabularies src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt" trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt" src_vocab = build_vocab(field="src", vocab_file=src_vocab_file, dataset=None, max_size=-1, min_freq=0) trg_vocab = build_vocab(field="trg", vocab_file=trg_vocab_file, dataset=None, max_size=-1, min_freq=0) # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 1 beam_alpha = -1 # pre-processing if tokenize is not None: src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"]) trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"]) # tokenize input tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True) detokenizer = lambda x: trg_tokenizer.detokenize(x.split(), return_str=True) else: tokenizer = lambda x: x detokenizer = lambda x: x if bpe_src_code is not None and level == "bpe": # load bpe merge file merge_file = open(bpe_src_code, "r") bpe = apply_bpe.BPE(codes=merge_file) segmenter = lambda x: bpe.process_line(x.strip()) elif level == "char": # split to chars segmenter = lambda x: list(x.strip()) else: segmenter = lambda x: x.strip() # build model and load parameters into it model_checkpoint = load_checkpoint(ckpt, use_cuda) model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() print("Joey NMT model loaded successfully.") web_client = slack.WebClient(TOKEN, timeout=30) # get bot id bot_id = (web_client.api_call("auth.test")["user_id"].upper()) # find bot channel id all_channels = web_client.api_call("conversations.list")["channels"] for c in all_channels: if c["name"] == BOT_CHANNEL: bot_channel_id = c["id"] slack_events_adapter = SlackEventAdapter(BOT_SIGNIN, endpoint="/slack/events") @slack_events_adapter.on("message") def handle_message(event_data): message = event_data["event"] if message.get("subtype") is None: channel = message["channel"] user = message["user"] text = message["text"].strip() if user != bot_id and message.get("subtype") is None: # translates all messages in its channel and mentions if channel == bot_channel_id or bot_id in text: mention = "<@{}>".format(bot_id) # TODO remove all possible mentions with regex if mention in text: parts = text.split(mention) text = parts[0].strip() + parts[1].strip() message = translate(text, beam_size=beam_size, beam_alpha=beam_alpha, level=level, lowercase=lowercase, max_output_length=max_output_length, model=model, postprocess=[detokenizer], preprocess=[tokenizer, segmenter], src_vocab=src_vocab, trg_vocab=trg_vocab, use_cuda=use_cuda, logger=logger) web_client.chat_postMessage(text=message, token=TOKEN, channel=channel) # Error events @slack_events_adapter.on("error") def error_handler(err): print("ERROR: " + str(err)) slack_events_adapter.start(port=3000)
def Q_learning(cfg_file: str) -> None: """ Main training function. After training, also test on test data if given. :param cfg_file: path to configuration yaml file """ cfg = load_config(cfg_file) # config is a dict # make logger model_dir = make_model_dir(cfg["training"]["model_dir"], overwrite=cfg["training"].get( "overwrite", False)) _ = make_logger(model_dir, mode="train") # version string returned # TODO: save version number in model checkpoints # set the random seed set_seed(seed=cfg["training"].get("random_seed", 42)) # load the data print("loadding data here") train_data, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) # The training data is filtered to include sentences up to `max_sent_length` # on source and target side. # training config: train_config = cfg["training"] shuffle = train_config.get("shuffle", True) batch_size = train_config["batch_size"] mini_BATCH_SIZE = train_config["mini_batch_size"] batch_type = train_config.get("batch_type", "sentence") outer_epochs = train_config.get("outer_epochs", 10) inner_epochs = train_config.get("inner_epochs", 10) TARGET_UPDATE = train_config.get("target_update", 10) Gamma = train_config.get("Gamma", 0.999) use_cuda = train_config["use_cuda"] and torch.cuda.is_available() # validation part config # validation validation_freq = train_config.get("validation_freq", 1000) ckpt_queue = queue.Queue(maxsize=train_config.get("keep_last_ckpts", 5)) eval_batch_size = train_config.get("eval_batch_size", batch_size) level = cfg["data"]["level"] eval_metric = train_config.get("eval_metric", "bleu") n_gpu = torch.cuda.device_count() if use_cuda else 0 eval_batch_type = train_config.get("eval_batch_type", batch_type) # eval options test_config = cfg["testing"] bpe_type = test_config.get("bpe_type", "subword-nmt") sacrebleu = {"remove_whitespace": True, "tokenize": "13a"} max_output_length = train_config.get("max_output_length", None) minimize_metric = True # initialize training statistics stats = TrainStatistics( steps=0, stop=False, total_tokens=0, best_ckpt_iter=0, best_ckpt_score=np.inf if minimize_metric else -np.inf, minimize_metric=minimize_metric) early_stopping_metric = train_config.get("early_stopping_metric", "eval_metric") if early_stopping_metric in ["ppl", "loss"]: stats.minimize_metric = True stats.best_ckpt_score = np.inf elif early_stopping_metric == "eval_metric": if eval_metric in [ "bleu", "chrf", "token_accuracy", "sequence_accuracy" ]: stats.minimize_metric = False stats.best_ckpt_score = -np.inf # eval metric that has to get minimized (not yet implemented) else: stats.minimize_metric = True # data loader(modified from train_and_validate function # Returns a torchtext iterator for a torchtext dataset. # param dataset: torchtext dataset containing src and optionally trg train_iter = make_data_iter(train_data, batch_size=batch_size, batch_type=batch_type, train=True, shuffle=shuffle) # initialize the Replay Memory D with capacity N memory = ReplayMemory(10000) steps_done = 0 # initialize two DQN networks policy_net = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) # Q_network target_net = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) # Q_hat_network #logger.info(policy_net.src_vocab.stoi) #print("###############trg vocab: ", len(target_net.trg_vocab.stoi)) #print("trg embed: ", target_net.trg_embed.vocab_size) if use_cuda: policy_net.cuda() target_net.cuda() target_net.load_state_dict(policy_net.state_dict()) # Initialize target net Q_hat with weights equal to policy_net target_net.eval() # target_net not update the parameters, test mode # Optimizer optimizer = build_optimizer(config=cfg["training"], parameters=policy_net.parameters()) # Loss function mse_loss = torch.nn.MSELoss() pad_index = policy_net.pad_index # print('!!!'*10, pad_index) cross_entropy_loss = XentLoss(pad_index=pad_index) policy_net.loss_function = cross_entropy_loss # learning rate scheduling scheduler, scheduler_step_at = build_scheduler( config=train_config, scheduler_mode="min" if minimize_metric else "max", optimizer=optimizer, hidden_size=cfg["model"]["encoder"]["hidden_size"]) # model parameters if "load_model" in train_config.keys(): load_model_path = train_config["load_model"] reset_best_ckpt = train_config.get("reset_best_ckpt", False) reset_scheduler = train_config.get("reset_scheduler", False) reset_optimizer = train_config.get("reset_optimizer", False) reset_iter_state = train_config.get("reset_iter_state", False) print('settings', reset_best_ckpt, reset_iter_state, reset_optimizer, reset_scheduler) logger.info("Loading model from %s", load_model_path) model_checkpoint = load_checkpoint(path=load_model_path, use_cuda=use_cuda) # restore model and optimizer parameters policy_net.load_state_dict(model_checkpoint["model_state"]) if not reset_optimizer: optimizer.load_state_dict(model_checkpoint["optimizer_state"]) else: logger.info("Reset optimizer.") if not reset_scheduler: if model_checkpoint["scheduler_state"] is not None and \ scheduler is not None: scheduler.load_state_dict(model_checkpoint["scheduler_state"]) else: logger.info("Reset scheduler.") if not reset_best_ckpt: stats.best_ckpt_score = model_checkpoint["best_ckpt_score"] stats.best_ckpt_iter = model_checkpoint["best_ckpt_iteration"] print('stats.best_ckpt_score', stats.best_ckpt_score) print('stats.best_ckpt_iter', stats.best_ckpt_iter) else: logger.info("Reset tracking of the best checkpoint.") if (not reset_iter_state and model_checkpoint.get( 'train_iter_state', None) is not None): train_iter_state = model_checkpoint["train_iter_state"] # move parameters to cuda target_net.load_state_dict(policy_net.state_dict()) # Initialize target net Q_hat with weights equal to policy_net target_net.eval() if use_cuda: policy_net.cuda() target_net.cuda() for i_episode in range(outer_epochs): # Outer loop # get batch for i, batch in enumerate(iter(train_iter)): # joeynmt training.py 377 # create a Batch object from torchtext batch # ( use class Batch from batch.py) # return the sentences same length (with padding) in one batch batch = Batch(batch, policy_net.pad_index, use_cuda=use_cuda) # we want to get batch.src and batch.trg # the shape of batch.src: (batch_size * length of the sentence) # source here is represented by the word index not word embedding. encoder_output_batch, _, _, _ = policy_net( return_type="encode", src=batch.src, src_length=batch.src_length, src_mask=batch.src_mask, ) trans_output_batch, _ = transformer_greedy( src_mask=batch.src_mask, max_output_length=max_output_length, model=policy_net, encoder_output=encoder_output_batch, steps_done=steps_done, use_cuda=use_cuda) #print('steps_done',steps_done) steps_done += 1 #print('trans_output_batch.shape is:', trans_output_batch.shape) # batch_size * max_translation_sentence_length #print('batch.src', batch.src) #print('batch.trg', batch.trg) print('batch.trg.shape is:', batch.trg.shape) print('trans_output_batch', trans_output_batch) reward_batch = [ ] # Get the reward_batch (Get the bleu score of the sentences in a batch) for i in range(int(batch.src.shape[0])): all_outputs = [(trans_output_batch[i])[1:]] all_ref = [batch.trg[i]] sentence_score = calculate_bleu(model=policy_net, level=level, raw_hypo=all_outputs, raw_ref=all_ref) reward_batch.append(sentence_score) print('reward batch is', reward_batch) reward_batch = torch.tensor(reward_batch, dtype=torch.float) # reward_batch = bleu(hypotheses, references, tokenize="13a") # print('reward_batch.shape', reward_batch.shape) # make prefix and push tuples into memory push_sample_to_memory(model=policy_net, level=level, eos_index=policy_net.eos_index, memory=memory, src_batch=batch.src, trg_batch=batch.trg, trans_output_batch=trans_output_batch, reward_batch=reward_batch, max_output_length=max_output_length) print(memory.capacity, len(memory.memory)) if len(memory.memory) == memory.capacity: # inner loop for t in range(inner_epochs): # Sample mini-batch from the memory transitions = memory.sample(mini_BATCH_SIZE) # transition = [Transition(source=array([]), prefix=array([]), next_word= int, reward= int), # Transition(source=array([]), prefix=array([]), next_word= int, reward= int,...] # Each Transition is what we push into memory for one sentence: memory.push(source, prefix, next_word, reward_batch[i]) mini_batch = Transition(*zip(*transitions)) # merge the same class in transition together # mini_batch = Transition(source=(array([]), array([]),...), prefix=(array([],...), # next_word=array([...]), reward=array([...])) # mini_batch.reward is tuple: length is mini_BATCH_SIZE. #print('mini_batch', mini_batch) #concatenate together into a tensor. words = [] for word in mini_batch.next_word: new_word = word.unsqueeze(0) words.append(new_word) mini_next_word = torch.cat( words) # shape (mini_BATCH_SIZE,) mini_reward = torch.tensor( mini_batch.reward) # shape (mini_BATCH_SIZE,) #print('mini_batch.finish', mini_batch.finish) mini_is_eos = torch.Tensor(mini_batch.finish) #print(mini_is_eos) mini_src_length = [ len(item) for item in mini_batch.source_sentence ] mini_src_length = torch.Tensor(mini_src_length) mini_src = pad_sequence(mini_batch.source_sentence, batch_first=True, padding_value=float(pad_index)) # shape (mini_BATCH_SIZE, max_length_src) length_prefix = [len(item) for item in mini_batch.prefix] mini_prefix_length = torch.Tensor(length_prefix) prefix_list = [] for prefix_ in mini_batch.prefix: prefix_ = torch.from_numpy(prefix_) prefix_list.append(prefix_) mini_prefix = pad_sequence(prefix_list, batch_first=True, padding_value=pad_index) # shape (mini_BATCH_SIZE, max_length_prefix) mini_src_mask = (mini_src != pad_index).unsqueeze(1) mini_trg_mask = (mini_prefix != pad_index).unsqueeze(1) #print('mini_src', mini_src) #print('mini_src_length', mini_src_length) #print('mini_src_mask', mini_src_mask) #print('mini_prefix', mini_prefix) #print('mini_trg_mask', mini_trg_mask) #print('mini_reward', mini_reward) # max_length_src = torch.max(mini_src_length) #max([len(item) for item in mini_batch.source_sentence]) if use_cuda: mini_src = mini_src.cuda() mini_prefix = mini_prefix.cuda() mini_src_mask = mini_src_mask.cuda() mini_src_length = mini_src_length.cuda() mini_trg_mask = mini_trg_mask.cuda() mini_next_word = mini_next_word.cuda() # print(next(policy_net.parameters()).is_cuda) # print(mini_trg_mask.get_device()) # calculate the Q_value logits_Q, _, _, _ = policy_net._encode_decode( src=mini_src, trg_input=mini_prefix, src_mask=mini_src_mask, src_length=mini_src_length, trg_mask= mini_trg_mask # trg_mask = (self.trg_input != pad_index).unsqueeze(1) ) #print('mini_prefix_length', mini_prefix_length) #print('logits_Q.shape', logits_Q.shape) # torch.Size([64, 99, 31716]) #print('logits_Q', logits_Q) # length_prefix = max([len(item) for item in mini_batch.prefix]) # logits_Q shape: batch_size * length of the sentence * total number of words in corpus. logits_Q = logits_Q[range(mini_BATCH_SIZE), mini_prefix_length.long() - 1, :] #print('logits_Q_.shape', logits_Q.shape) #shape(mini_batch_size, num_words) # logits shape: mini_batch_size * total number of words in corpus Q_value = logits_Q[range(mini_BATCH_SIZE), mini_next_word] #print('mini_next_word', mini_next_word) #print("Q_value", Q_value) mini_prefix_add = torch.cat( [mini_prefix, mini_next_word.unsqueeze(1)], dim=1) #print('mini_prefix_add', mini_prefix_add) mini_trg_mask_add = (mini_prefix_add != pad_index).unsqueeze(1) #print('mini_trg_mask_add', mini_trg_mask_add) if use_cuda: mini_prefix_add = mini_prefix_add.cuda() mini_trg_mask_add = mini_trg_mask_add.cuda() logits_Q_hat, _, _, _ = target_net._encode_decode( src=mini_src, trg_input=mini_prefix_add, src_mask=mini_src_mask, src_length=mini_src_length, trg_mask=mini_trg_mask_add) #print('mini_prefix_add.shape', mini_prefix_add.shape) #print('logits_Q_hat.shape', logits_Q_hat.shape) #print('mini_prefix_length.long()', mini_prefix_length.long()) logits_Q_hat = logits_Q_hat[range(mini_BATCH_SIZE), mini_prefix_length.long(), :] Q_hat_value, _ = torch.max(logits_Q_hat, dim=1) #print('Q_hat_value', Q_hat_value) if use_cuda: Q_hat_value = Q_hat_value.cuda() mini_reward = mini_reward.cuda() mini_is_eos = mini_is_eos.cuda() yj = mini_reward.float() + Gamma * Q_hat_value #print('yj', yj) index = mini_is_eos.long() #print('mini_is_eos', mini_is_eos) yj[index] = mini_reward[index] #print('yj', yj) #print('Q_value1', Q_value) yj.detach() # Optimize the model policy_net.zero_grad() # Compute loss loss = mse_loss(yj, Q_value) print('loss', loss) logger.info("step = {}, loss = {}".format( stats.steps, loss.item())) loss.backward() #for param in policy_net.parameters(): # param.grad.data.clamp_(-1, 1) optimizer.step() stats.steps += 1 #print('step', stats.steps) if stats.steps % TARGET_UPDATE == 0: #print('update the parameters in target_net.') target_net.load_state_dict(policy_net.state_dict()) if stats.steps % validation_freq == 0: # Validation print('Start validation') valid_score, valid_loss, valid_ppl, valid_sources, \ valid_sources_raw, valid_references, valid_hypotheses, \ valid_hypotheses_raw, valid_attention_scores = \ validate_on_data( model=policy_net, data=dev_data, batch_size=eval_batch_size, use_cuda=use_cuda, level=level, eval_metric=eval_metric, n_gpu=n_gpu, compute_loss=True, beam_size=1, beam_alpha=-1, batch_type=eval_batch_type, postprocess=True, bpe_type=bpe_type, sacrebleu=sacrebleu, max_output_length=max_output_length ) print( 'validation_loss: {}, validation_score: {}'.format( valid_loss, valid_score)) logger.info(valid_loss) print('average loss: total_loss/n_tokens:', valid_ppl) if early_stopping_metric == "loss": ckpt_score = valid_loss elif early_stopping_metric in ["ppl", "perplexity"]: ckpt_score = valid_ppl else: ckpt_score = valid_score if stats.is_best(ckpt_score): stats.best_ckpt_score = ckpt_score stats.best_ckpt_iter = stats.steps logger.info( 'Hooray! New best validation result [%s]!', early_stopping_metric) if ckpt_queue.maxsize > 0: logger.info("Saving new checkpoint.") # def _save_checkpoint(self) -> None: """ Save the model's current parameters and the training state to a checkpoint. The training state contains the total number of training steps, the total number of training tokens, the best checkpoint score and iteration so far, and optimizer and scheduler states. """ model_path = "{}/{}.ckpt".format( model_dir, stats.steps) model_state_dict = policy_net.module.state_dict() \ if isinstance(policy_net, torch.nn.DataParallel) \ else policy_net.state_dict() state = { "steps": stats.steps, "total_tokens": stats.total_tokens, "best_ckpt_score": stats.best_ckpt_score, "best_ckpt_iteration": stats.best_ckpt_iter, "model_state": model_state_dict, "optimizer_state": optimizer.state_dict(), # "scheduler_state": scheduler.state_dict() if # self.scheduler is not None else None, # 'amp_state': amp.state_dict() if self.fp16 else None } torch.save(state, model_path) if ckpt_queue.full(): to_delete = ckpt_queue.get( ) # delete oldest ckpt try: os.remove(to_delete) except FileNotFoundError: logger.warning( "Wanted to delete old checkpoint %s but " "file does not exist.", to_delete) ckpt_queue.put(model_path) best_path = "{}/best.ckpt".format(model_dir) try: # create/modify symbolic link for best checkpoint symlink_update( "{}.ckpt".format(stats.steps), best_path) except OSError: # overwrite best.ckpt torch.save(state, best_path)
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights """ cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take oldest from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"]["batch_size"] use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): if data_set is None: # e.g. no valid_data continue #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores, logprobs = validate_on_data( model, data=data_set, batch_size=batch_size, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha) #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) print("{:4s} {}: {} [{}]".format(data_set_name, eval_metric, score, decoding_description)) else: print("No references given for {} -> no evaluation.".format( data_set_name)) if attention_scores is not None and save_attention: attention_path = "{}/{}.{}.att".format(model_dir, data_set_name, step) print("Attention plots saved to: {}.xx".format(attention_path)) store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), output_prefix=attention_path) if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: if cfg["data"].get("post_process", True): for hyp in hypotheses: out_file.write(hyp + "\n") else: for hyp in hypotheses_raw: out_file.write(" ".join(hyp) + "\n") print("Translations saved to: {}".format(output_path_set))
def test(cfg_file, ckpt, output_path: str = None, save_attention: bool = False, logger: logging.Logger = None, data_to_test: str = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) train_cfg = cfg["training"] data_cfg = cfg["data"] test_cfg = cfg["testing"] if "test" not in data_cfg.keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir model_dir = train_cfg["model_dir"] if ckpt is None: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint at {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = train_cfg.get("eval_batch_size", train_cfg["batch_size"]) batch_type = train_cfg.get("batch_type", "sentence") use_cuda = train_cfg.get("use_cuda", False) assert "level" in data_cfg or "trg_level" in data_cfg trg_level = data_cfg.get("level", data_cfg["trg_level"]) eval_metric = train_cfg["eval_metric"] if isinstance(eval_metric, str): eval_metric = [eval_metric] max_output_length = test_cfg.get("max_output_length", train_cfg.get("max_output_length", None)) # load the data data = load_data(data_cfg) dev_data = data["dev_data"] test_data = data["test_data"] vocabs = data["vocabs"] data_to_predict = {"dev": dev_data, "test": test_data} if data_to_test is not None: assert data_to_test in data_to_predict data_to_predict = {data_to_test: data_to_predict[data_to_test]} # load model state from disk if isinstance(ckpt, str): ckpt = [ckpt] models = [] for c in ckpt: model_checkpoint = load_checkpoint(c, use_cuda=use_cuda) # build model and load parameters into it m = build_model(cfg["model"], vocabs=vocabs) m.load_state_dict(model_checkpoint["model_state"]) models.append(m) model = models[0] if len(models) == 1 else EnsembleModel(*models) if use_cuda: model.cuda() # should this exist? # whether to use beam search for decoding, 0: greedy decoding beam_sizes = beam_alpha = 0 if "testing" in cfg.keys(): beam_sizes = test_cfg.get("beam_size", 0) beam_alpha = test_cfg.get("alpha", 0) beam_sizes = [beam_sizes] if isinstance(beam_sizes, int) else beam_sizes assert beam_alpha >= 0, "Use alpha >= 0" method = test_cfg.get("method", None) max_hyps = test_cfg.get("max_hyps", 1) # only for the enumerate thing validate_by_label = test_cfg.get("validate_by_label", train_cfg.get("validate_by_label", False)) forced_sparsity = test_cfg.get("forced_sparsity", train_cfg.get("forced_sparsity", False)) for beam_size in beam_sizes: for data_set_name, data_set in data_to_predict.items(): valid_results = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, trg_level=trg_level, max_output_length=max_output_length, eval_metrics=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, save_attention=save_attention, validate_by_label=validate_by_label, forced_sparsity=forced_sparsity, method=method, max_hyps=max_hyps, break_at_p=test_cfg.get("break_at_p", 1.0), break_at_argmax=test_cfg.get("break_at_argmax", False), short_depth=test_cfg.get("short_depth", 0)) scores = valid_results[0] hypotheses, hypotheses_raw = valid_results[2:4] scores_by_label = valid_results[5] if "trg" in data_set.fields: log_scores(logger, data_set_name, scores, scores_by_label, beam_size, beam_alpha) else: logger.info("No references given for %s -> no evaluation.", data_set_name) attention_scores = valid_results[4] if save_attention and not attention_scores: logger.warning("Attention scores could not be saved. " "Note that attention scores are not " "available when using beam search. " "Set beam_size to 0 for greedy decoding.") if save_attention and attention_scores: # currently this will break for transformers logger.info("Saving attention plots. This might be slow.") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), model_dir=model_dir, steps=step, data_set_name=data_set_name) logger.info("Attention plots saved to: %s", model_dir) if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as outf: for hyp in hypotheses: outf.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def load_model(self, src_language, trg_language, domain, bpe_src_code=None, tokenize=None): """ Load model for given trg language. """ # model_dir = "{}-{}".format(self._model_dir_prefix, trg_language) model_dir = f"{self._model_dir_prefix}{src_language}-{trg_language}-{domain}" # Load the checkpoint. ckpt_path = os.path.join(model_dir, 'model.ckpt') # Load the vocabularies. src_vocab_path = os.path.join(model_dir, 'src_vocab.txt') trg_vocab_path = os.path.join(model_dir, 'trg_vocab.txt') # Load the config. config_path = os.path.join(model_dir, 'config_orig.yaml') # Adjust config. config = load_config(config_path) new_config_file = os.path.join(model_dir, 'config.yaml') config = self._update_config(config, src_vocab_path, trg_vocab_path, model_dir, ckpt_path) with open(new_config_file, 'w') as cfile: yaml.dump(config, cfile) # print('Loaded model for {}-{}.'.format(self._src_language, trg_language)) print('Loaded model for {}-{}.'.format(src_language, trg_language)) conf = {} logger = logging.getLogger(__name__) conf["logger"] = logger # load the Joey configuration cfg = load_config(new_config_file) # load the checkpoint if "load_model" in cfg['training'].keys(): ckpt = cfg['training']["load_model"] else: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint found in directory {}." .format(model_dir)) # prediction parameters from config conf["use_cuda"] = cfg["training"].get("use_cuda", False) if torch.cuda.is_available() else False conf["level"] = cfg["data"]["level"] conf["max_output_length"] = cfg["training"].get("max_output_length", None) conf["lowercase"] = cfg["data"].get("lowercase", False) # load the vocabularies src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt" trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt" conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file, dataset=None, max_size=-1, min_freq=0) conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file, dataset=None, max_size=-1, min_freq=0) # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): conf["beam_size"] = cfg["testing"].get("beam_size", 0) conf["beam_alpha"] = cfg["testing"].get("alpha", -1) else: conf["beam_size"] = 1 conf["beam_alpha"] = -1 # pre-processing if tokenize is not None: src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"]) trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"]) # tokenize input tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True) detokenizer = lambda x: trg_tokenizer.detokenize( x.split(), return_str=True) else: tokenizer = lambda x: x detokenizer = lambda x: x if bpe_src_code is not None and level == "bpe": # load bpe merge file merge_file = open(bpe_src_code, "r") bpe = apply_bpe.BPE(codes=merge_file) segmenter = lambda x: bpe.process_line(x.strip()) elif conf["level"] == "char": # split to chars segmenter = lambda x: list(x.strip()) else: segmenter = lambda x: x.strip() conf["preprocess"] = [tokenizer, segmenter] conf["postprocess"] = [detokenizer] # build model and load parameters into it model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"]) model = build_model(cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"]) model.load_state_dict(model_checkpoint["model_state"]) # ipdb.set_trace() if conf["use_cuda"]: model.cuda() conf["model"] = model print("Joey NMT model loaded successfully.") return conf
def load_model(model_dir, bpe_src_code=None, tokenize=None): """ Start the bot. This means loading the model according to the config file. :param model_dir: Model directory of trained Joey NMT model. :param bpe_src_code: BPE codes for source side processing (optional). :param tokenize: If True, tokenize inputs with Moses tokenizer. :return: """ conf = {} cfg_file = model_dir+"/config.yaml" logger = logging.getLogger(__name__) conf["logger"] = logger # load the Joey configuration cfg = load_config(cfg_file) # load the checkpoint if "load_model" in cfg['training'].keys(): ckpt = cfg['training']["load_model"] else: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint found in directory {}." .format(model_dir)) # prediction parameters from config conf["use_cuda"] = cfg["training"].get("use_cuda", False) conf["level"] = cfg["data"]["level"] conf["max_output_length"] = cfg["training"].get("max_output_length", None) conf["lowercase"] = cfg["data"].get("lowercase", False) # load the vocabularies src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt" trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt" conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file, dataset=None, max_size=-1, min_freq=0) conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file, dataset=None, max_size=-1, min_freq=0) # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): conf["beam_size"] = cfg["testing"].get("beam_size", 0) conf["beam_alpha"] = cfg["testing"].get("alpha", -1) else: conf["beam_size"] = 1 conf["beam_alpha"] = -1 # pre-processing if tokenize is not None: src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"]) trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"]) # tokenize input tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True) detokenizer = lambda x: trg_tokenizer.detokenize( x.split(), return_str=True) else: tokenizer = lambda x: x detokenizer = lambda x: x if bpe_src_code is not None and level == "bpe": # load bpe merge file merge_file = open(bpe_src_code, "r") bpe = apply_bpe.BPE(codes=merge_file) segmenter = lambda x: bpe.process_line(x.strip()) elif conf["level"] == "char": # split to chars segmenter = lambda x: list(x.strip()) else: segmenter = lambda x: x.strip() conf["preprocess"] = [tokenizer, segmenter] conf["postprocess"] = [detokenizer] # build model and load parameters into it model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"]) model = build_model(cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"]) model.load_state_dict(model_checkpoint["model_state"]) if conf["use_cuda"]: model.cuda() conf["model"] = model print("Joey NMT model loaded successfully.") return conf
def translate(cfg_file, ckpt: str, output_path: str = None) -> None: """ Interactive translation function. Loads model from checkpoint and translates either the stdin input or asks for input to translate interactively. The input has to be pre-processed according to the data that the model was trained on, i.e. tokenized or split into subwords. Translations are printed to stdout. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load """ def _load_line_as_data(line): """ Create a dataset from one line via a temporary file. """ # write src input to temporary file tmp_name = "tmp" tmp_suffix = ".src" tmp_filename = tmp_name+tmp_suffix with open(tmp_filename, "w") as tmp_file: tmp_file.write("{}\n".format(line)) test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field) # remove temporary file if os.path.exists(tmp_filename): os.remove(tmp_filename) return test_data def _translate_data(test_data): """ Translates given dataset, using parameters from outer scope. """ # pylint: disable=unused-variable _, _, _, _, hypotheses, _, _, _, _ = validate_on_data( model, data=test_data, batch_size=batch_size, level=level, max_output_length=max_output_length, eval_metrics=[], use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha) return hypotheses cfg = load_config(cfg_file) # when checkpoint is not specified, take oldest from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) data_cfg = cfg["data"] batch_size = cfg["training"].get("batch_size", 1) use_cuda = cfg["training"].get("use_cuda", False) max_output_length = cfg["training"].get("max_output_length", None) # read vocabs # This will need to change: currently translate does not support inflection src_vocab_file = data_cfg.get( "src_vocab", cfg["training"]["model_dir"] + "/src_vocab.txt") trg_vocab_file = data_cfg.get( "trg_vocab", cfg["training"]["model_dir"] + "/trg_vocab.txt") src_vocab = Vocabulary(file=src_vocab_file) trg_vocab = Vocabulary(file=trg_vocab_file) vocabs = {"src": src_vocab, "trg": trg_vocab} level = data_cfg["level"] lowercase = data_cfg["lowercase"] tok_fun = list if level == "char" else str.split src_field = Field(init_token=None, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN, tokenize=tok_fun, batch_first=True, lower=lowercase, unk_token=UNK_TOKEN, include_lengths=True) src_field.vocab = src_vocab # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], vocabs=vocabs) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", 0) else: beam_size = 0 beam_alpha = 0 if beam_alpha < 0: raise ConfigurationError("alpha for length penalty should be >= 0") if not sys.stdin.isatty(): # file given test_data = MonoDataset(path=sys.stdin, ext="", field=src_field) hypotheses = _translate_data(test_data) if output_path is not None: output_path_set = "{}".format(output_path) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") print("Translations saved to: {}".format(output_path_set)) else: for hyp in hypotheses: print(hyp) else: # enter interactive mode batch_size = 1 while True: try: src_input = input("\nPlease enter a source sentence " "(pre-processed): \n") if not src_input.strip(): break # every line has to be made into dataset test_data = _load_line_as_data(line=src_input) hypotheses = _translate_data(test_data) print("JoeyNMT: {}".format(hypotheses[0])) except (KeyboardInterrupt, EOFError): print("\nBye.") break
def test(cfg_file, ckpt, # str or list now output_path: str = None, save_attention: bool = False, logger: logging.Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) train_cfg = cfg["training"] data_cfg = cfg["data"] test_cfg = cfg["testing"] if "test" not in data_cfg.keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = train_cfg["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint found in directory {}." .format(model_dir)) try: step = ckpt.split(model_dir+"/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = train_cfg.get("eval_batch_size", train_cfg["batch_size"]) batch_type = train_cfg.get("eval_batch_type", train_cfg.get("batch_type", "sentence")) use_cuda = train_cfg.get("use_cuda", False) src_level = data_cfg.get("src_level", data_cfg.get("level", "word")) trg_level = data_cfg.get("trg_level", data_cfg.get("level", "word")) eval_metric = train_cfg["eval_metric"] if isinstance(eval_metric, str): eval_metric = [eval_metric] attn_metric = train_cfg.get("attn_metric", []) if isinstance(attn_metric, str): attn_metric = [attn_metric] max_output_length = train_cfg.get("max_output_length", None) # load the data data = load_data(data_cfg) dev_data = data["dev_data"] test_data = data["test_data"] vocabs = data["vocabs"] data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk if isinstance(ckpt, str): ckpt = [ckpt] individual_models = [] for c in ckpt: model_checkpoint = load_checkpoint(c, use_cuda=use_cuda) # build model and load parameters into it m = build_model(cfg["model"], vocabs=vocabs) m.load_state_dict(model_checkpoint["model_state"]) individual_models.append(m) if len(individual_models) == 1: model = individual_models[0] else: model = EnsembleModel(*individual_models) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_sizes = test_cfg.get("beam_size", 0) beam_alpha = test_cfg.get("alpha", 0) else: beam_sizes = 0 beam_alpha = 0 if isinstance(beam_sizes, int): beam_sizes = [beam_sizes] assert beam_alpha >= 0, "Use alpha >= 0" for beam_size in beam_sizes: for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable scores, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores, scores_by_lang, by_lang = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, src_level=src_level, trg_level=trg_level, max_output_length=max_output_length, eval_metrics=eval_metric, attn_metrics=attn_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, save_attention=save_attention) #pylint: enable=unused-variable if "trg" in data_set.fields: labeled_scores = sorted(scores.items()) eval_report = ", ".join("{}: {:.5f}".format(n, v) for n, v in labeled_scores) decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: [%s]", data_set_name, eval_report, decoding_description) if scores_by_lang is not None: for metric, scores in scores_by_lang.items(): # make a report lang_report = [metric] numbers = sorted(scores.items()) lang_report.extend(["{}: {:.5f}".format(k, v) for k, v in numbers]) logger.info("\n\t".join(lang_report)) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: # currently this will break for transformers if attention_scores: #attention_name = "{}.{}.att".format(data_set_name, step) #attention_path = os.path.join(model_dir, attention_name) logger.info("Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), model_dir=model_dir, steps=step, data_set_name=data_set_name) logger.info("Attention plots saved to: %s", model_dir) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 0 for greedy decoding.") if output_path is not None: for lang, ref_and_hyp in by_lang.items(): if lang is None: # monolingual case output_path_set = "{}.{}".format(output_path, data_set_name) else: output_path_set = "{}.{}.{}".format(output_path, lang, data_set_name) if isinstance(ref_and_hyp[0], str): hyps = ref_and_hyp else: hyps = [hyp for (ref, hyp) in ref_and_hyp] with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hyps: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def __init__(self, cfg_file, ckpt: str, output_path: str = None, logger: Logger = None) -> None: """ Recover the saved model, specified as in configuration. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = make_logger() cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") #print(cfg.keys()) if "dqn" not in cfg.keys(): raise ValueError("dqn data must be specified in config.") self.model_dir = cfg["training"]["model_dir"] # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" self.batch_size = 1 #** self.batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) self.use_cuda = cfg["training"].get("use_cuda", False) self.level = cfg["data"]["level"] self.eval_metric = cfg["training"]["eval_metric"] self.max_output_length = cfg["training"].get("max_output_length", None) # load the data train_data, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) #Loading the DQN parameters: self.sample_size = cfg["dqn"]["sample_size"] self.lr = cfg["dqn"].get("lr", 0.01) self.egreed_max = cfg["dqn"].get("egreed_max", 0.9) self.egreed_min = cfg["dqn"].get("egreed_min", 0.01) self.gamma_max = cfg["dqn"].get("gamma_max", 0.9) self.gamma_min = cfg["dqn"].get("gamma_min", 0.5) self.nu_iter = cfg["dqn"]["nu_iter"] self.mem_cap = cfg["dqn"]["mem_cap"] self.beam_min = cfg["dqn"]["beam_min"] self.beam_max = cfg["dqn"]["beam_max"] self.state_type = cfg["dqn"]["state_type"] if self.state_type == 'hidden': self.state_size = cfg["model"]["encoder"]["hidden_size"] * 2 else: self.state_size = cfg["model"]["encoder"]["hidden_size"] self.actions_size = len(src_vocab) self.gamma = None print("Sample size: ", self.sample_size) print("State size: ", self.state_size) print("Action size: ", self.actions_size) self.epochs = cfg["dqn"]["epochs"] # Inii the Qnet and Qnet2 self.eval_net = Net(self.state_size, self.actions_size) self.target_net = Net(self.state_size, self.actions_size) #Following the algorithm self.target_net.load_state_dict(self.eval_net.state_dict()) self.learn_step_counter = 0 self.memory_counter = 0 self.size_memory1 = self.state_size * 2 + 2 + 1 self.memory = np.zeros((self.mem_cap, self.size_memory1)) self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=self.lr) self.loss_func = nn.MSELoss() #others parameters self.bos_index = trg_vocab.stoi[BOS_TOKEN] self.eos_index = trg_vocab.stoi[EOS_TOKEN] self.pad_index = trg_vocab.stoi[PAD_TOKEN] self.data_to_train_dqn = {"train": train_data} #self.data_to_train_dqn = {"test": test_data} #self.data_to_dev = {"dev": dev_data} self.data_to_dev = {"dev": dev_data} #self.data_to_train_dqn = {"train": train_data # ,"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=self.use_cuda) # build model and load parameters into it self.model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) self.model.load_state_dict(model_checkpoint["model_state"]) if self.use_cuda: self.model.cuda() # whether to use beam search for decoding, 0: greedy decoding beam_size = 1 beam_alpha = -1 #others not important parameters self.index_fin = None path_tensroboard = self.model_dir + "/tensorboard_DQN/" self.tb_writer = SummaryWriter(log_dir=path_tensroboard, purge_step=0) self.dev_network_count = 0 print(cfg["dqn"]["reward_type"]) #Reward funtion related: if cfg["dqn"]["reward_type"] == "bleu_diff": print("You select the reward based on the Bleu score differences") self.Reward = self.Reward_bleu_diff elif cfg["dqn"]["reward_type"] == "bleu_lin": print( "You select the reward based on the linear Bleu socres, and several punishments" ) self.Reward = self.Reward_lin else: print( "You select the reward based on the final score on the last state " ) self.Reward = self.Reward_bleu_fin
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: logging.Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"]["batch_size"] batch_type = cfg["training"].get("batch_type", "sentence") use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha) #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 0 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = make_logger() cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"].get("eval_batch_size", cfg["training"]["batch_size"]) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 1 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores,valid_hypotheses_full_n_best,scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger) #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size < 2 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: ''' output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") #sy_debug alt_output = "{}.n_best.{}".format(output_path, data_set_name) with open(alt_output, mode="w", encoding="utf-8") as out_file: for n in valid_hypotheses_full_n_best: out_file.write(n + "\n") ''' #@Shiya: exporting hypothesis and associated score to .csv file #TODO: write_to_csv(hyps,scores) def write_to_csv(hyps: list, scores: list): import csv output_file = "{}.n_csv.{}".format(output_path, data_set_name) with open(output_file, mode="w", newline='', encoding="utf-8") as out_file: fieldnames = ['Predictions', 'Scores'] writer = csv.DictWriter(out_file, fieldnames=fieldnames) writer.writeheader() for prediction, score in zip(hyps, scores): writer.writerow({ fieldnames[0]: prediction, fieldnames[1]: score }) write_to_csv(valid_hypotheses_full_n_best, scores)
def init_from_checkpoint(self, path: str, reset_best_ckpt: bool = False, reset_scheduler: bool = False, reset_optimizer: bool = False, reset_iter_state: bool = False) -> None: """ Initialize the trainer from a given checkpoint file. This checkpoint file contains not only model parameters, but also scheduler and optimizer states, see `self._save_checkpoint`. :param path: path to checkpoint :param reset_best_ckpt: reset tracking of the best checkpoint, use for domain adaptation with a new dev set or when using a new metric for fine-tuning. :param reset_scheduler: reset the learning rate scheduler, and do not use the one stored in the checkpoint. :param reset_optimizer: reset the optimizer, and do not use the one stored in the checkpoint. :param reset_iter_state: reset the sampler's internal state and do not use the one stored in the checkpoint. """ logger.info("Loading model from %s", path) model_checkpoint = load_checkpoint(path=path, use_cuda=self.use_cuda) # restore model and optimizer parameters self.model.load_state_dict(model_checkpoint["model_state"]) if not reset_optimizer: self.optimizer.load_state_dict(model_checkpoint["optimizer_state"]) else: logger.info("Reset optimizer.") if not reset_scheduler: if model_checkpoint["scheduler_state"] is not None and \ self.scheduler is not None: self.scheduler.load_state_dict( model_checkpoint["scheduler_state"]) else: logger.info("Reset scheduler.") # restore counts self.stats.steps = model_checkpoint["steps"] self.stats.total_tokens = model_checkpoint["total_tokens"] if not reset_best_ckpt: self.stats.best_ckpt_score = model_checkpoint["best_ckpt_score"] self.stats.best_ckpt_iter = model_checkpoint["best_ckpt_iteration"] else: logger.info("Reset tracking of the best checkpoint.") if not reset_iter_state: assert 'train_iter_state' in model_checkpoint self.train_iter_state = model_checkpoint["train_iter_state"] else: logger.info("Reset train data iterator.") # move parameters to cuda if self.use_cuda: self.model.to(self.device) # fp16 if self.fp16 and model_checkpoint.get("amp_state", None) is not None: amp.load_state_dict(model_checkpoint['amp_state'])
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: logging.Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"].get("eval_batch_size", cfg["training"]["batch_size"]) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data,\ src_vocab, trg_vocab,\ _, dev_kb, test_kb,\ _, dev_kb_lookup, test_kb_lookup, \ _, dev_kb_lengths, test_kb_lengths,\ _, dev_kb_truvals, test_kb_truvals, \ trv_vocab, canon_fun,\ dev_data_canon, test_data_canon \ = load_data( data_cfg=cfg["data"] ) report_entf1_on_canonicals = cfg["training"].get( "report_entf1_on_canonicals", False) kb_task = (test_kb != None) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab, trv_vocab=trv_vocab, canonizer=canon_fun) model.load_state_dict(model_checkpoint["model_state"]) # FIXME for the moment, for testing, try overriding model.canonize with canon_fun from test functions loaded data # should hopefully not be an issue with gridsearch results... if use_cuda: model.cuda() # move to GPU # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): if data_set_name == "dev": kb_info = [ dev_kb, dev_kb_lookup, dev_kb_lengths, dev_kb_truvals, dev_data_canon ] elif data_set_name == "test": kb_info = [ test_kb, test_kb_lookup, test_kb_lengths, test_kb_truvals, test_data_canon ] else: raise ValueError((data_set_name, data_set)) #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores, kb_att_scores, ent_f1, ent_mcc = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, kb_task = kb_task, valid_kb=kb_info[0], valid_kb_lkp=kb_info[1], valid_kb_lens=kb_info[2], valid_kb_truvals=kb_info[3], valid_data_canon=kb_info[4], report_on_canonicals=report_entf1_on_canonicals ) """ batch_size=self.eval_batch_size, data=valid_data, eval_metric=self.eval_metric, level=self.level, model=self.model, use_cuda=self.use_cuda, max_output_length=self.max_output_length, loss_function=self.loss, beam_size=0, batch_type=self.eval_batch_type, kb_task=kb_task, valid_kb=valid_kb, valid_kb_lkp=valid_kb_lkp, valid_kb_lens=valid_kb_lens, valid_kb_truvals=valid_kb_truvals """ #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f f1: %6.2f mcc: %6.2f [%s]", data_set_name, eval_metric, score, ent_f1, ent_mcc, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) if kb_att_scores: kb_att_name = "{}.{}.kbatt".format(data_set_name, step) kb_att_path = os.path.join(model_dir, kb_att_name) store_attention_plots( attentions=kb_att_scores, targets=hypotheses_raw, sources=list(data_set.kbsrc), #TODO indices=range(len(hypotheses)), output_prefix=kb_att_path, kb_info=(dev_kb_lookup, dev_kb_lengths, list(data_set.kbtrg))) logger.info("KB Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 0 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)