def __init__(self, engine_path, is_admin): self.engine_path = engine_path self.is_admin = is_admin self.model_path = os.path.join(engine_path, 'model') self.config_path = os.path.join(engine_path, 'config.yaml') self.gpu_id = None # Load parameters from configuration file config = load_config(self.config_path) if "load_model" in config['training'].keys(): self.ckpt = os.path.realpath( os.path.join(app.config['JOEYNMT_FOLDER'], config['training']["load_model"])) else: self.ckpt = get_latest_checkpoint(self.model_path) self.use_cuda = config["training"].get("use_cuda", False) self.level = config["data"]["level"] self.max_output_length = config["training"].get( "max_output_length", None) self.lowercase = config["data"].get("lowercase", False) self.model_data = config["model"] # load the vocabularies src_vocab_file = os.path.realpath( os.path.join(app.config['JOEYNMT_FOLDER'], config["data"]["src_vocab"])) trg_vocab_file = os.path.realpath( os.path.join(app.config['JOEYNMT_FOLDER'], config["data"]["trg_vocab"])) self.src_vocab = build_vocab(field="src", vocab_file=src_vocab_file, dataset=None, max_size=-1, min_freq=0) self.trg_vocab = build_vocab(field="trg", vocab_file=trg_vocab_file, dataset=None, max_size=-1, min_freq=0) # whether to use beam search for decoding, 0: greedy decoding if "testing" in config.keys(): self.beam_size = config["testing"].get("beam_size", 0) self.beam_alpha = config["testing"].get("alpha", -1) else: self.beam_size = 1 self.beam_alpha = -1 self.logger = logging.getLogger(__name__)
def translate(cfg_file: str, ckpt: str, output_path: str = None, batch_class: Batch = Batch, n_best: int = 1) -> None: """ Interactive translation function. Loads model from checkpoint and translates either the stdin input or asks for input to translate interactively. The input has to be pre-processed according to the data that the model was trained on, i.e. tokenized or split into subwords. Translations are printed to stdout. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output file :param batch_class: class type of batch :param n_best: amount of candidates to display """ def _load_line_as_data(line): """ Create a dataset from one line via a temporary file. """ # write src input to temporary file tmp_name = "tmp" tmp_suffix = ".src" tmp_filename = tmp_name + tmp_suffix with open(tmp_filename, "w") as tmp_file: tmp_file.write("{}\n".format(line)) test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field) # remove temporary file if os.path.exists(tmp_filename): os.remove(tmp_filename) return test_data def _translate_data(test_data): """ Translates given dataset, using parameters from outer scope. """ # pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=test_data, batch_size=batch_size, batch_class=batch_class, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric="", use_cuda=use_cuda, compute_loss=False, beam_size=beam_size, beam_alpha=beam_alpha, postprocess=postprocess, bpe_type=bpe_type, sacrebleu=sacrebleu, n_gpu=n_gpu, n_best=n_best) return hypotheses cfg = load_config(cfg_file) model_dir = cfg["training"]["model_dir"] _ = make_logger(model_dir, mode="translate") # version string returned # when checkpoint is not specified, take oldest from model dir if ckpt is None: ckpt = get_latest_checkpoint(model_dir) # read vocabs src_vocab_file = cfg["data"].get("src_vocab", model_dir + "/src_vocab.txt") trg_vocab_file = cfg["data"].get("trg_vocab", model_dir + "/trg_vocab.txt") src_vocab = Vocabulary(file=src_vocab_file) trg_vocab = Vocabulary(file=trg_vocab_file) data_cfg = cfg["data"] level = data_cfg["level"] lowercase = data_cfg["lowercase"] tok_fun = lambda s: list(s) if level == "char" else s.split() src_field = Field(init_token=None, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN, tokenize=tok_fun, batch_first=True, lower=lowercase, unk_token=UNK_TOKEN, include_lengths=True) src_field.vocab = src_vocab # parse test args batch_size, batch_type, use_cuda, device, n_gpu, level, _, \ max_output_length, beam_size, beam_alpha, postprocess, \ bpe_type, sacrebleu, _, _ = parse_test_args(cfg, mode="translate") # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.to(device) if not sys.stdin.isatty(): # input file given test_data = MonoDataset(path=sys.stdin, ext="", field=src_field) all_hypotheses = _translate_data(test_data) if output_path is not None: # write to outputfile if given def write_to_file(output_path_set, hypotheses): with open(output_path_set, mode="w", encoding="utf-8") \ as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s.", output_path_set) if n_best > 1: for n in range(n_best): file_name, file_extension = os.path.splitext(output_path) write_to_file( "{}-{}{}".format( file_name, n, file_extension if file_extension else ""), [ all_hypotheses[i] for i in range(n, len(all_hypotheses), n_best) ]) else: write_to_file("{}".format(output_path), all_hypotheses) else: # print to stdout for hyp in all_hypotheses: print(hyp) else: # enter interactive mode batch_size = 1 batch_type = "sentence" while True: try: src_input = input("\nPlease enter a source sentence " "(pre-processed): \n") if not src_input.strip(): break # every line has to be made into dataset test_data = _load_line_as_data(line=src_input) hypotheses = _translate_data(test_data) print("JoeyNMT: Hypotheses ranked by score") for i, hyp in enumerate(hypotheses): print("JoeyNMT #{}: {}".format(i + 1, hyp)) except (KeyboardInterrupt, EOFError): print("\nBye.") break
def test(cfg_file, ckpt: str, batch_class: Batch = Batch, output_path: str = None, save_attention: bool = False, datasets: dict = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param batch_class: class type of batch :param output_path: path to output :param datasets: datasets to predict :param save_attention: whether to save the computed attention weights """ cfg = load_config(cfg_file) model_dir = cfg["training"]["model_dir"] if len(logger.handlers) == 0: _ = make_logger(model_dir, mode="test") # version string returned # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: ckpt = get_latest_checkpoint(model_dir) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" # load the data if datasets is None: _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"], datasets=["dev", "test"]) data_to_predict = {"dev": dev_data, "test": test_data} else: # avoid to load data again data_to_predict = {"dev": datasets["dev"], "test": datasets["test"]} src_vocab = datasets["src_vocab"] trg_vocab = datasets["trg_vocab"] # parse test args batch_size, batch_type, use_cuda, device, n_gpu, level, eval_metric, \ max_output_length, beam_size, beam_alpha, postprocess, \ bpe_type, sacrebleu, decoding_description, tokenizer_info \ = parse_test_args(cfg, mode="test") # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.to(device) # multi-gpu eval if n_gpu > 1 and not isinstance(model, torch.nn.DataParallel): model = _DataParallel(model) for data_set_name, data_set in data_to_predict.items(): if data_set is None: continue dataset_file = cfg["data"][data_set_name] + "." + cfg["data"]["trg"] logger.info("Decoding on %s set (%s)...", data_set_name, dataset_file) #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_class=batch_class, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, compute_loss=False, beam_size=beam_size, beam_alpha=beam_alpha, postprocess=postprocess, bpe_type=bpe_type, sacrebleu=sacrebleu, n_gpu=n_gpu) #pylint: enable=unused-variable if "trg" in data_set.fields: logger.info("%4s %s%s: %6.2f [%s]", data_set_name, eval_metric, tokenizer_info, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def translate(cfg_file, ckpt: str, output_path: str = None) -> None: """ Interactive translation function. Loads model from checkpoint and translates either the stdin input or asks for input to translate interactively. The input has to be pre-processed according to the data that the model was trained on, i.e. tokenized or split into subwords. Translations are printed to stdout. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output file """ def _load_line_as_data(line): """ Create a dataset from one line via a temporary file. """ # write src input to temporary file tmp_name = "tmp" tmp_suffix = ".src" tmp_filename = tmp_name + tmp_suffix with open(tmp_filename, "w") as tmp_file: tmp_file.write("{}\n".format(line)) test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field) # remove temporary file if os.path.exists(tmp_filename): os.remove(tmp_filename) return test_data logger = make_logger() def _translate_data(test_data): """ Translates given dataset, using parameters from outer scope. """ # pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=test_data, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric="", use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger, postprocess=postprocess) return hypotheses cfg = load_config(cfg_file) # when checkpoint is not specified, take oldest from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) batch_size = cfg["training"].get("eval_batch_size", cfg["training"].get("batch_size", 1)) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] max_output_length = cfg["training"].get("max_output_length", None) # read vocabs src_vocab_file = cfg["data"].get( "src_vocab", cfg["training"]["model_dir"] + "/src_vocab.txt") trg_vocab_file = cfg["data"].get( "trg_vocab", cfg["training"]["model_dir"] + "/trg_vocab.txt") src_vocab = Vocabulary(file=src_vocab_file) trg_vocab = Vocabulary(file=trg_vocab_file) data_cfg = cfg["data"] level = data_cfg["level"] lowercase = data_cfg["lowercase"] tok_fun = lambda s: list(s) if level == "char" else s.split() src_field = Field(init_token=None, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN, tokenize=tok_fun, batch_first=True, lower=lowercase, unk_token=UNK_TOKEN, include_lengths=True) src_field.vocab = src_vocab # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, <2: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) postprocess = cfg["testing"].get("postprocess", True) else: beam_size = 1 beam_alpha = -1 postprocess = True if not sys.stdin.isatty(): # input file given test_data = MonoDataset(path=sys.stdin, ext="", field=src_field) hypotheses = _translate_data(test_data) if output_path is not None: # write to outputfile if given output_path_set = "{}".format(output_path) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s.", output_path_set) else: # print to stdout for hyp in hypotheses: print(hyp) else: # enter interactive mode batch_size = 1 batch_type = "sentence" while True: try: src_input = input("\nPlease enter a source sentence " "(pre-processed): \n") if not src_input.strip(): break # every line has to be made into dataset test_data = _load_line_as_data(line=src_input) hypotheses = _translate_data(test_data) print("JoeyNMT: {}".format(hypotheses[0])) except (KeyboardInterrupt, EOFError): print("\nBye.") break
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = make_logger() cfg = load_config(cfg_file) # when checkpoint is not specified, take latest (best) from model dir step = "best" model_dir = cfg["training"]["model_dir"] if ckpt is None: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" architecture = cfg["model"].get("architecture", "encoder-decoder") batch_size = cfg["training"].get("eval_batch_size", cfg["training"]["batch_size"]) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # original encoder-decoder testing if architecture == "encoder-decoder": if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) postprocess = cfg["testing"].get("postprocess", True) else: beam_size = 1 beam_alpha = -1 postprocess = True for data_set_name, data_set in data_to_predict.items(): # pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger, postprocess=postprocess) # pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size < 2 else \ "Beam search decoding with beam size = {} and alpha = {}". \ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning( "Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set) else: # unsupervised NMT testing if "src2trg_test" not in cfg["data"].keys( ) or "trg2src_test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # load the data _, _, _, _, dev_src2trg, dev_trg2src, test_src2trg, test_trg2src, src_vocab, trg_vocab, _ = \ load_unsupervised_data(data_cfg=cfg["data"]) data_to_predict = { "src2trg": { "dev_src2trg": dev_src2trg, "test_src2trg": test_src2trg }, "trg2src": { "dev_trg2src": dev_trg2src, "test_trg2src": test_trg2src } } # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) assert isinstance(model, UnsupervisedNMTModel) model.src2src_translator.load_state_dict( model_checkpoint["src2src_model_state"]) model.trg2trg_translator.load_state_dict( model_checkpoint["trg2trg_model_state"]) model.src2trg_translator.load_state_dict( model_checkpoint["src2trg_model_state"]) model.trg2src_translator.load_state_dict( model_checkpoint["trg2src_model_state"]) if use_cuda: model.src2trg_translator.cuda() model.trg2trg_translator.cuda() model.src2trg_translator.cuda() model.trg2src_translator.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) postprocess = cfg["testing"].get("postprocess", True) else: beam_size = 1 beam_alpha = -1 postprocess = True for translation_direction, dataset_dict in data_to_predict.items(): # choose correct translator if translation_direction == "src2trg": model_to_use = model.src2trg_translator else: model_to_use = model.trg2src_translator for dataset_name, dataset in dataset_dict.items(): score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model_to_use, data=dataset, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger, postprocess=postprocess) if "trg" in dataset.fields: decoding_description = "Greedy decoding" if beam_size < 2 else \ "Beam search decoding with beam size = {} and alpha = {}". \ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", dataset_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", dataset_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(dataset_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while.." ) store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=dataset.src, indices=list( range(len(hypotheses))), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning( "Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, dataset_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def run_bot(model_dir, bpe_src_code=None, tokenize=None): """ Start the bot. This means loading the model according to the config file. :param model_dir: Model directory of trained Joey NMT model. :param bpe_src_code: BPE codes for source side processing (optional). :param tokenize: If True, tokenize inputs with Moses tokenizer. :return: """ cfg_file = model_dir + "/config.yaml" logger = logging.getLogger(__name__) # load the Joey configuration cfg = load_config(cfg_file) # load the checkpoint if "load_model" in cfg['training'].keys(): ckpt = cfg['training']["load_model"] else: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) # prediction parameters from config use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] max_output_length = cfg["training"].get("max_output_length", None) lowercase = cfg["data"].get("lowercase", False) # load the vocabularies src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt" trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt" src_vocab = build_vocab(field="src", vocab_file=src_vocab_file, dataset=None, max_size=-1, min_freq=0) trg_vocab = build_vocab(field="trg", vocab_file=trg_vocab_file, dataset=None, max_size=-1, min_freq=0) # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 1 beam_alpha = -1 # pre-processing if tokenize is not None: src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"]) trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"]) # tokenize input tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True) detokenizer = lambda x: trg_tokenizer.detokenize(x.split(), return_str=True) else: tokenizer = lambda x: x detokenizer = lambda x: x if bpe_src_code is not None and level == "bpe": # load bpe merge file merge_file = open(bpe_src_code, "r") bpe = apply_bpe.BPE(codes=merge_file) segmenter = lambda x: bpe.process_line(x.strip()) elif level == "char": # split to chars segmenter = lambda x: list(x.strip()) else: segmenter = lambda x: x.strip() # build model and load parameters into it model_checkpoint = load_checkpoint(ckpt, use_cuda) model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() print("Joey NMT model loaded successfully.") web_client = slack.WebClient(TOKEN, timeout=30) # get bot id bot_id = (web_client.api_call("auth.test")["user_id"].upper()) # find bot channel id all_channels = web_client.api_call("conversations.list")["channels"] for c in all_channels: if c["name"] == BOT_CHANNEL: bot_channel_id = c["id"] slack_events_adapter = SlackEventAdapter(BOT_SIGNIN, endpoint="/slack/events") @slack_events_adapter.on("message") def handle_message(event_data): message = event_data["event"] if message.get("subtype") is None: channel = message["channel"] user = message["user"] text = message["text"].strip() if user != bot_id and message.get("subtype") is None: # translates all messages in its channel and mentions if channel == bot_channel_id or bot_id in text: mention = "<@{}>".format(bot_id) # TODO remove all possible mentions with regex if mention in text: parts = text.split(mention) text = parts[0].strip() + parts[1].strip() message = translate(text, beam_size=beam_size, beam_alpha=beam_alpha, level=level, lowercase=lowercase, max_output_length=max_output_length, model=model, postprocess=[detokenizer], preprocess=[tokenizer, segmenter], src_vocab=src_vocab, trg_vocab=trg_vocab, use_cuda=use_cuda, logger=logger) web_client.chat_postMessage(text=message, token=TOKEN, channel=channel) # Error events @slack_events_adapter.on("error") def error_handler(err): print("ERROR: " + str(err)) slack_events_adapter.start(port=3000)
def test(cfg_file, ckpt, output_path: str = None, save_attention: bool = False, logger: logging.Logger = None, data_to_test: str = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) train_cfg = cfg["training"] data_cfg = cfg["data"] test_cfg = cfg["testing"] if "test" not in data_cfg.keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir model_dir = train_cfg["model_dir"] if ckpt is None: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint at {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = train_cfg.get("eval_batch_size", train_cfg["batch_size"]) batch_type = train_cfg.get("batch_type", "sentence") use_cuda = train_cfg.get("use_cuda", False) assert "level" in data_cfg or "trg_level" in data_cfg trg_level = data_cfg.get("level", data_cfg["trg_level"]) eval_metric = train_cfg["eval_metric"] if isinstance(eval_metric, str): eval_metric = [eval_metric] max_output_length = test_cfg.get("max_output_length", train_cfg.get("max_output_length", None)) # load the data data = load_data(data_cfg) dev_data = data["dev_data"] test_data = data["test_data"] vocabs = data["vocabs"] data_to_predict = {"dev": dev_data, "test": test_data} if data_to_test is not None: assert data_to_test in data_to_predict data_to_predict = {data_to_test: data_to_predict[data_to_test]} # load model state from disk if isinstance(ckpt, str): ckpt = [ckpt] models = [] for c in ckpt: model_checkpoint = load_checkpoint(c, use_cuda=use_cuda) # build model and load parameters into it m = build_model(cfg["model"], vocabs=vocabs) m.load_state_dict(model_checkpoint["model_state"]) models.append(m) model = models[0] if len(models) == 1 else EnsembleModel(*models) if use_cuda: model.cuda() # should this exist? # whether to use beam search for decoding, 0: greedy decoding beam_sizes = beam_alpha = 0 if "testing" in cfg.keys(): beam_sizes = test_cfg.get("beam_size", 0) beam_alpha = test_cfg.get("alpha", 0) beam_sizes = [beam_sizes] if isinstance(beam_sizes, int) else beam_sizes assert beam_alpha >= 0, "Use alpha >= 0" method = test_cfg.get("method", None) max_hyps = test_cfg.get("max_hyps", 1) # only for the enumerate thing validate_by_label = test_cfg.get("validate_by_label", train_cfg.get("validate_by_label", False)) forced_sparsity = test_cfg.get("forced_sparsity", train_cfg.get("forced_sparsity", False)) for beam_size in beam_sizes: for data_set_name, data_set in data_to_predict.items(): valid_results = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, trg_level=trg_level, max_output_length=max_output_length, eval_metrics=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, save_attention=save_attention, validate_by_label=validate_by_label, forced_sparsity=forced_sparsity, method=method, max_hyps=max_hyps, break_at_p=test_cfg.get("break_at_p", 1.0), break_at_argmax=test_cfg.get("break_at_argmax", False), short_depth=test_cfg.get("short_depth", 0)) scores = valid_results[0] hypotheses, hypotheses_raw = valid_results[2:4] scores_by_label = valid_results[5] if "trg" in data_set.fields: log_scores(logger, data_set_name, scores, scores_by_label, beam_size, beam_alpha) else: logger.info("No references given for %s -> no evaluation.", data_set_name) attention_scores = valid_results[4] if save_attention and not attention_scores: logger.warning("Attention scores could not be saved. " "Note that attention scores are not " "available when using beam search. " "Set beam_size to 0 for greedy decoding.") if save_attention and attention_scores: # currently this will break for transformers logger.info("Saving attention plots. This might be slow.") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), model_dir=model_dir, steps=step, data_set_name=data_set_name) logger.info("Attention plots saved to: %s", model_dir) if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as outf: for hyp in hypotheses: outf.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights """ cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take oldest from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"]["batch_size"] use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): if data_set is None: # e.g. no valid_data continue #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores, logprobs = validate_on_data( model, data=data_set, batch_size=batch_size, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha) #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) print("{:4s} {}: {} [{}]".format(data_set_name, eval_metric, score, decoding_description)) else: print("No references given for {} -> no evaluation.".format( data_set_name)) if attention_scores is not None and save_attention: attention_path = "{}/{}.{}.att".format(model_dir, data_set_name, step) print("Attention plots saved to: {}.xx".format(attention_path)) store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), output_prefix=attention_path) if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: if cfg["data"].get("post_process", True): for hyp in hypotheses: out_file.write(hyp + "\n") else: for hyp in hypotheses_raw: out_file.write(" ".join(hyp) + "\n") print("Translations saved to: {}".format(output_path_set))
def train(cfg_file: str) -> None: """ Main training function. After training, also test on test data if given. :param cfg_file: path to configuration yaml file """ cfg = load_config(cfg_file) # set the random seed set_seed(seed=cfg["training"].get("random_seed", 42)) # load the data train_data, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) # build an encoder-decoder model model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) # for training management, e.g. early stopping and model selection trainer = TrainManager(model=model, config=cfg) # store copy of original training config in model dir shutil.copy2(cfg_file, trainer.model_dir + "/config.yaml") # log all entries of config log_cfg(cfg, trainer.logger) log_data_info(train_data=train_data, valid_data=dev_data, test_data=test_data, src_vocab=src_vocab, trg_vocab=trg_vocab, logging_function=trainer.logger.info) # store the vocabs src_vocab_file = "{}/src_vocab.txt".format(cfg["training"]["model_dir"]) src_vocab.to_file(src_vocab_file) trg_vocab_file = "{}/trg_vocab.txt".format(cfg["training"]["model_dir"]) trg_vocab.to_file(trg_vocab_file) # train the model trainer.train_and_validate(train_data=train_data, valid_data=dev_data) # test the model with the best checkpoint if test_data is not None: # load checkpoint if trainer.best_ckpt_iteration > 0: checkpoint_path = "{}/{}.ckpt".format(trainer.model_dir, trainer.best_ckpt_iteration) else: ## For save_checkpoint by save_freq checkpoint_path = get_latest_checkpoint(trainer.model_dir) try: trainer.init_from_checkpoint(checkpoint_path) except AssertionError: trainer.logger.warning( "Checkpoint %s does not exist. " "Skipping testing.", checkpoint_path) if trainer.best_ckpt_iteration == 0 \ and trainer.best_ckpt_score in [np.inf, -np.inf]: trainer.logger.warning( "It seems like no checkpoint was written, " "since no improvement was obtained over the initial model." ) return # generate hypotheses for test data if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) return_logp = cfg["testing"].get("return_logp", False) else: beam_size = 0 beam_alpha = -1 return_logp = False # pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores, log_probs = validate_on_data( data=test_data, batch_size=trainer.batch_size, eval_metric=trainer.eval_metric, level=trainer.level, max_output_length=trainer.max_output_length, model=model, use_cuda=trainer.use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, return_logp=return_logp) if "trg" in test_data.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}"\ .format(beam_size, beam_alpha) trainer.logger.info("Test data result: %f %s [%s]", score, trainer.eval_metric, decoding_description) else: trainer.logger.info( "No references given for %s.%s -> no evaluation.", cfg["data"]["test"], cfg["data"]["src"]) output_path_set = "{}/{}.{}".format(trainer.model_dir, "test", cfg["data"]["trg"]) with open(output_path_set, mode="w", encoding="utf-8") as f: for h in hypotheses: f.write("{}\n".format(h)) trainer.logger.info("Test translations saved to: %s", output_path_set) if return_logp: output_path_set_logp = output_path_set + ".logp" with open(output_path_set_logp, mode="w", encoding="utf-8") as f: for l in log_probs: f.write("{}\n".format(l)) trainer.logger.info("Test log probs saved to: %s", output_path_set_logp)
def test(cfg_file, ckpt: str = None, output_path: str = None, save_attention: bool = False): """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: :param ckpt: :param output_path: :param save_attention: :return: """ cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take oldest from model dir if ckpt is None: dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(dir) try: step = ckpt.split(dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"]["batch_size"] use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data # TODO load only test data train_data, dev_data, test_data, src_vocab, trg_vocab = \ load_data(cfg=cfg) # TODO specify this differently data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_model_from_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, criterion=None, beam_size=beam_size, beam_alpha=beam_alpha) if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".format( beam_size, beam_alpha) print("{:4s} {}: {} [{}]".format(data_set_name, eval_metric, score, decoding_description)) else: print("No references given for {} -> no evaluation.".format( data_set_name)) if attention_scores is not None and save_attention: attention_path = "{}/{}.{}.att".format(dir, data_set_name, step) print("Attention plots saved to: {}.xx".format(attention_path)) store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], idx=range(len(hypotheses)), output_prefix=attention_path) if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as f: for h in hypotheses: f.write(h + "\n") print("Translations saved to: {}".format(output_path_set))
def translate(cfg_file, ckpt: str, output_path: str = None) -> None: """ Interactive translation function. Loads model from checkpoint and translates either the stdin input or asks for input to translate interactively. The input has to be pre-processed according to the data that the model was trained on, i.e. tokenized or split into subwords. Translations are printed to stdout. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load """ def _load_line_as_data(line): """ Create a dataset from one line via a temporary file. """ # write src input to temporary file tmp_name = "tmp" tmp_suffix = ".src" tmp_filename = tmp_name+tmp_suffix with open(tmp_filename, "w") as tmp_file: tmp_file.write("{}\n".format(line)) test_data = MonoDataset(path=tmp_name, ext=tmp_suffix, field=src_field) # remove temporary file if os.path.exists(tmp_filename): os.remove(tmp_filename) return test_data def _translate_data(test_data): """ Translates given dataset, using parameters from outer scope. """ # pylint: disable=unused-variable _, _, _, _, hypotheses, _, _, _, _ = validate_on_data( model, data=test_data, batch_size=batch_size, level=level, max_output_length=max_output_length, eval_metrics=[], use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha) return hypotheses cfg = load_config(cfg_file) # when checkpoint is not specified, take oldest from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) data_cfg = cfg["data"] batch_size = cfg["training"].get("batch_size", 1) use_cuda = cfg["training"].get("use_cuda", False) max_output_length = cfg["training"].get("max_output_length", None) # read vocabs # This will need to change: currently translate does not support inflection src_vocab_file = data_cfg.get( "src_vocab", cfg["training"]["model_dir"] + "/src_vocab.txt") trg_vocab_file = data_cfg.get( "trg_vocab", cfg["training"]["model_dir"] + "/trg_vocab.txt") src_vocab = Vocabulary(file=src_vocab_file) trg_vocab = Vocabulary(file=trg_vocab_file) vocabs = {"src": src_vocab, "trg": trg_vocab} level = data_cfg["level"] lowercase = data_cfg["lowercase"] tok_fun = list if level == "char" else str.split src_field = Field(init_token=None, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN, tokenize=tok_fun, batch_first=True, lower=lowercase, unk_token=UNK_TOKEN, include_lengths=True) src_field.vocab = src_vocab # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], vocabs=vocabs) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", 0) else: beam_size = 0 beam_alpha = 0 if beam_alpha < 0: raise ConfigurationError("alpha for length penalty should be >= 0") if not sys.stdin.isatty(): # file given test_data = MonoDataset(path=sys.stdin, ext="", field=src_field) hypotheses = _translate_data(test_data) if output_path is not None: output_path_set = "{}".format(output_path) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") print("Translations saved to: {}".format(output_path_set)) else: for hyp in hypotheses: print(hyp) else: # enter interactive mode batch_size = 1 while True: try: src_input = input("\nPlease enter a source sentence " "(pre-processed): \n") if not src_input.strip(): break # every line has to be made into dataset test_data = _load_line_as_data(line=src_input) hypotheses = _translate_data(test_data) print("JoeyNMT: {}".format(hypotheses[0])) except (KeyboardInterrupt, EOFError): print("\nBye.") break
def test(cfg_file, ckpt, # str or list now output_path: str = None, save_attention: bool = False, logger: logging.Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) train_cfg = cfg["training"] data_cfg = cfg["data"] test_cfg = cfg["testing"] if "test" not in data_cfg.keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = train_cfg["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint found in directory {}." .format(model_dir)) try: step = ckpt.split(model_dir+"/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = train_cfg.get("eval_batch_size", train_cfg["batch_size"]) batch_type = train_cfg.get("eval_batch_type", train_cfg.get("batch_type", "sentence")) use_cuda = train_cfg.get("use_cuda", False) src_level = data_cfg.get("src_level", data_cfg.get("level", "word")) trg_level = data_cfg.get("trg_level", data_cfg.get("level", "word")) eval_metric = train_cfg["eval_metric"] if isinstance(eval_metric, str): eval_metric = [eval_metric] attn_metric = train_cfg.get("attn_metric", []) if isinstance(attn_metric, str): attn_metric = [attn_metric] max_output_length = train_cfg.get("max_output_length", None) # load the data data = load_data(data_cfg) dev_data = data["dev_data"] test_data = data["test_data"] vocabs = data["vocabs"] data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk if isinstance(ckpt, str): ckpt = [ckpt] individual_models = [] for c in ckpt: model_checkpoint = load_checkpoint(c, use_cuda=use_cuda) # build model and load parameters into it m = build_model(cfg["model"], vocabs=vocabs) m.load_state_dict(model_checkpoint["model_state"]) individual_models.append(m) if len(individual_models) == 1: model = individual_models[0] else: model = EnsembleModel(*individual_models) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_sizes = test_cfg.get("beam_size", 0) beam_alpha = test_cfg.get("alpha", 0) else: beam_sizes = 0 beam_alpha = 0 if isinstance(beam_sizes, int): beam_sizes = [beam_sizes] assert beam_alpha >= 0, "Use alpha >= 0" for beam_size in beam_sizes: for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable scores, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores, scores_by_lang, by_lang = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, src_level=src_level, trg_level=trg_level, max_output_length=max_output_length, eval_metrics=eval_metric, attn_metrics=attn_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, save_attention=save_attention) #pylint: enable=unused-variable if "trg" in data_set.fields: labeled_scores = sorted(scores.items()) eval_report = ", ".join("{}: {:.5f}".format(n, v) for n, v in labeled_scores) decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: [%s]", data_set_name, eval_report, decoding_description) if scores_by_lang is not None: for metric, scores in scores_by_lang.items(): # make a report lang_report = [metric] numbers = sorted(scores.items()) lang_report.extend(["{}: {:.5f}".format(k, v) for k, v in numbers]) logger.info("\n\t".join(lang_report)) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: # currently this will break for transformers if attention_scores: #attention_name = "{}.{}.att".format(data_set_name, step) #attention_path = os.path.join(model_dir, attention_name) logger.info("Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), model_dir=model_dir, steps=step, data_set_name=data_set_name) logger.info("Attention plots saved to: %s", model_dir) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 0 for greedy decoding.") if output_path is not None: for lang, ref_and_hyp in by_lang.items(): if lang is None: # monolingual case output_path_set = "{}.{}".format(output_path, data_set_name) else: output_path_set = "{}.{}.{}".format(output_path, lang, data_set_name) if isinstance(ref_and_hyp[0], str): hyps = ref_and_hyp else: hyps = [hyp for (ref, hyp) in ref_and_hyp] with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hyps: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def __init__(self, cfg_file, ckpt: str, output_path: str = None, logger: Logger = None) -> None: """ Recover the saved model, specified as in configuration. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = make_logger() cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") #print(cfg.keys()) if "dqn" not in cfg.keys(): raise ValueError("dqn data must be specified in config.") self.model_dir = cfg["training"]["model_dir"] # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" self.batch_size = 1 #** self.batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) self.use_cuda = cfg["training"].get("use_cuda", False) self.level = cfg["data"]["level"] self.eval_metric = cfg["training"]["eval_metric"] self.max_output_length = cfg["training"].get("max_output_length", None) # load the data train_data, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) #Loading the DQN parameters: self.sample_size = cfg["dqn"]["sample_size"] self.lr = cfg["dqn"].get("lr", 0.01) self.egreed_max = cfg["dqn"].get("egreed_max", 0.9) self.egreed_min = cfg["dqn"].get("egreed_min", 0.01) self.gamma_max = cfg["dqn"].get("gamma_max", 0.9) self.gamma_min = cfg["dqn"].get("gamma_min", 0.5) self.nu_iter = cfg["dqn"]["nu_iter"] self.mem_cap = cfg["dqn"]["mem_cap"] self.beam_min = cfg["dqn"]["beam_min"] self.beam_max = cfg["dqn"]["beam_max"] self.state_type = cfg["dqn"]["state_type"] if self.state_type == 'hidden': self.state_size = cfg["model"]["encoder"]["hidden_size"] * 2 else: self.state_size = cfg["model"]["encoder"]["hidden_size"] self.actions_size = len(src_vocab) self.gamma = None print("Sample size: ", self.sample_size) print("State size: ", self.state_size) print("Action size: ", self.actions_size) self.epochs = cfg["dqn"]["epochs"] # Inii the Qnet and Qnet2 self.eval_net = Net(self.state_size, self.actions_size) self.target_net = Net(self.state_size, self.actions_size) #Following the algorithm self.target_net.load_state_dict(self.eval_net.state_dict()) self.learn_step_counter = 0 self.memory_counter = 0 self.size_memory1 = self.state_size * 2 + 2 + 1 self.memory = np.zeros((self.mem_cap, self.size_memory1)) self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=self.lr) self.loss_func = nn.MSELoss() #others parameters self.bos_index = trg_vocab.stoi[BOS_TOKEN] self.eos_index = trg_vocab.stoi[EOS_TOKEN] self.pad_index = trg_vocab.stoi[PAD_TOKEN] self.data_to_train_dqn = {"train": train_data} #self.data_to_train_dqn = {"test": test_data} #self.data_to_dev = {"dev": dev_data} self.data_to_dev = {"dev": dev_data} #self.data_to_train_dqn = {"train": train_data # ,"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=self.use_cuda) # build model and load parameters into it self.model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) self.model.load_state_dict(model_checkpoint["model_state"]) if self.use_cuda: self.model.cuda() # whether to use beam search for decoding, 0: greedy decoding beam_size = 1 beam_alpha = -1 #others not important parameters self.index_fin = None path_tensroboard = self.model_dir + "/tensorboard_DQN/" self.tb_writer = SummaryWriter(log_dir=path_tensroboard, purge_step=0) self.dev_network_count = 0 print(cfg["dqn"]["reward_type"]) #Reward funtion related: if cfg["dqn"]["reward_type"] == "bleu_diff": print("You select the reward based on the Bleu score differences") self.Reward = self.Reward_bleu_diff elif cfg["dqn"]["reward_type"] == "bleu_lin": print( "You select the reward based on the linear Bleu socres, and several punishments" ) self.Reward = self.Reward_lin else: print( "You select the reward based on the final score on the last state " ) self.Reward = self.Reward_bleu_fin
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = make_logger() cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"].get("eval_batch_size", cfg["training"]["batch_size"]) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 1) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 1 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores,valid_hypotheses_full_n_best,scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, logger=logger) #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size < 2 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 1 for greedy decoding.") if output_path is not None: ''' output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") #sy_debug alt_output = "{}.n_best.{}".format(output_path, data_set_name) with open(alt_output, mode="w", encoding="utf-8") as out_file: for n in valid_hypotheses_full_n_best: out_file.write(n + "\n") ''' #@Shiya: exporting hypothesis and associated score to .csv file #TODO: write_to_csv(hyps,scores) def write_to_csv(hyps: list, scores: list): import csv output_file = "{}.n_csv.{}".format(output_path, data_set_name) with open(output_file, mode="w", newline='', encoding="utf-8") as out_file: fieldnames = ['Predictions', 'Scores'] writer = csv.DictWriter(out_file, fieldnames=fieldnames) writer.writeheader() for prediction, score in zip(hyps, scores): writer.writerow({ fieldnames[0]: prediction, fieldnames[1]: score }) write_to_csv(valid_hypotheses_full_n_best, scores)
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: logging.Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"]["batch_size"] batch_type = cfg["training"].get("batch_type", "sentence") use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha) #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 0 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def load_model(model_dir, bpe_src_code=None, tokenize=None): """ Start the bot. This means loading the model according to the config file. :param model_dir: Model directory of trained Joey NMT model. :param bpe_src_code: BPE codes for source side processing (optional). :param tokenize: If True, tokenize inputs with Moses tokenizer. :return: """ conf = {} cfg_file = model_dir+"/config.yaml" logger = logging.getLogger(__name__) conf["logger"] = logger # load the Joey configuration cfg = load_config(cfg_file) # load the checkpoint if "load_model" in cfg['training'].keys(): ckpt = cfg['training']["load_model"] else: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint found in directory {}." .format(model_dir)) # prediction parameters from config conf["use_cuda"] = cfg["training"].get("use_cuda", False) conf["level"] = cfg["data"]["level"] conf["max_output_length"] = cfg["training"].get("max_output_length", None) conf["lowercase"] = cfg["data"].get("lowercase", False) # load the vocabularies src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt" trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt" conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file, dataset=None, max_size=-1, min_freq=0) conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file, dataset=None, max_size=-1, min_freq=0) # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): conf["beam_size"] = cfg["testing"].get("beam_size", 0) conf["beam_alpha"] = cfg["testing"].get("alpha", -1) else: conf["beam_size"] = 1 conf["beam_alpha"] = -1 # pre-processing if tokenize is not None: src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"]) trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"]) # tokenize input tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True) detokenizer = lambda x: trg_tokenizer.detokenize( x.split(), return_str=True) else: tokenizer = lambda x: x detokenizer = lambda x: x if bpe_src_code is not None and level == "bpe": # load bpe merge file merge_file = open(bpe_src_code, "r") bpe = apply_bpe.BPE(codes=merge_file) segmenter = lambda x: bpe.process_line(x.strip()) elif conf["level"] == "char": # split to chars segmenter = lambda x: list(x.strip()) else: segmenter = lambda x: x.strip() conf["preprocess"] = [tokenizer, segmenter] conf["postprocess"] = [detokenizer] # build model and load parameters into it model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"]) model = build_model(cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"]) model.load_state_dict(model_checkpoint["model_state"]) if conf["use_cuda"]: model.cuda() conf["model"] = model print("Joey NMT model loaded successfully.") return conf
def load_model(self, src_language, trg_language, domain, bpe_src_code=None, tokenize=None): """ Load model for given trg language. """ # model_dir = "{}-{}".format(self._model_dir_prefix, trg_language) model_dir = f"{self._model_dir_prefix}{src_language}-{trg_language}-{domain}" # Load the checkpoint. ckpt_path = os.path.join(model_dir, 'model.ckpt') # Load the vocabularies. src_vocab_path = os.path.join(model_dir, 'src_vocab.txt') trg_vocab_path = os.path.join(model_dir, 'trg_vocab.txt') # Load the config. config_path = os.path.join(model_dir, 'config_orig.yaml') # Adjust config. config = load_config(config_path) new_config_file = os.path.join(model_dir, 'config.yaml') config = self._update_config(config, src_vocab_path, trg_vocab_path, model_dir, ckpt_path) with open(new_config_file, 'w') as cfile: yaml.dump(config, cfile) # print('Loaded model for {}-{}.'.format(self._src_language, trg_language)) print('Loaded model for {}-{}.'.format(src_language, trg_language)) conf = {} logger = logging.getLogger(__name__) conf["logger"] = logger # load the Joey configuration cfg = load_config(new_config_file) # load the checkpoint if "load_model" in cfg['training'].keys(): ckpt = cfg['training']["load_model"] else: ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError("No checkpoint found in directory {}." .format(model_dir)) # prediction parameters from config conf["use_cuda"] = cfg["training"].get("use_cuda", False) if torch.cuda.is_available() else False conf["level"] = cfg["data"]["level"] conf["max_output_length"] = cfg["training"].get("max_output_length", None) conf["lowercase"] = cfg["data"].get("lowercase", False) # load the vocabularies src_vocab_file = cfg["training"]["model_dir"] + "/src_vocab.txt" trg_vocab_file = cfg["training"]["model_dir"] + "/trg_vocab.txt" conf["src_vocab"] = build_vocab(field="src", vocab_file=src_vocab_file, dataset=None, max_size=-1, min_freq=0) conf["trg_vocab"] = build_vocab(field="trg", vocab_file=trg_vocab_file, dataset=None, max_size=-1, min_freq=0) # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): conf["beam_size"] = cfg["testing"].get("beam_size", 0) conf["beam_alpha"] = cfg["testing"].get("alpha", -1) else: conf["beam_size"] = 1 conf["beam_alpha"] = -1 # pre-processing if tokenize is not None: src_tokenizer = MosesTokenizer(lang=cfg["data"]["src"]) trg_tokenizer = MosesDetokenizer(lang=cfg["data"]["trg"]) # tokenize input tokenizer = lambda x: src_tokenizer.tokenize(x, return_str=True) detokenizer = lambda x: trg_tokenizer.detokenize( x.split(), return_str=True) else: tokenizer = lambda x: x detokenizer = lambda x: x if bpe_src_code is not None and level == "bpe": # load bpe merge file merge_file = open(bpe_src_code, "r") bpe = apply_bpe.BPE(codes=merge_file) segmenter = lambda x: bpe.process_line(x.strip()) elif conf["level"] == "char": # split to chars segmenter = lambda x: list(x.strip()) else: segmenter = lambda x: x.strip() conf["preprocess"] = [tokenizer, segmenter] conf["postprocess"] = [detokenizer] # build model and load parameters into it model_checkpoint = load_checkpoint(ckpt, conf["use_cuda"]) model = build_model(cfg["model"], src_vocab=conf["src_vocab"], trg_vocab=conf["trg_vocab"]) model.load_state_dict(model_checkpoint["model_state"]) # ipdb.set_trace() if conf["use_cuda"]: model.cuda() conf["model"] = model print("Joey NMT model loaded successfully.") return conf
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: logging.Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"].get("eval_batch_size", cfg["training"]["batch_size"]) batch_type = cfg["training"].get( "eval_batch_type", cfg["training"].get("batch_type", "sentence")) use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data,\ src_vocab, trg_vocab,\ _, dev_kb, test_kb,\ _, dev_kb_lookup, test_kb_lookup, \ _, dev_kb_lengths, test_kb_lengths,\ _, dev_kb_truvals, test_kb_truvals, \ trv_vocab, canon_fun,\ dev_data_canon, test_data_canon \ = load_data( data_cfg=cfg["data"] ) report_entf1_on_canonicals = cfg["training"].get( "report_entf1_on_canonicals", False) kb_task = (test_kb != None) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab, trv_vocab=trv_vocab, canonizer=canon_fun) model.load_state_dict(model_checkpoint["model_state"]) # FIXME for the moment, for testing, try overriding model.canonize with canon_fun from test functions loaded data # should hopefully not be an issue with gridsearch results... if use_cuda: model.cuda() # move to GPU # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): if data_set_name == "dev": kb_info = [ dev_kb, dev_kb_lookup, dev_kb_lengths, dev_kb_truvals, dev_data_canon ] elif data_set_name == "test": kb_info = [ test_kb, test_kb_lookup, test_kb_lengths, test_kb_truvals, test_data_canon ] else: raise ValueError((data_set_name, data_set)) #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores, kb_att_scores, ent_f1, ent_mcc = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha, kb_task = kb_task, valid_kb=kb_info[0], valid_kb_lkp=kb_info[1], valid_kb_lens=kb_info[2], valid_kb_truvals=kb_info[3], valid_data_canon=kb_info[4], report_on_canonicals=report_entf1_on_canonicals ) """ batch_size=self.eval_batch_size, data=valid_data, eval_metric=self.eval_metric, level=self.level, model=self.model, use_cuda=self.use_cuda, max_output_length=self.max_output_length, loss_function=self.loss, beam_size=0, batch_type=self.eval_batch_type, kb_task=kb_task, valid_kb=valid_kb, valid_kb_lkp=valid_kb_lkp, valid_kb_lens=valid_kb_lens, valid_kb_truvals=valid_kb_truvals """ #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f f1: %6.2f mcc: %6.2f [%s]", data_set_name, eval_metric, score, ent_f1, ent_mcc, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=data_set.src, indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) if kb_att_scores: kb_att_name = "{}.{}.kbatt".format(data_set_name, step) kb_att_path = os.path.join(model_dir, kb_att_name) store_attention_plots( attentions=kb_att_scores, targets=hypotheses_raw, sources=list(data_set.kbsrc), #TODO indices=range(len(hypotheses)), output_prefix=kb_att_path, kb_info=(dev_kb_lookup, dev_kb_lengths, list(data_set.kbtrg))) logger.info("KB Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 0 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)