def parse_opt(opt, model_root): argv = [] parser = ArgumentParser() onmt.opts.model_opts(parser) onmt.opts.translate_opts(parser) models = opt["models"] if not isinstance(models, (list, tuple)): models = [models] opt["models"] = [os.path.join(model_root, model) for model in models] opt["src"] = "dummy_src" for (k, v) in opt.items(): if k == "models": argv += ["-model"] argv += [str(model) for model in v] elif type(v) == bool: argv += ["-%s" % k] else: argv += ["-%s" % k, str(v)] opt = parser.parse_args(argv) ArgumentParser.validate_translate_opts(opt) opt.cuda = opt.gpu > -1 return opt
def _get_opt(self, language, method): parser = ArgumentParser(description='summarizer.py') if language == 'en' and method == 'bert': config_file = 'en_bert_transformer.yml' elif language == 'en' and method == 'conv': config_file = 'en_conv_transformer.yml' elif language == 'de' and method == 'bert': config_file = 'de_bert_transformer.yml' elif language == 'de' and method == 'conv': config_file = 'de_conv_transformer.yml' else: sys.stderr.write( f"Method '{method}' for language '{language}' is not supported." ) #Hack to load parser arguments prec_argv = sys.argv sys.argv = [sys.argv[0]] sys.argv.extend(['-config', 'config/' + config_file]) opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args() sys.argv = prec_argv return opt
def translate_file(input_filename, output_filename): parser = ArgumentParser(description='translation') opts.config_opts(parser) opts.translate_opts(parser) # print(opts) args = f'''-model m16_step_44000.pt -src source_products_16.txt -output op_16x_4400_50_10.txt -batch_size 128 -replace_unk -max_length 200 -verbose -beam_size 50 -n_best 10 -min_length 5''' opt = parser.parse_args(args) # print(opt.model) translator = build_translator(opt, report_score=True) src_shards = split_corpus(opt.src, opt.shard_size) tgt_shards = repeat(None) shard_pairs = zip(src_shards, tgt_shards) for i, (src_shard, tgt_shard) in enumerate(shard_pairs): scores, predictions = translator.translate(src=src_shard, tgt=tgt_shard, src_dir=opt.src_dir, batch_size=opt.batch_size, attn_debug=opt.attn_debug) return scores, predictions
def _get_parser(): parser = ArgumentParser(description='translate.py') opts.config_opts(parser) opts.translate_opts(parser) print(parser.parse_args()) return parser
def translate_file(input_filename, output_filename): parser = ArgumentParser(description='translation') opts.config_opts(parser) opts.translate_opts(parser) args = f'''-model Experiments/Checkpoints/retrosynthesis_augmented_medium/retrosynthesis_aug_medium_model_step_100000.pt -src MCTS_data/{input_filename}.txt -output MCTS_data/{output_filename}.txt -batch_size 128 -replace_unk -max_length 200 -verbose -beam_size 10 -n_best 10 -min_length 5 -gpu 0''' opt = parser.parse_args(args) translator = build_translator(opt, report_score=True) src_shards = split_corpus(opt.src, opt.shard_size) tgt_shards = repeat(None) shard_pairs = zip(src_shards, tgt_shards) for i, (src_shard, tgt_shard) in enumerate(shard_pairs): scores, predictions = translator.translate(src=src_shard, tgt=tgt_shard, src_dir=opt.src_dir, batch_size=opt.batch_size, attn_debug=opt.attn_debug) return scores, predictions
def main(): parser = ArgumentParser() opts.config_opts(parser) opts.model_opts(parser) opts.global_opts(parser) opt = parser.parse_args() with open(os.path.join(dir_path, 'opt_data'), 'wb') as f: pickle.dump(opt, f)
def __init__(self): parser = ArgumentParser() opts.config_opts(parser) self.opt = parser.parse_args() ArgumentParser.validate_translate_opts(self.opt) self.translator = build_translator(self.opt, report_score=True) self.mecab = Mecab.Tagger("-Owakati") self.mecab.parce("")
def load_model(self, path=None): if path is None or not os.path.exists( os.path.abspath(os.path.join(os.getcwd(), path))): print("No model present at the specified path : {}".format(path)) parser = ArgumentParser(description='translate.py') opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args(["--model", path]) self.model = build_translator(opt, report_score=True) return
def __init__(self): # コマンドラインで指定したオプションをもとにモデルを読み込む parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) self.opt = parser.parse_args() ArgumentParser.validate_translate_opts(self.opt) self.translator = build_translator(self.opt, report_score=True) # 分かち書きのためにMeCabを使用 self.mecab = MeCab.Tagger("-Owakati") self.mecab.parse("") # 前回の応答を保存しておく辞書 self.prev_uttr_dict = {}
def __init__(self): # おまじない parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) self.opt = parser.parse_args(args=[ "-model", "../models/model.pt", "-src", "None", "-replace_unk", "--beam_size", "10", "--min_length", "7", "--block_ngram_repeat", "2" ]) ArgumentParser.validate_translate_opts(self.opt) self.translator = build_translator(self.opt, report_score=True) # 単語分割用にMeCabを使用 self.mecab = MeCab.Tagger("-Owakati") self.mecab.parse("")
def _build_translator(args): """ Initializes a seq2seq translator model """ from onmt.utils.parse import ArgumentParser parser = ArgumentParser() import onmt.opts as opts opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args(args=args) ArgumentParser.validate_translate_opts(opt) from onmt.translate.translator import build_translator translator = build_translator(opt, report_score=False) return translator, opt
def parse_opt(self, opt): """Parse the option set passed by the user using `onmt.opts` Args: opt (dict): Options passed by the user Returns: opt (argparse.Namespace): full set of options for the Translator """ prec_argv = sys.argv sys.argv = sys.argv[:1] parser = ArgumentParser() onmt.opts.translate_opts(parser) models = opt['models'] if not isinstance(models, (list, tuple)): models = [models] opt['models'] = [ os.path.join(self.model_root, model) for model in models ] opt['src'] = "dummy_src" for (k, v) in opt.items(): if k == 'models': sys.argv += ['-model'] sys.argv += [str(model) for model in v] elif type(v) == bool: # only true bool should be parsed if v is True: sys.argv += ['-%s' % k] else: sys.argv += ['-%s' % k, str(v)] opt = parser.parse_args() opt.alignment_heads = 8 ArgumentParser.validate_translate_opts(opt) opt.cuda = opt.gpu > -1 sys.argv = prec_argv return opt
def onmt_translator_builder(my_opts, s0_model_path, logger=None): if logger is not None: logger.info('Building ONMT translator with model from ' + s0_model_path) else: print('Building ONMT translator with model from ' + s0_model_path) parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) arglist = ['-model', s0_model_path] + opts_to_list(my_opts) print(arglist) opt = parser.parse_args(arglist) ArgumentParser.validate_translate_opts(opt) translator = build_translator(opt, report_score=True, logger=logger) if logger is not None: logger.info('Finished building ONMT translator.') else: print('Finished building ONMT translator.') return translator
def translate_trained_model(n_latent, data_path, output_dir, trained_model_path, use_segments=False, max_segments=10): parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args('') for k, v in translate.DEFAULT_TRANSLATE_PARAMS.items(): vars(opt)[k] = v vars(opt)['models'] = [trained_model_path] src_path = '/'.join(data_path.split('/')[:-1]) + '/src-test.txt' vars(opt)['src'] = src_path output_path = '/'.join(output_dir.split('/')[:-2]) + '/preds' vars(opt)['output_dir'] = output_path vars(opt)['n_latent'] = n_latent vars(opt)['use_segments'] = use_segments vars(opt)['max_segments'] = max_segments translate.main(opt)
def parse_opt(self, opt): """Parse the option set passed by the user using `onmt.opts` Args: opt (dict): Options passed by the user Returns: opt (argparse.Namespace): full set of options for the Translator """ prec_argv = sys.argv sys.argv = sys.argv[:1] parser = ArgumentParser() onmt.opts.translate_opts(parser) models = opt['models'] if not isinstance(models, (list, tuple)): models = [models] opt['models'] = [os.path.join(self.model_root, model) for model in models] opt['src'] = "dummy_src" for (k, v) in opt.items(): if k == 'models': sys.argv += ['-model'] sys.argv += [str(model) for model in v] elif type(v) == bool: sys.argv += ['-%s' % k] else: sys.argv += ['-%s' % k, str(v)] opt = parser.parse_args() ArgumentParser.validate_translate_opts(opt) opt.cuda = opt.gpu > -1 sys.argv = prec_argv return opt
def _parse_opt(opt): prec_argv = sys.argv sys.argv = sys.argv[:1] parser = ArgumentParser() onmt.opts.translate_opts(parser) opt['src'] = "dummy_src" opt['replace_unk'] = True for (k, v) in opt.items(): if k == 'models': sys.argv += ['-model'] sys.argv += [str(model) for model in v] elif type(v) == bool: sys.argv += ['-%s' % k] else: sys.argv += ['-%s' % k, str(v)] opt = parser.parse_args() ArgumentParser.validate_translate_opts(opt) opt.cuda = opt.gpu > -1 sys.argv = prec_argv return opt
fields = inputters.get_fields(opt.data_type, src_nfeats, tgt_nfeats, dynamic_dict=opt.dynamic_dict, src_truncate=opt.src_seq_length_trunc, tgt_truncate=opt.tgt_seq_length_trunc) src_reader = inputters.str2reader[opt.data_type].from_opt(opt) tgt_reader = inputters.str2reader["text"].from_opt(opt) logger.info("Building & saving training data...") train_dataset_files = build_save_dataset('train', fields, src_reader, tgt_reader, opt) if opt.valid_src and opt.valid_tgt: logger.info("Building & saving validation data...") build_save_dataset('valid', fields, src_reader, tgt_reader, opt) logger.info("Building & saving vocabulary...") build_save_vocab(train_dataset_files, fields, opt) if __name__ == "__main__": parser = ArgumentParser(description='preprocess.py') opts.config_opts(parser) opts.preprocess_opts(parser) opt = parser.parse_args() main(opt)
def __init__(self, model_dir): # Model dir self._model_dir = model_dir # Get pt file model_files = [] for file in os.listdir(f'{self._model_dir}/translation_model'): if file.endswith(".pt"): model_files.append( os.path.join(model_dir, 'translation_model', file)) if len(model_files) != 1: msg = f"Extended model {self._model_dir} sould have one .pt file. {len(model_files)}" raise ValueError(msg) model_file = model_files[0] # Load config from file config_path = os.path.join(model_dir, "config.json") # Load config from json file with open(config_path) as f: config = json.load(f) # Langs self._src_lang = config[self.SRC_LANG] self._tgt_lang = config[self.TGT_LANG] # Online learning if self.ONLINE_LEARNING in config: self._online_learning = _OnlineLearningConfig( config[self.ONLINE_LEARNING]) else: self._online_learning = _OnlineLearningConfig() # Create a parser for train and translate options parser = ArgumentParser(description='pangeanmt options', conflict_handler='resolve') # Parser for translate options _translate_opts(parser) # Parser for training options _train_opts(parser) # Parser for model options _model_opts(parser) # --src argument is not used parser.add_argument('--src', '-src', required=False, help="This argument isn't used!") # --data argument is not used parser.add_argument('--data', '-data', required=False, help="This argument isn't used!") # --seed Overwrite default parser.add_argument('--seed', '-seed', required=False, default=829, help="Seed") # Create opts from config args = ['--model', model_file] for k, v in config['opts'].items(): args.append('--' + k) if v is not None: if k == 'model': v = os.path.join(model_dir, v) args.append(str(v)) self._opts = parser.parse_args(args)
def __build_translator(self, model_addr, src_addr): parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args(['-model', model_addr, '-src', src_addr]) return build_translator(opt, report_score=False)
import onmt.opts as opts from onmt.utils.parse import ArgumentParser from pyknp import Juman jumanpp = Juman() parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args([ '-model', sys.argv[1], '-src', 'dummy.txt', '-output', 'dummy.txt', '-replace_unk', ]) ArgumentParser.validate_translate_opts(opt) translator = build_translator(opt, report_score=False) def answer(translator, utterance): result = jumanpp.analysis(utterance) src_shard = [[mrph.midasi for mrph in result.mrph_list()]] tgt_shard = [['']] _, all_predictions = translator.translate(src=src_shard, tgt=tgt_shard,
from onmt.utils.parse import ArgumentParser import onmt.opts as opts parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) optstring = [ '-batch_size', '20', '-beam_size', '10', '-model', '/home/hansonlu/links/data/giga-models/giga_halfsplit_pt1_nocov_step_59156_valacc48.57_ppl15.51.pt', '-src', '/home/hansonlu/myOpenNMT/data/giga/small_input.txt', '-seed', '-1' ] opt = parser.parse_args(optstring) print(opt)