예제 #1
0
def parse_opt(opt, model_root):

    argv = []
    parser = ArgumentParser()
    onmt.opts.model_opts(parser)
    onmt.opts.translate_opts(parser)

    models = opt["models"]
    if not isinstance(models, (list, tuple)):
        models = [models]
    opt["models"] = [os.path.join(model_root, model) for model in models]
    opt["src"] = "dummy_src"

    for (k, v) in opt.items():
        if k == "models":
            argv += ["-model"]
            argv += [str(model) for model in v]
        elif type(v) == bool:
            argv += ["-%s" % k]
        else:
            argv += ["-%s" % k, str(v)]

    opt = parser.parse_args(argv)
    ArgumentParser.validate_translate_opts(opt)
    opt.cuda = opt.gpu > -1

    return opt
예제 #2
0
    def _get_opt(self, language, method):

        parser = ArgumentParser(description='summarizer.py')

        if language == 'en' and method == 'bert':
            config_file = 'en_bert_transformer.yml'
        elif language == 'en' and method == 'conv':
            config_file = 'en_conv_transformer.yml'
        elif language == 'de' and method == 'bert':
            config_file = 'de_bert_transformer.yml'
        elif language == 'de' and method == 'conv':
            config_file = 'de_conv_transformer.yml'
        else:
            sys.stderr.write(
                f"Method '{method}' for language '{language}' is not supported."
            )

        #Hack to load parser arguments
        prec_argv = sys.argv
        sys.argv = [sys.argv[0]]

        sys.argv.extend(['-config', 'config/' + config_file])
        opts.config_opts(parser)
        opts.translate_opts(parser)
        opt = parser.parse_args()

        sys.argv = prec_argv

        return opt
예제 #3
0
def translate_file(input_filename, output_filename):
    parser = ArgumentParser(description='translation')
    opts.config_opts(parser)
    opts.translate_opts(parser)
    # print(opts)
    args = f'''-model m16_step_44000.pt
                -src source_products_16.txt
                -output op_16x_4400_50_10.txt
                -batch_size 128
                -replace_unk
                -max_length 200
                -verbose
                -beam_size 50
                -n_best 10
                -min_length 5'''
    opt = parser.parse_args(args)
    # print(opt.model)
    translator = build_translator(opt, report_score=True)

    src_shards = split_corpus(opt.src, opt.shard_size)
    tgt_shards = repeat(None)
    shard_pairs = zip(src_shards, tgt_shards)

    for i, (src_shard, tgt_shard) in enumerate(shard_pairs):
        scores, predictions = translator.translate(src=src_shard,
                                                   tgt=tgt_shard,
                                                   src_dir=opt.src_dir,
                                                   batch_size=opt.batch_size,
                                                   attn_debug=opt.attn_debug)

    return scores, predictions
예제 #4
0
def _get_parser():
    parser = ArgumentParser(description='translate.py')

    opts.config_opts(parser)
    opts.translate_opts(parser)
    print(parser.parse_args())
    return parser
def translate_file(input_filename, output_filename):
    parser = ArgumentParser(description='translation')
    opts.config_opts(parser)
    opts.translate_opts(parser)

    args = f'''-model Experiments/Checkpoints/retrosynthesis_augmented_medium/retrosynthesis_aug_medium_model_step_100000.pt 
                -src MCTS_data/{input_filename}.txt 
                -output MCTS_data/{output_filename}.txt 
                -batch_size 128 
                -replace_unk
                -max_length 200 
                -verbose 
                -beam_size 10 
                -n_best 10 
                -min_length 5 
                -gpu 0'''

    opt = parser.parse_args(args)
    translator = build_translator(opt, report_score=True)

    src_shards = split_corpus(opt.src, opt.shard_size)
    tgt_shards = repeat(None)
    shard_pairs = zip(src_shards, tgt_shards)

    for i, (src_shard, tgt_shard) in enumerate(shard_pairs):
        scores, predictions = translator.translate(src=src_shard,
                                                   tgt=tgt_shard,
                                                   src_dir=opt.src_dir,
                                                   batch_size=opt.batch_size,
                                                   attn_debug=opt.attn_debug)

    return scores, predictions
예제 #6
0
def main():
    parser = ArgumentParser()
    opts.config_opts(parser)
    opts.model_opts(parser)
    opts.global_opts(parser)
    opt = parser.parse_args()
    with open(os.path.join(dir_path, 'opt_data'), 'wb') as f:
        pickle.dump(opt, f)
예제 #7
0
    def __init__(self):
        parser = ArgumentParser()
        opts.config_opts(parser)
        self.opt = parser.parse_args()
        ArgumentParser.validate_translate_opts(self.opt)
        self.translator = build_translator(self.opt, report_score=True)

        self.mecab = Mecab.Tagger("-Owakati")
        self.mecab.parce("")
예제 #8
0
    def load_model(self, path=None):

        if path is None or not os.path.exists(
                os.path.abspath(os.path.join(os.getcwd(), path))):
            print("No model present at the specified path : {}".format(path))

        parser = ArgumentParser(description='translate.py')
        opts.config_opts(parser)

        opts.translate_opts(parser)
        opt = parser.parse_args(["--model", path])
        self.model = build_translator(opt, report_score=True)
        return
    def __init__(self):
        # コマンドラインで指定したオプションをもとにモデルを読み込む
        parser = ArgumentParser()
        opts.config_opts(parser)
        opts.translate_opts(parser)
        self.opt = parser.parse_args()
        ArgumentParser.validate_translate_opts(self.opt)
        self.translator = build_translator(self.opt, report_score=True)

        # 分かち書きのためにMeCabを使用
        self.mecab = MeCab.Tagger("-Owakati")
        self.mecab.parse("")

        # 前回の応答を保存しておく辞書
        self.prev_uttr_dict = {}
    def __init__(self):
        # おまじない
        parser = ArgumentParser()
        opts.config_opts(parser)
        opts.translate_opts(parser)
        self.opt = parser.parse_args(args=[
            "-model", "../models/model.pt", "-src", "None", "-replace_unk",
            "--beam_size", "10", "--min_length", "7", "--block_ngram_repeat",
            "2"
        ])
        ArgumentParser.validate_translate_opts(self.opt)
        self.translator = build_translator(self.opt, report_score=True)

        # 単語分割用にMeCabを使用
        self.mecab = MeCab.Tagger("-Owakati")
        self.mecab.parse("")
예제 #11
0
def _build_translator(args):
    """
    Initializes a seq2seq translator model
    """

    from onmt.utils.parse import ArgumentParser
    parser = ArgumentParser()

    import onmt.opts as opts
    opts.config_opts(parser)
    opts.translate_opts(parser)

    opt = parser.parse_args(args=args)
    ArgumentParser.validate_translate_opts(opt)

    from onmt.translate.translator import build_translator
    translator = build_translator(opt, report_score=False)

    return translator, opt
예제 #12
0
    def parse_opt(self, opt):
        """Parse the option set passed by the user using `onmt.opts`

       Args:
           opt (dict): Options passed by the user

       Returns:
           opt (argparse.Namespace): full set of options for the Translator
        """

        prec_argv = sys.argv
        sys.argv = sys.argv[:1]
        parser = ArgumentParser()
        onmt.opts.translate_opts(parser)

        models = opt['models']
        if not isinstance(models, (list, tuple)):
            models = [models]
        opt['models'] = [
            os.path.join(self.model_root, model) for model in models
        ]
        opt['src'] = "dummy_src"

        for (k, v) in opt.items():
            if k == 'models':
                sys.argv += ['-model']
                sys.argv += [str(model) for model in v]
            elif type(v) == bool:  # only true bool should be parsed
                if v is True:
                    sys.argv += ['-%s' % k]
            else:
                sys.argv += ['-%s' % k, str(v)]

        opt = parser.parse_args()
        opt.alignment_heads = 8
        ArgumentParser.validate_translate_opts(opt)
        opt.cuda = opt.gpu > -1

        sys.argv = prec_argv
        return opt
예제 #13
0
def onmt_translator_builder(my_opts, s0_model_path, logger=None):
    if logger is not None:
        logger.info('Building ONMT translator with model from ' + s0_model_path)
    else:
        print('Building ONMT translator with model from ' + s0_model_path)

    parser = ArgumentParser()
    opts.config_opts(parser)
    opts.translate_opts(parser)

    arglist = ['-model', s0_model_path] + opts_to_list(my_opts)
    print(arglist)
    opt = parser.parse_args(arglist)

    ArgumentParser.validate_translate_opts(opt)
    translator = build_translator(opt, report_score=True, logger=logger)
    if logger is not None:
        logger.info('Finished building ONMT translator.')
    else:
        print('Finished building ONMT translator.')

    return translator
예제 #14
0
def translate_trained_model(n_latent,
                            data_path,
                            output_dir,
                            trained_model_path,
                            use_segments=False,
                            max_segments=10):
    parser = ArgumentParser()
    opts.config_opts(parser)
    opts.translate_opts(parser)
    opt = parser.parse_args('')

    for k, v in translate.DEFAULT_TRANSLATE_PARAMS.items():
        vars(opt)[k] = v

    vars(opt)['models'] = [trained_model_path]
    src_path = '/'.join(data_path.split('/')[:-1]) + '/src-test.txt'
    vars(opt)['src'] = src_path
    output_path = '/'.join(output_dir.split('/')[:-2]) + '/preds'
    vars(opt)['output_dir'] = output_path
    vars(opt)['n_latent'] = n_latent
    vars(opt)['use_segments'] = use_segments
    vars(opt)['max_segments'] = max_segments
    translate.main(opt)
예제 #15
0
    def parse_opt(self, opt):
        """Parse the option set passed by the user using `onmt.opts`

       Args:
           opt (dict): Options passed by the user

       Returns:
           opt (argparse.Namespace): full set of options for the Translator
        """

        prec_argv = sys.argv
        sys.argv = sys.argv[:1]
        parser = ArgumentParser()
        onmt.opts.translate_opts(parser)

        models = opt['models']
        if not isinstance(models, (list, tuple)):
            models = [models]
        opt['models'] = [os.path.join(self.model_root, model)
                         for model in models]
        opt['src'] = "dummy_src"

        for (k, v) in opt.items():
            if k == 'models':
                sys.argv += ['-model']
                sys.argv += [str(model) for model in v]
            elif type(v) == bool:
                sys.argv += ['-%s' % k]
            else:
                sys.argv += ['-%s' % k, str(v)]

        opt = parser.parse_args()
        ArgumentParser.validate_translate_opts(opt)
        opt.cuda = opt.gpu > -1

        sys.argv = prec_argv
        return opt
예제 #16
0
def _parse_opt(opt):
    prec_argv = sys.argv
    sys.argv = sys.argv[:1]
    parser = ArgumentParser()
    onmt.opts.translate_opts(parser)

    opt['src'] = "dummy_src"
    opt['replace_unk'] = True

    for (k, v) in opt.items():
        if k == 'models':
            sys.argv += ['-model']
            sys.argv += [str(model) for model in v]
        elif type(v) == bool:
            sys.argv += ['-%s' % k]
        else:
            sys.argv += ['-%s' % k, str(v)]

    opt = parser.parse_args()
    ArgumentParser.validate_translate_opts(opt)
    opt.cuda = opt.gpu > -1

    sys.argv = prec_argv
    return opt
예제 #17
0
    fields = inputters.get_fields(opt.data_type,
                                  src_nfeats,
                                  tgt_nfeats,
                                  dynamic_dict=opt.dynamic_dict,
                                  src_truncate=opt.src_seq_length_trunc,
                                  tgt_truncate=opt.tgt_seq_length_trunc)

    src_reader = inputters.str2reader[opt.data_type].from_opt(opt)
    tgt_reader = inputters.str2reader["text"].from_opt(opt)

    logger.info("Building & saving training data...")
    train_dataset_files = build_save_dataset('train', fields, src_reader,
                                             tgt_reader, opt)

    if opt.valid_src and opt.valid_tgt:
        logger.info("Building & saving validation data...")
        build_save_dataset('valid', fields, src_reader, tgt_reader, opt)

    logger.info("Building & saving vocabulary...")
    build_save_vocab(train_dataset_files, fields, opt)


if __name__ == "__main__":
    parser = ArgumentParser(description='preprocess.py')

    opts.config_opts(parser)
    opts.preprocess_opts(parser)

    opt = parser.parse_args()
    main(opt)
예제 #18
0
    def __init__(self, model_dir):

        # Model dir
        self._model_dir = model_dir

        # Get pt file
        model_files = []
        for file in os.listdir(f'{self._model_dir}/translation_model'):
            if file.endswith(".pt"):
                model_files.append(
                    os.path.join(model_dir, 'translation_model', file))
        if len(model_files) != 1:
            msg = f"Extended model {self._model_dir} sould have one .pt file. {len(model_files)}"
            raise ValueError(msg)
        model_file = model_files[0]

        # Load config from file
        config_path = os.path.join(model_dir, "config.json")

        # Load config from json file
        with open(config_path) as f:
            config = json.load(f)

        # Langs
        self._src_lang = config[self.SRC_LANG]
        self._tgt_lang = config[self.TGT_LANG]

        # Online learning
        if self.ONLINE_LEARNING in config:
            self._online_learning = _OnlineLearningConfig(
                config[self.ONLINE_LEARNING])
        else:
            self._online_learning = _OnlineLearningConfig()

        # Create a parser for train and translate options
        parser = ArgumentParser(description='pangeanmt options',
                                conflict_handler='resolve')

        # Parser for translate options
        _translate_opts(parser)

        # Parser for training options
        _train_opts(parser)

        # Parser for model options
        _model_opts(parser)

        # --src argument is not used
        parser.add_argument('--src',
                            '-src',
                            required=False,
                            help="This argument isn't used!")

        # --data argument is not used
        parser.add_argument('--data',
                            '-data',
                            required=False,
                            help="This argument isn't used!")

        # --seed Overwrite default
        parser.add_argument('--seed',
                            '-seed',
                            required=False,
                            default=829,
                            help="Seed")

        # Create opts from config
        args = ['--model', model_file]
        for k, v in config['opts'].items():
            args.append('--' + k)
            if v is not None:
                if k == 'model':
                    v = os.path.join(model_dir, v)
                args.append(str(v))
        self._opts = parser.parse_args(args)
예제 #19
0
 def __build_translator(self, model_addr, src_addr):
     parser = ArgumentParser()
     opts.config_opts(parser)
     opts.translate_opts(parser)
     opt = parser.parse_args(['-model', model_addr, '-src', src_addr])
     return build_translator(opt, report_score=False)
예제 #20
0
import onmt.opts as opts
from onmt.utils.parse import ArgumentParser

from pyknp import Juman

jumanpp = Juman()

parser = ArgumentParser()
opts.config_opts(parser)
opts.translate_opts(parser)

opt = parser.parse_args([
    '-model',
    sys.argv[1],
    '-src',
    'dummy.txt',
    '-output',
    'dummy.txt',
    '-replace_unk',
])

ArgumentParser.validate_translate_opts(opt)
translator = build_translator(opt, report_score=False)


def answer(translator, utterance):
    result = jumanpp.analysis(utterance)
    src_shard = [[mrph.midasi for mrph in result.mrph_list()]]
    tgt_shard = [['']]
    _, all_predictions = translator.translate(src=src_shard,
                                              tgt=tgt_shard,
예제 #21
0
from onmt.utils.parse import ArgumentParser
import onmt.opts as opts

parser = ArgumentParser()
opts.config_opts(parser)
opts.translate_opts(parser)

optstring = [
    '-batch_size', '20', '-beam_size', '10', '-model',
    '/home/hansonlu/links/data/giga-models/giga_halfsplit_pt1_nocov_step_59156_valacc48.57_ppl15.51.pt',
    '-src', '/home/hansonlu/myOpenNMT/data/giga/small_input.txt', '-seed', '-1'
]

opt = parser.parse_args(optstring)

print(opt)