Beispiel #1
0
def _get_parser():
    parser = ArgumentParser(description='translate.py')

    # import ipdb; ipdb.set_trace()
    opts.config_opts(parser)
    opts.translate_opts(parser)
    return parser
Beispiel #2
0
def _get_parser():
    parser = ArgumentParser(description='translate.py')

    opts.config_opts(parser)
    opts.translate_opts(parser)
    print(parser.parse_args())
    return parser
Beispiel #3
0
    def _get_opt(self, language, method):

        parser = ArgumentParser(description='summarizer.py')

        if language == 'en' and method == 'bert':
            config_file = 'en_bert_transformer.yml'
        elif language == 'en' and method == 'conv':
            config_file = 'en_conv_transformer.yml'
        elif language == 'de' and method == 'bert':
            config_file = 'de_bert_transformer.yml'
        elif language == 'de' and method == 'conv':
            config_file = 'de_conv_transformer.yml'
        else:
            sys.stderr.write(
                f"Method '{method}' for language '{language}' is not supported."
            )

        #Hack to load parser arguments
        prec_argv = sys.argv
        sys.argv = [sys.argv[0]]

        sys.argv.extend(['-config', 'config/' + config_file])
        opts.config_opts(parser)
        opts.translate_opts(parser)
        opt = parser.parse_args()

        sys.argv = prec_argv

        return opt
Beispiel #4
0
def translate_file(input_filename, output_filename):
    parser = ArgumentParser(description='translation')
    opts.config_opts(parser)
    opts.translate_opts(parser)
    # print(opts)
    args = f'''-model m16_step_44000.pt
                -src source_products_16.txt
                -output op_16x_4400_50_10.txt
                -batch_size 128
                -replace_unk
                -max_length 200
                -verbose
                -beam_size 50
                -n_best 10
                -min_length 5'''
    opt = parser.parse_args(args)
    # print(opt.model)
    translator = build_translator(opt, report_score=True)

    src_shards = split_corpus(opt.src, opt.shard_size)
    tgt_shards = repeat(None)
    shard_pairs = zip(src_shards, tgt_shards)

    for i, (src_shard, tgt_shard) in enumerate(shard_pairs):
        scores, predictions = translator.translate(src=src_shard,
                                                   tgt=tgt_shard,
                                                   src_dir=opt.src_dir,
                                                   batch_size=opt.batch_size,
                                                   attn_debug=opt.attn_debug)

    return scores, predictions
Beispiel #5
0
def _get_parser():
    parser = ArgumentParser(description='translate.py')

    opts.config_opts(parser)
    opts.translate_opts(parser)
    parser.add(
        '--model',
        '-model',
        dest='models',
        metavar='MODEL',
        nargs='+',
        type=str,
        default=[
            "F:/Project/Python/selfProject/translate_NMT/transflate_NMT/data_step_100.pt"
        ],
        required=False,
        help="模型使用得训练文件")

    parser.add(
        '--src',
        '-src',
        required=False,
        default=
        "F:/Project/Python/selfProject/translate_NMT/transflate_NMT/data/src-test.txt",
        help="自己写的测试文件在哪里????")

    parser.add(
        '--output',
        '-output',
        default=
        'F:/Project/Python/selfProject/translate_NMT/transflate_NMT/data/pred.txt',
        help="测试文件输出未知  改成自己得")
    return parser
Beispiel #6
0
def build_translator(model, fields, model_opt, beam_size=1, n_best=1):
    # model, fields, model_opt are all loaded from start of script
    model_opt.beta = -0.0
    model_opt.coverage_penalty = "none"
    model_opt.length_penalty = "none"

    scorer = onmt.translate.GNMTGlobalScorer(alpha=0.0,
                                             beta=-0.0,
                                             cov_penalty="none",
                                             length_penalty="none")

    dummy_parser = argparse.ArgumentParser(description='translate.py')
    opts.translate_opts(dummy_parser)
    dummy_translate_opt = dummy_parser.parse_known_args(
        "-model dummy -src dummy".split())[0]
    dummy_translate_opt.beam_size = beam_size
    dummy_translate_opt.cuda = False

    translator = Translator(model,
                            fields,
                            beam_size,
                            n_best,
                            global_scorer=scorer,
                            out_file=None,
                            report_score=False,
                            gpu=False,
                            replace_unk=True)
    # translator.beam_size = beam_size
    # translator.n_best = n_best

    return translator
Beispiel #7
0
def _get_parser():
    parser = ArgumentParser(description='translate.py')

    opts.config_opts(parser)
    opts.translate_opts(parser)
    opts.mmod_finetune_translate_opts(parser)
    return parser
def _get_parser():
    parser = ArgumentParser(description='run_kp_eval.py')

    opts.config_opts(parser)
    opts.translate_opts(parser)

    return parser
def translate_file(input_filename, output_filename):
    parser = ArgumentParser(description='translation')
    opts.config_opts(parser)
    opts.translate_opts(parser)

    args = f'''-model Experiments/Checkpoints/retrosynthesis_augmented_medium/retrosynthesis_aug_medium_model_step_100000.pt 
                -src MCTS_data/{input_filename}.txt 
                -output MCTS_data/{output_filename}.txt 
                -batch_size 128 
                -replace_unk
                -max_length 200 
                -verbose 
                -beam_size 10 
                -n_best 10 
                -min_length 5 
                -gpu 0'''

    opt = parser.parse_args(args)
    translator = build_translator(opt, report_score=True)

    src_shards = split_corpus(opt.src, opt.shard_size)
    tgt_shards = repeat(None)
    shard_pairs = zip(src_shards, tgt_shards)

    for i, (src_shard, tgt_shard) in enumerate(shard_pairs):
        scores, predictions = translator.translate(src=src_shard,
                                                   tgt=tgt_shard,
                                                   src_dir=opt.src_dir,
                                                   batch_size=opt.batch_size,
                                                   attn_debug=opt.attn_debug)

    return scores, predictions
Beispiel #10
0
def _get_parser():
    parser = ArgumentParser(description='RL_train.py')

    opts.config_opts(parser)
    # yida RL
    opts.model_opts(parser)
    # opts.train_opts(parser)
    opts.translate_opts(parser)
    return parser
def onmt_parser() -> ArgumentParser:
    """
    Create the OpenNMT parser, adapted from OpenNMT-Py repo.
    """

    parser = ArgumentParser(description='translate.py')

    opts.config_opts(parser)
    opts.translate_opts(parser)

    return parser
Beispiel #12
0
    def configure_opt(self, description):
        # Uses OpenNMT's ArgumentParser class to create an object
        # That holds all the parameters needed to load the model
        parser = ArgumentParser(description='translation')
        opts.config_opts(parser)
        opts.translate_opts(parser)
        opt = {a.dest: a.default for a in parser._actions}
        opt.update(description['opt'])
        opt['models'] = [description['model']]
        opt = types.SimpleNamespace(**opt)

        return opt
Beispiel #13
0
    def __init__(self):
        # おまじない
        parser = ArgumentParser()
        opts.config_opts(parser)
        opts.translate_opts(parser)
        self.opt = parser.parse_args()
        ArgumentParser.validate_translate_opts(self.opt)
        self.translator = build_translator(self.opt, report_score=True)

        # 単語分割用にMeCabを使用
        self.mecab = MeCab.Tagger("-Owakati")
        self.mecab.parse("")
    def load_model(self, path=None):

        if path is None or not os.path.exists(
                os.path.abspath(os.path.join(os.getcwd(), path))):
            print("No model present at the specified path : {}".format(path))

        parser = ArgumentParser(description='translate.py')
        opts.config_opts(parser)

        opts.translate_opts(parser)
        opt = parser.parse_args(["--model", path])
        self.model = build_translator(opt, report_score=True)
        return
    def __init__(self):
        # コマンドラインで指定したオプションをもとにモデルを読み込む
        parser = ArgumentParser()
        opts.config_opts(parser)
        opts.translate_opts(parser)
        self.opt = parser.parse_args()
        ArgumentParser.validate_translate_opts(self.opt)
        self.translator = build_translator(self.opt, report_score=True)

        # 分かち書きのためにMeCabを使用
        self.mecab = MeCab.Tagger("-Owakati")
        self.mecab.parse("")

        # 前回の応答を保存しておく辞書
        self.prev_uttr_dict = {}
    def __init__(self):
        # おまじない
        parser = ArgumentParser()
        opts.config_opts(parser)
        opts.translate_opts(parser)
        self.opt = parser.parse_args(args=[
            "-model", "../models/model.pt", "-src", "None", "-replace_unk",
            "--beam_size", "10", "--min_length", "7", "--block_ngram_repeat",
            "2"
        ])
        ArgumentParser.validate_translate_opts(self.opt)
        self.translator = build_translator(self.opt, report_score=True)

        # 単語分割用にMeCabを使用
        self.mecab = MeCab.Tagger("-Owakati")
        self.mecab.parse("")
Beispiel #17
0
def _build_translator(args):
    """
    Initializes a seq2seq translator model
    """

    from onmt.utils.parse import ArgumentParser
    parser = ArgumentParser()

    import onmt.opts as opts
    opts.config_opts(parser)
    opts.translate_opts(parser)

    opt = parser.parse_args(args=args)
    ArgumentParser.validate_translate_opts(opt)

    from onmt.translate.translator import build_translator
    translator = build_translator(opt, report_score=False)

    return translator, opt
Beispiel #18
0
def trans(n_bset,content_src):
    parser = argparse.ArgumentParser()
    translate_opts(parser)

    opt = parser.parse_args()

    model_dir = os.path.join(os.path.dirname(__file__), 'available_models')
    opt.model=os.path.join(model_dir, 'zh2zh_model_2000000.pt')
    opt.n_best=n_bset
    opt.gpu=-1
    opt.data_type='text'

    translator = build_translator(opt, report_score=True)
    result = translator.translate(src_path=opt.src,
                                  src_data_iter=content_src,
                                  tgt_path=opt.tgt,
                                  src_dir=opt.src_dir,
                                  batch_size=opt.batch_size,
                                  attn_debug=opt.attn_debug)
    return result
Beispiel #19
0
    def __init__(self, gpu):
        parser = configargparse.ArgumentParser(
            description='translate.py',
            config_file_parser_class=configargparse.YAMLConfigFileParser,
            formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
        opts.config_opts(parser)
        opts.add_md_help_argument(parser)
        opts.translate_opts(parser)
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        if len(gpu) > 1:
            print('do not try hacking')
            exit()
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)

        sys.argv = ["python"]
        opt = parser.parse_args()

        opt.models = [
            './modules/multi_summ/dataset_m2s2/korean_bert_8_single_new_economy_add_cls_sep_segment_eos_penalty_new_bloom_step_25000.pt'
        ]
        opt.segment = True
        opt.batch_size = 8
        opt.beam_size = 10
        opt.src = '.1'
        opt.output = '.1'
        opt.verbose = True
        opt.stepwise_penalty = True
        opt.coverage_penalty = 'sumarry'
        opt.beta = 5
        opt.length_penalty = 'wu'
        opt.alpha = 0.9
        opt.block_ngram_repeat = 3
        opt.ignore_when_blocking = [".", "</t", "<t>", ",_", "%"]
        opt.max_length = 300
        opt.min_length = 35
        opt.gpu = 0
        opt.segment = True

        logger = init_logger(opt.log_file)
        self.translator = build_translator(opt, report_score=True)
Beispiel #20
0
def onmt_translator_builder(my_opts, s0_model_path, logger=None):
    if logger is not None:
        logger.info('Building ONMT translator with model from ' + s0_model_path)
    else:
        print('Building ONMT translator with model from ' + s0_model_path)

    parser = ArgumentParser()
    opts.config_opts(parser)
    opts.translate_opts(parser)

    arglist = ['-model', s0_model_path] + opts_to_list(my_opts)
    print(arglist)
    opt = parser.parse_args(arglist)

    ArgumentParser.validate_translate_opts(opt)
    translator = build_translator(opt, report_score=True, logger=logger)
    if logger is not None:
        logger.info('Finished building ONMT translator.')
    else:
        print('Finished building ONMT translator.')

    return translator
Beispiel #21
0
def translate_trained_model(n_latent,
                            data_path,
                            output_dir,
                            trained_model_path,
                            use_segments=False,
                            max_segments=10):
    parser = ArgumentParser()
    opts.config_opts(parser)
    opts.translate_opts(parser)
    opt = parser.parse_args('')

    for k, v in translate.DEFAULT_TRANSLATE_PARAMS.items():
        vars(opt)[k] = v

    vars(opt)['models'] = [trained_model_path]
    src_path = '/'.join(data_path.split('/')[:-1]) + '/src-test.txt'
    vars(opt)['src'] = src_path
    output_path = '/'.join(output_dir.split('/')[:-2]) + '/preds'
    vars(opt)['output_dir'] = output_path
    vars(opt)['n_latent'] = n_latent
    vars(opt)['use_segments'] = use_segments
    vars(opt)['max_segments'] = max_segments
    translate.main(opt)
Beispiel #22
0
def translate_esan(input_eng, model):
    input_file = "corpus/input.txt"
    output_file = "corpus/output.txt"
    f = open(input_file, "w", encoding="UTF-8")
    f.write(input_eng)
    f.close()
    open(output_file, "w", encoding="UTF-8").close()
    parser = configargparse.ArgumentParser(
        description='translate.py',
        config_file_parser_class=configargparse.DefaultConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter,
        add_env_var_help=True)
    opts.config_opts(parser)
    opts.translate_opts(parser)
    s = "-model " + model + " -src " + input_file + " -output " + output_file
    opt = parser.parse_args(s)
    main(opt)
    f = open(output_file, "r", encoding="Utf-8")
    content = ""
    if f.mode == 'r':
        content = f.read()
        f.close()
    return content
Beispiel #23
0
    # else:
    #   tgt_iter = None
    translator.translate(src_data_iter=src_iter,
                         tgt_data_iter=tgt_iter,
                         batch_size=opt.batch_size,
                         out_file=out_file)
    out_file.close()


if __name__ == "__main__":
    parser = configargparse.ArgumentParser(
        description='translate.py',
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
    opts.config_opts(parser)
    opts.translate_opts(parser)

    opt = parser.parse_args()
    logger = init_logger(opt.log_file)
    logger.info("Input args: %r", opt)
    path = 'rein_model/rein_model_step'
    for i in range(0, 25000, 10):
        current_path = path + '_' + str(i) + '.pt'
        if os.path.exists(current_path):

            model_path = current_path
            opt.output = 'rein_data/rein.tran' + '_' + str(i)
            main(opt, model_path)
        else:

            continue
Beispiel #24
0
 def __build_translator(self, model_addr, src_addr):
     parser = ArgumentParser()
     opts.config_opts(parser)
     opts.translate_opts(parser)
     opt = parser.parse_args(['-model', model_addr, '-src', src_addr])
     return build_translator(opt, report_score=False)
Beispiel #25
0
def _get_parser():
    parser = ArgumentParser(description='translate.py')
    parser.add_argument('--pivot_vocab', action='store', dest='pivot_vocab')
    opts.config_opts(parser)
    opts.translate_opts(parser)
    return parser
Beispiel #26
0
def _get_parser():
    parser = ArgumentParser(description='kp_generate.py')

    opts.config_opts(parser)
    opts.translate_opts(parser)
    return parser
Beispiel #27
0
def _get_parser():
    parser = ArgumentParser(description='translate_dynamic.py')

    opts.config_opts(parser)
    opts.translate_opts(parser, dynamic=True)
    return parser
    def __init__(self, model_filename, cmdline_args):
        parser = argparse.ArgumentParser(
            description='translate.py',
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        opts.add_md_help_argument(parser)
        opts.translate_opts(parser)
        opt = parser.parse_args(['-model', model_filename, '-src', ''] +
                                (cmdline_args or []))

        translator = make_translator(opt)
        model = translator.model
        fields = translator.fields
        tgt_vocab = fields["tgt"].vocab

        def encode_from_src(src):
            enc_states, memory_bank = model.encoder(src)
            return dict(enc_states=enc_states,
                        memory_bank=memory_bank,
                        src=src)

        @lru_cache(maxsize=32)
        def encode_text(in_text):
            text_preproc = fields['src'].preprocess(in_text)
            src, src_len = fields['src'].process([text_preproc],
                                                 device=-1,
                                                 train=False)
            src = src.unsqueeze(2)  # not sure why
            return encode_from_src(src)

        @lru_cache(maxsize=32)
        def encode_img(image_idx):
            if isinstance(image_idx, str):
                image_idx = int(image_idx)
            src = Variable(torch.IntTensor([image_idx]), volatile=True)
            return encode_from_src(src)

        def encode(inp):
            if model.encoder.__class__.__name__ == 'VecsEncoder':
                return encode_img(inp)
            else:
                return encode_text(inp)

        @lru_cache(maxsize=128)
        def get_decoder_state(in_text, tokens_so_far):
            encoder_out = encode(in_text)
            enc_states = encoder_out['enc_states']
            memory_bank = encoder_out['memory_bank']
            src = encoder_out['src']

            if len(tokens_so_far) == 0:
                return None, translator.model.decoder.init_decoder_state(
                    src, memory_bank, enc_states)

            prev_out, prev_state = get_decoder_state(in_text,
                                                     tokens_so_far[:-1])

            tgt_in = Variable(torch.LongTensor(
                [tgt_vocab.stoi[tokens_so_far[-1]]]),
                              volatile=True)  # [tgt_len]
            tgt_in = tgt_in.unsqueeze(1)  # [tgt_len x batch=1]
            tgt_in = tgt_in.unsqueeze(1)  # [tgt_len x batch=1 x nfeats=1]

            # Prepare to call the decoder. Unfortunately the decoder mutates the state passed in!
            memory_bank = copy.deepcopy(memory_bank)
            assert isinstance(prev_state.hidden, tuple)
            prev_state.hidden = tuple(v.detach() for v in prev_state.hidden)
            prev_state = copy.deepcopy(prev_state)

            assert memory_bank.size()[1] == 1

            dec_out, dec_states, attn = translator.model.decoder(
                tgt_in, memory_bank, prev_state)

            assert dec_out.shape[0] == 1
            return dec_out[0], dec_states

        def generate_completions(in_text, tokens_so_far):
            tokens_so_far = [onmt.io.BOS_WORD] + tokens_so_far
            tokens_so_far = tuple(tokens_so_far)  # Make it hashable
            dec_out, dec_states = get_decoder_state(in_text, tokens_so_far)
            logits = model.generator.forward(dec_out).data
            vocab = tgt_vocab.itos

            assert logits.shape[0] == 1
            logits = logits[0]
            return logits, vocab

        def eval_logprobs(in_text, tokens, *, use_eos):
            encoder_out = encode(in_text)
            enc_states = encoder_out['enc_states']
            memory_bank = encoder_out['memory_bank']
            src = encoder_out['src']

            tokens = [onmt.io.BOS_WORD] + tokens
            if use_eos:
                tokens = tokens + [onmt.io.EOS_WORD]

            decoder_state = model.decoder.init_decoder_state(
                src, memory_bank=memory_bank, encoder_final=enc_states)
            tgt = Variable(
                torch.LongTensor([tgt_vocab.stoi[tok] for tok in tokens
                                  ]).unsqueeze(1).unsqueeze(1))
            dec_out, dec_states, attn = model.decoder(tgt[:-1], memory_bank,
                                                      decoder_state)
            logits = model.generator(dec_out)
            return F.nll_loss(logits.squeeze(1),
                              tgt[1:].squeeze(1).squeeze(1),
                              reduce=False,
                              size_average=False).data.numpy()

        self.model = model
        self.fields = fields
        self.translator = translator
        self.encode = encode
        self.get_decoder_state = get_decoder_state
        self.generate_completions = generate_completions
        self.eval_logprobs = eval_logprobs