def run_translate(verbose, translate_flags=tuple()): t0 = time() #translate_flags = ['--keep-unreachable-facts'] #translate_flags = ['--negative-axioms'] fd_root = get_fd_root() translate_path = os.path.join(fd_root, FD_BIN, TRANSLATE_DIR) if translate_path not in sys.path: sys.path.append(translate_path) temp_argv = sys.argv[:] sys.argv = sys.argv[:1] + list(translate_flags) + [DOMAIN_PATH, PROBLEM_PATH] import translate # NOTE - no longer causes any overhead sys.argv = temp_argv if verbose: print SEPARATOR translate.main() print print DOMAIN_PATH print PROBLEM_PATH print 'from translate import main', time() - t0 else: with open(os.devnull, 'w') as devnull: old_stdout = sys.stdout sys.stdout = devnull try: translate.main() finally: sys.stdout = old_stdout
def run_translate(verbose, translate_flags=tuple()): t0 = time() fd_root = get_fd_root() translate_path = os.path.join(fd_root, FD_BIN, TRANSLATE_DIR) if translate_path not in sys.path: sys.path.append(translate_path) temp_argv = sys.argv[:] sys.argv = sys.argv[:1] + \ list(translate_flags) + [DOMAIN_PATH, PROBLEM_PATH] import translate sys.argv = temp_argv if verbose: print SEPARATOR translate.main() print print 'from translate import main', time() - t0 else: with open(os.devnull, 'w') as devnull: old_stdout = sys.stdout sys.stdout = devnull try: translate.main() finally: sys.stdout = old_stdout
def run(self, *args, **kwargs): if self.translate is not None: # subprocess.check_call(["../translate.sh", "translate"]) if IN_PYTHON3: translate.main(xcheck=False, snudown=SCRIPT_DIR) else: subprocess.check_call( ["python3", "../translate.py", "translate", SCRIPT_DIR]) extensions.append(self.build_extension()) if self.rust_crosschecks is not None: # subprocess.check_call(["../translate.sh", "rustcheck"]) if IN_PYTHON3: translate.main(xcheck=True, snudown=SCRIPT_DIR) else: subprocess.check_call( ["python3", "../translate.py", "rustcheck", SCRIPT_DIR]) extensions.append(self.build_extension()) if self.clang_crosschecks is not None: # subprocess.check_call(["../translate.sh"]) if IN_PYTHON3: translate.generate_html_entries_header(snudown=SCRIPT_DIR) else: subprocess.check_call([ "python3", "../translate.py", "html_entities", SCRIPT_DIR ]) extensions.append(self.build_extension()) distutils.command.build.build.run(self, *args, **kwargs)
def main(): if len(sys.argv) < 2: print sys.argv[0] + " start|stop|restart " sys.exit(1) cmd = sys.argv[1] context = daemon.DaemonContext(pidfile=PIDLockFile('/tmp/translate.pid'), working_directory='/tmp') if cmd == "start": with context: translate.main() elif cmd == "stop": context.close() elif cmd == "restart": print "todo: implement" else: print "start, stop, restart"
def run_opennmt_translate(self, pointer_generator_folder): translate_args = list() translate_args.append( {"model": os.path.join("models", self.lang, "model_dsa_60_0_20K_2l_step_18000.pt"), "src_file": os.path.join(pointer_generator_folder, "src_sequences.txt"), "output_file": os.path.join(pointer_generator_folder, "predicted_by_model_1.txt"), "all_concepts_lowercase": False, "do_fix_sequences": True }) translate_args.append( {"model": os.path.join("models", self.lang, "model_dsa_dbpedia_spotlight10_100K_3l_step_80000.pt"), "src_file": os.path.join(pointer_generator_folder, "src_sequences.txt"), "output_file": os.path.join(pointer_generator_folder, "predicted_by_model_2.txt"), "all_concepts_lowercase": False, "do_fix_sequences": True }) for params in translate_args: opt = self.parser.parse_args(["-model", params["model"], "-src", params["src_file"], "-output", params["output_file"], "-replace_unk"]) # "-gpu", 0 translate.main(opt) return translate_args
def translate_trained_model(n_latent, data_path, output_dir, trained_model_path, use_segments=False, max_segments=10): parser = ArgumentParser() opts.config_opts(parser) opts.translate_opts(parser) opt = parser.parse_args('') for k, v in translate.DEFAULT_TRANSLATE_PARAMS.items(): vars(opt)[k] = v vars(opt)['models'] = [trained_model_path] src_path = '/'.join(data_path.split('/')[:-1]) + '/src-test.txt' vars(opt)['src'] = src_path output_path = '/'.join(output_dir.split('/')[:-2]) + '/preds' vars(opt)['output_dir'] = output_path vars(opt)['n_latent'] = n_latent vars(opt)['use_segments'] = use_segments vars(opt)['max_segments'] = max_segments translate.main(opt)
def run_translate(verbose, temp_dir, use_negative=False): t0 = time() translate_path = os.path.join(get_fd_root(), FD_BIN, TRANSLATE_DIR) if translate_path not in sys.path: sys.path.append(translate_path) if use_negative and ('modified' in get_fd_root()): translate_flags = ['--negative-axioms'] else: translate_flags = [] temp_argv = sys.argv[:] sys.argv = sys.argv[:1] + translate_flags + [DOMAIN_INPUT, PROBLEM_INPUT] import translate sys.argv = temp_argv old_cwd = os.getcwd() tmp_cwd = os.path.join(old_cwd, temp_dir) if verbose: print '\nTranslate command: import translate; translate.main()' os.chdir(tmp_cwd) translate.main() os.chdir(old_cwd) print 'Translate runtime:', time() - t0 return with open(os.devnull, 'w') as devnull: old_stdout = sys.stdout sys.stdout = devnull os.chdir(tmp_cwd) try: translate.main() finally: sys.stdout = old_stdout os.chdir(old_cwd)
def main(jobId, status, imgFile, transTo='en'): if status == "failure": return (False, "character recognition api", "failure") words = recognition.main(jobId) if len(words) == 0: return (False, "character recognition api", "characters don't exist") access_token = translate.getAccessToken() if access_token == None: return (False, "translation api", "can't get access token") transWords = translate.main(access_token, words, _to=transTo) if transWords == None: return (False, "translation api", "can't translate words") return (True, transWords, imgFile)
def make_summary(queue, keyword, article_list): # 소주제별 추출 문장 출력 # print("INPUT TEXT : " + str(morphemized_article)) make_tmp_input(keyword, article_list) opt = make_tmp_opt(keyword) # 밑에 소스랑 갈아끼우면 됨 attns_info, oov_info, copy_info, raw_attns_info = translate.main(opt) # 2018/11/22 # GPU memroy flush #print("paraent pid", os.getpid()) #ctx = mp.get_context('spawn') #queue = ctx.Queue() #p = ctx.Process(target=translate.sub_main, args=(queue, opt)) #p.start() #p.join(3) # timeout 안 설정하면 안끝남 #attns_info, oov_info, copy_info, raw_attns_info = queue.get() # print("exit sub process") # res = read_result(keyword) # 입력에 대한 모델 결과(요약 출력) # print("OUTPUT TEXT : " + str(res)) json_list = [] for i in range(len(raw_attns_info)): json_obj = make_demo_attn_info(article_list[i], res[i], raw_attns_info[i], json_path.format(keyword), i) json_list.append(json_obj) gen_abs_list = [dic["gen_abstract"] for dic in json_list] normal_word_gen_abs_list = m_to_n_convert.convert(gen_abs_list, keyword) # print("DEMO JSON : " + str(json_obj)) queue.put((normal_word_gen_abs_list, json_list))
def main(): data = "../../data_%s/%s/%s-train.pt" % (opt.task, opt.data, opt.data) print("Loading data from '%s'" % data) if opt.label_smooth: assert opt.num_rb_bin == 2 dataset = torch.load(data) if opt.separate_threshold: print dataset["src_threshold"] print dataset["tgt_threshold"] threshold = { "src": dataset["src_threshold"][opt.num_rb_bin], "tgt": dataset["tgt_threshold"][opt.num_rb_bin] } else: if opt.num_rb_bin > 0: single_threshold = dataset['all_threshold'][opt.num_rb_bin] else: single_threshold = [0] threshold = {"src": single_threshold, "tgt": single_threshold} print threshold dicts = dataset['dicts'] ori_datasets = copy.deepcopy(dataset) if opt.parallel_ratio is not None: parallel_len = l = int( len(dataset['train']['src']) * opt.parallel_ratio) dataset['train']['src'] = dataset['train']['src'][:l] print dataset['train']['src'][-1] dataset['train']['tgt'] = dataset['train']['tgt'][:l] dataset['train']['src_rb'] = dataset['train']['src_rb'][:l] dataset['train']['tgt_rb'] = dataset['train']['tgt_rb'][:l] else: parallel_len = None if opt.separate_encoder == 0: forward_data = onmt.BucketIterator(dataset['train']['src'], dataset['train']['tgt'], dataset['train']['src_rb'], dataset['train']['tgt_rb'], opt, threshold) valid_data = onmt.BucketIterator(dataset['valid']['src'], dataset['valid']['tgt'], dataset['valid']['src_rb'], dataset['valid']['tgt_rb'], opt, threshold) valid_datas = [valid_data] valid_weight = [1.] valid_probability = [1.] train_datas = [forward_data] probability = [1.] weights = [1.] print len(forward_data) else: opt.filter_src_rb = 0 forward_data = onmt.BucketIterator(dataset['train']['src'], dataset['train']['tgt'], dataset['train']['src_rb'], dataset['train']['tgt_rb'], opt, threshold) #print len(forward_data) valid_data = onmt.BucketIterator(dataset['valid']['src'], dataset['valid']['tgt'], dataset['valid']['src_rb'], dataset['valid']['tgt_rb'], opt, threshold) valid_datas = [valid_data] valid_weight = [1.] valid_probability = [1.] train_datas = [forward_data] probability = [1.] weights = [1.] opt.filter_src_rb = 1 forward_data = onmt.BucketIterator(dataset['train']['src'], dataset['train']['tgt'], dataset['train']['src_rb'], dataset['train']['tgt_rb'], opt, threshold) valid_data = onmt.BucketIterator(dataset['valid']['src'], dataset['valid']['tgt'], dataset['valid']['src_rb'], dataset['valid']['tgt_rb'], opt, threshold) valid_datas += [valid_data] valid_weight += [1.] valid_probability += [1.] train_datas += [forward_data] probability += [1.] weights += [1.] opt.filter_src_rb = None if not opt.no_tgt_to_src: backwardData = onmt.BucketIterator(dataset['train_bi']['src'], dataset['train_bi']['tgt'], dataset['train_bi']['src_rb'], dataset['train_bi']['tgt_rb'], opt, threshold) train_datas.append(backwardData) weights.append(1.) probability = [0.5, 0.5] trainData = onmt.mixed_iterator(train_datas, probability) validData = onmt.mixed_iterator(valid_datas, valid_probability) print(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) print(' * number of training sentences. %d' % len(dataset['train']['src'])) print(' * maximum batch size. %d' % opt.batch_size) print('Building model...') if opt.train_from is None: decoder = onmt.Models.Decoder(opt, dicts['tgt'], attn_type=opt.attn_type) generator = nn.Sequential(nn.Linear(opt.rnn_size, dicts['tgt'].size()), nn.LogSoftmax()) if opt.cuda > 1: generator = nn.DataParallel(generator, device_ids=opt.gpus) discriminator = onmt.Models.Discriminator(opt) if not opt.separate_encoder: encoder = onmt.Models.Encoder(opt, dicts['src']) models = [ onmt.Models.NMTModel(encoder, decoder, generator, discriminator, opt) ] else: models = [] for i in range(opt.num_rb_bin): encoder = onmt.Models.Encoder(opt, dicts['src']) models += [ onmt.Models.NMTModel(encoder, decoder, generator, discriminator, opt) ] optims = [] for model_single in models: if opt.cuda > 1: model_single = nn.DataParallel(model_single, device_ids=opt.gpus) if opt.cuda: model_single.cuda() else: model_single.cpu() model_single.generator = generator for p in model_single.get_seq2seq_parameters(): p.data.uniform_(-opt.param_init, opt.param_init) for p in model_single.get_disc_parameters(): if opt.non_linear == "relu": opt.adv_para_init = 2. / opt.disc_size p.data.uniform_(-opt.adv_param_init, opt.adv_param_init) optim_single = onmt.Optim( model_single.parameters(), model_single.get_seq2seq_parameters(), model_single.get_disc_parameters(), model_single.get_encoder_parameters(), opt.optim, opt.learning_rate, opt.max_grad_norm, lr_decay=opt.learning_rate_decay, start_decay_at=opt.start_decay_at, adam_momentum=opt.adam_momentum, ) optims += [optim_single] else: print('Loading from checkpoint at %s' % opt.train_from) checkpoint = torch.load(opt.train_from) model_single = checkpoint['model'] if opt.cuda: model_single.cuda() else: model_single.cpu() optim_single = checkpoint['optim'] opt.start_epoch = checkpoint['epoch'] + 1 nParams = sum([ p.nelement() for model_single in models for p in model_single.parameters() ]) print('* number of parameters: %d' % nParams) trainModel(models, trainData, validData, dataset, optims, dicts, weights, valid_weight, threshold) if opt.task == "MT": translate.main([ "-task", opt.task, "-data", opt.data, "-model", "%s/model.pt" % exp_path, "-replace_unk", "-gpus", str(opt.gpus[0]), "-output", "%s/test_no_unk.txt" % exp_path, "-verbose" ]) evaluate_file.main([ "-task", opt.task, "-data", opt.data, "-outputs", "%s/test_no_unk.txt" % exp_path ]) elif opt.task == "Multi-MT": for test_set in ["test"]: for language_pair in dataset["language_pairs"]: line = language_pair.split("-") S_lang = line[0] T_lang = line[1] print "test_set", test_set + "_" + language_pair if opt.filter_src_rb is None or opt.filter_src_rb == dataset[ "src_language_mapping"][S_lang]: translate.main([ "-task", opt.task, "-data", opt.data, "-model", "%s/model.pt" % exp_path, "-replace_unk", "-gpus", str(opt.gpus[0]), "-output", "%s/%s_%s_no_unk.txt" % (exp_path, test_set, language_pair), "-verbose", "-language_pair", language_pair, "-test_set", test_set, "-bpe" ]) evaluate_file.main([ "-task", opt.task, "-data", opt.data, "-outputs", "%s/%s_%s_no_unk.txt" % (exp_path, test_set, language_pair), "-language_pair", language_pair, "-test_set", test_set ]) else: print "BLEU 0.0, SARI 0.00, R1 0.00, R2 0.00, RL 0.00, FK_O 0.0, acc 0.00" else: for i in range(opt.num_rb_bin): translate.main([ "-task", opt.task, "-data", opt.data, "-model", "%s/model.pt" % exp_path, "-replace_unk", "-gpus", str(opt.gpus[0]), "-output", "%s/test_no_unk.txt" % exp_path, "-verbose", "-tgt_rb_all", str(i) ]) evaluate_file.main([ "-task", opt.task, "-data", opt.data, "-outputs", "%s/test_no_unk.txt" % exp_path, "-single_rb", str(i) ]) print "all rb", i translate.main([ "-task", opt.task, "-data", opt.data, "-model", "%s/model.pt" % exp_path, "-replace_unk", "-gpus", str(opt.gpus[0]), "-output", "%s/test_no_unk.txt" % exp_path, "-verbose" ]) evaluate_file.main([ "-task", opt.task, "-data", opt.data, "-outputs", "%s/test_no_unk.txt" % exp_path ])
def translate_task(task_file): print('\nTranslating %s:' % get_task_name(task_file)) sys.argv = [sys.argv[0], '--force-old-python', task_file] translate.main()
import translate if __name__ == '__main__': translate.main()