def e2k_trans(num=None): if request.method == 'GET': return render_template("e2k.html") if request.method == 'POST': if request.form['src'] == "": return render_template("e2k.html") input_sen = request.form['src'] text_file = open("input.txt", "w", encoding="utf8") text_file.write(input_sen) text_file.close() tokenizer=check_output('./tokenizer.perl en < input.txt> input.txt.tok',shell=True) apply_bpe=check_output('../subword_nmt/apply_bpe.py -c ../data_05/bleu05/test/kr.10000.code < ./input.txt.tok > ./input.txt.tok.sym.sub',shell=True) valid_iter = TextIterator(args.src_file, args.en_dict, batch_size=1, maxlen=1000, ahead=1, resume_num=0) for x_data, x_mask, cur_line, iloop in valid_iter: samples = translate_beam_1(e2k_model, x_data, args) output = ids2words(e2k_trg_inv_dict, samples, eos_id=EOS_token) output = unbpe(output) output = output.replace(" ' ", "\'") return render_template('e2k.html', src_contents = input_sen, trans_contents = output) else: return render_template("e2k.html")
model = torch.load(file_name) for i in range(3): input_sen = input("source: ") print(input_sen) text_file = open("input.kr", "w",encoding="utf8") text_file.write(input_sen) text_file.close() tokenizer=check_output('./tokenizer.perl en < input.kr> input.kr.tok',shell=True) apply_bpe=check_output('../subword_nmt/apply_bpe.py -c ../data_05/bleu05/test/kr.10000.code < ./input.kr.tok > ./input.kr.tok.sub',shell=True) valid_iter = TextIterator(src_file, args.src_dict, batch_size=1, maxlen=1000, ahead=1, resume_num=0) for x_data, x_mask, cur_line, iloop in valid_iter: if args.beam_width == 1: samples = translate_beam_1(model, x_data, args) else: samples = translate_beam_k(model, x_data, args) sentence = ids2words(trg_inv_dict, samples, eos_id=EOS_token) sentence = unbpe(sentence) print("trans: ",sentence) #print(sentence.replace("'", "/''")) #print(sentence)
def k2e_trans(num=None): #야매임 if request.method == 'GET': return render_template("k2e.html") if request.method == 'POST': if request.form['src'] == "": return render_template("k2e.html") input_sen = request.form['src'] replaced_sen = "" print("src_kr : " + input_sen) #토큰화 text_file = open("input.txt", "w", encoding="utf8") text_file.write(input_sen) text_file.close() tokenizer = check_output( './tokenizer.perl en < input.txt> input.txt.tok', shell=True) #숫자기호화 number_sym = call('./web_symbolize.py', shell=True) #일단 kr -> en 만 했음. text_file = open("input.txt.tok.sym", "r", encoding="utf8") replaced_sen = text_file.read() print("number_sym : ", replaced_sen) #성경인물 => P0 lang = "k2e" replaced_sen, info_dict = convert_pn_for_web(replaced_sen, PN_list, lang) print("replaced_sen : " + replaced_sen) print("info_dict : ", info_dict) text_file.close() text_file = open("input.txt.tok.sym.pn", "w", encoding="utf8") text_file.write(replaced_sen) text_file.close() #참고하는 코드 파일로 바꿔줘야함 apply_bpe=check_output("../subword_nmt/apply_bpe.py -c " +\ "./bible_people/bible_data_GYGJ+NIV/PN/subword/kr.5000.code " +\ "< ./input.txt.tok.sym.pn > ./input.txt.tok.sym.pn.sub", shell=True) #k2e 모델에 넣기 valid_iter = TextIterator(args.src_file, args.kr_dict, batch_size=1, maxlen=1000, ahead=1, resume_num=0) for x_data, x_mask, cur_line, iloop in valid_iter: samples = translate_beam_1(k2e_model, x_data, args) # print("samples : ", samples) output = ids2words(k2e_trg_inv_dict, samples, eos_id=EOS_token) output = unbpe(output) output = output.replace(" ' ", "\'") output = output.replace(" '", "\'") output = output.replace("' ", "\'") output = output.replace("'", "\'") output = output.replace(" " ", "\"") output = output.replace(" "", "\"") output = output.replace("" ", "\"") output = output.replace(""", "\"") #숫자 기호화 되돌리기 mapping = open("mapping.sym", "rb") num_dict = pickle.load(mapping) print("num_dict : ", num_dict) print("output1 : " + output) for key, value in num_dict.items(): #key : __NO / value : 25 if key in output: output = output.replace(key, value) #__P0같은거 원래대로 변환 for key, val in info_dict.items(): #key : __P0, val : 예수(한국어) # print("key : " + key) # print("val : " + val) temp = key.strip() if temp in output: # print("key2 : " + key) # print("val2 : " + val) for (PN_key, PN_val) in PN_list: # for PN_key, PN_val in PN_dict.items(): if val == PN_key: # print("key : " + key) # print("val : " + val) # print("PN_key : " + PN_key) # print("PN_val : " + PN_val) # print("temp : " + temp + "\n") # output = output.replace(key, PN_val) output = output.replace(temp, PN_val) print("output2 : ", output) return render_template('k2e.html', src_contents=input_sen, trans_contents=output) else: return render_template("k2e.html")
def translate_file(args, valid=None, model=None): torch.no_grad() valid_iter = TextIterator(args.valid_src_file, args.src_dict, batch_size=1, maxlen=1000, ahead=1, resume_num=0) trg_dict2 = read_dict(args.trg_dict) args.trg_words_n = len(trg_dict2) trg_inv_dict = dict() for kk, vv in trg_dict2.items(): trg_inv_dict[vv] = kk # model if model is None: file_name = args.save_dir + '/' + args.model_file + '.pth' if args.use_best == 1: file_name = file_name + '.best.pth' print("Using best model") model = torch.load(file_name) ''' model = AttNMT(args=args) state_dict = tmp_model.module.state_dict() model.load_state_dict(state_dict) model.to(device) print("I'm using ", device) ''' # translate if valid: multibleu_cmd = ["perl", args.bleu_script, args.valid_trg_file, "<"] mb_subprocess = Popen(multibleu_cmd, stdin=PIPE, stdout=PIPE, universal_newlines=True) else: fp = open(args.trans_file, 'w') for x_data, x_mask, cur_line, iloop in valid_iter: if valid or args.beam_width == 1: samples = translate_beam_1(model, x_data, args) else: samples = translate_beam_k(model, x_data, args) sentence = ids2words(trg_inv_dict, samples, eos_id=EOS_token) sentence = unbpe(sentence) if valid: mb_subprocess.stdin.write(sentence + '\n') mb_subprocess.stdin.flush() if iloop % 500 == 0: print(iloop, 'is validated...') else: fp.write(sentence+'\n') if iloop % 500 == 0: print(iloop, 'is translated...') ret = -1 if valid: mb_subprocess.stdin.close() stdout = mb_subprocess.stdout.readline() out_parse = re.match(r'BLEU = [-.0-9]+', stdout) mb_subprocess.terminate() if out_parse: ret = float(out_parse.group()[6:]) else: fp.close() torch.set_grad_enabled(True) return ret