def replaceJapanese(msg): if not translator2.mecab_initialized: translator2.initialize() msg = translator2.japanese_braille_separate(msg, _logwrite)[0] global kanadic if kanadic is None: kanadic = load_kanadic() for p in kanadic: try: msg = re.sub(p[0], p[1], msg) except: pass return msg
def pass2(verboseMode=False): global output outfile = '__h2output.txt' with open(outfile, 'w') as f: output = cStringIO.StringIO() translator2.initialize(__print, jtalk_dir, dic_dir, user_dics) log = output.getvalue() output.close() f.write(log) f.write("\n") count = 0 for t in tests: if not t.has_key('input'): continue nabcc = False if t.has_key('mode') and t['mode'] == 'NABCC': nabcc = True if t.has_key('text'): output = cStringIO.StringIO() result, pat, inpos1, inpos2 = translator2.translateWithInPos2( t['text'], logwrite=__print, nabcc=nabcc) log = output.getvalue() output.close() # inpos2 if t.has_key('inpos2'): correct_inpos2 = ','.join(['%d' % n for n in t['inpos2'] ]) else: correct_inpos2 = None # inpos1 if t.has_key('inpos1'): correct_inpos1 = ','.join(['%d' % n for n in t['inpos1'] ]) else: correct_inpos1 = None # merged inpos inpos, outpos_ = translator2.mergePositionMap( inpos1, inpos2, len(pat), len(t['text'])) # outpos outpos = translator2.makeOutPos(inpos, len(t['text']), len(pat)) if t.has_key('inpos'): correct_inpos = ','.join(['%d' % n for n in t['inpos'] ]) else: correct_inpos = None if t.has_key('outpos'): correct_outpos = ','.join(['%d' % n for n in t['outpos'] ]) else: correct_outpos = None # result result_inpos2 = ','.join(['%d' % n for n in inpos2]) result_inpos1 = ','.join(['%d' % n for n in inpos1]) result_inpos = ','.join(['%d' % n for n in inpos]) result_outpos = ','.join(['%d' % n for n in outpos]) # output isError = False if result != t['input'] or \ (correct_inpos2 and result_inpos2 != correct_inpos2) or \ (correct_inpos and result_inpos != correct_inpos) or \ (correct_outpos and result_outpos != correct_outpos): isError = True count+=1 if isError or verboseMode: f.write("text : " + t['text'].encode('utf-8') + "\n") f.write("correct: " + t['input'].encode('utf-8') + "\n") f.write("result : " + result.encode('utf-8') + "\n") f.write("pat : " + pat.encode('utf-8') + "\n") if correct_inpos2: f.write("cor_in2: " + correct_inpos2 + "\n") if correct_inpos1: f.write("cor_in1: " + correct_inpos1 + "\n") if correct_inpos: f.write("cor_in : " + correct_inpos + "\n") if correct_outpos: f.write("cor_out: " + correct_outpos + "\n") f.write("res_in2: " + result_inpos2 + "\n") f.write("res_in1: " + result_inpos1 + "\n") f.write("res_in : " + result_inpos + "\n") f.write("res_out: " + result_outpos + "\n") if 'comment' in t and t['comment']: f.write("comment: " + t['comment'].encode('utf-8') + "\n") f.write("\n") f.write(log) f.write("\n") print 'h2: %d error(s). see %s' % (count, outfile) return (count, outfile)