def MakeStrDic(psp_jp_files, pc_cn_files, report): if len(psp_jp_files) != len(pc_cn_files): print('File num not match') input() return dst_dic = dict() match_dic = dict() for fn_jp, fn_cn in zip(psp_jp_files, pc_cn_files): if Common.BaseName(fn_jp) != Common.BaseName(fn_cn): print('Filename not match: {} {}'.format(fn_jp, fn_cn)) input() exit(1) jp_group_list = [] fname = Common.BaseName(fn_cn) print('process jp cn: {}'.format(fname)) jp = open(fn_jp, 'r', encoding='utf16') cn = open(fn_cn, 'r', encoding='utf16') lines = jp.readlines() jp_lines = [line.rstrip('\n') for line in lines if line != '' and line != '\n'] lines = cn.readlines() cn_lines = [line.rstrip('\n') for line in lines if line != '' and line != '\n'] if len(jp_lines) != len(cn_lines): input('Line num not match: {}'.format(Common.BaseName(fn_jp))) exit(1) for jp_line, cn_line in zip(jp_lines, cn_lines): offset, size, jp_text = Common.ParseWQSGLine(jp_line) offset, size, cn_text = Common.ParseWQSGLine(cn_line) if (not jp_text) and (not cn_text): continue jp_group = Common.SplitString(jp_text) cn_group = Common.SplitString(cn_text) if len(jp_group) != len(cn_group): log = 'Group not match {}:{}--{}'.format(fname, jp_text, cn_text) print(log) report.write(log + '\n') continue jp_group_list += jp_group for key, value in zip(jp_group, cn_group): dst_dic[key] = value match_dic[fname] = jp_group_list jp.close() cn.close() return dst_dic, match_dic
def CompareLength(psp_jp_files, pc_cn_files, report): if len(psp_jp_files) != len(pc_cn_files): print('File num not match') input() return for fn_jp, fn_cn in zip(psp_jp_files, pc_cn_files): if Common.BaseName(fn_jp) != Common.BaseName(fn_cn): print('Filename not match: {} {}'.format(fn_jp, fn_cn)) input() exit(1) fname = Common.BaseName(fn_cn) print('process jp cn: {}'.format(fname)) jp = open(fn_jp, 'r', encoding='utf16') cn = open(fn_cn, 'r', encoding='utf16') lines = jp.readlines() jp_lines = [ line.rstrip('\n') for line in lines if line != '' and line != '\n' ] lines = cn.readlines() cn_lines = [ line.rstrip('\n') for line in lines if line != '' and line != '\n' ] if len(jp_lines) != len(cn_lines): input('Line num not match: {}'.format(Common.BaseName(fn_jp))) exit(1) for jp_line, cn_line in zip(jp_lines, cn_lines): offset, size, jp_text = Common.ParseWQSGLine(jp_line) offset, size, cn_text = Common.ParseWQSGLine(cn_line) if (not jp_text) and (not cn_text): continue if len(jp_text) < len(cn_text): report.write('{} > {} -- {}\n'.format(fname, jp_text, cn_text)) # jp_group = Common.SplitString(jp_text) # cn_group = Common.SplitString(cn_text) jp.close() cn.close()
def CheckOnePair(fn_jp, fn_cn, report): jp = open(fn_jp, 'r', encoding='utf16') cn = open(fn_cn, 'r', encoding='utf16') lines = jp.readlines() jp_lines = [line for line in lines if line != '' and line != '\n'] lines = cn.readlines() cn_lines = [line for line in lines if line != '' and line != '\n'] if len(cn_lines) != len(jp_lines): log = 'Line num not match:{}\n'.format(Common.BaseName(fn_jp)) report.write(log) return for jp_line, cn_line in zip(jp_lines, cn_lines): offset, size, jp_text = Common.ParseWQSGLine(jp_line.rstrip('\n')) offset, size, cn_text = Common.ParseWQSGLine(cn_line.rstrip('\n')) jp_group_count = len(Common.SplitString(jp_text)) cn_group_count = len(Common.SplitString(cn_text)) if jp_group_count != cn_group_count: log = 'Group num not match: {}:{}\t{}:{}\n'.format( fn_jp, jp_text, fn_cn, cn_text) report.write(log) jp.close() cn.close()
def ReadVitaGroups(vita_fn_list): dst_list = [] for fn in vita_fn_list: fname = Common.BaseName(fn) print('process vita txt: {}'.format(fname)) src = open(fn, 'r', encoding='utf16') _lines = src.readlines() lines = [ line.rstrip('\n') for line in _lines if line != '' and line != '\n' ] for line in lines: offset, size, text = Common.ParseWQSGLine(line) if not text: continue group = Common.SplitString(text) for string in group: if IsAlNum(string): continue if len(string) >= 2 and \ ((IsAlNum(string[0]) and (not IsAlNum(string[1]))) or ((not IsAlNum(string[0])) and IsAlNum(string[1]))) and \ (string[0] != ' ' and string[1] != ' '): # this was originally prevent junk chars to be added # since now the vita text is pretty better with almost no junk chars # I just log it out and comment this line # continue print('alpha string:{}'.format(string)) dst_list.append((fname, string)) src.close() return dst_list