Esempio n. 1
0
    def check_same_en_difference_cn(en_dir,
                                    cn_dir,
                                    print_msg=False,
                                    suffix='',
                                    trans_unicode=True):
        """英文相同时,是否有不一致的翻译"""

        all_translation = dict()
        diff_translation = dict()
        en_file_list = filex.list_file(en_dir, '\.(?!png|gif)')
        for en_file in en_file_list:
            print('\ncheck ' + en_file)
            cn_file = Translator.get_cn_file_name(en_dir, cn_dir, en_file,
                                                  suffix)
            if not os.path.exists(cn_file):
                print('中文文件不存在' + cn_file)
                continue
            en_dict = Tools.get_dict_from_file(en_file)
            cn_dict = Tools.get_dict_from_file(cn_file,
                                               delete_cn_shortcut=True,
                                               trans_unicode=trans_unicode)
            for key, en_value in en_dict.items():
                if key in cn_dict.keys():
                    # 有key对应的中英文
                    cn_value = cn_dict[key]
                    if cn_value != en_value:
                        # 中英文不一样才算翻译
                        if en_value in all_translation.keys():
                            pre_translation = all_translation[en_value]
                            if pre_translation != cn_value:
                                if en_value not in diff_translation.keys():
                                    diff_translation[
                                        en_value] = pre_translation + '\n' + cn_value
                                else:
                                    pre_diff_translation = diff_translation[
                                        en_value]
                                    if cn_value not in pre_diff_translation.split(
                                            '\n'):
                                        # 之前没有记录过才再记录
                                        diff_translation[
                                            en_value] = pre_diff_translation + '\n' + cn_value
                                if print_msg:
                                    print(
                                        '\n词典中已经存在%s,但翻译不相同\n%s\n%s' %
                                        (en_value, pre_translation, cn_value))
                        else:
                            all_translation[en_value] = cn_value
            if print_msg:
                print('the size is %d' % len(sorted(all_translation.keys())))
        # 读取守毕
        for key in diff_translation.keys():
            all_translation.pop(key)
        return all_translation, diff_translation
Esempio n. 2
0
 def export_to_omegat(file_path, result_file=None):
     """导出为OmegaT的记忆文件"""
     if result_file is None:
         result_file = filex.get_result_file_name(file_path, '', '.tmx.xml')
     translation_dict = filex.get_dict_from_file(file_path)
     output_dict = dict()
     for key, value in translation_dict.items():
         if value:
             if '【】' in value:
                 output_dict[key] = value.split('【】')[0]
             else:
                 output_dict[key] = value
     Tools.save_omegat_dict(output_dict, result_file)
Esempio n. 3
0
    def update_omegat_dict(dict_file, omegat_dict_file, result_dict_file):
        """更新omegat的记忆文件"""

        pre_dict = Tools.get_dict_from_file(dict_file)
        print('pre size is %d' % len(sorted(pre_dict.keys())))
        omegat_dict = Translator.get_omegat_dict(omegat_dict_file)
        print('omegat size is %d' % len(sorted(omegat_dict.keys())))

        # 更新,以omegat为准
        pre_dict.update(omegat_dict)

        print('result size is %d' % len(sorted(pre_dict.keys())))
        Tools.save_omegat_dict(pre_dict, result_dict_file)
Esempio n. 4
0
 def update_omegat_dict_by_pseudo_dict(omegat_dict_file,
                                       pseudo_dict_file,
                                       result_file=None):
     "根据伪翻译记忆更新记忆库"
     if result_file is None:
         result_file = filex.get_result_file_name(pseudo_dict_file,
                                                  '_update_by_pseudo')
     source_dict = Tools.get_dict_from_omegat(omegat_dict_file)
     pseudo_dict = Tools.get_dict_from_omegat(pseudo_dict_file)
     print('记忆库共%d条记录,伪翻译共%d条记录' % (len(source_dict), len(pseudo_dict)))
     result_dict = ActionsBundle.update_dict_add_extra_info(
         source_dict, pseudo_dict)
     print('处理结果共%s条记录' % len(result_dict))
     Tools.save_omegat_dict(result_dict, result_file)
Esempio n. 5
0
    def get_action_desc_dict(en_file, print_msg=False):
        """
        读取操作对应的描述
        :param en_file:
        :param print_msg:
        :return:
        """

        en_dict = Tools.get_dict_from_file(en_file)
        desc_dict = dict()
        # 重复的处理起来麻烦,索性删除了
        duplicate_value = list()
        # 反转
        for key, value in en_dict.items():
            if key.endswith('.text'):
                desc_key = key[:-len('.text')] + '.description'
                if desc_key in en_dict.keys():
                    desc = en_dict[desc_key]
                    if value in desc_dict.keys():
                        pre_desc = desc_dict[value]
                        if desc == pre_desc:
                            print('%s重复【%s】' + value, desc)
                        else:
                            duplicate_value.append(value)
                            desc = '%s【或】%s' % (pre_desc, desc)
                            print('%s有不同的描述【%s】【%s】' % (value, desc, pre_desc))
                    desc_dict[value] = desc
        for value in duplicate_value:
            if value in desc_dict.keys():
                desc_dict.pop(value)
                if print_msg:
                    print('删除' + value)
        if print_msg:
            print(desc_dict)
        return desc_dict
Esempio n. 6
0
    def process_google_translation_to_omegat_dict(en_file,
                                                  cn_file,
                                                  result_file=None):
        if result_file is None:
            result_file = filex.get_result_file_name(en_file, '', 'tmx')
        en_dict = filex.get_dict_from_file(en_file)
        cn_dict = filex.get_dict_from_file(cn_file)
        print('英文词典%d条' % (len(en_dict)))
        print('中文词典%d条' % (len(cn_dict)))

        result = dict()
        for cn_key, cn_value in cn_dict.items():
            cn_key = cn_key.strip()
            cn_value = cn_value.strip()
            if cn_key in en_dict.keys():
                en_value = en_dict[cn_key]
                result[en_value] = cn_value

        print('结果%d条' % (len(result)))
        Tools.save_omegat_dict(result, result_file)
Esempio n. 7
0
    def generate_need_translation_file2(en_dir, result_file):
        """生成需要翻译的文件"""
        en_file_list = filex.list_file(en_dir)
        all_translation = dict()
        for en_file in en_file_list:
            en_dict = Tools.get_dict_from_file(en_file)
            all_translation.update(en_dict)

        result = list()
        for key, value in all_translation.items():
            result.append('%s=%s\n' % (key, value))
        filex.write_lines(result_file, result)
Esempio n. 8
0
    def check_same_key_difference_value(dir_path):
        """如果key相同,value是否有不一致
        发现确实有不一致,所以必须区分文件"""

        file_list = filex.list_file(dir_path)
        all_translation = dict()
        for file in file_list:
            print('\ncheck ' + file)
            translation_dict = Tools.get_dict_from_file(
                file, delete_value_and_symbol=True, trans_unicode=True)
            if all_translation:
                # 不为空再处理,为空的第一次直接update
                for key, value in translation_dict.items():
                    if key in all_translation.keys():
                        if all_translation[key] != value:
                            print('key相同%s,但value不一致\n%s\n%s' %
                                  (key, all_translation[key], value))
            all_translation.update(translation_dict)
Esempio n. 9
0
    def generate_need_translation_file(en_dir, result_dir):
        """生成需要翻译的文件"""
        en_file_list = filex.list_file(en_dir)
        all_translation = dict()
        for en_file in en_file_list:
            en_dict = Tools.get_dict_from_file(en_file)
            all_translation.update(en_dict)

        i = 0
        size = 3000
        result = list()
        for key, value in all_translation.items():
            i += 1
            result.append('%s=%s\n' % (key, value))
            if i % size == size - 1:
                index = int(i / size)
                result_file = '%s\\%2d.properties' % (result_dir, index)
                filex.write_lines(result_file, result)
                result.clear()
        if result:
            index = int(i / size)
            result_file = '%s\\%2d.properties' % (result_dir, index)
            filex.write_lines(result_file, result)
            result.clear()
Esempio n. 10
0
    def add_ellipsis_and_shortcut(en_file, cn_file, result_file=None):
        """
        处理快捷键,将_字母替换为(_字母)
        :param en_file:
        :param cn_file:
        :param result_file:
        :return:
        """
        if result_file is None:
            result_file = filex.get_result_file_name(
                cn_file, '_add_ellipsis_and_shortcut')

        en_dict = Tools.get_dict_from_file(en_file,
                                           delete_value_ellipsis=False,
                                           delete_value_underline=False)
        cn_dict = Tools.get_dict_from_file(cn_file,
                                           delete_value_ellipsis=False,
                                           delete_value_underline=False)
        count = 0

        p_ellipsise = re.compile('……|…$')
        p_period = re.compile('\.')
        for (k, v) in en_dict.items():
            if v.endswith('.'):
                # 以.结尾
                if k in cn_dict.keys():
                    cn_value = cn_dict[k]
                    old_value = cn_value
                    if v.endswith('...'):
                        # 省略号结尾
                        cn_value = re.sub(p_ellipsise, '...', cn_value)
                        if not cn_value.endswith('...'):
                            cn_value += '...'
                    elif v.endswith('.'):
                        # 句号结尾
                        cn_value = re.sub(p_period, '。', cn_value)
                        if not cn_value.endswith('。'):
                            cn_value += '。'

                    if cn_value != old_value:
                        print('修改【%s】为【%s】' % (old_value, cn_value))
                        cn_dict[k] = cn_value
            if '_' in v:
                # 有快捷方式
                index = v.find('_')
                shortcut = v[index + 1:index + 2]
                # 包含快捷键
                if k in cn_dict.keys():
                    # 都有
                    cn_value = cn_dict[k]
                    count += 1
                    # 已经是(_字母结)结尾的,重新替换一遍
                    p = re.compile(r'(.*)(\(_\w\))')
                    if re.match(p, cn_value) is not None:
                        replace_result = re.sub(p, r'\1' + '(_%s)' % shortcut,
                                                cn_value)
                        print('替换%d,key=%s,v=%s,cn=%s,r=%s' %
                              (count, shortcut, v, cn_value, replace_result))
                    else:
                        replace_result = cn_value.replace(
                            '_', '') + '(_%s)' % shortcut
                        print('添加%d,key=%s,v=%s,cn=%s,r=%s' %
                              (count, shortcut, v, cn_value, replace_result))
                    cn_dict[k] = replace_result
        result = Tools.translate_file_by_dict(en_file, cn_dict, '')  # 重新翻译
        result.insert(
            0,
            '# from:[AndroidStudio翻译(3)-ActionsBundle中文翻译](http://blog.pingfangx.com/2355.html)\n'
        )
        filex.write_lines(result_file, result)
Esempio n. 11
0
    def compare_translation(en_dir,
                            compare_dir_list,
                            omegat_dict_file=None,
                            dict_file=None,
                            dict_diff_file=None,
                            by_index=False,
                            trans_unicode=True):
        if dict_file is None:
            dict_file = 'data/dict.txt'
        if dict_diff_file is None:
            dict_diff_file = filex.get_result_file_name(dict_file, '_diff')
        separator = '[xx|]'
        dict_list = list()
        for i in compare_dir_list:
            if i == r'C:\Users\Admin\Desktop\AndroidStudio汉化\汉化包\整理':
                t_dict = dict()
                for i_file in filex.list_file(i, '.properties'):
                    t_dict.update(filex.get_dict_from_file(i_file))
                dict_list.append(t_dict)
                continue
            i_all_translation, i_diff_translation = Translator.check_same_en_difference_cn(
                en_dir, i, False, '', trans_unicode=trans_unicode)
            dict_list.append(i_all_translation)
        if omegat_dict_file is not None:
            dict_list.insert(0, Translator.get_omegat_dict(omegat_dict_file))

        for i in range(len(dict_list)):
            print('%d中共包含翻译%d条' % (i + 1, len(sorted(dict_list[i].keys()))))

        all_translation = dict()
        diff_translation = dict()
        print_i = True
        if by_index:
            # 按倒序更新,得到结果
            for i in range(len(dict_list) - 1, -1, -1):
                all_translation.update(dict_list[i])
                print('更新%d后,size是%d' %
                      (i + 1, len(sorted(all_translation.keys()))))
        else:
            for i in range(len(dict_list)):
                i_dict = dict_list[i]
                index = 0
                length = len(sorted(i_dict.keys()))
                for key, i_value in i_dict.items():
                    index += 1
                    if print_i:
                        print('\n检查%d/%d,%s' % (index, length, key))
                        print('词典%d中是%s' % (i, i_value))
                    has_diff = False
                    for j in range(i + 1, len(dict_list)):
                        j_dict = dict_list[j]
                        if key in j_dict:
                            j_value = j_dict[key]
                            if i_value == j_value:
                                if print_i:
                                    print('词典%d中相同' % j)
                            else:
                                has_diff = True
                                if key in diff_translation.keys():
                                    pre_translation = diff_translation[key]
                                    if j_value not in pre_translation.split(
                                            separator):
                                        diff_translation[
                                            key] = pre_translation + separator + j_value.replace(
                                                '\n', '')
                                else:
                                    diff_translation[key] = (i_value +
                                                             separator +
                                                             j_value).replace(
                                                                 '\n', '')
                                if print_i:
                                    print('词典%d中是%s' % (j, j_value))
                            # 处理后移除
                            j_dict.pop(key)
                        else:
                            if print_i:
                                print('词典%d中缺少' % j)
                    if not has_diff:
                        if print_i:
                            print('统一翻译')
                        if i_value:
                            # 只添加不为空的
                            all_translation[key] = i_value
                print('%d中处理%d条,其中%d条翻译相同,%d条不同' %
                      (i, len(sorted(
                          i_dict.keys())), len(sorted(all_translation.keys())),
                       len(sorted(diff_translation.keys()))))

        print('size is %d' % len(sorted(all_translation.keys())))
        if all_translation:
            if dict_file.endswith('.tmx') or dict_file.endswith('.tmx.xml'):
                Tools.save_omegat_dict(all_translation, dict_file)
            else:
                result = list()
                for key, value in all_translation.items():
                    result.append('%s=%s\n' % (key, value))
                filex.write_lines(dict_file, result)

        print('diff size is %d' % len(sorted(diff_translation.keys())))
        if diff_translation:
            result = list()
            for key, value in diff_translation.items():
                result.append('%s=%s\n' % (key, value))
            filex.write_lines(dict_diff_file, result)
Esempio n. 12
0
 def check_translation_complete(en_dir, cn_dir, out_put=None, suffix=''):
     """翻译是否完整"""
     incomplete_dict = dict()
     en_file_list = filex.list_file(en_dir, '\.(?!png|gif)')
     incomplete_file = []
     miss_file = []
     complete_count = 0
     same_file = []
     complete_file = []
     for en_file in en_file_list:
         # print('\ncheck ' + en_file)
         cn_file = Translator.get_cn_file_name(en_dir, cn_dir, en_file,
                                               suffix)
         if not os.path.exists(cn_file):
             # print('中文文件不存在' + cn_file)
             miss_file.append(cn_file)
             continue
         if filecmp.cmp(en_file, cn_file):
             # print('文件相同' + en_file)
             same_file.append(en_file)
             continue
         en_dict = Tools.get_dict_from_file(en_file)
         cn_dict = Tools.get_dict_from_file(cn_file, trans_unicode=True)
         is_complete = True
         translation_count_in_file = 0
         for key, en_value in en_dict.items():
             if key not in cn_dict.keys():
                 is_complete = False
                 incomplete_dict[key] = en_value
                 # print('没有翻译%s对应的%s' % (key, en_value))
             else:
                 cn_value = cn_dict[key]
                 if en_value == cn_value:
                     is_complete = False
                     incomplete_dict[key] = en_value
                     # print('%s对应的翻译仍然是%s,未翻译' % (key, en_value))
                 else:
                     translation_count_in_file += 1
                     complete_count += 1
         if not is_complete:
             print('文件未完全翻译' + en_file)
             incomplete_file.append(en_file)
         else:
             if translation_count_in_file == 0:
                 # 一句都没翻译
                 # print('文件一句都没翻译' + en_file)
                 same_file.append(en_file)
             else:
                 complete_file.append(en_file)
                 print('文件翻译完整' + en_file)
     print('缺少%d个文件' % len(miss_file))
     print(miss_file)
     print('有%d个文件完全相同' % len(same_file))
     print('有%d个文件未翻译完整' % len(incomplete_file))
     print(incomplete_file)
     print('有%d个文件完整翻译,共%d条翻译' % (len(complete_file), complete_count))
     if out_put is not None:
         result = list()
         for key, value in incomplete_dict.items():
             result.append('%s=%s\n\n' % (key, value))
         print('incomplete size is %d' %
               len(sorted(incomplete_dict.keys())))
         filex.write_lines(out_put, result)
Esempio n. 13
0
    def translate_file(cls,
                       file_path,
                       result_file=None,
                       ignore_reg_list=None,
                       ignore_file_path=None,
                       delete_empty_translation=False):
        """
        翻译
        寻找一个要翻译的单词
        翻译,更新字典,保存
        继续
        :param cls: 类
        :param file_path: 文件路径
        :param result_file: 结果文件
        :param ignore_reg_list: 忽略正则列表,如果匹配则忽略
        :param ignore_file_path: 忽略的单词保存于,用于生成 auto中的tmx ,避免每次都要手动设为相同翻译
        :param delete_empty_translation: 删除空翻译
        :return: 
        """

        if result_file is None:
            result_file = file_path

        if not issubclass(cls, MachineTranslator):
            print('%s 不是 MachineTranslator 的子类' % cls)
            return

        en_dict = Tools.get_dict_from_omegat(file_path)
        if not en_dict:
            print('翻译文件字典为空')
            return

        keys = sorted(en_dict.keys())
        length = len(keys)
        for i in range(length):
            en = keys[i]
            cn = en_dict[en]
            if cn is not None:
                continue
            continue_loop = False
            if ignore_reg_list:
                for ignore_reg in ignore_reg_list:
                    if ignore_reg.startswith('^'):
                        if re.match(ignore_reg, en):
                            continue_loop = True
                    else:
                        # 不以 ^ 开头才搜索
                        if re.search(ignore_reg, en):
                            continue_loop = True
                    if continue_loop:
                        print('\n跳过 %d/%d 个:【%s】' % (i + 1, length, en))
                        if ignore_file_path:
                            MachineTranslation.save_translation(
                                ignore_file_path, en, en)
                        break
            if continue_loop:
                continue
            print('\n翻译 %d/%d 个:【%s】' % (i + 1, length, en))
            cn = cls.translate(en)
            print('翻译结果 %d/%d 个:【%s】' % (i + 1, length, cn))
            # 更新字典
            en_dict[en] = cn
            # 写入文件
            MachineTranslation.save_translation(result_file, en, cn)
        if delete_empty_translation:
            MachineTranslation.delete_empty_or_same_translation(
                result_file, True, True)