Exemple #1
0
 def update_omegat_dict_by_pseudo_dict(omegat_dict_file,
                                       pseudo_dict_file,
                                       result_file=None):
     "根据伪翻译记忆更新记忆库"
     if result_file is None:
         result_file = filex.get_result_file_name(pseudo_dict_file,
                                                  '_update_by_pseudo')
     source_dict = Tools.get_dict_from_omegat(omegat_dict_file)
     pseudo_dict = Tools.get_dict_from_omegat(pseudo_dict_file)
     print('记忆库共%d条记录,伪翻译共%d条记录' % (len(source_dict), len(pseudo_dict)))
     result_dict = ActionsBundle.update_dict_add_extra_info(
         source_dict, pseudo_dict)
     print('处理结果共%s条记录' % len(result_dict))
     Tools.save_omegat_dict(result_dict, result_file)
Exemple #2
0
    def translate_file(cls,
                       file_path,
                       result_file=None,
                       ignore_reg_list=None,
                       ignore_file_path=None,
                       delete_empty_translation=False):
        """
        翻译
        寻找一个要翻译的单词
        翻译,更新字典,保存
        继续
        :param cls: 类
        :param file_path: 文件路径
        :param result_file: 结果文件
        :param ignore_reg_list: 忽略正则列表,如果匹配则忽略
        :param ignore_file_path: 忽略的单词保存于,用于生成 auto中的tmx ,避免每次都要手动设为相同翻译
        :param delete_empty_translation: 删除空翻译
        :return: 
        """

        if result_file is None:
            result_file = file_path

        if not issubclass(cls, MachineTranslator):
            print('%s 不是 MachineTranslator 的子类' % cls)
            return

        en_dict = Tools.get_dict_from_omegat(file_path)
        if not en_dict:
            print('翻译文件字典为空')
            return

        keys = sorted(en_dict.keys())
        length = len(keys)
        for i in range(length):
            en = keys[i]
            cn = en_dict[en]
            if cn is not None:
                continue
            continue_loop = False
            if ignore_reg_list:
                for ignore_reg in ignore_reg_list:
                    if ignore_reg.startswith('^'):
                        if re.match(ignore_reg, en):
                            continue_loop = True
                    else:
                        # 不以 ^ 开头才搜索
                        if re.search(ignore_reg, en):
                            continue_loop = True
                    if continue_loop:
                        print('\n跳过 %d/%d 个:【%s】' % (i + 1, length, en))
                        if ignore_file_path:
                            MachineTranslation.save_translation(
                                ignore_file_path, en, en)
                        break
            if continue_loop:
                continue
            print('\n翻译 %d/%d 个:【%s】' % (i + 1, length, en))
            cn = cls.translate(en)
            print('翻译结果 %d/%d 个:【%s】' % (i + 1, length, cn))
            # 更新字典
            en_dict[en] = cn
            # 写入文件
            MachineTranslation.save_translation(result_file, en, cn)
        if delete_empty_translation:
            MachineTranslation.delete_empty_or_same_translation(
                result_file, True, True)