コード例 #1
0
    def pretranslatexliff(self, input_source, template_source=None):
        """helper that converts strings to po source without requiring files"""
        input_file = wStringIO.StringIO(input_source)
        if template_source:
            template_file = wStringIO.StringIO(template_source)
        else:
            template_file = None
        output_file = wStringIO.StringIO()

        pretranslate.pretranslate_file(input_file, output_file, template_file)
        output_file.seek(0)
        return xliff.xlifffile(output_file.read())
コード例 #2
0
    def pretranslatepo(self, input_source, template_source=None):
        """helper that converts strings to po source without requiring files"""
        input_file = BytesIO(input_source.encode())
        if template_source:
            template_file = BytesIO(template_source.encode())
        else:
            template_file = None
        output_file = BytesIO()

        pretranslate.pretranslate_file(input_file, output_file, template_file)
        output_file.seek(0)
        return po.pofile(output_file.read())
コード例 #3
0
    def pretranslatexliff(self, input_source, template_source=None):
        """helper that converts strings to po source without requiring files"""
        input_file = wStringIO.StringIO(input_source)
        if template_source:
            template_file = wStringIO.StringIO(template_source)
        else:
            template_file = None
        output_file = wStringIO.StringIO()

        pretranslate.pretranslate_file(input_file, output_file, template_file)
        output_file.seek(0)
        return xliff.xlifffile(output_file.read())
コード例 #4
0
def merge_translation(old_path, new_path, output_path, tmp_path):

    tmp_old_path = os.path.join(tmp_path, 'old.csv')
    tmp_new_path = os.path.join(tmp_path, 'new.csv')
    lines = ''

    if os.path.isfile(old_path):

        # 正確にマッチさせるため列と表記を合わせ、比較用の一時ファイルとして出力
        with open(old_path, "r", encoding="utf_8_sig", newline="\n") as f_csv:
            old_lines = f_csv.read()

        old_lines = comma_replacer.sub(r'\t', old_lines)
        old_lines = re.sub(r'(^[^"\r\n])', r'"\1', old_lines, flags=re.MULTILINE)
        old_lines = re.sub(r'([^"\r\n]$)', r'\1"', old_lines, flags=re.MULTILINE)
        old_lines = re.sub(r'(["]{0,1})\t(["]{0,1})', r'"\t"', old_lines, flags=re.MULTILINE)
        old_lines = re.sub(r'^(([^\t\r\n]+\t?){2,2})$', r'\1\t""', old_lines, flags=re.MULTILINE)
        old_lines = re.sub(r'\t', r',', old_lines)

        with open(new_path, "r", encoding="utf_8_sig", newline="\n") as f_csv:
            new_lines = f_csv.read()

        new_lines = comma_replacer.sub(r'\t', new_lines)
        new_lines = re.sub(r'^(([^\t\r\n]+\t?){0,3})\t[^\r\n]+', r'\1', new_lines, flags=re.MULTILINE)
        new_lines = re.sub(r'\t', r',', new_lines)

        with open(tmp_old_path, "w+", encoding="utf_8_sig", newline="\n") as f_csv:
            f_csv.write(old_lines)

        with open(tmp_new_path, "w+", encoding="utf_8_sig", newline="\n") as f_csv:
            f_csv.write(new_lines)

        os.makedirs(os.path.split(output_path)[0], exist_ok=True)

        # 全文一致
        f_input_csv = open(tmp_new_path, 'rb')
        f_output_csv = open(output_path, 'wb+')
        f_tm = open(tmp_old_path, 'rb')

        pretranslate_file(f_input_csv, f_output_csv, f_tm, fuzzymatching=False)

        f_input_csv.close()
        f_output_csv.close()
        f_tm.close()

        with open(output_path, "r", encoding="utf_8_sig") as f:
            no_fuzzy_lines = f.read()


        # あいまい一致用に原文のタグを消す
        new_lines = comma_replacer.sub(r'\t', new_lines)
        notags_lines = []
        for line in new_lines.splitlines(False):
            if line.count('"') % 2 != 0: # クォーテーションが一致しない=破損したエントリ
                notags_lines.append(line)
            s = line.split('\t')
            if len(s) >= 2:
                s[1] = re.sub(r'(<[^>]+>)|(\{[^\}]+\})', '', s[1])
            notags_lines.append(','.join(s))

        with open(tmp_new_path, "w+", encoding="utf_8_sig", newline="\n") as f_csv:
            f_csv.write('\n'.join(notags_lines))

        old_lines = comma_replacer.sub(r'\t', old_lines)
        notags_lines = []
        for line in old_lines.splitlines(False):
            if line.count('"') % 2 != 0:
                notags_lines.append(line)
            s = line.split('\t')
            if len(s) >= 2:
                s[1] = re.sub(r'(<[^>]+>)|(\{[^\}]+\})', '', s[1])
            notags_lines.append(','.join(s))

        with open(tmp_old_path, "w+", encoding="utf_8_sig", newline="\n") as f_csv:
            f_csv.write('\n'.join(notags_lines))

        # あいまい一致
        f_input_csv = open(tmp_new_path, 'rb')
        f_output_csv = open(output_path, 'wb+')
        f_tm = open(tmp_old_path, 'rb')

        pretranslate_file(f_input_csv, f_output_csv, f_tm, min_similarity=75, fuzzymatching=True)

        f_input_csv.close()
        f_output_csv.close()
        f_tm.close()

        with open(output_path, "r", encoding="utf_8_sig") as f:
            fuzzy_lines = f.read()


        # 全文一致とあいまい一致を比較し、あいまい一致のラインに目印となるキーワードを入れる
        no_fuzzy_lines = comma_replacer.sub('\t', no_fuzzy_lines)
        fuzzy_lines = comma_replacer.sub('\t', fuzzy_lines)
        no_fuzzy_lines = no_fuzzy_lines.splitlines(False)
        fuzzy_lines = fuzzy_lines.splitlines(False)
        # 破損したエントリを削除
        no_fuzzy_lines = [line for line in no_fuzzy_lines if '\t' in line]
        fuzzy_lines = [line for line in fuzzy_lines if '\t' in line] 


        if len(no_fuzzy_lines) == len(fuzzy_lines):

            old_dict = {}
            for line in old_lines.splitlines(False):
                s = line.split('\t')
                if len(s) > 2 and s[2] != '""':
                    old_dict[s[2]] = s[1]

            for idx, line in enumerate(no_fuzzy_lines):
                s = line.split('\t')
                nt_s = fuzzy_lines[idx].split('\t')

                if len(s) > 2 and s[2] == '""':
                    if isJp.findall(nt_s[2]) and not ('(あいまい一致_') in nt_s[2]:

                        isdiff_source = (re.sub(r'<[^>]+>', '', s[1]) != re.sub(r'<[^>]+>', '', old_dict.get(nt_s[2], '')))
                        isdiff_tag = (''.join(re.findall(r'<[^>]+>', s[1])) != ''.join(re.findall(r'<[^>]+>', old_dict.get(nt_s[2], ''))))

                        if isdiff_tag and not isdiff_source:
                            s[2] = re.sub(r'^("?)', r'\1(あいまい一致_タグに差異)', nt_s[2])
                        elif isdiff_source and not isdiff_tag:
                            s[2] = re.sub(r'^("?)', r'\1(あいまい一致_原文に差異)', nt_s[2])
                        else:
                            s[2] = re.sub(r'^("?)', r'\1(あいまい一致)', nt_s[2])
                    else:
                        nt_s[2] = '""'
                no_fuzzy_lines[idx] = '\t'.join(s)

            no_fuzzy_lines.append('')
            lines = '\n'.join(no_fuzzy_lines)
        else:
            no_fuzzy_lines.append('')
            lines = '\n'.join(no_fuzzy_lines)

        lines = lines.replace('\t', ',')

    return lines