Exemple #1
0
 def write_byte2_gb2312(self, b2):
     """@reimp"""
     t = b2.decode('gb2312', errors='ignore')
     if t:
         if self.zht:
             t = zhs2zht(t)
         self.write_text(t)
Exemple #2
0
    def translateJapanese(self, t):
        """
    @param  unicode
    @return  unicode or None
    """
        MAX_LENGTH = len(t) * 20
        ret = None
        if not ret and 'zh' in self.japaneseTranslateLanguages:
            for dic in 'ja-zh', 'ja-zh-gbk':
                ret = dicts.lingoes(dic).translate(t)
                if ret and len(ret) < MAX_LENGTH / 2:
                    if self.userLanguage == 'zht':
                        ret = convutil.zhs2zht(ret)
                    return ret

        if 'ko' in self.japaneseTranslateLanguages:
            ret = dicts.lingoes('ja-ko').translate(t)
            if ret and len(ret) < MAX_LENGTH / 2:
                return ret

        if 'vi' in self.japaneseTranslateLanguages:
            ret = dicts.stardict('ja-vi').translate(t)
            if ret and len(ret) < MAX_LENGTH:
                return ret

        if 'de' in self.japaneseTranslateLanguages:
            ret = ebdict.wadoku().translate(t)
            if ret and len(ret) < MAX_LENGTH:
                return ret

        for lang in config.JMDICT_LANGS:
            if lang in self.japaneseTranslateLanguages:
                ret = ebdict.jmdict(lang).translate(t)
                if ret and len(ret) < MAX_LENGTH:
                    return ret

        if not ret and 'en' in self.japaneseTranslateLanguages:
            ret = dicts.stardict('ja-vi').translate(t, english=True)
            if ret and len(ret) < MAX_LENGTH:
                return ret
Exemple #3
0
    def saveTerms(self, path, type, to, fr, macros):
        """This method is invoked from a different thread
    @param  path  unicode
    @param  type  str  term type
    @param  to  str  target text language
    @param  fr  str  source text language
    @param  macros  {unicode pattern:unicode repl}
    @return  bool
    """
        type, _, attr = type.partition('_')
        syntax = True if attr == 'syntax' else False if attr == 'nosyntax' else None

        type_trans = type == 'trans'
        type_output = type == 'output'

        fr2 = fr[:2]
        to2 = to[:2]

        fr_ja = fr == 'ja'
        fr_zh = fr2 == 'zh'
        fr_zht = fr == 'zht'
        fr_zht = fr == 'zht'

        to_zh = to2 == 'zh'
        to_zhs = to == 'zhs'
        to_zht = to == 'zht'
        to_ko = to == 'ko'
        to_vi = to == 'vi'

        frKanjiLanguage = config.is_kanji_language(fr)
        frSpaceLanguage = config.language_word_has_space(fr)

        toKanjiLanguage = config.is_kanji_language(to)
        toSpaceLanguage = config.language_word_has_space(to)

        convertsSimplifiedChinese = to_zhs and type in ('output', 'trans')
        convertsTraditionalChinese = to_zht and type in ('output', 'trans')

        #padding = trans_input or toLatinLanguage and td.type in ('trans', 'name', 'yomi')

        RUBY_TYPES = dataman.Term.RUBY_TYPES

        empty = True
        count = len(self.termData)
        try:
            with open(path, 'w') as f:
                f.write(self._renderHeader(type, to, fr))
                for td in self.iterTermData(type, to, fr, syntax=syntax):
                    if self.isOutdated():
                        raise Exception("cancel saving out-of-date terms")
                    zs = convertsSimplifiedChinese and td.language == 'zht'
                    zt = convertsTraditionalChinese and td.language == 'zhs'
                    # no padding space for Chinese names

                    regex = td.regex

                    role = td.role or _td_default_role(td)

                    pattern = _unescape_term_text(td.pattern)
                    pattern = self._applyMacros(pattern, macros)
                    if type_output:
                        if zs:
                            pattern = zht2zhs(pattern)
                        elif zt:
                            pattern = zhs2zht(pattern)
                        #if role == defs.TERM_NAME_ROLE:
                        #  pattern = jazh.ja2zht_name_fix(pattern)

                    elif type_trans and role == defs.TERM_NAME_ROLE and fr2 == 'zh':
                        if fr_zhs:
                            pattern = opencc.ja2zhs(pattern)
                        elif fr_zht:
                            pattern = opencc.ja2zht(pattern)

                    if td.type == 'yomi' and to_zh:
                        repl = ja2zhs_name(pattern) if to_zhs else ja2zht_name(
                            pattern)
                        if not repl:  # this should never happen
                            continue
                        ruby = td.ruby
                        if not ruby and self.chineseRubyEnabled and fr_ja and to_zh and td.type == 'yomi' and td.text:
                            t = kana2name(td.text, 'en')
                            if t != td.text:
                                ruby = t
                        if ruby and self.rubyEnabled and repl != ruby:
                            repl = richutil.createRuby(repl, ruby)
                    else:
                        repl = td.text
                        if repl:
                            ruby = td.ruby

                            repl = _unescape_term_text(td.text)
                            repl = self._applyMacros(repl, macros)

                            if td.type == 'yomi':
                                repl = kana2name(repl, to) or repl
                            elif td.type == 'name' and td.language != to and to != 'el':  # temporarily skip Greek
                                if not ruby:
                                    ruby = repl
                                repl = toalphabet(repl, to=to, fr=td.language)

                            if zs:
                                repl = zht2zhs(repl)
                            elif zt:
                                repl = zhs2zht(repl)
                                if role == defs.TERM_NAME_ROLE:
                                    repl = ja2zht_name_fix(repl)

                            if repl and td.type in RUBY_TYPES:
                                if not ruby:
                                    #if self.chineseRubyEnabled and fr_ja and to_zh and td.type == 'yomi' and td.text:
                                    #  t = kana2name(td.text, 'en')
                                    #  if t != td.text:
                                    #    ruby = t
                                    if self.koreanRubyEnabled and fr_ja and to_ko and td.type == 'yomi' and td.pattern:
                                        t = td.pattern
                                        t = hanjaconv.to_hangul(t)
                                        if not kochars.allhangul(
                                                t
                                        ):  # allhangul excludes ASCII characters. So, it will automatically text regex expressions as well
                                            t = td.pattern
                                            t = ja2zht_name(t)
                                            t = ja2zht_name_fix(t)
                                            t = hanjaconv.to_hangul(t)
                                        if t and kochars.allhangul(t):
                                            ruby = t
                                    if self.vietnameseRubyEnabled and to_vi and td.type == 'yomi' and td.pattern:
                                        t = td.pattern
                                        t = ja2zhs_name(t)
                                        t = hanviet.toreading(t)
                                        if t and vichars.allviet(t):
                                            ruby = t
                                if ruby:
                                    if self.rubyEnabled and repl != ruby:
                                        repl = richutil.createRuby(repl, ruby)
                                elif not td.ruby and not self.rubyEnabled:
                                    repl = richutil.removeRuby(repl)

                    if td.phrase:
                        left = pattern[0]
                        right = pattern[-1]
                        if not regex:
                            regex = True
                            pattern = re.escape(pattern)
                        pattern = _phrase_lbound(
                            left, fr) + pattern + _phrase_rbound(right, fr)

                    if type_trans:
                        if role and to_ko:
                            role = _mutate_ko_role(role, repl)

                        if td.type == 'suffix':
                            if not _contains_syntax_symbol(pattern):
                                pattern = "[[%s]]%s" % (defs.TERM_NAME_ROLE,
                                                        pattern)
                            if not _contains_syntax_symbol(repl):
                                if to_ko:
                                    repl = "[[]] %s" % repl
                                else:
                                    repl = "[[]]%s" % repl
                        elif td.type == 'prefix':
                            if not _contains_syntax_symbol(pattern):
                                if frSpaceLanguage:
                                    pattern = "%s [[%s]]" % (
                                        pattern, defs.TERM_NAME_ROLE)
                                else:
                                    pattern = "%s[[%s]]" % (
                                        pattern, defs.TERM_NAME_ROLE)
                            if not _contains_syntax_symbol(repl):
                                if toSpaceLanguage:
                                    repl = "%s [[]]" % repl
                                else:
                                    repl = "%s[[]]" % repl

                    if type_trans:
                        self._writeCodecLine(f, td.id, pattern, repl, regex,
                                             td.icase, td.context, td.host,
                                             role)
                    else:
                        self._writeTransformLine(f, td.id, pattern, repl,
                                                 regex, td.icase, td.context,
                                                 td.host)

                    empty = False

            if not empty:
                return True

        except Exception, e:
            dwarn(e)
Exemple #4
0
 def convertChinese(self, text):
     return zhs2zht(text)