def write_byte2_gb2312(self, b2): """@reimp""" t = b2.decode('gb2312', errors='ignore') if t: if self.zht: t = zhs2zht(t) self.write_text(t)
def translateJapanese(self, t): """ @param unicode @return unicode or None """ MAX_LENGTH = len(t) * 20 ret = None if not ret and 'zh' in self.japaneseTranslateLanguages: for dic in 'ja-zh', 'ja-zh-gbk': ret = dicts.lingoes(dic).translate(t) if ret and len(ret) < MAX_LENGTH / 2: if self.userLanguage == 'zht': ret = convutil.zhs2zht(ret) return ret if 'ko' in self.japaneseTranslateLanguages: ret = dicts.lingoes('ja-ko').translate(t) if ret and len(ret) < MAX_LENGTH / 2: return ret if 'vi' in self.japaneseTranslateLanguages: ret = dicts.stardict('ja-vi').translate(t) if ret and len(ret) < MAX_LENGTH: return ret if 'de' in self.japaneseTranslateLanguages: ret = ebdict.wadoku().translate(t) if ret and len(ret) < MAX_LENGTH: return ret for lang in config.JMDICT_LANGS: if lang in self.japaneseTranslateLanguages: ret = ebdict.jmdict(lang).translate(t) if ret and len(ret) < MAX_LENGTH: return ret if not ret and 'en' in self.japaneseTranslateLanguages: ret = dicts.stardict('ja-vi').translate(t, english=True) if ret and len(ret) < MAX_LENGTH: return ret
def saveTerms(self, path, type, to, fr, macros): """This method is invoked from a different thread @param path unicode @param type str term type @param to str target text language @param fr str source text language @param macros {unicode pattern:unicode repl} @return bool """ type, _, attr = type.partition('_') syntax = True if attr == 'syntax' else False if attr == 'nosyntax' else None type_trans = type == 'trans' type_output = type == 'output' fr2 = fr[:2] to2 = to[:2] fr_ja = fr == 'ja' fr_zh = fr2 == 'zh' fr_zht = fr == 'zht' fr_zht = fr == 'zht' to_zh = to2 == 'zh' to_zhs = to == 'zhs' to_zht = to == 'zht' to_ko = to == 'ko' to_vi = to == 'vi' frKanjiLanguage = config.is_kanji_language(fr) frSpaceLanguage = config.language_word_has_space(fr) toKanjiLanguage = config.is_kanji_language(to) toSpaceLanguage = config.language_word_has_space(to) convertsSimplifiedChinese = to_zhs and type in ('output', 'trans') convertsTraditionalChinese = to_zht and type in ('output', 'trans') #padding = trans_input or toLatinLanguage and td.type in ('trans', 'name', 'yomi') RUBY_TYPES = dataman.Term.RUBY_TYPES empty = True count = len(self.termData) try: with open(path, 'w') as f: f.write(self._renderHeader(type, to, fr)) for td in self.iterTermData(type, to, fr, syntax=syntax): if self.isOutdated(): raise Exception("cancel saving out-of-date terms") zs = convertsSimplifiedChinese and td.language == 'zht' zt = convertsTraditionalChinese and td.language == 'zhs' # no padding space for Chinese names regex = td.regex role = td.role or _td_default_role(td) pattern = _unescape_term_text(td.pattern) pattern = self._applyMacros(pattern, macros) if type_output: if zs: pattern = zht2zhs(pattern) elif zt: pattern = zhs2zht(pattern) #if role == defs.TERM_NAME_ROLE: # pattern = jazh.ja2zht_name_fix(pattern) elif type_trans and role == defs.TERM_NAME_ROLE and fr2 == 'zh': if fr_zhs: pattern = opencc.ja2zhs(pattern) elif fr_zht: pattern = opencc.ja2zht(pattern) if td.type == 'yomi' and to_zh: repl = ja2zhs_name(pattern) if to_zhs else ja2zht_name( pattern) if not repl: # this should never happen continue ruby = td.ruby if not ruby and self.chineseRubyEnabled and fr_ja and to_zh and td.type == 'yomi' and td.text: t = kana2name(td.text, 'en') if t != td.text: ruby = t if ruby and self.rubyEnabled and repl != ruby: repl = richutil.createRuby(repl, ruby) else: repl = td.text if repl: ruby = td.ruby repl = _unescape_term_text(td.text) repl = self._applyMacros(repl, macros) if td.type == 'yomi': repl = kana2name(repl, to) or repl elif td.type == 'name' and td.language != to and to != 'el': # temporarily skip Greek if not ruby: ruby = repl repl = toalphabet(repl, to=to, fr=td.language) if zs: repl = zht2zhs(repl) elif zt: repl = zhs2zht(repl) if role == defs.TERM_NAME_ROLE: repl = ja2zht_name_fix(repl) if repl and td.type in RUBY_TYPES: if not ruby: #if self.chineseRubyEnabled and fr_ja and to_zh and td.type == 'yomi' and td.text: # t = kana2name(td.text, 'en') # if t != td.text: # ruby = t if self.koreanRubyEnabled and fr_ja and to_ko and td.type == 'yomi' and td.pattern: t = td.pattern t = hanjaconv.to_hangul(t) if not kochars.allhangul( t ): # allhangul excludes ASCII characters. So, it will automatically text regex expressions as well t = td.pattern t = ja2zht_name(t) t = ja2zht_name_fix(t) t = hanjaconv.to_hangul(t) if t and kochars.allhangul(t): ruby = t if self.vietnameseRubyEnabled and to_vi and td.type == 'yomi' and td.pattern: t = td.pattern t = ja2zhs_name(t) t = hanviet.toreading(t) if t and vichars.allviet(t): ruby = t if ruby: if self.rubyEnabled and repl != ruby: repl = richutil.createRuby(repl, ruby) elif not td.ruby and not self.rubyEnabled: repl = richutil.removeRuby(repl) if td.phrase: left = pattern[0] right = pattern[-1] if not regex: regex = True pattern = re.escape(pattern) pattern = _phrase_lbound( left, fr) + pattern + _phrase_rbound(right, fr) if type_trans: if role and to_ko: role = _mutate_ko_role(role, repl) if td.type == 'suffix': if not _contains_syntax_symbol(pattern): pattern = "[[%s]]%s" % (defs.TERM_NAME_ROLE, pattern) if not _contains_syntax_symbol(repl): if to_ko: repl = "[[]] %s" % repl else: repl = "[[]]%s" % repl elif td.type == 'prefix': if not _contains_syntax_symbol(pattern): if frSpaceLanguage: pattern = "%s [[%s]]" % ( pattern, defs.TERM_NAME_ROLE) else: pattern = "%s[[%s]]" % ( pattern, defs.TERM_NAME_ROLE) if not _contains_syntax_symbol(repl): if toSpaceLanguage: repl = "%s [[]]" % repl else: repl = "%s[[]]" % repl if type_trans: self._writeCodecLine(f, td.id, pattern, repl, regex, td.icase, td.context, td.host, role) else: self._writeTransformLine(f, td.id, pattern, repl, regex, td.icase, td.context, td.host) empty = False if not empty: return True except Exception, e: dwarn(e)
def convertChinese(self, text): return zhs2zht(text)