def to_utf8(text): import pykf encoding = pykf.guess(text) for enc_name in ENCODES: enc = getattr(pykf, enc_name) if enc == encoding: return unicode(text, enc_name.lower()).encode('utf8')
def unify(text): c = pykf.guess(text) if c is pykf.EUC: try: return unicode(text, 'euc-jp',"ignore") except: return text elif c in (pykf.SJIS, pykf.JIS): try: return unicode(text, 'sjis',"ignore") except: return text return text return text
def toutf8(txt): """pykf.guess cannot guess utf-8 string (wrong to sjis)""" if not txt: return txt import sys c = pykf.guess(txt) if c is pykf.JIS: try: u = unicode(txt, 'iso2022_jp') except: return txt return u.encode('utf-8') for enc in ['utf-8', 'euc_jp', 'shift_jis']: try: u = unicode(txt, enc) return u.encode('utf-8') except UnicodeError: pass return txt