Esempio n. 1
0
def to_utf8(text):
    import pykf
    encoding = pykf.guess(text)
    for enc_name in ENCODES:
        enc = getattr(pykf, enc_name)
        if enc == encoding:
            return unicode(text, enc_name.lower()).encode('utf8')
Esempio n. 2
0
def unify(text):
    c = pykf.guess(text)
    if c is pykf.EUC:
        try:
            return unicode(text,  'euc-jp',"ignore")
        except:
            return text
    elif c in (pykf.SJIS,  pykf.JIS):
        try:
            return unicode(text,  'sjis',"ignore")
        except:
            return text
        return text
    return text
Esempio n. 3
0
def toutf8(txt):
  """pykf.guess cannot guess utf-8 string (wrong to sjis)"""
  if not txt:
    return txt
  import sys
  c = pykf.guess(txt)
  if c is pykf.JIS:
    try:
      u = unicode(txt, 'iso2022_jp')
    except:
      return txt
    return u.encode('utf-8')
  for enc in ['utf-8', 'euc_jp', 'shift_jis']:
    try:
      u = unicode(txt, enc)
      return u.encode('utf-8')
    except UnicodeError:
      pass
  return txt