def cmmseg2_seg(w, encoding='utf8'): segs = [] if type(w) != unicode: w = w.decode(encoding) for x in cmmseg.segment(w.encode('utf8')): try: x = x.decode("utf8") segs.append(x) except: pass return segs
def cmmseg2_seg(w, encoding="utf8"): segs = [] if type(w) != unicode: w = w.decode(encoding) for x in cmmseg.segment(w.encode("utf8")): try: x = x.decode("utf8") segs.append(x) except: pass return segs
# -*- coding: utf-8 -*- import cmmseg #cmmseg.init('F:\\deps\\mmseg\\src\\win32') seg = cmmseg.MMSeg('\\deps\\mmseg\\src\\win32') rs = cmmseg.segment((u'中文分词').encode('utf-8')) for i in rs: print i.decode('utf-8')