def api_segment(string): '''分词,返回大数组,单个数据又是(词,词性,idf)组成的数组 **example**: @startcode for c in api_segment("我们是好朋友"): print c[0],":",c[1],":",c[2] @endcode **result**: @startcode 我们 : r : 4.42000007629 是 : v : 0.0 好朋友 : nz : 7.96999979019 @endcode ''' try: return {"code":200,"data":_scws.get_res(string)} except: return {"code":510,"msg":"scws internal error"}
# coding:utf8 import _scws print "hello" _scws.scws_new() _scws.scws_set_charset("UTF8") _scws.scws_set_xdb("/etc/scws/dict.utf8.xdb") _scws.scws_set_rule("/etc/scws/rules.utf8.ini") # print s[0] print "hid" print "hi" d = _scws.get_res("我们是很好很好的好朋友啊,列宁说") for c in d: print c[0] print c[1] print c[2] print "exit..." print d
def segment(string): '''分词,返回大数组,单个数据又是(词,词性,idf)组成的数组''' return _scws.get_res(string)
def cnseg(string): """分词,返回用空格分开的词语组""" d=_scws.get_res(string) return " ".join([c[0] for c in d])
def get_text_fc(self,text): ret = _scws.get_res(text) return ret
def get_text_fc(self, text): ret = _scws.get_res(text) return ret