def init_fenci(self): print self.xdb_dict_path _scws.scws_new() _scws.scws_set_charset(self.charset) _scws.scws_set_xdb(self.xdb_dict_path) _scws.scws_set_rule(self.rule_path) _scws.scws_add_dict(self.txt_dict_path) _scws.scws_set_multi(8)
def get_scws(cls, xdb_handle=None): if xdb_handle is None or xdb_handle not in xdb_handle_dict: raise Exception("must a meaningful xdb handle to init.") xdb_path = xdb_handle_dict[xdb_handle] # print(xdb_path) _scws.scws_new() _scws.scws_set_charset("UTF8") _scws.scws_set_xdb(xdb_path) _scws.scws_set_multi(8) return _scws
def init_fenci(self): # print self.xdb_dict_path _scws.scws_new() _scws.scws_set_charset(self.charset) _scws.scws_set_xdb(self.xdb_dict_path) _scws.scws_set_rule(self.rule_path) _scws.scws_add_dict(self.txt_dict_path) _scws.scws_set_multi(0) _scws.scws_set_ignore(1) _scws.scws_set_duality(0)
# coding:utf8 import _scws print "hello" _scws.scws_new() _scws.scws_set_charset("UTF8") _scws.scws_set_xdb("/etc/scws/dict.utf8.xdb") _scws.scws_set_rule("/etc/scws/rules.utf8.ini") # print s[0] print "hid" print "hi" d = _scws.get_res("我们是很好很好的好朋友啊,列宁说") for c in d: print c[0] print c[1] print c[2] print "exit..." print d
def _prepare_scws(): '''预先准备好分词需要的词库,规则文件''' _scws.scws_new() _scws.scws_set_charset("UTF8") _scws.scws_set_xdb("/etc/scws/dict.utf8.xdb") _scws.scws_set_rule("/etc/scws/rules.utf8.ini")