def wsplit(self,s): length=len(s) li=ictclas.process_str_ret_list(s,length,ictclas.eCodeType.UNKNOW) str='' for i in li: str=str+s[i.iStartPos:(i.iStartPos+i.iLength)]+' ' return str
def wsplit(self, s): length = len(s) li = ictclas.process_str_ret_list(s, length, ictclas.eCodeType.UNKNOW) str = '' for i in li: str = str + s[i.iStartPos:(i.iStartPos + i.iLength)] + ' ' return str
def split(self,s): #print '--------------split----------------' #print chardet.detect(s) length=len(s) #print s li=ictclas.process_str_ret_list(s,length,ictclas.eCodeType.UTF8) str='' for i in li: #print i.iStartPos str=str+s[i.iStartPos:(i.iStartPos+i.iLength)]+' ' return str
def split(self, s): #print '--------------split----------------' #print chardet.detect(s) length = len(s) #print s li = ictclas.process_str_ret_list(s, length, ictclas.eCodeType.UTF8) str = '' for i in li: #print i.iStartPos str = str + s[i.iStartPos:(i.iStartPos + i.iLength)] + ' ' return str
def wordsplit(s): '返回一个由 分割开的自符' str='' #print s ictclas.import_dict('./user.txt') ictclas.ict_init("./") li = ictclas.process_str_ret_list(s) for i in li: #print i.start, i.length, i.word_id, s[i.start:(i.start+i.length)] # print s[i.start:(i.start+i.length)],hash(s[i.start:(i.start+i.length)]) str=str+s[i.start:(i.start+i.length)]+' ' ictclas.ict_exit() return str
#!/usr/bin/env python #-*- coding:utf8 -*- import ictclas print ictclas.ict_init("./",0) s = "我们都是好孩子,异想天开的孩子。written by 爱思客" count = ictclas.process_str_ret_word_count(s.decode("utf8").encode("gb2312")) print "Count:%s"%(count) li = ictclas.process_str_ret_list(s) print "-"*8 for i in li: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print "-"*8 kw = ictclas.keyword(len(li)) for i in kw: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print "-"*8 print "set pos map:%s"%(ictclas.POSMAP.ICT_SECOND) print "after add user word" ictclas.add_user_word("爱思客 n") ictclas.set_pos_map(ictclas.POSMAP.ICT_SECOND) li = ictclas.process_str_ret_list(s) fingerprint = ictclas.fingerprint() print "fingerprint:%s"%(fingerprint) kw = ictclas.keyword(len(li)) for i in kw: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print ictclas.ict_exit()