def wordsplit(s): '返回一个由 分割开的自符' str='' #print s ictclas.import_dict('./user.txt') ictclas.ict_init("./") li = ictclas.process_str_ret_list(s) for i in li: #print i.start, i.length, i.word_id, s[i.start:(i.start+i.length)] # print s[i.start:(i.start+i.length)],hash(s[i.start:(i.start+i.length)]) str=str+s[i.start:(i.start+i.length)]+' ' ictclas.ict_exit() return str
def __del__ (self): ictclas.ict_exit()
def __del__(self): ictclas.ict_exit()
def filesplit(f,n,tag): ictclas.import_dict('./user.txt') ictclas.ict_init("./") li=ictclas.process_file(f,n,tag) ictclas.ict_exit()
#!/usr/bin/env python #-*- coding:utf8 -*- import ictclas print ictclas.ict_init("./",0) s = "我们都是好孩子,异想天开的孩子。written by 爱思客" count = ictclas.process_str_ret_word_count(s.decode("utf8").encode("gb2312")) print "Count:%s"%(count) li = ictclas.process_str_ret_list(s) print "-"*8 for i in li: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print "-"*8 kw = ictclas.keyword(len(li)) for i in kw: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print "-"*8 print "set pos map:%s"%(ictclas.POSMAP.ICT_SECOND) print "after add user word" ictclas.add_user_word("爱思客 n") ictclas.set_pos_map(ictclas.POSMAP.ICT_SECOND) li = ictclas.process_str_ret_list(s) fingerprint = ictclas.fingerprint() print "fingerprint:%s"%(fingerprint) kw = ictclas.keyword(len(li)) for i in kw: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print ictclas.ict_exit()