def wordsplit(s): '返回一个由 分割开的自符' str='' #print s ictclas.import_dict('./user.txt') ictclas.ict_init("./") li = ictclas.process_str_ret_list(s) for i in li: #print i.start, i.length, i.word_id, s[i.start:(i.start+i.length)] # print s[i.start:(i.start+i.length)],hash(s[i.start:(i.start+i.length)]) str=str+s[i.start:(i.start+i.length)]+' ' ictclas.ict_exit() return str
def __init__(self,basepath='./'): ictclas.ict_init(basepath)
def filesplit(f,n,tag): ictclas.import_dict('./user.txt') ictclas.ict_init("./") li=ictclas.process_file(f,n,tag) ictclas.ict_exit()
def __init__(self,basepath='/home/chunwei/swin2/_parser/ICTCLAS50'): ictclas.ict_init(basepath)
def __init__(self, basepath='./'): ictclas.ict_init(basepath)
#!/usr/bin/env python #-*- coding:utf8 -*- import ictclas print ictclas.ict_init("./",0) s = "我们都是好孩子,异想天开的孩子。written by 爱思客" count = ictclas.process_str_ret_word_count(s.decode("utf8").encode("gb2312")) print "Count:%s"%(count) li = ictclas.process_str_ret_list(s) print "-"*8 for i in li: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print "-"*8 kw = ictclas.keyword(len(li)) for i in kw: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print "-"*8 print "set pos map:%s"%(ictclas.POSMAP.ICT_SECOND) print "after add user word" ictclas.add_user_word("爱思客 n") ictclas.set_pos_map(ictclas.POSMAP.ICT_SECOND) li = ictclas.process_str_ret_list(s) fingerprint = ictclas.fingerprint() print "fingerprint:%s"%(fingerprint) kw = ictclas.keyword(len(li)) for i in kw: print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] print ictclas.ict_exit()
#!/usr/bin/env python #-*- coding:gb2312 -*- import ictclas print ictclas.ict_init(".") s = "我们都是好孩子,异想天开的孩子。written by 爱思客" print s count = ictclas.process_str_ret_word_count(s) print "Count:%s"%(count) #li = ictclas.process_str_ret_list(s) #print "-"*8 #for i in li: # print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] #print "-"*8 #kw = ictclas.keyword(len(li)) #for i in kw: # print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] # #print "-"*8 #print "set pos map:%s"%(ictclas.POSMAP.ICT_SECOND) #print "after add user word" #ictclas.add_user_word("爱思客 n") #ictclas.set_pos_map(ictclas.POSMAP.ICT_SECOND) #li = ictclas.process_str_ret_list(s) #fingerprint = ictclas.fingerprint() #print "fingerprint:%s"%(fingerprint) #kw = ictclas.keyword(len(li)) #for i in kw: # print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)] # #print ictclas.ict_exit() #
def ict_init(): import os path = os.path.join(os.getcwd(), __file__) path = os.path.abspath(path[:path.rfind("/")]) return ictclas.ict_init(path)