Example #1
0
 def wsplit(self,s):
     length=len(s)
     li=ictclas.process_str_ret_list(s,length,ictclas.eCodeType.UNKNOW)
     str=''
     for i in li:
         str=str+s[i.iStartPos:(i.iStartPos+i.iLength)]+' '
     return str
Example #2
0
 def wsplit(self, s):
     length = len(s)
     li = ictclas.process_str_ret_list(s, length, ictclas.eCodeType.UNKNOW)
     str = ''
     for i in li:
         str = str + s[i.iStartPos:(i.iStartPos + i.iLength)] + ' '
     return str
Example #3
0
 def split(self,s):
     #print '--------------split----------------'
     #print chardet.detect(s)
     length=len(s)
     #print s
     li=ictclas.process_str_ret_list(s,length,ictclas.eCodeType.UTF8)
     str=''
     for i in li:
         #print i.iStartPos
         str=str+s[i.iStartPos:(i.iStartPos+i.iLength)]+' '
     return str
Example #4
0
 def split(self, s):
     #print '--------------split----------------'
     #print chardet.detect(s)
     length = len(s)
     #print s
     li = ictclas.process_str_ret_list(s, length, ictclas.eCodeType.UTF8)
     str = ''
     for i in li:
         #print i.iStartPos
         str = str + s[i.iStartPos:(i.iStartPos + i.iLength)] + ' '
     return str
Example #5
0
def wordsplit(s):
    '返回一个由  分割开的自符'
    str=''
    #print s
    ictclas.import_dict('./user.txt')
    ictclas.ict_init("./")
    li = ictclas.process_str_ret_list(s)
    for i in li:
        #print i.start, i.length, i.word_id, s[i.start:(i.start+i.length)]
        # print s[i.start:(i.start+i.length)],hash(s[i.start:(i.start+i.length)]) 
        str=str+s[i.start:(i.start+i.length)]+' '
    ictclas.ict_exit()
    return str
Example #6
0
File: test.py Project: mlzboy/resys
#!/usr/bin/env python
#-*- coding:utf8 -*-
import ictclas
print ictclas.ict_init("./",0)
s = "我们都是好孩子,异想天开的孩子。written by 爱思客"
count = ictclas.process_str_ret_word_count(s.decode("utf8").encode("gb2312"))
print "Count:%s"%(count)
li = ictclas.process_str_ret_list(s)
print "-"*8
for i in li:
	print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)]
print "-"*8
kw = ictclas.keyword(len(li))
for i in kw:
	print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)]

print "-"*8
print "set pos map:%s"%(ictclas.POSMAP.ICT_SECOND)
print "after add user word"
ictclas.add_user_word("爱思客	n")
ictclas.set_pos_map(ictclas.POSMAP.ICT_SECOND)
li = ictclas.process_str_ret_list(s)
fingerprint = ictclas.fingerprint()
print "fingerprint:%s"%(fingerprint)
kw = ictclas.keyword(len(li))
for i in kw:
	print i.start, i.length, i.ipos, i.spos, i.word_id, i.word_type, i.weight, s[i.start:(i.start+i.length)]

print ictclas.ict_exit()