def __cut_DAG(sentence): DAG = get_DAG(sentence) route ={} calc(sentence,DAG,0,route=route) x = 0 buf =u'' N = len(sentence) while x<N: y = route[x][1]+1 l_word = sentence[x:y] if y-x==1: buf+= l_word else: if len(buf)>0: if len(buf)==1: yield buf buf=u'' else: regognized = finalseg.__cut(buf) for t in regognized: yield t buf=u'' yield l_word x =y if len(buf)>0: if len(buf)==1: yield buf else: regognized = finalseg.__cut(buf) for t in regognized: yield t
def __cut_DAG(sentence): DAG = get_DAG(sentence) route ={} calc(sentence,DAG,0,route=route) x = 0 buf =u'' N = len(sentence) while x<N: y = route[x][1]+1 l_word = sentence[x:y] if y-x==1: buf+= l_word else: l = len(buf) if l>0: if l==1: yield (buf, x-1) buf=u'' else: regognized = finalseg.__cut(buf) for t, p in regognized: yield (t, x-l+p) buf=u'' yield (l_word, x) x =y l = len(buf) if l>0: if l==1: yield (buf,x-1) else: regognized = finalseg.__cut(buf) for t, p in regognized: yield (t, x-l+p)
def __cut_DAG(sentence): DAG = get_DAG(sentence) route = {} calc(sentence, DAG, 0, route=route) x = 0 buf = u'' N = len(sentence) while x < N: y = route[x][1] + 1 l_word = sentence[x:y] if y - x == 1: buf += l_word else: l = len(buf) if l > 0: if l == 1: yield (buf, x - 1) buf = u'' else: regognized = finalseg.__cut(buf) for t, p in regognized: yield (t, x - l + p) buf = u'' yield (l_word, x) x = y l = len(buf) if l > 0: if l == 1: yield (buf, x - 1) else: regognized = finalseg.__cut(buf) for t, p in regognized: yield (t, x - l + p)
def __cut_DAG(sentence): N = len(sentence) i,j=0,0 p = trie DAG = {} while i<N: c = sentence[j] if c in p: p = p[c] if '' in p: if not i in DAG: DAG[i]=[] DAG[i].append(j) j+=1 if j>=N: i+=1 j=i p=trie else: p = trie i+=1 j=i for i in xrange(len(sentence)): if not i in DAG: DAG[i] =[i] #pprint.pprint(DAG) route ={} calc(sentence,DAG,0,route=route) x = 0 buf =u'' while x<N: y = route[x][1]+1 l_word = sentence[x:y] if y-x==1: buf+= l_word else: if len(buf)>0: if len(buf)==1: yield buf buf=u'' else: regognized = finalseg.__cut(buf) for t in regognized: yield t buf=u'' yield l_word x =y if len(buf)>0: if len(buf)==1: yield buf else: regognized = finalseg.__cut(buf) for t in regognized: yield t