def viterbi(self, pylist, top=15, words=[]): V = [{} for _ in range(2)] t = 0 idx = 0 cur_obs = pylist[t] # 现在的观察 cur_cand_states = self.py2ch[cur_obs] # 可能状态 prepyseq = "".join(pylist[:-1]) pylislen = len(pylist) START = 1 TAG = 0 if prepyseq in self.memo: TAG = 1 start = time.time() T = pylislen-1 # Last one's index cur_cand_states = [] for state in self.memo[prepyseq]: cur_cand_states.append(state) V[pylislen % 2][state] = self.memo[prepyseq][state] START = T end = time.time() print("READ MEMORY COST {}".format(end-start)) else: for state in cur_cand_states: tao = Pi_state(self.Pi, state) + emit_a_b(self.emit, state, cur_obs) _path = [state] V[0].setdefault(state, PrioritySet(top)) V[0][state] = PrioritySet(top) V[0][state].put(tao, _path) # Iteration T > 0 start = time.time() for t in range(START, pylislen): cur_obs = pylist[t] idx = t % 2 V[idx] = {} prev_states = cur_cand_states if not words: cur_cand_states = self.py2ch[cur_obs] else: cur_cand_states = words for state in cur_cand_states: # 此时状态 V[idx].setdefault(state, PrioritySet(top)) for prev in prev_states: # 前一个状态 for cand in V[(idx+1) % 2][prev]: # 前一个状态为prev, cand的概率 tao = trans_a_b(self.trans, prev, state) + emit_a_b(self.emit, state, cur_obs) new_tao = tao + cand.score _p = cand.path + [state] V[idx][state].put(new_tao, _p) end = time.time() print("RUN VITERBI COST: {}".format(end-start)) start = time.time() results = PrioritySet(top) for last_state in V[idx]: self.memo["".join(pylist)][last_state] = V[idx][last_state] # 记住拼音串所有最后状态的Priority集 for item in V[idx][last_state]: results.put(item.score, item.path) results = [item for item in results] end = time.time() print("LAST PROCESSING: {}".format(end-start)) return sorted(results, key=lambda x: x.score, reverse=True)
def newviterbi(self, pylist, top=15): V = [{} for _ in range(2)] t = 0 idx = 0 cur_obs = pylist[t] # topp =100 prefix_ans = {} self.pt.get_totalwords_of_prefix(self.pt.root,pylist[0], prefix_ans) sorted_pf_ans = sorted(prefix_ans.items(), key=lambda x: x[1], reverse=True) words = [hz_freq[0] for hz_freq in sorted_pf_ans[:topp]] cur_cand_states = words # 可能状态 for i in cur_cand_states: print(i) prepyseq = "".join(pylist[:-1]) pylislen = len(pylist) START = 1 for state in cur_cand_states: tao = Pi_state(self.Pi, state) + emit_a_b_many(self.emit, state, cur_obs) _path = [state] V[0].setdefault(state, PrioritySet(top)) V[0][state] = PrioritySet(top) V[0][state].put(tao, _path) for t in range(START, pylislen): cur_obs = pylist[t] print "---------------" print pylist,t,pylist[t] idx = t % 2 V[idx] = {} prev_states = cur_cand_states prefix_ans = {} self.pt.get_totalwords_of_prefix(self.pt.root, cur_obs, prefix_ans) sorted_pf_ans = sorted(prefix_ans.items(), key=lambda x: x[1], reverse=True) words = [hz_freq[0] for hz_freq in sorted_pf_ans[:topp]] cur_cand_states = words # 可能状态 for i in cur_cand_states: print(i) for state in cur_cand_states: # 此时状态 V[idx].setdefault(state, PrioritySet(top)) for prev in prev_states: # 前一个状态 for cand in V[(idx + 1) % 2][prev]: # 前一个状态为prev, cand的概率 tao = trans_a_b(self.trans, prev, state) + emit_a_b_many(self.emit, state, cur_obs) new_tao = tao + cand.score _p = cand.path + [state] V[idx][state].put(new_tao, _p) results = PrioritySet(top) for last_state in V[idx]: for item in V[idx][last_state]: results.put(item.score, item.path) results = [item for item in results] return sorted(results, key=lambda x: x.score, reverse=True)
def serch_in_dict(pyl,dict): res = "" ii = 15 for i in pyl: if i!=" ": res += i res += " " if res in dict: list = PrioritySet(15) s = sorted(dict[res].iteritems(), key=lambda d: d[1], reverse=True) mm = 0 for j in s: list1 = [] for o in j[0]: list1.append(o) list.put(j[1],list1) mm += 1 return list else: return []