Esempio n. 1
0
    def viterbi(self, pylist, top=15, words=[]):

        V = [{} for _ in range(2)]
        t = 0
        idx = 0
        cur_obs = pylist[t]  # 现在的观察
        cur_cand_states = self.py2ch[cur_obs]  # 可能状态
        prepyseq = "".join(pylist[:-1])
        pylislen = len(pylist)
        START = 1
        TAG = 0
        if prepyseq in self.memo:
            TAG = 1
            start = time.time()
            T = pylislen-1   # Last one's index
            cur_cand_states = []
            for state in self.memo[prepyseq]:
                cur_cand_states.append(state)
                V[pylislen % 2][state] = self.memo[prepyseq][state]
            START = T
            end = time.time()
            print("READ MEMORY COST {}".format(end-start))
        else:
            for state in cur_cand_states:
                tao = Pi_state(self.Pi, state) + emit_a_b(self.emit, state, cur_obs)
                _path = [state]
                V[0].setdefault(state, PrioritySet(top))
                V[0][state] = PrioritySet(top)
                V[0][state].put(tao, _path)
        # Iteration T > 0
        start = time.time()
        for t in range(START, pylislen):
            cur_obs = pylist[t]
            idx = t % 2
            V[idx] = {}
            prev_states = cur_cand_states
            if not words:
                cur_cand_states = self.py2ch[cur_obs]
            else:
                cur_cand_states = words
            for state in cur_cand_states:  # 此时状态
                V[idx].setdefault(state, PrioritySet(top))
                for prev in prev_states:   # 前一个状态
                    for cand in V[(idx+1) % 2][prev]:  # 前一个状态为prev, cand的概率
                        tao = trans_a_b(self.trans, prev, state) + emit_a_b(self.emit, state, cur_obs)
                        new_tao = tao + cand.score
                        _p = cand.path + [state]
                        V[idx][state].put(new_tao, _p)
        end = time.time()
        print("RUN VITERBI COST: {}".format(end-start))
        start = time.time()
        results = PrioritySet(top)
        for last_state in V[idx]:
            self.memo["".join(pylist)][last_state] = V[idx][last_state]  # 记住拼音串所有最后状态的Priority集
            for item in V[idx][last_state]:
                results.put(item.score, item.path)
        results = [item for item in results]
        end = time.time()
        print("LAST PROCESSING: {}".format(end-start))
        return sorted(results, key=lambda x: x.score, reverse=True)
Esempio n. 2
0
    def newviterbi(self, pylist, top=15):
        V = [{} for _ in range(2)]
        t = 0
        idx = 0
        cur_obs = pylist[t]  #

        topp =100

        prefix_ans = {}
        self.pt.get_totalwords_of_prefix(self.pt.root,pylist[0], prefix_ans)
        sorted_pf_ans = sorted(prefix_ans.items(), key=lambda x: x[1], reverse=True)
        words = [hz_freq[0] for hz_freq in sorted_pf_ans[:topp]]
        cur_cand_states = words  # 可能状态
        for i in cur_cand_states:
            print(i)

        prepyseq = "".join(pylist[:-1])
        pylislen = len(pylist)
        START = 1

        for state in cur_cand_states:
            tao = Pi_state(self.Pi, state) + emit_a_b_many(self.emit, state, cur_obs)
            _path = [state]
            V[0].setdefault(state, PrioritySet(top))
            V[0][state] = PrioritySet(top)
            V[0][state].put(tao, _path)

        for t in range(START, pylislen):
            cur_obs = pylist[t]
            print "---------------"
            print pylist,t,pylist[t]
            idx = t % 2
            V[idx] = {}
            prev_states = cur_cand_states

            prefix_ans = {}
            self.pt.get_totalwords_of_prefix(self.pt.root, cur_obs, prefix_ans)
            sorted_pf_ans = sorted(prefix_ans.items(), key=lambda x: x[1], reverse=True)
            words = [hz_freq[0] for hz_freq in sorted_pf_ans[:topp]]
            cur_cand_states = words  # 可能状态
            for i in cur_cand_states:
                print(i)

            for state in cur_cand_states:  # 此时状态
                V[idx].setdefault(state, PrioritySet(top))
                for prev in prev_states:  # 前一个状态
                    for cand in V[(idx + 1) % 2][prev]:  # 前一个状态为prev, cand的概率
                        tao = trans_a_b(self.trans, prev, state) + emit_a_b_many(self.emit, state, cur_obs)
                        new_tao = tao + cand.score
                        _p = cand.path + [state]
                        V[idx][state].put(new_tao, _p)
        results = PrioritySet(top)
        for last_state in V[idx]:
            for item in V[idx][last_state]:
                results.put(item.score, item.path)
        results = [item for item in results]
        return sorted(results, key=lambda x: x.score, reverse=True)
Esempio n. 3
0
def serch_in_dict(pyl,dict):
    res = ""
    ii = 15
    for i in pyl:
        if i!=" ":
            res += i
    res += "  "
    if res in dict:
        list =  PrioritySet(15)
        s = sorted(dict[res].iteritems(), key=lambda d: d[1], reverse=True)
        mm = 0
        for j in s:
            list1 = []
            for o in j[0]:
                list1.append(o)
            list.put(j[1],list1)
            mm += 1
        return list
    else:
        return []