예제 #1
0
 def setUp(self):
     # 建立两个HMM,隐藏状态个数为4,X可能分布为10类
     n_state =4
     n_feature = 10
     X_length = 1000
     n_batch = 100 # 批量数目
     self.n_batch = n_batch
     self.X_length = X_length
     self.test_hmm = hmm.DiscreteHMM(n_state, n_feature)
     self.comp_hmm = ContrastHMM(n_state, n_feature)
     self.X, self.Z = self.comp_hmm.module.sample(self.X_length*10)
     self.test_hmm.train(self.X, self.Z)
예제 #2
0
    def test_train_batch(self):
        X = []
        Z = []
        for b in range(self.n_batch):
            b_X, b_Z = self.comp_hmm.module.sample(self.X_length)
            X.append(b_X)
            Z.append(b_Z)

        batch_hmm = hmm.DiscreteHMM(self.test_hmm.n_state, self.test_hmm.x_num)
        batch_hmm.train_batch(X, Z)
        # 判断概率参数是否接近
        # 初始概率判定没有通过!!!
        self.assertAlmostEqual(s_error(batch_hmm.start_prob, self.comp_hmm.module.startprob_), 0, 1)
        self.assertAlmostEqual(s_error(batch_hmm.transmat_prob, self.comp_hmm.module.transmat_), 0, 1)
        self.assertAlmostEqual(s_error(batch_hmm.emission_prob, self.comp_hmm.module.emissionprob_), 0, 1)
예제 #3
0
for i in range(len(X)):
    start_prob[Z[i][0]] += 1.0
    for j in range(1, len(Z[i])):
        transmat_prob[Z[i][j - 1]][Z[i][j]] += 1.0
    for j in range(len(Z[i])):
        emission_prob[Z[i][j]][X[i][j]] += 1.0

# 对概率矩阵归一化
start_prob = start_prob / np.sum(start_prob)
transmat_prob = transmat_prob / np.repeat(np.sum(transmat_prob, axis=1),
                                          4).reshape((4, 4))
emission_prob = emission_prob / np.repeat(np.sum(emission_prob, axis=1),
                                          len(word_dic)).reshape(
                                              (4, len(word_dic)))

wordseg_hmm = hmm.DiscreteHMM(start_prob, transmat_prob, emission_prob, 4,
                              len(word_dic))

print("startprob_prior: ", wordseg_hmm.start_prob)
print("transmit: ", wordseg_hmm.transmat_prob)

sentence_1 = "我要回家吃饭"
sentence_2 = "中国人民从此站起来了"
sentence_3 = "经党中央研究决定"
sentence_4 = "江主席发表重要讲话"

Z_1 = wordseg_hmm.decode(word_trans(sentence_1, word_dic))
Z_2 = wordseg_hmm.decode(word_trans(sentence_2, word_dic))
Z_3 = wordseg_hmm.decode(word_trans(sentence_3, word_dic))
Z_4 = wordseg_hmm.decode(word_trans(sentence_4, word_dic))

print(u"我要回家吃饭: ", Z_1)
예제 #4
0
# -*- coding:utf-8 -*-
# By tostq <*****@*****.**>
# 博客: blog.csdn.net/tostq
from hmmlearn.hmm import MultinomialHMM
import numpy as np
import hmm

dice_num = 3
x_num = 8
dice_hmm = hmm.DiscreteHMM(3, 8)
dice_hmm.start_prob = np.ones(3) / 3.0
dice_hmm.transmat_prob = np.ones((3, 3)) / 3.0
dice_hmm.emission_prob = np.array([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
                                   [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0],
                                   [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]])
# 归一化
dice_hmm.emission_prob = dice_hmm.emission_prob / np.repeat(
    np.sum(dice_hmm.emission_prob, 1), 8).reshape((3, 8))

dice_hmm.trained = True

X = np.array([[1], [6], [3], [5], [2], [7], [3], [5], [2], [4], [3], [6], [1],
              [5], [4]])
Z = dice_hmm.decode(X)  # 问题A
logprob = dice_hmm.X_prob(X)  # 问题B

# 问题C
x_next = np.zeros((x_num, dice_num))
for i in range(x_num):
    c = np.array([i])
    x_next[i] = dice_hmm.predict(X, i)
예제 #5
0
    word_inc = []
    line = wordline.strip()
    line = line.decode("utf-8", "ignore")
    for n in range(len(line)):
        word_inc.append([word_dic[line[n]]])

    return np.array(word_inc)


X, Z, word_dic = precess_data()
print type(X)
print type(Z)
print X[10]
print Z[10]
print len(word_dic)  #是代表观测值的种类数,即有多少种文字
wordseg_hmm = hmm.DiscreteHMM(4, len(word_dic), 2)
wordseg_hmm.train_batch(X, Z)

print "startprob_prior: ", wordseg_hmm.start_prob
print "transmit: ", wordseg_hmm.transmat_prob

sentence_1 = "我要回家吃饭"
sentence_2 = "中国人民从此站起来了"
sentence_3 = "经党中央研究决定"
sentence_4 = "江主席发表重要讲话"

Z_1 = wordseg_hmm.decode(word_trans(sentence_1, word_dic))
Z_2 = wordseg_hmm.decode(word_trans(sentence_2, word_dic))
Z_3 = wordseg_hmm.decode(word_trans(sentence_3, word_dic))
Z_4 = wordseg_hmm.decode(word_trans(sentence_4, word_dic))