def hmmTrain(self, poiList, lengthList, lenTrainData): poiList = self.mypt.poi2newCate(poiList, self.dictgt) if (len(poiList) < sum(lengthList)): self.userState[1] += 1 return -1 # 寻找长度适中的poi序列作为测试集 sortedlength = sorted(lengthList) idxmedian = sortedlength[int(np.ceil(len(sortedlength) / 2))] poiTest = [] poiTrain = [] lengthNew = [] idxprev = 0 for i, length in enumerate(lengthList): if (length != idxmedian): poiTrain.extend(poiList[idxprev:idxprev + length]) lengthNew.append(length) idxprev = idxprev + length else: poiTest.extend(poiList[idxprev:idxprev + length]) idxprev = idxprev + length # poiTrain = poiList[0:lenTrainData] # poiTest = poiList[lenTrainData:] setTrain = set(poiTrain) setTest = set(poiTest) if (not setTrain > setTest): # 必须保证测试集是训练集的子集 self.userState[2] += 1 return -1 # LabelEncoder 为训练样本序列编码,以满足hmmlearn.fit()的要求,详见云笔记 le = preprocessing.LabelEncoder() le.fit(list(setTrain)) trainEncode = le.transform(poiTrain) testEncode = le.transform(poiTest) X = np.atleast_2d(trainEncode) Xtest = np.atleast_2d(testEncode) # ohe = OneHotEncoder() # ohe.fit(np.array(range(8)).reshape(8,1)) # poiList = np.array(poiList).reshape(len(poiList),1) # poiArray = ohe.transform(poiList).toarray() # poiArray = np.atleast_2d(poiArray) # #Y = np.atleast_2d(np.array([0,1,2,3,4,5,6,7,8,9,10,11,12])) lengthList = np.array(lengthList) remodel = hmm.MultinomialHMM(n_components=6) modelBest = [] scoreHighest = -1000000 for i in range(15): # print len(X[0]) # print sum(lengthNew) remodel.fit(X.T, lengthNew) modelScore = remodel.score(X.T, lengthNew) if (modelScore > scoreHighest): scoreHighest = modelScore modelBest = remodel #print modelScore # 现在的测试是不严谨的,没有考虑‘测试集中存在训练集未出现的样例’的情况 hstate = modelBest.predict(Xtest.T) predictTrue = self.catePredict(modelBest.emissionprob_, hstate, Xtest) print predictTrue return predictTrue
Xgenes = data.get("genes") #Les genes, une array de arrays Genome = data.get("genome") #le premier million de bp de Coli Annotation = data.get("annotation") ##l'annotation sur le genome ##0 = non codant, 1 = gene sur le brin positif ### Quelques constantes DNA = ["A", "C", "G", "T"] stop_codons = ["TAA", "TAG", "TGA"] n_states_m1 = 4 # syntaxe objet python: créer un objet HMM model1 = hmm.MultinomialHMM(n_components = n_states_m1,init_params='ste') #Sous question 1 #l'esperance de la loi géométrique est 1 / p donc pour p = a, on obtient que a = 1 / 200 bp #Pour calculer b, on prend la longueur moyenne d'un gène a priori a = 1.0 / 200.0 s = 0 for gene in Xgenes: s += len(gene) s = s / len(Xgenes) #la taille moyenne d'un gène est de taille s. Par un raisonnement similaire que pour le paramètre a, on a b = 1 /s b = 3.0 / s print('a : ', a) print('b : ', b) #print('Xgenes : ', Xgenes[0])
def train(self): Hstate_num = range(len(self.p_state)) #同一个 p_id , 每一条数据不重复的观测序列的数目 Ostate_num = range(len(self.p_state)) #同一个 p_id , 每一条数据观测序列的数目 Ostate = [] #观测状态序列 集 print("my_test********************self.p_state") print(self.p_state) for (index, value) in enumerate(self.p_state): #value = [[78], [46], [78]] Ostate.append(value) # print("my_test********************value") # print(value) # print("my_test*****************np.array(value).reshape(1,len(value))[0]") # print(np.array(value).reshape(1,len(value))[0]) tmp_value = copy.deepcopy(value) tmp_value.pop() tmp_value.pop() tmp_value.pop() tmp_value.pop() #删除后面的0 1 2 3 Hstate_num[index] = len( set(np.array(tmp_value).reshape( 1, len(tmp_value))[0])) #set() 函数创建一个无序不重复元素集 Ostate_num[index] = len(value) self.Ostate = Ostate #[ [[78], [46], [78]](一次http请求某一个参数泛化之后的数组) , [另外一次http请求,但参数的md5一样] ,[ ] , ] self.Hstate_num = Hstate_num #参数md5 对应的参数的状态数 #[ [ [ [78], [46], [78] ] ] , [ ] ] -> [ 2 , ... ] self.n = int(round( np.array(Hstate_num).mean())) #隐藏状态数 #round()四舍五入 mean()求均值 print("my_test******************隐藏状态数") print(self.n) model = hmm.MultinomialHMM(n_components=self.n, n_iter=1000, tol=0.01) print("my_test************************self.Ostate") print(self.Ostate) print("my_test************************Ostate_num") print(Ostate_num) # X1 = [[0.5], [1.0], [-1.0], [0.42], [0.24]] # X2 = [[2.4], [4.2], [0.5], [-0.24]] # X = np.concatenate([X1, X2]) # print("my_test****************************X = np.concatenate([X1, X2])") # print(X) t_list = [] for item in self.Ostate: t_list = t_list + item model.fit(np.array(t_list).reshape(-1, 1), lengths=Ostate_num) # model.fit(np.array(self.Ostate)) # for i in range(len(self.Ostate)): # model.fit(np.array(self.Ostate[i])) # print("my_test**************************m.startprob_") # print(model.startprob_) # print("my_test**************************m.transmat_") # print(model.transmat_) # print("my_test**************************m.emissionprob_") # print(model.emissionprob_) self.model = model print("my_test**************************m.startprob_") print(model.startprob_) print("my_test**************************m.transmat_") print(model.transmat_) print("my_test**************************m.emissionprob_") print(model.emissionprob_)
from hmmlearn import hmm import numpy as np startprob = np.array([1.0, 0.0]) transmat = np.array([[0.7, 0.3], [0.5, 0.5]]) emission_probs = np.array([[0.2, 0.1, 0.7], [0.3, 0.6, 0.1]]) model = hmm.MultinomialHMM(n_components=2) model.startprob_ = startprob model.transmat_ = transmat model.emissionprob_ = emission_probs # sample the model - X is the observed values (Dribble, Pass & Shoot sequence) # and Z is the "hidden" states (Healthy & Injured sequence) samples = 300 iters = 10000 print("With %d samples and %d iterations:" % (samples, iters)) X, Z = model.sample(samples) # Make an HMM instance and execute fit newModel = hmm.MultinomialHMM(n_components=2, n_iter=iters).fit(X) print("Original Model:") print("Transition matrix") print(transmat) print("Emission probabilities") print(emission_probs) print("---------------------------------") print("Fitted Model:") print("Transition matrix")
def main(): ## READ FILE ## string = readFile() ## CLEAN TEXT ## string = cleanString(string) ## Keys definition and keyboard arrangement ## KEYS, keyboardArrangement = keysAndArrange() ## NOISY FILE ## cEmissions = noisyFileCreator(string, keyboardArrangement, KEYS) ## SPLIT DATA ## clean = string.split() noisy = open("noisy.txt", "r").read().split() X_train, X_test, y_train, y_test = split_data(clean, noisy) ## CREATING HMM MODEL## states = KEYS symbols = KEYS initial = np.array([1 / 26 for i in states]) model = hmm.MultinomialHMM(n_components=len(KEYS)) model.startprob_ = initial model.transmat_ = vTransitions(X_train, KEYS) model.emissionprob_ = vEmissions(cEmissions) ## VITERBIE ## vResult = viterbie(KEYS, model, y_test) ## TRUE NEGATIVE ## TN = 0 for i in range(len(X_test)): for j in range(len(X_test[i])): if X_test[i][j] == vResult[i][j] and X_test[i][j] == y_test[i][j]: TN += 1 print("TRUE NEGATIVE = " + str(TN)) ## FALSE POSITIVE ## FP = 0 for i in range(len(X_test)): for j in range(len(X_test[i])): if X_test[i][j] != vResult[i][j] and X_test[i][j] == y_test[i][j]: FP += 1 print("FALSE POSITIVE = " + str(FP)) ## TRUE POSITIVE ## TP = 0 for i in range(len(X_test)): for j in range(len(X_test[i])): if X_test[i][j] == vResult[i][j] and X_test[i][j] != y_test[i][j]: TP += 1 print("TRUE POSITIVE = " + str(TP)) ## FALSE NEGATIVE ## FN = 0 for i in range(len(X_test)): for j in range(len(X_test[i])): if X_test[i][j] != vResult[i][j] and X_test[i][j] != y_test[i][j]: FN += 1 print("FALSE NEGATIVE = " + str(FN)) print() ## PRECISION RECALL ## print("PRECISION = " + str(TP / (TP + FN))) print("RECALL = " + str(TP / (TP + FP)))
with warnings.catch_warnings(): warnings.filterwarnings("ignore",category=DeprecationWarning) from hmmlearn import hmm import lang import utils dictionary, X, lengths = utils.parseInput() trainData = np.array([X]).reshape(-1, 1) try: model = joblib.load("model.pkl") except: model = hmm.MultinomialHMM(n_components=10, n_iter=3, verbose=True) model.fit(trainData, lengths=lengths) joblib.dump(model, "model.pkl") def printSonnet(sonnet): for i, line in enumerate(sonnet): padding = '' if i > 11: padding = ' ' words = ["\\textcolor{" + str(s) + "}{" + w + "}" for w, s in line] # words = [w for (w, _) in line] # words[0] = words[0].capitalize() print(padding + ' '.join(words)) # print ' '.join([w for (w, _) in currLine]), nextWord print ' '
import numpy as np from hmmlearn import hmm # Set random seed for reproducibility np.random.seed(1000) if __name__ == '__main__': # Create a Multinomial HMM hmm_model = hmm.MultinomialHMM(n_components=2, n_iter=100, random_state=1000) # Define a list of observations observations = np.array([[0], [1], [1], [0], [1], [1], [1], [0], [1], [0], [0], [0], [1], [0], [1], [1], [0], [1], [0], [0], [1], [0], [1], [0], [0], [0], [1], [0], [1], [0], [1], [0], [0], [0], [0], [0]], dtype=np.int32) # Fit the model using the Forward-Backward algorithm hmm_model.fit(observations) # Check the convergence print('Converged: {}'.format(hmm_model.monitor_.converged)) # Print the transition probability matrix print('\nTransition probability matrix:') print(hmm_model.transmat_) # Create a test sequence sequence = np.array([[1], [1], [1], [0], [1], [1], [1], [0], [1], [0], [1], [0], [1], [0], [1], [1], [0], [1],
nearestL = 99 # Iterate over number_k_train on the right side for t in range(0, centerL.__len__()): # Calculate distance between points, test and train distL = math.sqrt((ZL[i, 0] - centerL[t, 0])**2 + (ZL[i, 1] - centerL[t, 1])**2) # Get the nearest distance if distL < nearestL: nearestL = distL aux = t secuenceL.append(aux) full_secuenceL[s] = secuenceL # Right model modelR = hmm.MultinomialHMM(2, verbose=True, n_iter=20) modelR.start_probability = np.array([0.6, 0.4]) #Numeros aleatorios modelR.transition_probability = np.array([[0.5, 0.5], [0.5, 0.5]]) #Poner numeros aleatorios modelR.emissionprob = np.array([[0.1, 0.5, 0.4], [0.5, 0.3, 0.2]]) #Numeros aleatorios X = np.asarray(full_secuenceL) #lengths = list(map(lambda x : len(x), X)) X = np.hstack(X) X = X.reshape(len(X), 1) modelR.fit(X, lengthsL)
def Question3(): ######################################################## #1. Create HMM with the library ######################################################## states = ["State_1", "State_2"] n_states = len(states) observations = ["O1", "O2"] n_observations = len(observations) model = hmm.MultinomialHMM(n_components=n_states, init_params="", n_iter=50, algorithm='map', tol=0.00001) model.startprob_ = np.array([0.31, 0.69]) model.transmat_ = np.array([[0.40, 0.60], [0.52, 0.48]]) model.emissionprob_ = np.array([[0.49, 0.51], [0.40, 0.60]]) ######################################################## #2.Learn the HMM with the following sample L1 = faaabb; abaabbb; aaababb; aabab; abg}. ######################################################## sequence1 = np.array([[0, 0, 0, 1, 1]]).T sequence2 = np.array([[0, 1, 0, 0, 1, 1, 1]]).T sequence3 = np.array([[0, 0, 0, 1, 0, 1, 1]]).T sequence4 = np.array([[0, 0, 1, 0, 1]]).T sequence5 = np.array([[0, 1]]).T sample = np.concatenate( [sequence1, sequence2, sequence3, sequence4, sequence5]) print("sample: ", sample) lengths = [ len(sequence1), len(sequence2), len(sequence3), len(sequence4), len(sequence5) ] model.fit(sample, lengths) #Moel obtained after training: print(model.transmat_) print(model.startprob_) print(model.emissionprob_) ####################################################### #3 ####################################################### states = ["State_1", "State_2"] n_states = len(states) observations = ["O1", "O2"] n_observations = len(observations) model2 = hmm.MultinomialHMM(n_components=n_states, init_params="", n_iter=50, algorithm='map', tol=0.00001) model2.startprob_ = np.array([0.31, 0.69]) model2.transmat_ = np.array([[0.40, 0.60], [0.52, 0.48]]) model2.emissionprob_ = np.array([[0.49, 0.51], [0.40, 0.60]]) sequence1 = np.array([[1, 1, 1, 0, 0]]).T sequence2 = np.array([[1, 0, 1, 1, 0, 0]]).T sequence3 = np.array([[1, 1, 1, 0, 1, 0, 0]]).T sequence4 = np.array([[1, 1, 0, 1, 1, 0]]).T sequence5 = np.array([[1, 1, 0, 0]]).T sample = np.concatenate( [sequence1, sequence2, sequence3, sequence4, sequence5]) lengths = [ len(sequence1), len(sequence2), len(sequence3), len(sequence4), len(sequence5) ] model2.fit(sample, lengths) print(model2.transmat_) print(model2.startprob_) print(model2.emissionprob_) ####################################################### #5 Compute the probabilities of the strings aababbb and bbabaaa. Are the results intuitive? ####################################################### sequence_1 = np.array([[0, 0, 1, 0, 1, 1, 1]]).T sequence_2 = np.array([[1, 1, 0, 1, 0, 0, 0]]).T p_extend = model.score(sequence1) # print("log prob for first model first sequence: ", (p_extend)) print("prob for first model first sequence: ", np.exp(p_extend)) p_extend = model.score(sequence2) # print("log prob for first model second sequence: ", (p_extend)) print("prob for first model second sequence: ", np.exp(p_extend)) p_extend = model2.score(sequence1) # print("log prob for second model first sequence: ", (p_extend)) print("prob for second model fist sequence: ", np.exp(p_extend)) p_extend = model2.score(sequence2) # print("log prob for second model second sequence: ", (p_extend)) print("prob for second model second sequence: ", np.exp(p_extend))
# for this example # n_components: 3 对应三个色子 # n_features: 1 理解一下, 这里feature和symbol是不一样的含义. symbol对应色子面的八种状态 # 初始概率: 三个色子随机抽取 startprob = np.ones(3) startprob /= startprob.sum() # 转移矩阵: 机会均等 transmat = np.ones((3, 3)) transmat /= transmat.sum(axis=1) # 观测矩阵: 和色子面数有关系 emissionprob = np.array([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]]) emissionprob /= emissionprob.sum(axis=1, keepdims=True) hmmdice = hmm.MultinomialHMM(n_components=3, algorithm="map") hmmdice.startprob_ = startprob hmmdice.transmat_ = transmat hmmdice.emissionprob_ = emissionprob X = np.array([1, 6, 3, 5, 2, 7, 3, 5, 2, 4, 3, 6, 1, 5, 4]).reshape(-1, 1) # 效果一样 # X = np.array([[1, 6, 3, 5, 2, 7, 3, 5, 2, 4, 3, 6, 1, 5, 4]]).T # 问题A prob, rst = hmmdice.decode(X) logger.info("\n%s" % hmmdice.startprob_) logger.info("\n%s" % hmmdice.transmat_) logger.info("\n%s" % hmmdice.emissionprob_) logger.info(hmmdice.predict(X)) # 问题B logger.info(prob) logger.info(rst)
#-*-coding: utf-8 -*- #@author:tyhj from __future__ import division import numpy as np from hmmlearn import hmm states = ['Rainy', 'Sunny'] n_states = len(states) observations = ['walk', 'shop', 'clean'] n_observations = len(observations) start_probability = np.array([0.6, 0.4]) transition_probability = np.array([[0.7, 0.3], [0.4, 0.6]]) emission_probability = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]]) model = hmm.MultinomialHMM(n_components=n_states, init_params='') model.startprob_ = start_probability model.transmat_ = transition_probability model.emissionprob_ = emission_probability #predict a sequence of hidden states based on visible states bob_says = np.array([[0, 2, 1, 1, 2, 0]]).T model = model.fit(bob_says) logprob, alice_hears = model.decode(bob_says, algorithm='viterbi') print 'Bob says:', ','.join(map(lambda x: observations[x], bob_says)) print 'Alice hears:', ','.join(map(lambda x: states[x], alice_hears))
for i in families: X = dataset.iloc[dataSelect2[i][0]:dataSelect2[i][1], 34:] Y = dataset.iloc[dataSelect2[i][0]:dataSelect2[i][1], 1] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=23) count += X_test.shape[0] testX = np.append(testX, X_test).reshape(count, 1000) testY = np.append(testY, Y_test) trainX = np.append(trainX, X_train).reshape(-1, 1000) trainY = np.append(trainY, Y_train) model = hmm.MultinomialHMM(n_components=10, n_iter=200, tol=0.5) model.fit(X_train) all_models.append(model) print("done") le = LabelEncoder() testY = le.fit_transform(testY) filename = 'finalized_model.sav' pickle.dump(all_models, open(filename, 'wb')) filename = 'X_test.sav' pickle.dump(testX, open(filename, 'wb')) filename = 'Y_test.sav' pickle.dump(testY, open(filename, 'wb'))
# X_d = np.asarray(list(map(int, X_d))) X_d = X_d.reshape((len(X_d), 1)) #For the very moment we are creating a hidden state for every possible location within the shop. Later we can think of narrowing it down hidden_states = {} count = 0 for x in range(-46, 31): #XRange for y in range(-10, 45): #YRange hidden_states[count] = (x, y) count += 1 n_hidden_states = len(hidden_states) # model = hmm.MultinomialHMM(n_components=n_hidden_states).fit(np.atleast_2d(X_d).T, len_samples) print(len_samples) print('Starting Training...') model = hmm.MultinomialHMM(n_components=n_hidden_states).fit(X_d, len_samples) print('Finished Training!') model.monitor_ model.monitor_.converged filename = 'model_all.pkl' joblib.dump(model, filename) stop = timeit.default_timer() print(stop-start) df_test = pd.read_csv('modified_training_day_log.csv') for i in range(len(macs)): # df1 = df_test[(df_test['controllerid'] == int(cid)) & (df_test['mac'] == macs[i])] df1 = df_test[(df_test['mac'] == macs[i])] df1.reset_index(inplace=True, drop=True) X = np.asarray(df1['pwr']) X_d = le.transform(X)
if TYPE_GENERATOR == 'hmm': if not os.path.exists(HMM_PATH): word_to_label = {} vocab = score_word_to_vec.vocab() for word in vocab: idx = vocab[word].index word_to_label[word] = labels[idx] def _text_to_seq(text): return np.array([[word_to_label[word]] for word in text]) sequences = [_text_to_seq(text) for text in myScoreToWord.scores] # Now actually train type_gen_model = hmm.MultinomialHMM(n_components=16) lengths = [len(seq) for seq in sequences] sequences = np.concatenate(sequences) type_gen_model.fit(sequences, lengths=lengths) print(type_gen_model.transmat_) with open(HMM_PATH, "wb") as file: pickle.dump(type_gen_model, file) else: type_gen_model = None with open(HMM_PATH, "rb") as file: type_gen_model = pickle.load(file) elif TYPE_GENERATOR == 'gru': if not os.path.exists(GRU_PATH): word_to_label = {} vocab = score_word_to_vec.vocab() for word in vocab:
#coding=utf-8 ''' Created on 2018-1-22 @author: 10205025 ''' import numpy as np from hmmlearn import hmm # 这里假设隐藏层数量为5个 model = hmm.MultinomialHMM(n_components=3, verbose=True, n_iter=1000, tol=0.001) # model = hmm.GaussianHMM(n_components=3, n_iter=1000, tol=0.1,covariance_type="full", verbose=True) X1 = np.array([[2], [1], [0]]) X2 = np.array([[2], [1], [0], [2]]) X3 = np.array([[2], [1], [1]]) X4 = np.array([[2], [1], [0]]) X5 = np.array([[1], [2], [0]]) X = np.vstack((X1, X2, X3, X4, X5)) print(X) # [[2] # [1] # [0] # [2] # [1] # [0] # [2]
def Question2(): #1. Compute the probability of the string abbaa states = ["State_1", "State_2", "State_3"] n_states = len(states) observations = ["O1", "O2", "O3"] n_observations = len(observations) model = hmm.MultinomialHMM(n_components=n_states, n_iter=10, algorithm='map', tol=0.00001) model.startprob_ = np.array([0.5, 0.3, 0.2]) model.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40], [0.15, 0.25, 0.60]]) model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]]) sequence1 = np.array([[0, 1, 1, 0, 0]]).T logproba = model.score(sequence1) print("log probability of the string abbaa: ", logproba) print("probability of the string abbaa: ", np.exp(logproba)) ############################################################################# #2. Apply BaumWelch with only one iteration and check the probability of the string model = hmm.MultinomialHMM(n_components=n_states, init_params="", n_iter=1, algorithm='map', tol=0.00001) model.startprob_ = np.array([0.5, 0.3, 0.2]) model.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40], [0.15, 0.25, 0.60]]) model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]]) model.fit(sequence1) p_extend = model.score(sequence1) print("log One Iteration BaumWelch: ", p_extend) print("One Iteration BaumWelch: ", np.exp(p_extend)) ############################################################################### #3. Do the same thing after 15 iterations ############################################################################### model = hmm.MultinomialHMM(n_components=n_states, init_params="", n_iter=15, algorithm='map', tol=0.00001) model.startprob_ = np.array([0.5, 0.3, 0.2]) model.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40], [0.15, 0.25, 0.60]]) model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]]) model.fit(sequence1) p_extend = model.score(sequence1) print("log 15 Iterations BaumWelch: ", (p_extend)) print("15 Iterations BaumWelch: ", np.exp(p_extend)) ############################################################################### #4. Try to obtain the result at convergence ############################################################################### model4 = hmm.MultinomialHMM(n_components=n_states, init_params="", n_iter=150, algorithm='map', tol=0.00000001) model4.startprob_ = np.array([0.5, 0.3, 0.2]) model4.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40], [0.15, 0.25, 0.60]]) model4.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]]) model4.fit(sequence1) p_extend = model4.score(sequence1) print("log At convergence BaumWelch: ", (p_extend)) print("At convergence BaumWelch: ", np.exp(p_extend)) ############################################################################### #5. Now create an HMM with 5 states with parameters initialized at any non zero correct values. ############################################################################### model = hmm.MultinomialHMM(n_components=5, init_params="", n_iter=120, algorithm='map', tol=0.00000001) model.startprob_ = np.array([0.5, 0.2, 0.1, 0.1, 0.1]) model.transmat_ = np.array([[0.40, 0.30, 0.10, 0.10, 0.10], [0.5, 0.10, 0.30, 0.05, 0.05], [0.10, 0.20, 0.60, 0.05, 0.05], [0.30, 0.40, 0.10, 0.10, 0.10], [0.25, 0.25, 0.25, 0.10, 0.15]]) model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0, 1]]) model.fit(sequence1) p_extend = model.score(sequence1) print("log At 5 states BaumWelch: ", (p_extend)) print("At 5 states BaumWelch: ", np.exp(p_extend))
train.append(trainsample[:800]) test.append(trainsample[800:]) train = np.asarray(train) test = np.asarray(test) test = np.concatenate(test, axis=0) #test = np.ravel(test) print(train.shape, test.shape) hidden_state = 2 # symbols = 26 # pi = np.random.dirichlet(np.ones(hidden_state), size=1) # A = np.random.dirichlet(np.ones(hidden_state), size=hidden_state) # B = np.random.dirichlet(np.ones(symbols), size=hidden_state) model = hmm.MultinomialHMM(n_components=hidden_state, init_params='ste', n_iter=10) # model.startprob_ = pi # model.transmat_ = A # model.emissionprob_ = B score = [] counter = 0 for cipher in train: input = np.concatenate(cipher, axis=0) print("number of ciphertxt: ", counter) hidden_state = 26 # symbols = len(input) # pi = np.random.dirichlet(np.ones(hidden_state), size=1) # A = np.random.dirichlet(np.ones(hidden_state), size=hidden_state) # B = np.random.dirichlet(np.ones(symbols), size=hidden_state)
def repeat_hmm_cv_simulation(X_fit, par_rep, n_cell, n_bin, n_trial, n_components=4, n_folds=5, n_iter=500, tol=1e-05, matlab_=False, eng=None, bin_size=1, time_state=np.array([1,1,1]), options=None, path_matlab=None): """ """ n_rep = len(par_rep) n_symbols = len(np.unique(X_fit)) symbols = np.unique(X_fit) score_rep = np.zeros(n_rep) #BIC or loglikelihood last_score = -np.inf for i_rep in range(n_rep): random_state = par_rep[i_rep] init_params = 'e' startprob_prior = np.concatenate([[1], np.zeros(n_components-1)]) emissionprob_prior = np.concatenate([np.ones([n_components,1]), np.zeros([n_components, n_symbols-1])], axis=1) + np.abs(np.random.randn(n_components, n_symbols)*.2) transmat_prior = np.identity(n_components) + np.abs(np.random.randn(n_components, n_components)*.2) for i in range(n_components): emissionprob_prior[i] = summingto1(emissionprob_prior[i]) transmat_prior[i] = summingto1(transmat_prior[i]) if n_folds == 1: kf = [(range(n_trial), range(n_trial))] else: kf = KFold(n_trial, n_folds=n_folds, shuffle=True, random_state=random_state) if not matlab_: model_ = hmm.MultinomialHMM(n_components=n_components, n_iter=n_iter, random_state=random_state, startprob_prior=startprob_prior, transmat_prior=transmat_prior, tol=tol, init_params=init_params, algorithm='viterbi') else: model_ = None logprob_pertrial = [] logprob_pertrial += [[] for _ in range(n_folds)] logprob_ = np.zeros(n_folds) bic_ = np.zeros(n_folds) score_fr_ = np.zeros(n_folds) sum_stable_state_ = np.zeros([n_folds, n_components, time_state.shape[0]]) Z_ = np.zeros(n_trial*n_bin) - 1 state_prob_ = np.zeros([n_trial*n_bin, n_components]) for i_fold, (train_index, test_index) in enumerate(kf): ######## fit trial_train_index = [] trial_train_index += [range(i_trial*n_bin,(i_trial+1)*n_bin) for i_trial in train_index] if not matlab_: #------------- hmmlearn trial_train_index = np.hstack(trial_train_index) length = np.repeat(n_bin, len(train_index)) model_.fit(X_fit[trial_train_index], length) pred_fr = model_.emissionprob_[:,1:] / bin_size else: #----------- hmmtrain X_fit_matrix = [] X_fit_matrix += [X_fit[i] for i in trial_train_index] X_fit_matrix = np.array(X_fit_matrix) print "Spoken cell i_fold: ", np.unique(X_fit_matrix) transmat, emissionprob = hmmtrain_matlab(X_fit_matrix, transmat_prior, emissionprob_prior, symbols, tol, n_iter, eng, path_matlab) pred_fr = emissionprob[:,1:] / bin_size trial_train_index = np.hstack(trial_train_index) ######## predict trial_test_index = [] trial_test_index += [range(i_trial*n_bin,(i_trial+1)*n_bin) for i_trial in test_index] print "rep:%d --- fold:%d" % (i_rep, i_fold) #print "Train trial: ", train_index #print "Test trial: ", test_index if not matlab_: #------------hmmlearn #trial_test_index = np.hstack(trial_test_index) #length_test = np.repeat(n_bin, len(test_index)) #X_predict = X_fit[trial_test_index] #Z_[trial_test_index] = model_.predict(X_predict, length_test) #logprob_[i_fold], state_prob_[trial_test_index,:] = model_.score_samples(X_predict, length_test) ####### One trial per time for i, i_trial in enumerate(test_index): Z_[trial_test_index[i]] = model_.predict(X_fit[trial_test_index[i]]) tmp, state_prob_[trial_test_index[i],:] = model_.score_samples(X_fit[trial_test_index[i]]) logprob_pertrial[i_fold] += [tmp] logprob_[i_fold] = np.mean(logprob_pertrial[i_fold]) trial_test_index = np.hstack(trial_test_index) else: #-----------hmmtrain for i, i_trial in enumerate(test_index): state_prob_[trial_test_index[i],:], tmp, Z_[trial_test_index[i]] = hmmdecode_viterbi_matlab(X_fit[trial_test_index[i]], transmat, emissionprob, symbols, eng, path_matlab) logprob_pertrial[i_fold] += [tmp] #Z_[trial_test_index[i]] = hmmviterbi_matlab(X_fit[trial_test_index[i]], transmat, emissionprob, symbols, eng) logprob_[i_fold] = np.mean(logprob_pertrial[i_fold]) trial_test_index = np.hstack(trial_test_index) print "Still to test: ", np.sum(Z_ == -1) print "check train/test: ", np.unique(np.diff(np.sort(np.concatenate([trial_test_index,trial_train_index])))) #score_fr_[i_fold] = score_firing_rate(options, pred_fr) bic_[i_fold] = logprob_[i_fold] - ((n_components**2 + n_components*(n_symbols-2)) / 2 * np.log(n_bin*len(test_index))) sum_stable_state_[i_fold,:,:] = stability_matrix(state_prob_[trial_test_index,:], Z_[trial_test_index], n_bin, len(test_index), bin_size, time_state) score_rep[i_rep] = np.mean(bic_) print "Score BIC: ", score_rep[i_rep] print "Score firing rate: ", score_fr_.mean() print "Stable state: ", squeeze_stable_state(sum_stable_state_)*100 if score_rep[i_rep] > last_score: #change variable to mean according to the score measure if matlab_: Z_ = Z_ - 1#matlab start from 1 Z = Z_ logprob = logprob_ state_prob = state_prob_ bic = bic_ model = model_ sum_stable_state = sum_stable_state_ score_fr = score_fr_ last_score = score_rep[i_rep] print "-----------------------------------" print "Mean BIC: ", last_score print "Mean score firing rate: ", np.mean(score_fr) print "std score firing rate: ", np.std(score_fr) print "logLik: ", logprob print "BIC: ", bic #print "stable state sum: ", sum_stable_state return model, Z, state_prob, sum_stable_state, logprob, bic, score_rep, last_score
start_probability = np.array([0.2, 0.4, 0.4]) transition_probability = np.array([ [0.5, 0.2, 0.3], [0.3, 0.5, 0.2], [0.2, 0.3, 0.5] ]) emission_probability = np.array([ [0.5, 0.5], [0.4, 0.6], [0.7, 0.3] ]) model = hmm.MultinomialHMM(n_components=n_states) model.startprob_=start_probability model.transmat_=transition_probability model.emissionprob_=emission_probability #解码问题: 给定模型参数和观测序列,求隐藏状态序列 #方法1 decode seen = np.array([[0,1,0]]).T logprob, box = model.decode(seen, algorithm="viterbi") print("The ball picked:", ", ".join(map(lambda x: observations[x], seen))) #给定的观测序列: print("The hidden box:", ", ".join(map(lambda x: states[x], box))) #最可能的隐藏状态序列 #方法2 predict box2 = model.predict(seen)
if seq[i] == "D": seq_data.append([2]) return seq_data states = ["a", "b", "d"] n_states = 3 obs = ["A", "B", "D"] n_obs = 3 start_arr = numpy.array([0.3333,0.3333,0.3334]) trans_mat = numpy.array([[AA,AB,AD],[BA,BB,BD],[DA,DB,DD]]) emiss_mat = numpy.array([[AA,AB,AD],[BA,BB,BD],[DA,DB,DD]]) #print(trans_mat) model = hmm.MultinomialHMM(n_components = n_states, verbose = True, n_iter = int(n_itera), tol = 0.001, init_params = "") model.startprob_ = start_arr model.transmat_ = trans_mat model.emissionprob_ = emiss_mat model.transmat_ = model.transmat_ / model.transmat_.sum(axis=1)[:, numpy.newaxis] model.emissionprob_ = model.emissionprob_ / model.emissionprob_.sum(axis=1)[:, numpy.newaxis] #path = opath + "/" + timetag + "/strings-raw.txt" #print(path) #print(type(emiss_mat)) #f = open(path) lines = all_lines
listA = getTimelineAnnotation(label_person_A, stepsize, timestamp_first, timestamp_last) listB = getTimelineAnnotation(label_person_B, stepsize, timestamp_first, timestamp_last) # X = sequence from actors # X[actor] = sequence for each # X = listA + listB # lengths_list = [len(listA), len(listB)] X = listA + listB lengths_list = [len(listA), len(listB)] # make a new HMM hmm_all = hmm.MultinomialHMM(n_components=num_components) model_all = hmm_all.fit(X) prediction_all = hmm_all.predict(X) decode_all = hmm_all.decode(X) hmm_a = hmm.MultinomialHMM(n_components=num_components) model_a = hmm_a.fit(listA) prediction_a = hmm_a.predict(listA) hmm_b = hmm.MultinomialHMM(n_components=num_components) model_b = hmm_b.fit(X) prediction_b = hmm_b.predict(X) customers_same = [] all_same = [] overall_graph = []
# Example of fitted HMM and sampling # 1- we create an HMM fitting it from data # 2- we extract some samples from the fitted model import numpy as np from hmmlearn import hmm np.random.seed(42) states = ["Rainy", "Sunny"] n_states = len(states) observations = ["walk", "shop", "clean"] n_observations = len(observations) train_data = [0, 2, 1, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0] model = hmm.MultinomialHMM(n_components=n_states, n_iter=100) model.fit(np.array([train_data]).T) print "start probs: ", model.startprob_ print "transmat: ", model.transmat_ print "emissionprob_", model.emissionprob_ X, Z = model.sample(5) print "X:", X print "Z:", Z print "States:", ", ".join(map(lambda x: states[x], Z)) print "Performed actions:", ", ".join( map(lambda x: observations[x], np.squeeze(np.asarray(X))))
from hmmlearn import hmm import pickle from text_analysis import TextAnalysis input_dir = "./datasets/livedoor/dokujo-tsushin/" input_data = input_dir + "dokujo-tsushin-4778030.txt" X = TextAnalysis.mecab_analysis(input_data) # verbose=Trueで各回のイテレーションを確認できる. # model = hmm.MultinomialHMM(n_components=10, n_iter=1000, verbose=True) model = hmm.MultinomialHMM(n_components=10, n_iter=1000) model.fit(X) L, Z = model.decode(X) # print(model.transmat_) # 遷移確率の出力 # print(model.monitor_) # historyの配列は最後から2つの対数尤度を出力している. sample = model.sample(n_samples=100) # 辞書の読み込み with open('./datasets/livedoor/livedoor_dict.pkl', 'rb') as f: livedoor_dict = pickle.load(f) # モデルからサンプルしてテキスト生成 sample_id = sample[0].flatten() sample_text = "" for id in sample_id: for key in livedoor_dict: if id == livedoor_dict[key]:
import numpy as np import hmmlearn.hmm as hmm import math status = ['吃', '睡'] # 状态序列 observation = ['哭', '没精神', '找妈妈'] # 观测序列 n_status = len(status) n_observation = len(observation) start_probability = np.array([0.3, 0.7]) # 初始状态分布 # 状态转移概率矩阵 transition_probability = np.array([[0.1, 0.9], [0.8, 0.2]]) # 观测生成矩阵 emission__probability = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]]) # HMM模型构建 model = hmm.MultinomialHMM(n_components=n_status) model.startprob_ = start_probability model.transmat_ = transition_probability model.emissionprob_ = emission__probability # 行为模型 Actions = np.array([[0, 1, 2]]) Action_model = Actions.T score = model.score(Action_model, lengths=None) Action = ','.join(map(lambda x: observation[x], Actions[0])) print("\t\"", Action, "\"的概率为:", end='') print('\t', math.exp(score) * 100, '%') # 所有观测值状态转移概率 predict_proba = model.predict_proba(Action_model, lengths=None) # 维特比算法估计最可能的状态
def main(): le = preprocessing.LabelEncoder() x = np.array([]) x_len = np.array([]) line_cache = linecache.getlines(train_file) count = len(line_cache) number = int(count / chunk_lines) print(count) print(number) t() pool = mp.Pool(processes=10) jobs = [] for i in range(10): jobs.append( pool.apply_async( read_distributed, line_cache[i * chunk_lines:i * chunk_lines + chunk_lines])) # jobs.append(pool.apply_async(read_distributed, line_cache[number * chunk_lines : count])) for job in jobs: x = np.append(x, job.get()[0]) x_len = np.append(x_len, job.get()[1]) pool.close() labels = [] for number in x: if number in labels: pass else: labels.append(number) # print(labels) le.fit(labels) print('**************************************') t() print(le.classes_) model_le_name = MODEL_PATH + 'le.pkl' with open(model_le_name, 'wb') as model_file: pickle.dump(le, model_file) print("le saved") x = x[:, np.newaxis] new_x = le.transform(x) X = np.array(new_x).astype('int32') # X = X[:, np.newaxis] X = X.reshape(-1, 1) # print(X.shape) # print(X.dtype) # print(X) print(len(X)) # # print(x_len.shape) # print(x_len.dtype) X_len = np.array(x_len).astype('int32') # print(X_len.shape) # print(X_len.dtype) print(sum(X_len)) number_of_status = 100 print('¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥') t() print('Start Training') model = hmm.MultinomialHMM(n_components=number_of_status, n_iter=10000, tol=0.01, verbose=True) model.fit(X, X_len) # print(model.score(x,x_len)) print('**************************************') print(model.transmat_) model_name = MODEL_PATH + 'hmm.pkl' with open(model_name, 'wb') as model_file: pickle.dump(model, model_file) print("hmm saved")
if __name__ == "__main__": n_states = 4 data_filename = [ 'data/LBG_VQ/train0.txt', 'data/LBG_VQ/train1.txt', 'data/LBG_VQ/train2.txt', 'data/LBG_VQ/train3.txt', 'data/LBG_VQ/train4.txt', 'data/LBG_VQ/train5.txt', 'data/LBG_VQ/train6.txt', 'data/LBG_VQ/train7.txt', 'data/LBG_VQ/train8.txt', 'data/LBG_VQ/train9.txt' ] best_models_set = [] for i in range(10): #多维观测序列 scores_set = [] onemodel_parms_set = [] model = hmm.MultinomialHMM(n_components=n_states, n_iter=500, tol=0.01) O = train_data_create(data_filename[i]) #观测序列 print("##########", i, i, i, "##########") print(O.shape) for j in range(10): #进行十次训练,取得分最高的模型,减少收敛至局部极大值的影响 model.fit(O) model_parms = {} model_parms['pi'] = model.startprob_ model_parms['A'] = model.transmat_ model_parms['B'] = model.emissionprob_ onemodel_parms_set.append(model_parms) scores_set.append(model.score(O)) max_index = scores_set.index(max(scores_set)) print("数字%d的最佳模型索引%d" % (i, max_index))
transm = np.array([[0.8, 0.2], [0.4, 0.6]]) emission_probability = { 'healthy': { 'no-symptoms': 0.6, 'cold': 0.3, 'dizzy': 0.1 }, 'sick': { 'no-symptoms': 0.1, 'cold': 0.3, 'dizzy': 0.6 }, } emism = np.array([[0.6, 0.3, 0.1], [0.1, 0.3, 0.6]]) hmm_model = hmm.MultinomialHMM(n_components=len(states), algorithm='viterbi') hmm_model.startprob_ = startm hmm_model.transmat_ = transm hmm_model.emissionprob_ = emism #Evaluation: given a model, what is the probability of sequence y? #Note that the score method produces the log likelihood, so to get prob, exponentiate y = np.array([[0]]) print('Probability of first observation in a sequence being', observations[0], 'regardless of state is', math.exp(hmm_model.score(y))) y = np.array([[1]]) print('Probability of first observation in a sequence being', observations[1], 'regardless of state is', math.exp(hmm_model.score(y))) y = np.array([[2]]) print('Probability of first observation in a sequence being', observations[2], 'regardless of state is', math.exp(hmm_model.score(y)))
[0.612, 0, 0, 0, 0.081, 0, 0.307], #5 [0.697, 0, 0, 0, 0.089, 0, 0.214] ]) #6 #emission states healthy(1), hospitilization(2), death(3) emission_prob = np.array([ [0, 0, 0], #1 [0, 0, 0], #2 [0, 0, 0], #3 [0, 0, 0], #4 [0, 0, 0.14], #5 [0.279, 0.325, 0.091], #6 [0.35, 0.357, 0.089] ]) #7 #create an instance of the model model = hmm.MultinomialHMM(n_components=7) model.startprob_ = startprob model.transmat_ = transmat model.emissionprob_ = emission_prob #draw samples given the startprob, transmat and emissionprob X, Z = model.sample(50) X_array = np.array(X) print('sampled states:', list(Z)) print() print('sampled outcomes:', list(X_array.flatten())) #plt the emmission data plt.plot(X, '.-', label='observations', ms=6, mfc='orange', alpha=0.7) #indicate emission emit = ['healthy', 'hospitilization', 'dead'] plt.xlabel('Iteration')
states = ["Gold", "Silver", "Bronze"] n_states = len(states) observations = ["Ruby", "Pearl", "Coral", "Sapphire"] n_observations = len(observations) start_probability = np.array([0.3, 0.3, 0.4]) transition_probability = np.array([[0.1, 0.5, 0.4], [0.4, 0.2, 0.4], [0.5, 0.3, 0.2]]) emission_probability = np.array([[0.4, 0.2, 0.2, 0.2], [0.25, 0.25, 0.25, 0.25], [0.33, 0.33, 0.33, 0]]) model = hmm.MultinomialHMM(n_components=3) # 直接指定pi: startProbability, A: transmationProbability 和B: emissionProbability model.startprob_ = start_probability model.transmat_ = transition_probability model.emissionprob_ = emission_probability X1 = [0, 1, 2] X2 = [0, 0, 0] if __name__ == '__main__': calculateLikelyHood(model, X1) optimizeStates(model, X1) calculateLikelyHood(model, X2) optimizeStates(model, X2)
from hmmlearn import hmm import numpy as np from hmm_classifier import HMM_classifier x = np.random.randint(0, 10, size=(300, 10, 2)) y = np.random.randint(0, 10, size=(300)) model = HMM_classifier(hmm.MultinomialHMM()) model.fit(x, y) # Predict probability per label pred = model.predict_proba(np.random.randint(0, 10, size=(10, 2))) # Get label with the most high probability pred = model.predict(np.random.randint(0, 10, size=(100, 2)))