def qz6(): # Initialize model = hmm.Hmm(3) with open('qz6.txt','r') as f: model.train(f) with open('qz6.counts.txt','w') as f: model.write_counts(f) model.read_counts_from_file("qz6.counts.txt") model.processing() # solve the problem str = 'the cat saw the saw'.split(" ") hmm.viterbi(str,model)
def test_dishonest_casino(self): '''Dishonest Casino Example.''' # Create transition probability matrix A = np.array([[0.99, 0.01], [0.01, 0.99]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1". B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1) assert_almost_equal(log_prob_Obs, -20.9468006, decimal=5, err_msg='Wrong observation probability') Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1) assert_equal(Q_star, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Wrong Viterbi path') Beta = hmm.backward(m, Obs, c) Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta) assert_almost_equal(Gamma, [[0.63711364302936, 0.6348934929050587, 0.6271179131667495, 0.6117100305977996, 0.5845543683193845, 0.5383975935172204, 0.46091113744414974, 0.3313982095474306, 0.28864618346708165, 0.27562909135388625, 0.27498372625848855, 0.26932891011973825], [0.36288635697064003, 0.3651065070949412, 0.3728820868332506, 0.38828996940220045, 0.4154456316806155, 0.4616024064827796, 0.5390888625558502, 0.6686017904525694, 0.7113538165329184, 0.7243709086461138, 0.7250162737415115, 0.7306710898802617]], decimal=5, err_msg='Wrong state probabilities') assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
def test_multi(): """ To test, A & B are designed in a specific way so we can be more assured the result is correct. in this test scenario: - observation 0 can only be produced by state 0 - observation 1 can be produced by state 1 and 2 - state 0 is highly likely to stay within state 0 - state 1 is highly likely to jump to state 2 - state 2 cannot transition back to state 0 with the above in mind, so given the particular observation in the test y: [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0] we can expect a state sequence like [0, 0, 0, 0, 2, 2, 2, 2, 1, 0, 0, 0] """ A = np.array([ [0.5, 0.2, 0.3], [0.2, 0.1, 0.7], [0.0, 0.3, 0.7], ]) B = np.array([ [1., 0.], [0., 1.], [0., 1.]] ) pi = np.array([0.3, 0.3, 0.4]) y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0]) x_seq_opt = viterbi(y, A, B, pi) np.testing.assert_array_equal(x_seq_opt, np.array([0, 0, 0, 0, 2, 2, 2, 2, 1, 0, 0, 0]))
def guessWord(obs, guessed_set): obs_num = 27 state_num = 26 states = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n',\ 'o','p','q','r','s','t','u','v','w','x','y','z') remain_p = [[0 for c in range(len(obs))] for r in range(state_num)] for o_num in range(len(obs)): for s_num in range(state_num): if states[s_num] == obs[o_num] or states[s_num] not in guessed_set: remain_p[s_num][o_num] = 1 (letter_f, first_p, bi_p, tri_p) = initGame() emit_p = [[0 for c in range(obs_num)] for r in range(state_num)] for diagnol in range(state_num): emit_p[diagnol][diagnol] = 1 emit_p[diagnol][obs_num - 1] = letter_f[diagnol] predictions = hmm.viterbi(obs, states, remain_p, first_p, bi_p, tri_p, emit_p) print predictions return optimize(predictions)
def test(data_folder): all_tests = True for filename in sorted(list(os.listdir(data_folder))): with open(os.path.join(data_folder, filename), 'rb') as pickle_file: data = pickle.load(pickle_file) A = data['A'] B = data['B'] pi = data['pi'] O = data['O'] forward_answer = data['forward_answer'] viterbi_answer = data['viterbi_answer'] forward_valid = abs(forward(A, B, pi, O) - forward_answer) <= forward_answer * 10**-4 viterbi_valid = (viterbi_answer == viterbi( A, B, pi, O)).astype(int).sum() == viterbi_answer.shape[0] all_tests = all_tests and forward_valid and viterbi_valid print(filename) print('Forward:', forward_valid) print('Viterbi:', viterbi_valid) print() if all_tests: print('PASSED all tests') else: print('Some of the tests FAILED')
def train_viterbi(X, A, E): ##################### # START CODING HERE # ##################### # Initialize your posterior matrices new_A = {} # for k in A: ... for k in A: new_A[k] = {l: 0 for l in A[k]} new_E = {} # for k in E: ... for k in E: new_E[k] = {s: 0 for s in E[k]} # Get the state path of every sequence in X, # using the viterbi() function imported from hmm.py print("DEBUG: ", X) for seq, label in X: # pi = state path, P = Viterbi probability, V = Viterbi trellis pi, P, V = viterbi(X, A, E) pass # Count the transitions and emissions for every state # Normalize your row sums ##################### # END CODING HERE # ##################### return new_A, new_E
def test_dishonest_casino_larger_transition_p(self): '''Dishonest Casino Example.''' # Create transition probability matrix A = np.array([[0.9, 0.1], [0.1, 0.9]]) # Create observable probability distribution matrix. Casino biased toward "6" in state "1" B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]])) # Create set of all observable symbols V = [1, 2, 3, 4, 5, 6] # Instantiate an HMM, note Pi is uniform probability distribution by default m = hmm.HMM(2, A=A, B=B, V=V) Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ] log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1) assert_almost_equal(log_prob_Obs, -20.124, decimal=3, err_msg='Wrong observation probability') Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1) assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], err_msg='Wrong Viterbi path') Beta = hmm.backward(m, Obs, c) Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta) assert_almost_equal(Gamma, [[0.8189770516168013, 0.8482906260695058, 0.8525027084764197, 0.8329611652077556, 0.7834127024175411, 0.6880018120129073, 0.5161970090643716, 0.2130207566284025, 0.12024202874950358, 0.10797060639721641, 0.15902649827833876, 0.14930464162738483], [0.18102294838319855, 0.15170937393049422, 0.14749729152358024, 0.16703883479224435, 0.21658729758245884, 0.31199818798709256, 0.4838029909356284, 0.7869792433715975, 0.8797579712504964, 0.8920293936027837, 0.8409735017216613, 0.8506953583726152]], decimal=5, err_msg='Wrong state probabilities') assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
def get_estimates(state_space, gps_measurements_list, signal_measurements_list, emission_variance, transition_decay, maximum_route_length, base_locations, base_max_range): estimated_states_list = list() naive_estimates_list = list() i = 0 for gps_measurements, signal_measurements in zip(gps_measurements_list, signal_measurements_list): print("Route #{}".format(i + 1)) print("Transition probabilities..") tp = transition_probabilties_by_weighting_route_length(state_space,\ transition_decay, maximum_route_length) print("Emission probabilities..") ep = emission_probabilities(gps_measurements, emission_variance, signal_measurements,\ base_locations, np.array([base_max_range]*base_locations.shape[0]), state_space) print("Viterbi..") pi = np.ones((len(state_space), )) / len(state_space) estimated_states = viterbi(tp, ep, pi) estimated_states_list.append(estimated_states) naive_estimate = spatially_closest_states(gps_measurements, state_space) naive_estimates_list.append(naive_estimate) i += 1 return estimated_states_list, naive_estimates_list
def test_viterbi(): emissions, model = fetch_hmm(Path('test/testcase46.txt')) path = hmm.viterbi(emissions=emissions, model=model) assert path == 'FFFFF' emissions, model = fetch_hmm(Path('test/testcase01.txt')) path = hmm.viterbi(emissions=emissions, model=model) assert path == 'AAABBAAAAA' emissions, model = fetch_hmm(Path('test/testcase02.txt')) path = hmm.viterbi(emissions=emissions, model=model) assert path == 'AAAAAAAAAAAAAABBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBAAA' emissions, model = fetch_hmm(Path('test/testcase03.txt')) path = hmm.viterbi(emissions=emissions, model=model) assert path == 'ABACCBABBABABACCABCCBABAABBBAABABCCBABBABABACCCCCCCCCCBBBBBABACCBABBACCCCCCCCCCCCCCCCBABABACBABAACCC' emissions, model = fetch_hmm(Path('test/testcase04.txt')) path = hmm.viterbi(emissions=emissions, model=model) assert path == 'CCCCCAAAAAAAAABABCAAAAAAABCCCAABAAAAAAAAAAABABAAABAAAAAAAAAAAAABABAAABAAAABAAABCABAAAABCAAABAAABCCCC' emissions, model = fetch_hmm(Path('test/testcase21.txt')) path = hmm.viterbi(emissions=emissions, model=model) assert path == 'AAABBAAAAA' emissions, model = fetch_hmm(Path('test/testcase23.txt')) path = hmm.viterbi(emissions=emissions, model=model) assert path == 'CCCDABBBBBBBBBBBBBBBBBBBBBBCDACDACCCDABBBBBDACDACDABBBBBBBBBBBBBBBBBBBBBBBBBBBBBDADACCDADACCDADADADA'
def smarthouse( dataset=["A", "B"], train_days=5, train_offset=0, test_days=None, test_offset=0, use_day_period=False, n_samples=None, ): if not (type(dataset) == tuple or type(dataset) == list): dataset = [dataset] truths = [] predicts = [] accs = [] for f in dataset: df = load_dataset(f, use_day_period=use_day_period) train_s, train_o, test_s, test_o = trainset_testset( df, train_days=train_days, train_offset=train_offset, test_days=test_days, test_offset=test_offset, ) # Calcolo delle distribuzioni della HMM n = max(df['activity'] + 1) m = max(df['sensors'] + 1) P, T, O = hmm(train_s, train_o, n=n, m=m) if n_samples: test_s, test_o = random_sample(P, T, O, n_samples) # Esegue l'algoritmo di Viterbi sul testset e calcola # calcola la percentuale di stati predetti correttamente predicted, p = viterbi(P, T, O, test_o) accuracy = reduce( lambda i, j: i + (1 if j[0] == j[1] else 0), zip(test_s, predicted), 0, ) / len(predicted) accs.append(accuracy) truths.append(test_s) predicts.append(predicted) if len(accs) == 1: return truths[0], predicts[0], accs[0] return truths, predicts, accs
def show_viterbi(grids): grid = np.random.choice(grids) H, W = grid.shape T = np.random.randint(3, 6) observations, states = grid.get_sequence(T) decoded, _ = viterbi(observations, grid.get_hmm()) decoded = [(s // H, s % W) for s in decoded] print(colored("Viterbi algorithm", "cyan")) print("Agent wandered on map \033[1m" + grid.name + "\033[0m") print("... going thorugh states", states) print("... observing", ", ".join([Grid.COLORS[o] for o in observations])) print("\nThe decoded sequence of states is", decoded) fig, axs = plt.subplots(1, 2, figsize=(10, 4), sharey="row") cm = LinearSegmentedColormap.from_list("cm", Grid.COLORS) sns.heatmap( grid.color, annot=grid.elevation, cmap=cm, square=True, cbar=False, annot_kws={"size": 20}, ax=axs[0], ) sns.heatmap( grid.color, annot=grid.elevation, cmap=cm, square=True, cbar=False, annot_kws={"size": 20}, ax=axs[1], ) axs[0].set_title(grid.name + " - original path") axs[1].set_title(grid.name + " - decoded path") for t in range(T - 1): (y0, x0), (y1, x1) = states[t], states[t + 1] y0, x0, y1, x1 = y0 + 0.5, x0 + 0.5, y1 + 0.5, x1 + 0.5 axs[0].annotate("", xy=(x1, y1), xytext=(x0, y0), arrowprops=dict(color="y", width=5.0)) (y0, x0), (y1, x1) = decoded[t], decoded[t + 1] y0, x0, y1, x1 = y0 + 0.5, x0 + 0.5, y1 + 0.5, x1 + 0.5 axs[1].annotate("", xy=(x1, y1), xytext=(x0, y0), arrowprops=dict(color="y", width=5.0))
def test_viterbi(self): t=np.identity(5) e= np.array([[0.600, 0.175, 0.175, 0.050], [0.050, 0.600, 0.175, 0.175], [0.050, 0.175, 0.600, 0.175], [0.050, 0.175, 0.175, 0.600], [0.600, 0.050, 0.175, 0.175]]) obs = np.array([0,0,1,0,0,3]) result = np.array([0,0,0,0,0,0]) np.testing.assert_array_equal(result, viterbi(pi, t, e, obs)[0])
def test_viterbi(grid, observations, test_states, test_values): print("Testing viterbi...") H, W = grid.shape states, delta = viterbi(observations, grid.get_hmm()) states = [(s // H, s % W) for s in states] print("States:", states) print("TStates:", test_states) assert len(states) == len(test_states) assert all([s_i == s_j for (s_i, s_j) in zip(states, test_states)]) assert np.allclose(delta, test_values) print(colored(">>> Viterbi looks right!", "green")) print("\n")
def test_decoded_by_sequence_length(grids, runs_no=1000): print("Evaluate how good the decoded paths are...") for T in range(1, 11): correct = 0 for run_id in range(runs_no): grid = np.random.choice(grids) H, W = grid.shape observations, states = grid.get_sequence(T) decoded, _ = viterbi(observations, grid.get_hmm()) decoded = [(s // H, s % W) for s in decoded] correct += sum([a == b for a, b in zip(states, decoded)]) perc = float(correct * 100) / (runs_no * T) print("%5d / %5d (%5.2f%%) for T =%2d" % (correct, runs_no * T, perc, T)) print("\n")
def test_viterbi2(self): t2 = np.array([[0.250, 0.500, 0.025, 0.200, 0.025], [0.250, 0.150, 0.075, 0.500, 0.025], [0.050, 0.025, 0.050, 0.850, 0.025], [0.025, 0.075, 0.150, 0.125, 0.625], [0.050, 0.075, 0.475, 0.025, 0.375]]) e2=np.array([[0.25, 0.25, 0.25, 0], [0.25, 0.25, 0.25, 0], [0.25, 0.25, 0.25, 0], [0.25, 0.25, 0.25, 0], [0, 0, 0, 1]]) obs2 = np.array([3,3,3,3,3,3]) result2 = np.array([4,4,4,4,4,4]) np.testing.assert_array_equal(result2, viterbi(pi, t2, e2, obs2)[0])
def validation(hmm, filename): p = 1. / 6. # backround, prefix states and first target address_initial_dist = np.array( [[p, p, p, p, p, p, 0, 0, 0, 0, 0, 0, 0, 0]]) target_states = [ v for k, v in address_states.items() if k.startswith('target') ] for emissions, orig, pos, adr in text_emissions(filename, address_emissions): states = viterbi(hmm, address_initial_dist, emissions) #print states address, addresses = list(), list() for i in range(len(states)): if states[i] in target_states: address.append(orig[i]) else: if len(address) > 1: addresses.append(' '.join([a for a in address if a])) address = list() yield ' '.join(adr), addresses
p, alpha = forward(obs, hmm) q, beta = backward(obs, hmm) print("p = %f, q = %f" % (p, q)) print("alpha") for l in alpha: print("%f %f" % (l[0], l[1])) print() print("beta") for l in beta: print("%f %f" % (l[0], l[1])) print() states, delta = viterbi(obs, hmm) print("states:", states) print("delta") for l in delta: print("%f %f" % (l[0], l[1])) print() print("most prob = ", np.max(delta[-1, :])) gamma = alpha * beta / p print("gamma") for l in gamma: print("%f %f" % (l[0], l[1])) print()
''' Created on Nov 11, 2014 @author: oropivan ''' import numpy from hmm import HMM, viterbi stateNum = 2 transition_probabilities = numpy.array( [ [.5,.5], [.4,.6] ] ) emission_probabilities = numpy.array( [ [0.2, 0.3, 0.3, 0.2], \ [0, 0.5, 0.2, 0.3] ] ) #symbols symbolList = ["A","C", "G", "T"] Pi = [0.5,0.5] model = HMM(stateNum, A=transition_probabilities, B=emission_probabilities, V=symbolList, Pi=Pi) print viterbi(model, [ "G", "G", "C", "A", "C", "T", "G", "A", "A"])
import numpy as np from hmm import forward, viterbi A = np.asarray([[0.3, 0.7], [0.8, 0.2]]) B = np.asarray([[0.5, 0.3, 0.2], [0.1, 0.1, 0.8]]) pi = np.asarray([0.6, 0.4]) O = np.asarray([1, 2, 1, 0]) alpha_gt = np.asarray([[0.18, 0.0172, 0.027276, 0.0054306], [0.04, 0.1072, 0.003348, 0.00197628]]) forward_result_gt = 0.00740688 delta_gt = np.asarray([[0.18, 0.0108, 0.024192, 0.0036288], [0.04, 0.1008, 0.002016, 0.00169344]]) viterbi_result_gt = np.asarray([0, 1, 0, 0]) forward_result, alpha = forward(A, B, pi, O) print('Forward result test: {}'.format( abs(forward_result_gt - forward_result) < 10**-5)) print('Forward alpha test: {}'.format( np.all(np.abs(alpha - alpha_gt) < 10**-5))) viterbi_result, delta = viterbi(A, B, pi, O) print('Viterbi result test: {}'.format( (viterbi_result_gt == viterbi_result).all())) print('Viterbi delta test: {}'.format( np.all(np.abs(delta - delta_gt) < 10**-5)))
ass_plots.append(('K-means', results[alg]['seq'])) elif alg == algos.em: ass_plots.append(('EM', results[alg]['seq'])) elif alg == algos.hmm: t = time.time() tau, A, obs_distr, pi, ll_train, _ = hmm.em_hmm( X, init_pi, init_obs_distr, n_iter=options.n_iter) print 'HMM EM: {}s, final loglikelihood: {}'.format( time.time() - t, ll_train[-1]) seq_smoothing = np.argmax(tau, axis=1) ass_plots.append(('HMM smoothing', seq_smoothing)) seq_viterbi, _ = hmm.viterbi(X, pi, A, obs_distr) ass_plots.append(('HMM viterbi', seq_viterbi)) results[alg] = { 'tau': tau, 'A': A, 'obs_distr': obs_distr, 'pi': pi, 'll_train': ll_train, 'seq_smoothing': seq_smoothing, 'seq_viterbi': seq_viterbi, } seqs[alg] = (seq_smoothing, seq_viterbi) elif alg == algos.map_hmm: t = time.time() seq, obs_distr, energies = hmm.map_em_hmm(X, init_obs_distr)
# plot( pro0a[:,0], 'b.', pro0b[:,0], 'r.', ) # now use KCPA (P,alpha,evals) = dr.kpca(x, 2, kernel.rbf1) #evals Pa = P[0:a.shape[0],:] Pb = P[a.shape[0]:,:] plot(Pa[:,0], Pa[:,1], 'b.', Pb[:,0], Pb[:,1], 'r.') plot(alpha[:, 0],'r.') #################### # HMM #################### (a,b,pi) = datasets.getHMMData() hmm.viterbi(array([0,1,1,2]), a, b, pi) #array([0, 0, 0, 1]) hmm.viterbi(array([0,2,1,2]), a, b, pi) #array([0, 1, 1, 1]) ###WU 8 # example 1 hmm.viterbi(array([0,1,1,1]), a, b, pi) # 0 0 0 0 hmm.viterbi(array([0,1,2,1]), a, b, pi) # 0 0 1 1 al = hmm.forward(array([0,1,1,2]), a, b, pi) be = hmm.backward(array([0,1,1,2]), a, b, pi) hmm.sanityCheck(al,be)
# plot( pro0a[:,0], 'b.', pro0b[:,0], 'r.', ) # now use KCPA (P, alpha, evals) = dr.kpca(x, 2, kernel.rbf1) #evals Pa = P[0:a.shape[0], :] Pb = P[a.shape[0]:, :] plot(Pa[:, 0], Pa[:, 1], 'b.', Pb[:, 0], Pb[:, 1], 'r.') plot(alpha[:, 0], 'r.') #################### # HMM #################### (a, b, pi) = datasets.getHMMData() hmm.viterbi(array([0, 1, 1, 2]), a, b, pi) #array([0, 0, 0, 1]) hmm.viterbi(array([0, 2, 1, 2]), a, b, pi) #array([0, 1, 1, 1]) ###WU 8 # example 1 hmm.viterbi(array([0, 1, 1, 1]), a, b, pi) # 0 0 0 0 hmm.viterbi(array([0, 1, 2, 1]), a, b, pi) # 0 0 1 1 al = hmm.forward(array([0, 1, 1, 2]), a, b, pi) be = hmm.backward(array([0, 1, 1, 2]), a, b, pi) hmm.sanityCheck(al, be) ##########
O = np.asarray([1, 2, 1, 0]) alpha_gt = np.asarray([[0.18, 0.0172, 0.027276, 0.0054306], [0.04, 0.1072, 0.003348, 0.00197628]]) forward_result_gt = 0.00740688 delta_gt = np.asarray([[0.18, 0.0108, 0.024192, 0.0036288], [0.04, 0.1008, 0.002016, 0.00169344]]) viterbi_result_gt = np.asarray([0, 1, 0, 0]) forward_result, alpha = forward(A, B, pi, O) print('Forward result test: {}'.format( abs(forward_result_gt - forward_result) < 10**-5)) print('Forward alpha test: {}'.format( np.all(np.abs(alpha - alpha_gt) < 10**-5))) viterbi_result, delta = viterbi(A, B, pi, O) print('Viterbi result test: {}'.format( (viterbi_result_gt == viterbi_result).all())) print('Viterbi delta test: {}'.format( np.all(np.abs(delta - delta_gt) < 10**-5))) test_A = np.array([[0.5, 0.5], [0.4, 0.6]]) test_B = np.array([[0.2, 0.3, 0.3, 0.2], [0.3, 0.2, 0.2, 0.3]]) test_pi = np.array([0.5, 0.5]) test_O = np.array([2, 2, 1, 0, 1, 3, 2, 0, 0]) test_viterbi, test_delta = viterbi(test_A, test_B, test_pi, test_O) print(test_viterbi, test_delta)
4.90660589e-23, 2.36899500e-24, 8.74828204e-26, 5.76689190e-27, 2.65176771e-28, 1.02821376e-29, 6.45525118e-31, 2.19395593e-32, 1.27209762e-33, 7.58706457e-35, 3.14381566e-36, 1.59146266e-37, 7.28862223e-39, 2.98685713e-40, 1.94326330e-41, 7.07671720e-43, 4.29072538e-44, 1.72310083e-45, 9.47392163e-47, 4.80288352e-48, 1.68916233e-49, 1.28052559e-50, 3.77500977e-52, 3.11720906e-53, 1.24641239e-54, 6.88279760e-56, 2.57904162e-57, 1.67549319e-58, 8.05105187e-60, 2.98733733e-61, 1.58657249e-62, 8.63982046e-64, 4.12736566e-65, 1.54044543e-66, 1.10042172e-67, 3.24406071e-69, 2.16853506e-70, 1.18089490e-71 ] ]) print("********************************************") print("TESTING VITERBI ALGORITHM: 1") test_viterbi, test_delta = viterbi(test_a1, test_b1, test_pi1, test_o1) #print("test delta") #print("delta1 = " + str(list(test_delta))) #print("test_viterbi") #print("viterbi1 = " + str(list(np.uint8(test_viterbi)))) print("********************************************") print('Viterbi1 result test: {}'.format((test_viterbi == viterbi1).all())) print('Viterbi1 delta test: {}'.format( np.all(np.abs(test_delta - delta1) < 10**-5))) print("********************************************") print("TESTING VITERBI ALGORITHM: 2") test_viterbi, test_delta = viterbi(test_a2, test_b2, test_pi2, test_o2) #print("test delta") #print("delta2 = " + str(list(test_delta)))
''' Created on Nov 11, 2014 @author: oropivan ''' import numpy from hmm import HMM, viterbi stateNum = 2 transition_probabilities = numpy.array([[.5, .5], [.4, .6]]) emission_probabilities = numpy.array( [ [0.2, 0.3, 0.3, 0.2], \ [0, 0.5, 0.2, 0.3] ] ) #symbols symbolList = ["A", "C", "G", "T"] Pi = [0.5, 0.5] model = HMM(stateNum, A=transition_probabilities, B=emission_probabilities, V=symbolList, Pi=Pi) print viterbi(model, ["G", "G", "C", "A", "C", "T", "G", "A", "A"])
def coalhmm(args): """ Trains and tests a Coal-HMM @param args (argparse.Namespace) Arguments provided by user: filename, sample, rounds """ # from table 2 in Hobolth et al. # mean_fragment_length_HC1 = 1684 # mean_fragment_length_others = 65 # probability_leaving_HC1 = 3 * s = 1 / 1684 s = 1.0 / (1684 * 3) # probability_leaving_others = 1 / 65 = u + 2 * v # u + 2 * v = 1 / 65 stationary = (0.49, 0.17, 0.17, 0.17) # stationary = np.array([psi, (1 - psi) / 3, (1 - psi) / 3, (1 - psi) / 3]) psi = 0.49 # psi = 1 / (1 + 3 * s / u) # 1 + 3 * s / u = 1 / psi # u + 3 * s = u / psi # 3 * s = (1 / psi - 1) * u u = 3 * s / (1 / psi - 1) v = (1 / 65.0 - u) / 2 # Transition probability: HC1, HC2, HG, CG transition = np.array([[1 - 3 * s, s, s, s], [u, 1 - (u + 2 * v), v, v], [u, v, 1 - (u + 2 * v), v], [u, v, v, 1 - (u + 2 * v)]]) print "Reading alignments" original_alignments = [np.array(a) for a in utils.read_maf(args.filename)] print "done" for j in range(args.rounds): print "ROUND {}".format(j) print "sampling" if args.sample is not None: alignments = [ original_alignments[i] for i in np.random.choice( np.arange(len(original_alignments)), args.sample, False) ] else: alignments = original_alignments print "Number of alignments: {}".format(len(alignments)) print "done" print "felsenstein" groupings = {} emission = np.zeros((4, 5**4)) for alignment in alignments: _, len_alignment = alignment.shape for i in range(len_alignment): column = "".join(alignment[:, i]) if column not in groupings: groupings[column] = len(groupings) trees = utils.generate_trees(alignment[:, i]) # Felsenstein to get emission for i, t in enumerate(trees): emission[i, groupings[column]] = math.exp( felsenstein.felsensteins(t)) print "done" print "BW" initial = np.array([0.25, 0.25, 0.25, 0.25]) # Baum welsh to update matrices emission, transition = hmm.baum_welch(initial, emission, transition, alignments, groupings) print "done" print "viterbi" # use viterbi to see which state we are in the longest hidden_states = [] for alignment in alignments: hidden_states.append( hmm.viterbi(initial, emission, transition, alignment, groupings)) print "done" # calculate time spent in a state counts = Counter([s for states in hidden_states for s in states]) print "Number of bases in each state: ", counts
def main(): aa = {} bb = {} vocabulary = set([]) file_list = os.listdir(fileparser.resource_path) # training print('Training...') for file in file_list: if file.startswith(fileparser.training_prefix): training_file = open(fileparser.resource_path + file, 'r') sentence_list = fileparser.parse(training_file) train(aa, bb, sentence_list, vocabulary) print('DONE') # transform into a and b t_start = time.time() a = {} b = {} user_states = list(aa.iterkeys()) states = list(aa.iterkeys()) + [hmm.START, hmm.END] for state in aa.iterkeys(): sum_counts = sum([aa[state][next_state] for next_state in aa[state].iterkeys()]) for next_state in states: if aa[state].has_key(next_state): a[(state, next_state)] = LogProbability(aa[state][next_state]) / sum_counts else: a[(state, next_state)] = LogProbability(0.0) # Extract vocabulary vocab = {} for state in bb.iterkeys(): for output in bb[state].iterkeys(): vocab[output] = vocab.get(output, 0) + 1 # Create matrix B and apply smoothing for state in bb.iterkeys(): sum_emmited = (sum([bb[state][output] for output in bb[state].iterkeys()]) if bb.has_key(state) else 0) b[state] = {} for output in vocab.iterkeys(): b[state][output] = LogProbability(bb.get(state, {}).get(output, 0.0) + 1.0) / (sum_emmited + len(vocab)) # Calculate average of singleton words unknown_b = {} singletons = [word for word, count in vocab.iteritems() if count == 1] for s in bb.iterkeys(): sm = LogProbability(0.0) for singleton in singletons: sm += b[s].get(singleton, LogProbability(0.0)) b[s][hmm.UNKNOWN] = sm / len(singletons) print hmm.states(a, b) def unknown_b_mapper(s, word): print 'Could not find word %s in state %s' % (word, s) print 'State has: %s' % (list(b[s].iterkeys())) assert word not in vocab print '**UNKNOWN** %s' % word return unknown_b[s] # computing likelihood print('computing likelihood...') forward_file = open('forward.txt', 'w') for file in file_list: if file.startswith(fileparser.test_prefix): training_file = open(fileparser.resource_path + file, 'r') sentence_list = fileparser.parse(training_file) for sentence in sentence_list: words = [word for word, tag in sentence] words = [(word if word in vocab else hmm.UNKNOWN) for word in words] forward_table = {} backward_table = {} forward_p = hmm.forward_algorithm(words, a, b, forward=forward_table) backward_p = hmm.backward_algorithm(words, a, b, backward=backward_table) forward_file.write('%s\n %s\n %s\n\n' % (words, forward_p.logv, backward_p.logv)) forward_file.close() print('likelihood computed.') print 'Took %ds' % (time.time() - t_start) # computing most likely tag sequencesanc accuracy print('computing most likely tag sequence and tagger accuracy...') match_count = 0.0 total_count = 0.0 for file in file_list: if file.startswith(fileparser.test_prefix): training_file = open(fileparser.resource_path + file, 'r') sentence_list = fileparser.parse(training_file) for sentence in sentence_list: words = [word for word, tag in sentence] words = [(word if word in vocab else hmm.UNKNOWN) for word in words] tagger_sequence = hmm.viterbi(words, a, b) human_sequence = [tag for word, tag in sentence] #print tagger_sequence #print human_sequence #print '----' # update tagger accuracy information for i in range(min(len(human_sequence), len(tagger_sequence))): # because of underflow it is possible that the tag sequences are not equal in length...s if tagger_sequence[i] == human_sequence[i]: match_count = match_count + 1.0 total_count = total_count + max(len(human_sequence), len(tagger_sequence)) #print('%s\n%s\nProbability: %f\n' % (human_sequence, tagger_sequence, p)) print('most likely tag sequence computed.') print('accuracy of tagger is: %f' % (match_count / total_count, ))
import numpy as np from gaussian import Gaussian import hmm signal = np.array([[1., 1.1, 0.8, 0.2, 1.6, 1.7, 3.4, 1.4, 1.1]]) trans = np.array([[0., 1. / 3, 1. / 3, 1. / 3, 0.], [0., 0.45, 0.45, 0., 0.1], [0., 0.45, 0.45, 0., 0.1], [0., 0., 0., 1., 0.], [0., 0., 0., 0., 0.]]) dists = [ Gaussian(mean=np.array([1]), cov=np.array([[1]])), Gaussian(mean=np.array([2]), cov=np.array([[1]])), Gaussian(mean=np.array([1.5]), cov=np.array([[1]])) ] vals, nll = hmm.viterbi(signal, trans, dists) print 'State sequence: ', vals #State sequence: [1 1 1 1 2 2 2 1 1] print 'Negative log-likelihood:', nll #Negative log-likelihood: 19.5947057502
import numpy as np from gaussian import Gaussian import hmm signal = np.array([[ 1. , 1.1, 0.8, 0.2, 1.6, 1.7, 3.4, 1.4, 1.1]]) trans = np.array([[ 0. , 1./3 , 1./3 , 1./3, 0. ], [ 0. , 0.45, 0.45, 0., 0.1 ], [ 0. , 0.45, 0.45, 0., 0.1 ], [ 0. , 0. , 0. , 1., 0. ], [ 0. , 0. , 0. , 0., 0. ]]) dists = [Gaussian(mean=np.array([1]),cov=np.array([[1]])), Gaussian(mean=np.array([2]),cov=np.array([[1]])), Gaussian(mean=np.array([1.5]),cov=np.array([[1]]))] vals, nll = hmm.viterbi(signal, trans, dists) print 'State sequence: ', vals #State sequence: [1 1 1 1 2 2 2 1 1] print 'Negative log-likelihood:', nll #Negative log-likelihood: 19.5947057502
transition_decay = 1/500 maximum_route_length = speed_limit/polling_frequency*2 no_of_bases = 50 base_max_range = 50 route_length = 200 print("Simulating route..") base_locations = generate_base_locations(bbox, no_of_bases) simulated_route = simulate_route(highway_dict, starting_node, starting_highway, intersections, route_length) gps_measurements, signal_measurements, measurement_states = simulate_observations(simulated_route, node_dict, gps_variance, polling_frequency,\ [speed_limit]*len(simulated_route), base_locations, np.array([base_max_range]*no_of_bases), state_space) print("Calculating transition probabilities..") tp = transition_probabilties_by_weighting_route_length(state_space, transition_decay, maximum_route_length) print("Calculating emission probabilities..") ep = emission_probabilities(gps_measurements, measurement_variance, signal_measurements, base_locations, np.array([500]*no_of_bases), state_space) N = len(state_space) print("Running Viterbi..") estimated_states = viterbi(tp, ep, np.array([1/N]*N)) naive_estimate = spatially_closest_states(gps_measurements, state_space) print("Accuracy with naive method: {}".format(np.mean(measurement_states == naive_estimate))) print("Accuracy with hidden markov model: {}".format(np.mean(estimated_states == measurement_states)))
Pi=initialProbabilities) # testing data iterSentencesCorrect = 0 iterSentences = 0 iterTagsCorrect = 0 iterTags = 0 for sentence in testSet: wordSeq = ['<S>'] POSSeq = ['<S>'] for word, POS in sentence: wordSeq.append(word) POSSeq.append(POS) wordSeq.append('<\S>') POSSeq.append('<\S>') resultPOS = viterbi(model, wordSeq, scaling=False) returnedSeq = [map_index_POS[x] for x in resultPOS[0]] if returnedSeq == POSSeq: numberSentencesCorrect += 1 iterSentencesCorrect += 1 numberSentences += 1 iterSentences += 1 for x, y in zip(POSSeq, returnedSeq): if x == y: numberTagsCorrect += 1 iterTagsCorrect += 1 numberTags += 1 iterTags += 1 print ','.join([
def predict_viterbi(self, test_data, verbose=1, output_filename="./utter_level_result.txt"): """ Viterbi decoding using output probabilites from the base model, marginal probabilities, and transition probabilities. Parameters ---------- test_data : MHDTestData An object of MHDTestData for test data. verbose : int The level of verbosity in range [0,3] output_filename : str Path to the utterance-level result file. """ if self.log_transitions is None: print("ERROR: Train or load the model first") return self.te_data = test_data self.n_labels = self.te_data.n_labels self.model_info = "_".join(["HMM", self.base_model.model_info]) te_data_nested = self.te_data.get_utter_level_data_from_sids( sorted(self.te_data.sstt2uid.keys())) ulists, docs, labs = te_data_nested vit_res = [] for sidx in range(len(ulists)): output_prob_s = self.base_model.result.output_prob[sidx] log_emissions = convert_class_prob_to_log_emission_prob( output_prob_s, self.marginals) vit_res.append( viterbi(log_emissions, self.log_transitions, self.log_start_prob, self.log_end_prob)) yhats = [s[1] for s in vit_res] output_scores = [s[0] for s in vit_res] self.result = DialogResult(self.n_labels, yhats, None, self.marginals, self.model_info, output_scores) if self.te_data.has_label: if verbose > 0: print("Calculate score") self.result.get_scores(labs) if verbose > 0: print("Printing utterance-level results to file " + output_filename) self.result.print_utter_level_results(ulists, docs, labs, self.te_data.lid2name, filename=output_filename) else: if verbose > 0: print("Printing utterance-level results to file " + output_filename) self.result.print_utter_level_results_without_true_lab( ulists, docs, self.te_data.lid2name, filename=output_filename) return self.result
alpha_scaled2, scale_alpha2 = hmm.forward(data_test, states, start_proba1[i], transition_proba1[i], means1[i], covariances1[i]) logllh2.append(hmm.loglike(states, alpha_scaled2, scale_alpha2)) plt.figure() plt.plot(logllh2) # 6 print "The log-likelihood for HMM on train data is %f" % (logllh1[-1]) print "The log-likelihood for HMM on test data is %f" % (logllh2[-1]) # 7 path1 = hmm.viterbi(data_train, states, start_proba1[-1], transition_proba1[-1], means1[-1], covariances1[-1]) def plotViterbi(data, path, means): n = len(data) K = len(means) colors = ['b', 'g', 'r', 'y'] fig = plt.figure() ax = fig.add_subplot(1, 1, 1) for i in range(0, n): cluster = int(path[i]) ax.scatter(data[i, 0], data[i, 1], color=colors[cluster]) for j in range(0, K): ax.scatter(means[j, 0], means[j, 1], color="black")
model = HMM(len(map_POS_index.keys()), A=transition_probabilities, B=emission_probabilities, V=symbolList, Pi=initialProbabilities) # testing data iterSentencesCorrect = 0 iterSentences = 0 iterTagsCorrect = 0 iterTags = 0 for sentence in testSet: wordSeq = ['<S>'] POSSeq = ['<S>'] for word, POS in sentence: wordSeq.append(word) POSSeq.append(POS) wordSeq.append('<\S>') POSSeq.append('<\S>') resultPOS = viterbi(model, wordSeq, scaling=False) returnedSeq = [map_index_POS[x] for x in resultPOS[0]] if returnedSeq == POSSeq: numberSentencesCorrect += 1 iterSentencesCorrect += 1 numberSentences +=1 iterSentences += 1 for x, y in zip(POSSeq, returnedSeq): if x==y: numberTagsCorrect +=1 iterTagsCorrect += 1 numberTags += 1 iterTags += 1 print ','.join(["test", str(i), str(iterSentencesCorrect/float(iterSentences)), str(iterTagsCorrect/float(iterTags))])