# -*- coding: utf-8 -*- from hmm import Model states = ("rainy", "sunny") symbols = ("walk", "shop", "clean") start_prob = {"rainy": 0.5, "sunny": 0.5} trans_prob = {"rainy": {"rainy": 0.7, "sunny": 0.3}, "sunny": {"rainy": 0.4, "sunny": 0.6}} emit_prob = {"rainy": {"walk": 0.1, "shop": 0.4, "clean": 0.5}, "sunny": {"walk": 0.6, "shop": 0.3, "clean": 0.1}} sequence = ["walk", "shop", "clean", "clean", "walk", "walk", "walk", "clean"] model = Model(states, symbols, start_prob, trans_prob, emit_prob) print model.evaluate(sequence) print model.decode(sequence)
phi = {'B':0.5,'E':0,'M':0,'S':0.5} model = Model(S,observation,phi,trans_prob,conf_prob) o_hstate = [] for obser in observations: ''' Notice,if a setence is too long,when we use viterbi algorithm it may result in the beta = 0 There are two solution,one is split the setence into serval sub_setence,another is use log function for the viterbi here we select the first method ''' length = len(obser) index,sub_obser,state= 0,[],[] while index < length: sub_obser.append(obser[index]) if obser[index] == '。' or obser[index]==',': sub_state = model.decode(sub_obser) sub_obser = [] state += sub_state elif index == length-1: sub_state = model.decode(sub_obser) sub_obser = [] state += sub_state index += 1 o_hstate.append(state) word_sequence = pro._word_sequence(observations,o_hstate) print word_sequence[3]
trans_prob = { 'rainy': { 'rainy': 0.7, 'sunny': 0.3 }, 'sunny': { 'rainy': 0.4, 'sunny': 0.6 } } emit_prob = { 'rainy': { 'walk': 0.1, 'shop': 0.4, 'clean': 0.5 }, 'sunny': { 'walk': 0.6, 'shop': 0.3, 'clean': 0.1 } } sequence = ['walk', 'shop', 'clean', 'clean', 'walk', 'walk', 'walk', 'clean'] model = Model(states, symbols, start_prob, trans_prob, emit_prob) print model.evaluate(sequence) print model.decode(sequence)
trans_prob = { 'rainy': { 'rainy': 0.7, 'sunny': 0.3 }, 'sunny': { 'rainy': 0.4, 'sunny': 0.6 } } emit_prob = { 'rainy': { 'walk': 0.1, 'shop': 0.4, 'clean': 0.5 }, 'sunny': { 'walk': 0.6, 'shop': 0.3, 'clean': 0.1 } } sequence = ['walk', 'shop', 'clean', 'clean', 'walk', 'walk', 'walk', 'clean'] model = Model(states, symbols, start_prob, trans_prob, emit_prob) print(model.evaluate(sequence)) print(model.decode(sequence))
#The hidden states states = [1,2,3] #The observation states observation = [1,2] #The intial probability for the hidden states phi = {1:0.333,2:0.333,3:0.333} #The trans prob for the hidden states trans_prob = { 1:{1:0.333,2:0.333,3:0.333}, 2:{1:0.333,2:0.333,3:0.333}, 3:{1:0.333,2:0.333,3:0.333} } #The prob of observation in condition of a hidden state conf_prob = { 1:{1:0.5,2:0.5}, 2:{1:0.75,2:0.25}, 3:{1:0.25,2:0.75} } observations =[1,1,1,1,2,1,2,2,2,2] model = Model(states,observation,phi,trans_prob,conf_prob) print model.evaluate(observations) print model.decode(observations)
''' observations = test phi = {'B': 0.5, 'E': 0, 'M': 0, 'S': 0.5} model = Model(S, observation, phi, trans_prob, conf_prob) o_hstate = [] for obser in observations: ''' Notice,if a setence is too long,when we use viterbi algorithm it may result in the beta = 0 There are two solution,one is split the setence into serval sub_setence,another is use log function for the viterbi here we select the first method ''' length = len(obser) index, sub_obser, state = 0, [], [] while index < length: sub_obser.append(obser[index]) if obser[index] == '。' or obser[index] == ',': sub_state = model.decode(sub_obser) sub_obser = [] state += sub_state elif index == length - 1: sub_state = model.decode(sub_obser) sub_obser = [] state += sub_state index += 1 o_hstate.append(state) word_sequence = pro._word_sequence(observations, o_hstate) print word_sequence[3]
#The prob of observation in condition of a hidden state conf_prob = { 'rainy': { 'walk': 0.1, 'shop': 0.3, 'clean': 0.6 }, 'sunny': { 'walk': 0.4, 'shop': 0.5, 'clean': 0.1 }, 'cloudy': { 'walk': 0.6, 'shop': 0.25, 'clean': 0.15 } } observations = [ 'walk', 'shop', 'clean', 'clean', 'walk', 'walk', 'walk', 'clean' ] #The iter_num is the iteration number in the EM algorithm iter_num = 50 model = Model(states, observation, phi, trans_prob, conf_prob, iter_num) print model.evaluate(observations) print model.decode(observations)