print "\n".join(state.name for state in model.states) print "Backward" print model.backward(sequence) print "Forward-Backward" trans, emissions = model.forward_backward(sequence) print trans print emissions print "Viterbi" prob, states = model.viterbi(sequence) print "Prob: {}".format(prob) print "\n".join(state[1].name for state in states) print print "MAP" prob, states = model.maximum_a_posteriori(sequence) print "Prob: {}".format(prob) print "\n".join(state[1].name for state in states) print "Showing that sampling can reproduce the original transition probs." print "Should produce a matrix close to the following: " print " [ [ 0.60, 0.10, 0.30 ] " print " [ 0.40, 0.40, 0.20 ] " print " [ 0.05, 0.15, 0.80 ] ] " print print "Tranition Matrix From 100000 Samples:" sample, path = model.sample(100000, path=True) trans = np.zeros((3, 3)) for state, n_state in it.izip(path[1:-2], path[2:-1]): state_name = float(state.name[1:]) - 1
# the probability of exiting the hmm model.add_transition(rainy, rainy, 0.65) model.add_transition(rainy, sunny, 0.25) model.add_transition(sunny, rainy, 0.35) model.add_transition(sunny, sunny, 0.55) # Add transitions to the end of the model model.add_transition(rainy, model.end, 0.1) model.add_transition(sunny, model.end, 0.1) # Finalize the model structure model.bake(verbose=True) # Lets sample from this model. print model.sample() # Lets call Bob every hour and see what he's doing! # (aka build up a sequence of observations) sequence = ['walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean'] # What is the probability of seeing this sequence? print "Probability of Sequence: ", \ math.e**model.forward( sequence )[ len(sequence), model.end_index ] print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \ math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ] print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \ math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ] print " ".join(state.name for i, state in model.maximum_a_posteriori(sequence)[1])
print "\n".join( state.name for state in model.states ) print "Backward" print model.backward( sequence ) print "Forward-Backward" trans, emissions = model.forward_backward( sequence ) print trans print emissions print "Viterbi" prob, states = model.viterbi( sequence ) print "Prob: {}".format( prob ) print "\n".join( state[1].name for state in states ) print print "MAP" prob, states = model.maximum_a_posteriori( sequence ) print "Prob: {}".format( prob ) print "\n".join( state[1].name for state in states ) print "Showing that sampling can reproduce the original transition probs." print "Should produce a matrix close to the following: " print " [ [ 0.60, 0.10, 0.30 ] " print " [ 0.40, 0.40, 0.20 ] " print " [ 0.05, 0.15, 0.80 ] ] " print print "Tranition Matrix From 100000 Samples:" sample, path = model.sample( 100000, path=True ) trans = np.zeros((3,3)) for state, n_state in it.izip( path[1:-2], path[2:-1] ): state_name = float( state.name[1:] )-1
# Transition matrix, with 0.05 subtracted from each probability to add to # the probability of exiting the hmm model.add_transition( rainy, rainy, 0.65 ) model.add_transition( rainy, sunny, 0.25 ) model.add_transition( sunny, rainy, 0.35 ) model.add_transition( sunny, sunny, 0.55 ) # Add transitions to the end of the model model.add_transition( rainy, model.end, 0.1 ) model.add_transition( sunny, model.end, 0.1 ) # Finalize the model structure model.bake( verbose=True ) # Lets sample from this model. print model.sample() # Lets call Bob every hour and see what he's doing! # (aka build up a sequence of observations) sequence = [ 'walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean' ] # What is the probability of seeing this sequence? print "Probability of Sequence: ", \ math.e**model.forward( sequence )[ len(sequence), model.end_index ] print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \ math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ] print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \ math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ] print " ".join( state.name for i, state in model.maximum_a_posteriori( sequence )[1] )
def main(): """Create a Hidden Markov Model.""" # Name of the model model = HiddenMarkovModel(name="Names") # Load probability distributions for each token set fn_dist = utils.load_dict_from_file(config.input_dir + config.token_files['first_name']) pfn_dist = utils.load_dict_from_file(config.input_dir + config.token_files['part_first_name']) ln1_dist = utils.load_dict_from_file(config.input_dir + config.token_files['last_name1']) pln1_dist = utils.load_dict_from_file( config.input_dir + config.token_files['part_last_name1']) ln2_dist = utils.load_dict_from_file(config.input_dir + config.token_files['last_name2']) pln2_dist = utils.load_dict_from_file( config.input_dir + config.token_files['part_last_name2']) # Calculate discrete distributions fn_dist = discrete_distribution(fn_dist, pfn_dist, ln1_dist, pln1_dist, ln2_dist, pln2_dist) pfn_dist = discrete_distribution(pfn_dist, fn_dist, ln1_dist, pln1_dist, ln2_dist, pln2_dist) ln1_dist = discrete_distribution(ln1_dist, fn_dist, pfn_dist, pln1_dist, ln2_dist, pln2_dist) pln1_dist = discrete_distribution(pln1_dist, fn_dist, pfn_dist, ln1_dist, ln2_dist, pln2_dist) ln2_dist = discrete_distribution(ln2_dist, fn_dist, pfn_dist, ln1_dist, pln1_dist, pln2_dist) pln2_dist = discrete_distribution(pln2_dist, fn_dist, pfn_dist, ln1_dist, pln1_dist, ln2_dist) # States of the model fn = State(DiscreteDistribution(fn_dist), name='FirstName') pfn = State(DiscreteDistribution(pfn_dist), name='ParticleFirstName') ln1 = State(DiscreteDistribution(ln1_dist), name='LastName1') pln1 = State(DiscreteDistribution(pln1_dist), name='ParticleLastName1') ln2 = State(DiscreteDistribution(ln2_dist), name='LastName2') pln2 = State(DiscreteDistribution(pln2_dist), name='ParticleLastName2') # Transition probabilities if config.graph_type == config.graph_types[0]: # Graph for FirstName LastName1 LastName2 sequences model.add_transition(model.start, fn, 1) model.add_transition(fn, fn, 0.256251576) model.add_transition(fn, pfn, 0.028472397) model.add_transition(fn, ln1, 0.704144114) model.add_transition(fn, pln1, 0.011131913) model.add_transition(pfn, pfn, 0.150) model.add_transition(pfn, fn, 0.850) model.add_transition(ln1, ln1, 0.007015434) model.add_transition(ln1, pln1, 0.007017087) model.add_transition(ln1, ln2, 0.960638112) model.add_transition(ln1, pln2, 0.014719859) model.add_transition(ln1, model.end, 0.010609508) model.add_transition(pln1, pln1, 0.150) model.add_transition(pln1, ln1, 0.850) model.add_transition(ln2, ln2, 0.004290151) model.add_transition(ln2, pln2, 0.006801967) model.add_transition(ln2, model.end, 0.988907882) model.add_transition(pln2, pln2, 0.150) model.add_transition(pln2, ln2, 0.850) else: # Graph for LastName1 LastName2 FirstName sequences model.add_transition(model.start, ln1, 0.984436899) model.add_transition(model.start, pln1, 0.015563101) model.add_transition(ln1, ln1, 0.007015434) model.add_transition(ln1, pln1, 0.007017087) model.add_transition(ln1, ln2, 0.960638112) model.add_transition(ln1, pln2, 0.014719859) model.add_transition(ln1, fn, 0.010609508) model.add_transition(pln1, pln1, 0.150) model.add_transition(pln1, ln1, 0.850) model.add_transition(ln2, ln2, 0.004290151) model.add_transition(ln2, pln2, 0.006801967) model.add_transition(ln2, fn, 0.988907882) model.add_transition(pln2, pln2, 0.150) model.add_transition(pln2, ln2, 0.850) model.add_transition(fn, fn, 0.256251576) model.add_transition(fn, pfn, 0.028472397) model.add_transition(fn, model.end, 0.715276027) model.add_transition(pfn, pfn, 0.150) model.add_transition(pfn, fn, 0.850) # "Bake" the model, finalizing its structure model.bake(verbose=True) # Testing the model parse_errors = 0 value_errors = 0 tagged_names = utils.load_dict_from_file(config.test_set_file) for key, value in tagged_names.items(): print('Observation: ' + value['observation']) norm_observation = utils.normalize(value['observation'], config.text_case) words = re.findall(config.word_pattern, norm_observation) token_sequence = [] for word in words: token_sequence.append(utils.to_token(word, config.token_length)) test_dict = { 'FirstName': '', 'LastName1': '', 'LastName2': '', } try: j = 0 for i, state in model.maximum_a_posteriori(token_sequence)[1]: if state.name[-4:] == 'Name': test_dict['FirstName'] += words[j] + ' ' if state.name[-5:] == 'Name1': test_dict['LastName1'] += words[j] + ' ' if state.name[-5:] == 'Name2': test_dict['LastName2'] += words[j] + ' ' j += 1 # compare results with tagged names test_dict['FirstName'] = test_dict['FirstName'].rstrip() test_dict['LastName1'] = test_dict['LastName1'].rstrip() test_dict['LastName2'] = test_dict['LastName2'].rstrip() print('Parsed: ' + str(test_dict)) # Probability of this sequence print('P(sequence) = ' + str(math.e**model.forward(token_sequence)[ len(token_sequence), model.end_index])) result = '' for state in ['FirstName', 'LastName1', 'LastName2']: if test_dict[state] == value[state]: result += '' else: result += state + ' differs. ' if result == '': result = 'Correct.' else: parse_errors += 1 print('Result: ' + result) except ValueError as ve: print(ve) value_errors += 1 print('--') # Final statistics print('Summary\n=======') print('Number of observations: ' + str(len(tagged_names))) print('Parse errors:' + str(parse_errors)) print('Value errors: ' + str(value_errors)) """