def build_an_hmm_example(): # i think the characters in each DiscreteDistribution definition, means the emission matrix for each state # because it says the probability of seeing each character when the system is in that state d1 = DiscreteDistribution({'A': 0.35, 'C': 0.20, 'G': 0.05, 'T': 0.40}) d2 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25}) d3 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10}) s1 = State(d1, name="s1") s2 = State(d2, name="s2") s3 = State(d3, name="s3") model = HiddenMarkovModel('example') model.add_states([s1, s2, s3]) model.add_transition(model.start, s1, 0.90) model.add_transition(model.start, s2, 0.10) model.add_transition(s1, s1, 0.80) model.add_transition(s1, s2, 0.20) model.add_transition(s2, s2, 0.90) model.add_transition(s2, s3, 0.10) model.add_transition(s3, s3, 0.70) model.add_transition(s3, model.end, 0.30) model.bake() for i in range(len(model.states)): print(model.states[i].name) model.plot() #print(model.log_probability(list('ACGACTATTCGAT'))) #print(", ".join(state.name for i, state in model.viterbi(list('ACGACTATTCGAT'))[1])) print("forward:", model.forward(list('ACG')))
model.add_transition(s2, s1, 0.4) model.add_transition(s2, s2, 0.4) model.add_transition(s2, s3, 0.2) model.add_transition(s3, s1, 0.05) model.add_transition(s3, s2, 0.15) model.add_transition(s3, s3, 0.8) model.bake() sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1] print model.is_infinite() print "Algorithms On Infinite Model" sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1] print "Forward" print model.forward(sequence) print "\n".join(state.name for state in model.states) print "Backward" print model.backward(sequence) print "Forward-Backward" trans, emissions = model.forward_backward(sequence) print trans print emissions print "Viterbi" prob, states = model.viterbi(sequence) print "Prob: {}".format(prob) print "\n".join(state[1].name for state in states) print
# re-order the rows/columns to match the specified column order transitions = model.dense_transition_matrix()[:, order_index][order_index, :] print("The state transition matrix, P(Xt|Xt-1):\n") print(transitions) print("\nThe transition probability from Rainy to Sunny is {:.0f}get_ipython().run_line_magic("".format(100", " * transitions[2, 1]))") # TODO: input a sequence of 'yes'/'no' values in the list below for testing observations = ['yes', 'no', 'yes'] assert len(observations) > 0, "You need to choose a sequence of 'yes'/'no' observations to test" # TODO: use model.forward() to calculate the forward matrix of the observed sequence, # and then use np.exp() to convert from log-likelihood to likelihood forward_matrix = np.exp(model.forward(observations)) # TODO: use model.log_probability() to calculate the all-paths likelihood of the # observed sequence and then use np.exp() to convert log-likelihood to likelihood probability_percentage = np.exp(model.log_probability(observations)) # Display the forward probabilities print(" " + "".join(s.name.center(len(s.name)+6) for s in model.states)) for i in range(len(observations) + 1): print(" <start> " if i==0 else observations[i - 1].center(9), end="") print("".join("{:.0f}get_ipython().run_line_magic("".format(100", " * forward_matrix[i, j]).center(len(s.name) + 6)") for j, s in enumerate(model.states))) print("\nThe likelihood over all possible paths " + \ "of this model producing the sequence {} is {:.2f}get_ipython().run_line_magic("\n\n"", "") .format(observations, 100 * probability_percentage))
model.add_transition(state, state, 0.4) model.add_transition(state, state2, 0.4) model.add_transition(state2, state2, 0.4) model.add_transition(state2, state, 0.4) model.add_transition(model.start, state, 0.5) model.add_transition(model.start, state2, 0.5) model.add_transition(state, model.end, 0.2) model.add_transition(state2, model.end, 0.2) model.bake() sequence = model.sample() print sequence print print model.forward(sequence)[ len(sequence), model.end_index ] print model.backward(sequence)[0,model.start_index] print trans, ems = model.forward_backward(sequence) print trans print ems print model.train( [ sequence ] ) print print model.forward(sequence)[ len(sequence), model.end_index ] print model.backward(sequence)[0,model.start_index] print trans, ems = model.forward_backward(sequence) print trans print ems
# the probability of exiting the hmm model.add_transition(rainy, rainy, 0.65) model.add_transition(rainy, sunny, 0.25) model.add_transition(sunny, rainy, 0.35) model.add_transition(sunny, sunny, 0.55) # Add transitions to the end of the model model.add_transition(rainy, model.end, 0.1) model.add_transition(sunny, model.end, 0.1) # Finalize the model structure model.bake(verbose=True) # Lets sample from this model. print model.sample() # Lets call Bob every hour and see what he's doing! # (aka build up a sequence of observations) sequence = ['walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean'] # What is the probability of seeing this sequence? print "Probability of Sequence: ", \ math.e**model.forward( sequence )[ len(sequence), model.end_index ] print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \ math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ] print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \ math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ] print " ".join(state.name for i, state in model.maximum_a_posteriori(sequence)[1])
# Transition matrix, with 0.05 subtracted from each probability to add to # the probability of exiting the hmm model.add_transition( rainy, rainy, 0.65 ) model.add_transition( rainy, sunny, 0.25 ) model.add_transition( sunny, rainy, 0.35 ) model.add_transition( sunny, sunny, 0.55 ) # Add transitions to the end of the model model.add_transition( rainy, model.end, 0.1 ) model.add_transition( sunny, model.end, 0.1 ) # Finalize the model structure model.bake( verbose=True ) # Lets sample from this model. print model.sample() # Lets call Bob every hour and see what he's doing! # (aka build up a sequence of observations) sequence = [ 'walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean' ] # What is the probability of seeing this sequence? print "Probability of Sequence: ", \ math.e**model.forward( sequence )[ len(sequence), model.end_index ] print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \ math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ] print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \ math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ] print " ".join( state.name for i, state in model.maximum_a_posteriori( sequence )[1] )
model.add_transition( s2, s2, 0.4 ) model.add_transition( s2, s3, 0.2 ) model.add_transition( s3, s1, 0.05 ) model.add_transition( s3, s2, 0.15 ) model.add_transition( s3, s3, 0.8 ) model.bake() sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ] print model.is_infinite() print "Algorithms On Infinite Model" sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ] print "Forward" print model.forward( sequence ) print "\n".join( state.name for state in model.states ) print "Backward" print model.backward( sequence ) print "Forward-Backward" trans, emissions = model.forward_backward( sequence ) print trans print emissions print "Viterbi" prob, states = model.viterbi( sequence ) print "Prob: {}".format( prob ) print "\n".join( state[1].name for state in states ) print
def main(): """Create a Hidden Markov Model.""" # Name of the model model = HiddenMarkovModel(name="Names") # Load probability distributions for each token set fn_dist = utils.load_dict_from_file(config.input_dir + config.token_files['first_name']) pfn_dist = utils.load_dict_from_file(config.input_dir + config.token_files['part_first_name']) ln1_dist = utils.load_dict_from_file(config.input_dir + config.token_files['last_name1']) pln1_dist = utils.load_dict_from_file( config.input_dir + config.token_files['part_last_name1']) ln2_dist = utils.load_dict_from_file(config.input_dir + config.token_files['last_name2']) pln2_dist = utils.load_dict_from_file( config.input_dir + config.token_files['part_last_name2']) # Calculate discrete distributions fn_dist = discrete_distribution(fn_dist, pfn_dist, ln1_dist, pln1_dist, ln2_dist, pln2_dist) pfn_dist = discrete_distribution(pfn_dist, fn_dist, ln1_dist, pln1_dist, ln2_dist, pln2_dist) ln1_dist = discrete_distribution(ln1_dist, fn_dist, pfn_dist, pln1_dist, ln2_dist, pln2_dist) pln1_dist = discrete_distribution(pln1_dist, fn_dist, pfn_dist, ln1_dist, ln2_dist, pln2_dist) ln2_dist = discrete_distribution(ln2_dist, fn_dist, pfn_dist, ln1_dist, pln1_dist, pln2_dist) pln2_dist = discrete_distribution(pln2_dist, fn_dist, pfn_dist, ln1_dist, pln1_dist, ln2_dist) # States of the model fn = State(DiscreteDistribution(fn_dist), name='FirstName') pfn = State(DiscreteDistribution(pfn_dist), name='ParticleFirstName') ln1 = State(DiscreteDistribution(ln1_dist), name='LastName1') pln1 = State(DiscreteDistribution(pln1_dist), name='ParticleLastName1') ln2 = State(DiscreteDistribution(ln2_dist), name='LastName2') pln2 = State(DiscreteDistribution(pln2_dist), name='ParticleLastName2') # Transition probabilities if config.graph_type == config.graph_types[0]: # Graph for FirstName LastName1 LastName2 sequences model.add_transition(model.start, fn, 1) model.add_transition(fn, fn, 0.256251576) model.add_transition(fn, pfn, 0.028472397) model.add_transition(fn, ln1, 0.704144114) model.add_transition(fn, pln1, 0.011131913) model.add_transition(pfn, pfn, 0.150) model.add_transition(pfn, fn, 0.850) model.add_transition(ln1, ln1, 0.007015434) model.add_transition(ln1, pln1, 0.007017087) model.add_transition(ln1, ln2, 0.960638112) model.add_transition(ln1, pln2, 0.014719859) model.add_transition(ln1, model.end, 0.010609508) model.add_transition(pln1, pln1, 0.150) model.add_transition(pln1, ln1, 0.850) model.add_transition(ln2, ln2, 0.004290151) model.add_transition(ln2, pln2, 0.006801967) model.add_transition(ln2, model.end, 0.988907882) model.add_transition(pln2, pln2, 0.150) model.add_transition(pln2, ln2, 0.850) else: # Graph for LastName1 LastName2 FirstName sequences model.add_transition(model.start, ln1, 0.984436899) model.add_transition(model.start, pln1, 0.015563101) model.add_transition(ln1, ln1, 0.007015434) model.add_transition(ln1, pln1, 0.007017087) model.add_transition(ln1, ln2, 0.960638112) model.add_transition(ln1, pln2, 0.014719859) model.add_transition(ln1, fn, 0.010609508) model.add_transition(pln1, pln1, 0.150) model.add_transition(pln1, ln1, 0.850) model.add_transition(ln2, ln2, 0.004290151) model.add_transition(ln2, pln2, 0.006801967) model.add_transition(ln2, fn, 0.988907882) model.add_transition(pln2, pln2, 0.150) model.add_transition(pln2, ln2, 0.850) model.add_transition(fn, fn, 0.256251576) model.add_transition(fn, pfn, 0.028472397) model.add_transition(fn, model.end, 0.715276027) model.add_transition(pfn, pfn, 0.150) model.add_transition(pfn, fn, 0.850) # "Bake" the model, finalizing its structure model.bake(verbose=True) # Testing the model parse_errors = 0 value_errors = 0 tagged_names = utils.load_dict_from_file(config.test_set_file) for key, value in tagged_names.items(): print('Observation: ' + value['observation']) norm_observation = utils.normalize(value['observation'], config.text_case) words = re.findall(config.word_pattern, norm_observation) token_sequence = [] for word in words: token_sequence.append(utils.to_token(word, config.token_length)) test_dict = { 'FirstName': '', 'LastName1': '', 'LastName2': '', } try: j = 0 for i, state in model.maximum_a_posteriori(token_sequence)[1]: if state.name[-4:] == 'Name': test_dict['FirstName'] += words[j] + ' ' if state.name[-5:] == 'Name1': test_dict['LastName1'] += words[j] + ' ' if state.name[-5:] == 'Name2': test_dict['LastName2'] += words[j] + ' ' j += 1 # compare results with tagged names test_dict['FirstName'] = test_dict['FirstName'].rstrip() test_dict['LastName1'] = test_dict['LastName1'].rstrip() test_dict['LastName2'] = test_dict['LastName2'].rstrip() print('Parsed: ' + str(test_dict)) # Probability of this sequence print('P(sequence) = ' + str(math.e**model.forward(token_sequence)[ len(token_sequence), model.end_index])) result = '' for state in ['FirstName', 'LastName1', 'LastName2']: if test_dict[state] == value[state]: result += '' else: result += state + ' differs. ' if result == '': result = 'Correct.' else: parse_errors += 1 print('Result: ' + result) except ValueError as ve: print(ve) value_errors += 1 print('--') # Final statistics print('Summary\n=======') print('Number of observations: ' + str(len(tagged_names))) print('Parse errors:' + str(parse_errors)) print('Value errors: ' + str(value_errors)) """
model.add_transition(state, state, 0.4) model.add_transition(state, state2, 0.4) model.add_transition(state2, state2, 0.4) model.add_transition(state2, state, 0.4) model.add_transition(model.start, state, 0.5) model.add_transition(model.start, state2, 0.5) model.add_transition(state, model.end, 0.2) model.add_transition(state2, model.end, 0.2) model.bake() sequence = model.sample() print sequence print print model.forward(sequence)[len(sequence), model.end_index] print model.backward(sequence)[0, model.start_index] print trans, ems = model.forward_backward(sequence) print trans print ems print model.train([sequence]) print print model.forward(sequence)[len(sequence), model.end_index] print model.backward(sequence)[0, model.start_index] print trans, ems = model.forward_backward(sequence) print trans print ems