Ejemplo n.º 1
0
def build_an_hmm_example():
    # i think the characters in each DiscreteDistribution definition, means the emission matrix for each state
    # because it says the probability of seeing each character when the system is in that state
    d1 = DiscreteDistribution({'A': 0.35, 'C': 0.20, 'G': 0.05, 'T': 0.40})
    d2 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})
    d3 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10})

    s1 = State(d1, name="s1")
    s2 = State(d2, name="s2")
    s3 = State(d3, name="s3")

    model = HiddenMarkovModel('example')
    model.add_states([s1, s2, s3])
    model.add_transition(model.start, s1, 0.90)
    model.add_transition(model.start, s2, 0.10)
    model.add_transition(s1, s1, 0.80)
    model.add_transition(s1, s2, 0.20)
    model.add_transition(s2, s2, 0.90)
    model.add_transition(s2, s3, 0.10)
    model.add_transition(s3, s3, 0.70)
    model.add_transition(s3, model.end, 0.30)
    model.bake()

    for i in range(len(model.states)):
        print(model.states[i].name)
    model.plot()
    #print(model.log_probability(list('ACGACTATTCGAT')))

    #print(", ".join(state.name for i, state in model.viterbi(list('ACGACTATTCGAT'))[1]))

    print("forward:", model.forward(list('ACG')))
Ejemplo n.º 2
0
model.add_transition(s2, s1, 0.4)
model.add_transition(s2, s2, 0.4)
model.add_transition(s2, s3, 0.2)
model.add_transition(s3, s1, 0.05)
model.add_transition(s3, s2, 0.15)
model.add_transition(s3, s3, 0.8)
model.bake()

sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1]

print model.is_infinite()

print "Algorithms On Infinite Model"
sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1]
print "Forward"
print model.forward(sequence)

print "\n".join(state.name for state in model.states)
print "Backward"
print model.backward(sequence)

print "Forward-Backward"
trans, emissions = model.forward_backward(sequence)
print trans
print emissions

print "Viterbi"
prob, states = model.viterbi(sequence)
print "Prob: {}".format(prob)
print "\n".join(state[1].name for state in states)
print
# re-order the rows/columns to match the specified column order
transitions = model.dense_transition_matrix()[:, order_index][order_index, :]
print("The state transition matrix, P(Xt|Xt-1):\n")
print(transitions)
print("\nThe transition probability from Rainy to Sunny is {:.0f}get_ipython().run_line_magic("".format(100", " * transitions[2, 1]))")


# TODO: input a sequence of 'yes'/'no' values in the list below for testing
observations = ['yes', 'no', 'yes']

assert len(observations) > 0, "You need to choose a sequence of 'yes'/'no' observations to test"

# TODO: use model.forward() to calculate the forward matrix of the observed sequence,
# and then use np.exp() to convert from log-likelihood to likelihood
forward_matrix = np.exp(model.forward(observations))

# TODO: use model.log_probability() to calculate the all-paths likelihood of the
# observed sequence and then use np.exp() to convert log-likelihood to likelihood
probability_percentage = np.exp(model.log_probability(observations))

# Display the forward probabilities
print("         " + "".join(s.name.center(len(s.name)+6) for s in model.states))
for i in range(len(observations) + 1):
    print(" <start> " if i==0 else observations[i - 1].center(9), end="")
    print("".join("{:.0f}get_ipython().run_line_magic("".format(100", " * forward_matrix[i, j]).center(len(s.name) + 6)")
                  for j, s in enumerate(model.states)))

print("\nThe likelihood over all possible paths " + \
      "of this model producing the sequence {} is {:.2f}get_ipython().run_line_magic("\n\n"", "")
      .format(observations, 100 * probability_percentage))
Ejemplo n.º 4
0
model.add_transition(state, state, 0.4)
model.add_transition(state, state2, 0.4)
model.add_transition(state2, state2, 0.4)
model.add_transition(state2, state, 0.4)

model.add_transition(model.start, state, 0.5)
model.add_transition(model.start, state2, 0.5)
model.add_transition(state, model.end, 0.2)
model.add_transition(state2, model.end, 0.2)

model.bake()
sequence = model.sample()

print sequence
print
print model.forward(sequence)[ len(sequence), model.end_index ]
print model.backward(sequence)[0,model.start_index]
print
trans, ems =  model.forward_backward(sequence)
print trans
print ems
print
model.train( [ sequence ] )

print
print model.forward(sequence)[ len(sequence), model.end_index ]
print model.backward(sequence)[0,model.start_index]
print
trans, ems = model.forward_backward(sequence)
print trans
print ems
Ejemplo n.º 5
0
# the probability of exiting the hmm
model.add_transition(rainy, rainy, 0.65)
model.add_transition(rainy, sunny, 0.25)
model.add_transition(sunny, rainy, 0.35)
model.add_transition(sunny, sunny, 0.55)

# Add transitions to the end of the model
model.add_transition(rainy, model.end, 0.1)
model.add_transition(sunny, model.end, 0.1)

# Finalize the model structure
model.bake(verbose=True)

# Lets sample from this model.
print model.sample()

# Lets call Bob every hour and see what he's doing!
# (aka build up a sequence of observations)
sequence = ['walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean']

# What is the probability of seeing this sequence?
print "Probability of Sequence: ", \
 math.e**model.forward( sequence )[ len(sequence), model.end_index ]
print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \
 math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ]
print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \
 math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ]

print " ".join(state.name
               for i, state in model.maximum_a_posteriori(sequence)[1])
Ejemplo n.º 6
0
# Transition matrix, with 0.05 subtracted from each probability to add to
# the probability of exiting the hmm
model.add_transition( rainy, rainy, 0.65 )
model.add_transition( rainy, sunny, 0.25 )
model.add_transition( sunny, rainy, 0.35 )
model.add_transition( sunny, sunny, 0.55 )

# Add transitions to the end of the model
model.add_transition( rainy, model.end, 0.1 )
model.add_transition( sunny, model.end, 0.1 )

# Finalize the model structure
model.bake( verbose=True )

# Lets sample from this model.
print model.sample()

# Lets call Bob every hour and see what he's doing!
# (aka build up a sequence of observations)
sequence = [ 'walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean' ]

# What is the probability of seeing this sequence?
print "Probability of Sequence: ", \
	math.e**model.forward( sequence )[ len(sequence), model.end_index ]
print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \
	math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ]
print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \
	math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ]

print " ".join( state.name for i, state in model.maximum_a_posteriori( sequence )[1] )
Ejemplo n.º 7
0
model.add_transition( s2, s2, 0.4 )
model.add_transition( s2, s3, 0.2 )
model.add_transition( s3, s1, 0.05 )
model.add_transition( s3, s2, 0.15 )
model.add_transition( s3, s3, 0.8 )
model.bake()

sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ]


print model.is_infinite()

print "Algorithms On Infinite Model"
sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ]
print "Forward"
print model.forward( sequence )

print "\n".join( state.name for state in model.states )
print "Backward"
print model.backward( sequence )

print "Forward-Backward"
trans, emissions = model.forward_backward( sequence )
print trans
print emissions

print "Viterbi"
prob, states = model.viterbi( sequence )
print "Prob: {}".format( prob )
print "\n".join( state[1].name for state in states )
print
Ejemplo n.º 8
0
def main():
    """Create a Hidden Markov Model."""
    # Name of the model
    model = HiddenMarkovModel(name="Names")

    # Load probability distributions for each token set
    fn_dist = utils.load_dict_from_file(config.input_dir +
                                        config.token_files['first_name'])
    pfn_dist = utils.load_dict_from_file(config.input_dir +
                                         config.token_files['part_first_name'])
    ln1_dist = utils.load_dict_from_file(config.input_dir +
                                         config.token_files['last_name1'])
    pln1_dist = utils.load_dict_from_file(
        config.input_dir + config.token_files['part_last_name1'])
    ln2_dist = utils.load_dict_from_file(config.input_dir +
                                         config.token_files['last_name2'])
    pln2_dist = utils.load_dict_from_file(
        config.input_dir + config.token_files['part_last_name2'])

    # Calculate discrete distributions
    fn_dist = discrete_distribution(fn_dist, pfn_dist, ln1_dist, pln1_dist,
                                    ln2_dist, pln2_dist)
    pfn_dist = discrete_distribution(pfn_dist, fn_dist, ln1_dist, pln1_dist,
                                     ln2_dist, pln2_dist)
    ln1_dist = discrete_distribution(ln1_dist, fn_dist, pfn_dist, pln1_dist,
                                     ln2_dist, pln2_dist)
    pln1_dist = discrete_distribution(pln1_dist, fn_dist, pfn_dist, ln1_dist,
                                      ln2_dist, pln2_dist)
    ln2_dist = discrete_distribution(ln2_dist, fn_dist, pfn_dist, ln1_dist,
                                     pln1_dist, pln2_dist)
    pln2_dist = discrete_distribution(pln2_dist, fn_dist, pfn_dist, ln1_dist,
                                      pln1_dist, ln2_dist)

    # States of the model
    fn = State(DiscreteDistribution(fn_dist), name='FirstName')
    pfn = State(DiscreteDistribution(pfn_dist), name='ParticleFirstName')
    ln1 = State(DiscreteDistribution(ln1_dist), name='LastName1')
    pln1 = State(DiscreteDistribution(pln1_dist), name='ParticleLastName1')
    ln2 = State(DiscreteDistribution(ln2_dist), name='LastName2')
    pln2 = State(DiscreteDistribution(pln2_dist), name='ParticleLastName2')

    # Transition probabilities
    if config.graph_type == config.graph_types[0]:
        # Graph for FirstName LastName1 LastName2 sequences
        model.add_transition(model.start, fn, 1)
        model.add_transition(fn, fn, 0.256251576)
        model.add_transition(fn, pfn, 0.028472397)
        model.add_transition(fn, ln1, 0.704144114)
        model.add_transition(fn, pln1, 0.011131913)
        model.add_transition(pfn, pfn, 0.150)
        model.add_transition(pfn, fn, 0.850)
        model.add_transition(ln1, ln1, 0.007015434)
        model.add_transition(ln1, pln1, 0.007017087)
        model.add_transition(ln1, ln2, 0.960638112)
        model.add_transition(ln1, pln2, 0.014719859)
        model.add_transition(ln1, model.end, 0.010609508)
        model.add_transition(pln1, pln1, 0.150)
        model.add_transition(pln1, ln1, 0.850)
        model.add_transition(ln2, ln2, 0.004290151)
        model.add_transition(ln2, pln2, 0.006801967)
        model.add_transition(ln2, model.end, 0.988907882)
        model.add_transition(pln2, pln2, 0.150)
        model.add_transition(pln2, ln2, 0.850)
    else:
        # Graph for LastName1 LastName2 FirstName sequences
        model.add_transition(model.start, ln1, 0.984436899)
        model.add_transition(model.start, pln1, 0.015563101)
        model.add_transition(ln1, ln1, 0.007015434)
        model.add_transition(ln1, pln1, 0.007017087)
        model.add_transition(ln1, ln2, 0.960638112)
        model.add_transition(ln1, pln2, 0.014719859)
        model.add_transition(ln1, fn, 0.010609508)
        model.add_transition(pln1, pln1, 0.150)
        model.add_transition(pln1, ln1, 0.850)
        model.add_transition(ln2, ln2, 0.004290151)
        model.add_transition(ln2, pln2, 0.006801967)
        model.add_transition(ln2, fn, 0.988907882)
        model.add_transition(pln2, pln2, 0.150)
        model.add_transition(pln2, ln2, 0.850)
        model.add_transition(fn, fn, 0.256251576)
        model.add_transition(fn, pfn, 0.028472397)
        model.add_transition(fn, model.end, 0.715276027)
        model.add_transition(pfn, pfn, 0.150)
        model.add_transition(pfn, fn, 0.850)

    # "Bake" the model, finalizing its structure
    model.bake(verbose=True)

    # Testing the model
    parse_errors = 0
    value_errors = 0
    tagged_names = utils.load_dict_from_file(config.test_set_file)
    for key, value in tagged_names.items():
        print('Observation: ' + value['observation'])
        norm_observation = utils.normalize(value['observation'],
                                           config.text_case)
        words = re.findall(config.word_pattern, norm_observation)
        token_sequence = []
        for word in words:
            token_sequence.append(utils.to_token(word, config.token_length))

        test_dict = {
            'FirstName': '',
            'LastName1': '',
            'LastName2': '',
        }
        try:
            j = 0
            for i, state in model.maximum_a_posteriori(token_sequence)[1]:
                if state.name[-4:] == 'Name':
                    test_dict['FirstName'] += words[j] + ' '
                if state.name[-5:] == 'Name1':
                    test_dict['LastName1'] += words[j] + ' '
                if state.name[-5:] == 'Name2':
                    test_dict['LastName2'] += words[j] + ' '
                j += 1

            # compare results with tagged names
            test_dict['FirstName'] = test_dict['FirstName'].rstrip()
            test_dict['LastName1'] = test_dict['LastName1'].rstrip()
            test_dict['LastName2'] = test_dict['LastName2'].rstrip()
            print('Parsed: ' + str(test_dict))

            # Probability of this sequence
            print('P(sequence) = ' + str(math.e**model.forward(token_sequence)[
                len(token_sequence), model.end_index]))

            result = ''
            for state in ['FirstName', 'LastName1', 'LastName2']:
                if test_dict[state] == value[state]:
                    result += ''
                else:
                    result += state + ' differs. '
            if result == '':
                result = 'Correct.'
            else:
                parse_errors += 1
            print('Result: ' + result)
        except ValueError as ve:
            print(ve)
            value_errors += 1

        print('--')

    # Final statistics
    print('Summary\n=======')
    print('Number of observations: ' + str(len(tagged_names)))
    print('Parse errors:' + str(parse_errors))
    print('Value errors: ' + str(value_errors))
    """
Ejemplo n.º 9
0
model.add_transition(state, state, 0.4)
model.add_transition(state, state2, 0.4)
model.add_transition(state2, state2, 0.4)
model.add_transition(state2, state, 0.4)

model.add_transition(model.start, state, 0.5)
model.add_transition(model.start, state2, 0.5)
model.add_transition(state, model.end, 0.2)
model.add_transition(state2, model.end, 0.2)

model.bake()
sequence = model.sample()

print sequence
print
print model.forward(sequence)[len(sequence), model.end_index]
print model.backward(sequence)[0, model.start_index]
print
trans, ems = model.forward_backward(sequence)
print trans
print ems
print
model.train([sequence])

print
print model.forward(sequence)[len(sequence), model.end_index]
print model.backward(sequence)[0, model.start_index]
print
trans, ems = model.forward_backward(sequence)
print trans
print ems