Ejemplo n.º 1
0
print "\n".join(state.name for state in model.states)
print "Backward"
print model.backward(sequence)

print "Forward-Backward"
trans, emissions = model.forward_backward(sequence)
print trans
print emissions

print "Viterbi"
prob, states = model.viterbi(sequence)
print "Prob: {}".format(prob)
print "\n".join(state[1].name for state in states)
print
print "MAP"
prob, states = model.maximum_a_posteriori(sequence)
print "Prob: {}".format(prob)
print "\n".join(state[1].name for state in states)

print "Showing that sampling can reproduce the original transition probs."
print "Should produce a matrix close to the following: "
print " [ [ 0.60, 0.10, 0.30 ] "
print "   [ 0.40, 0.40, 0.20 ] "
print "   [ 0.05, 0.15, 0.80 ] ] "
print
print "Tranition Matrix From 100000 Samples:"
sample, path = model.sample(100000, path=True)
trans = np.zeros((3, 3))

for state, n_state in it.izip(path[1:-2], path[2:-1]):
    state_name = float(state.name[1:]) - 1
Ejemplo n.º 2
0
# the probability of exiting the hmm
model.add_transition(rainy, rainy, 0.65)
model.add_transition(rainy, sunny, 0.25)
model.add_transition(sunny, rainy, 0.35)
model.add_transition(sunny, sunny, 0.55)

# Add transitions to the end of the model
model.add_transition(rainy, model.end, 0.1)
model.add_transition(sunny, model.end, 0.1)

# Finalize the model structure
model.bake(verbose=True)

# Lets sample from this model.
print model.sample()

# Lets call Bob every hour and see what he's doing!
# (aka build up a sequence of observations)
sequence = ['walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean']

# What is the probability of seeing this sequence?
print "Probability of Sequence: ", \
 math.e**model.forward( sequence )[ len(sequence), model.end_index ]
print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \
 math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ]
print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \
 math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ]

print " ".join(state.name
               for i, state in model.maximum_a_posteriori(sequence)[1])
Ejemplo n.º 3
0
print "\n".join( state.name for state in model.states )
print "Backward"
print model.backward( sequence )

print "Forward-Backward"
trans, emissions = model.forward_backward( sequence )
print trans
print emissions

print "Viterbi"
prob, states = model.viterbi( sequence )
print "Prob: {}".format( prob )
print "\n".join( state[1].name for state in states )
print
print "MAP"
prob, states = model.maximum_a_posteriori( sequence )
print "Prob: {}".format( prob )
print "\n".join( state[1].name for state in states )

print "Showing that sampling can reproduce the original transition probs."
print "Should produce a matrix close to the following: "
print " [ [ 0.60, 0.10, 0.30 ] "
print "   [ 0.40, 0.40, 0.20 ] "
print "   [ 0.05, 0.15, 0.80 ] ] "
print
print "Tranition Matrix From 100000 Samples:"
sample, path = model.sample( 100000, path=True )
trans = np.zeros((3,3))

for state, n_state in it.izip( path[1:-2], path[2:-1] ):
	state_name = float( state.name[1:] )-1
Ejemplo n.º 4
0
# Transition matrix, with 0.05 subtracted from each probability to add to
# the probability of exiting the hmm
model.add_transition( rainy, rainy, 0.65 )
model.add_transition( rainy, sunny, 0.25 )
model.add_transition( sunny, rainy, 0.35 )
model.add_transition( sunny, sunny, 0.55 )

# Add transitions to the end of the model
model.add_transition( rainy, model.end, 0.1 )
model.add_transition( sunny, model.end, 0.1 )

# Finalize the model structure
model.bake( verbose=True )

# Lets sample from this model.
print model.sample()

# Lets call Bob every hour and see what he's doing!
# (aka build up a sequence of observations)
sequence = [ 'walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean' ]

# What is the probability of seeing this sequence?
print "Probability of Sequence: ", \
	math.e**model.forward( sequence )[ len(sequence), model.end_index ]
print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \
	math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ]
print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \
	math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ]

print " ".join( state.name for i, state in model.maximum_a_posteriori( sequence )[1] )
Ejemplo n.º 5
0
def main():
    """Create a Hidden Markov Model."""
    # Name of the model
    model = HiddenMarkovModel(name="Names")

    # Load probability distributions for each token set
    fn_dist = utils.load_dict_from_file(config.input_dir +
                                        config.token_files['first_name'])
    pfn_dist = utils.load_dict_from_file(config.input_dir +
                                         config.token_files['part_first_name'])
    ln1_dist = utils.load_dict_from_file(config.input_dir +
                                         config.token_files['last_name1'])
    pln1_dist = utils.load_dict_from_file(
        config.input_dir + config.token_files['part_last_name1'])
    ln2_dist = utils.load_dict_from_file(config.input_dir +
                                         config.token_files['last_name2'])
    pln2_dist = utils.load_dict_from_file(
        config.input_dir + config.token_files['part_last_name2'])

    # Calculate discrete distributions
    fn_dist = discrete_distribution(fn_dist, pfn_dist, ln1_dist, pln1_dist,
                                    ln2_dist, pln2_dist)
    pfn_dist = discrete_distribution(pfn_dist, fn_dist, ln1_dist, pln1_dist,
                                     ln2_dist, pln2_dist)
    ln1_dist = discrete_distribution(ln1_dist, fn_dist, pfn_dist, pln1_dist,
                                     ln2_dist, pln2_dist)
    pln1_dist = discrete_distribution(pln1_dist, fn_dist, pfn_dist, ln1_dist,
                                      ln2_dist, pln2_dist)
    ln2_dist = discrete_distribution(ln2_dist, fn_dist, pfn_dist, ln1_dist,
                                     pln1_dist, pln2_dist)
    pln2_dist = discrete_distribution(pln2_dist, fn_dist, pfn_dist, ln1_dist,
                                      pln1_dist, ln2_dist)

    # States of the model
    fn = State(DiscreteDistribution(fn_dist), name='FirstName')
    pfn = State(DiscreteDistribution(pfn_dist), name='ParticleFirstName')
    ln1 = State(DiscreteDistribution(ln1_dist), name='LastName1')
    pln1 = State(DiscreteDistribution(pln1_dist), name='ParticleLastName1')
    ln2 = State(DiscreteDistribution(ln2_dist), name='LastName2')
    pln2 = State(DiscreteDistribution(pln2_dist), name='ParticleLastName2')

    # Transition probabilities
    if config.graph_type == config.graph_types[0]:
        # Graph for FirstName LastName1 LastName2 sequences
        model.add_transition(model.start, fn, 1)
        model.add_transition(fn, fn, 0.256251576)
        model.add_transition(fn, pfn, 0.028472397)
        model.add_transition(fn, ln1, 0.704144114)
        model.add_transition(fn, pln1, 0.011131913)
        model.add_transition(pfn, pfn, 0.150)
        model.add_transition(pfn, fn, 0.850)
        model.add_transition(ln1, ln1, 0.007015434)
        model.add_transition(ln1, pln1, 0.007017087)
        model.add_transition(ln1, ln2, 0.960638112)
        model.add_transition(ln1, pln2, 0.014719859)
        model.add_transition(ln1, model.end, 0.010609508)
        model.add_transition(pln1, pln1, 0.150)
        model.add_transition(pln1, ln1, 0.850)
        model.add_transition(ln2, ln2, 0.004290151)
        model.add_transition(ln2, pln2, 0.006801967)
        model.add_transition(ln2, model.end, 0.988907882)
        model.add_transition(pln2, pln2, 0.150)
        model.add_transition(pln2, ln2, 0.850)
    else:
        # Graph for LastName1 LastName2 FirstName sequences
        model.add_transition(model.start, ln1, 0.984436899)
        model.add_transition(model.start, pln1, 0.015563101)
        model.add_transition(ln1, ln1, 0.007015434)
        model.add_transition(ln1, pln1, 0.007017087)
        model.add_transition(ln1, ln2, 0.960638112)
        model.add_transition(ln1, pln2, 0.014719859)
        model.add_transition(ln1, fn, 0.010609508)
        model.add_transition(pln1, pln1, 0.150)
        model.add_transition(pln1, ln1, 0.850)
        model.add_transition(ln2, ln2, 0.004290151)
        model.add_transition(ln2, pln2, 0.006801967)
        model.add_transition(ln2, fn, 0.988907882)
        model.add_transition(pln2, pln2, 0.150)
        model.add_transition(pln2, ln2, 0.850)
        model.add_transition(fn, fn, 0.256251576)
        model.add_transition(fn, pfn, 0.028472397)
        model.add_transition(fn, model.end, 0.715276027)
        model.add_transition(pfn, pfn, 0.150)
        model.add_transition(pfn, fn, 0.850)

    # "Bake" the model, finalizing its structure
    model.bake(verbose=True)

    # Testing the model
    parse_errors = 0
    value_errors = 0
    tagged_names = utils.load_dict_from_file(config.test_set_file)
    for key, value in tagged_names.items():
        print('Observation: ' + value['observation'])
        norm_observation = utils.normalize(value['observation'],
                                           config.text_case)
        words = re.findall(config.word_pattern, norm_observation)
        token_sequence = []
        for word in words:
            token_sequence.append(utils.to_token(word, config.token_length))

        test_dict = {
            'FirstName': '',
            'LastName1': '',
            'LastName2': '',
        }
        try:
            j = 0
            for i, state in model.maximum_a_posteriori(token_sequence)[1]:
                if state.name[-4:] == 'Name':
                    test_dict['FirstName'] += words[j] + ' '
                if state.name[-5:] == 'Name1':
                    test_dict['LastName1'] += words[j] + ' '
                if state.name[-5:] == 'Name2':
                    test_dict['LastName2'] += words[j] + ' '
                j += 1

            # compare results with tagged names
            test_dict['FirstName'] = test_dict['FirstName'].rstrip()
            test_dict['LastName1'] = test_dict['LastName1'].rstrip()
            test_dict['LastName2'] = test_dict['LastName2'].rstrip()
            print('Parsed: ' + str(test_dict))

            # Probability of this sequence
            print('P(sequence) = ' + str(math.e**model.forward(token_sequence)[
                len(token_sequence), model.end_index]))

            result = ''
            for state in ['FirstName', 'LastName1', 'LastName2']:
                if test_dict[state] == value[state]:
                    result += ''
                else:
                    result += state + ' differs. '
            if result == '':
                result = 'Correct.'
            else:
                parse_errors += 1
            print('Result: ' + result)
        except ValueError as ve:
            print(ve)
            value_errors += 1

        print('--')

    # Final statistics
    print('Summary\n=======')
    print('Number of observations: ' + str(len(tagged_names)))
    print('Parse errors:' + str(parse_errors))
    print('Value errors: ' + str(value_errors))
    """