def test_random_mat():
    A = nlp.hmm()
    iters = 10000
    num_states = 5

    sum_mat = np.matrix(np.zeros((5, 5)))
    sum_mat2 = np.matrix(np.zeros((2, 5)))
    sum_mat3 = np.matrix(np.zeros((5)))

    for i in range(iters):
        A.randomize(['b', 'a'], 5)
        sum_mat += A.transition_matrix

        t = sum(A.transition_matrix[0], 0)
        sum_mat2[0] += A.emission_matrix['a']
        sum_mat2[1] += A.emission_matrix['b']

        sum_mat3 += A.initial_matrix

    assert (np.isclose(np.average(np.sum(sum_mat, 1) / iters), 1.0, atol=0.01))
    assert (np.isclose(np.average(np.sum(sum_mat2, 0) / iters), 1.0,
                       atol=0.01))
    assert (np.isclose(np.average(np.sum(sum_mat3, 1) / iters), 1.0,
                       atol=0.01))

    assert (A.transition_matrix.shape == (5, 5))
    assert (A.emission_matrix['a'].shape == (1, 5))
    assert (A.emission_matrix['b'].shape == (1, 5))
    assert (A.initial_matrix.shape == (1, 5))
def get_test_model0():
    A = nlp.hmm()
    A.initial_matrix = np.matrix([0.4, 0.6])
    A.transition_matrix = np.matrix([[0.1, 0.9], [0.5, 0.5]])
    A.emission_matrix = {
        'a': np.matrix([0.7, 0.4]),
        'b': np.matrix([0.3, 0.6])
    }

    return A
def test_forward_backward_alg():
    A = nlp.hmm()
    A.randomize(['a', 'b'], 2)

    s = 'ab'
    b_arr = A._backward_algorithm(list(s))
    f_arr = A._forward_algorithm(list(s))
    r0 = np.multiply(b_arr, f_arr).sum(1)
    s = 'aa'
    b_arr = A._backward_algorithm(list(s))
    f_arr = A._forward_algorithm(list(s))
    r1 = np.multiply(b_arr, f_arr).sum(1)
    s = 'bb'
    b_arr = A._backward_algorithm(list(s))
    f_arr = A._forward_algorithm(list(s))
    r2 = np.multiply(b_arr, f_arr).sum(1)
    s = 'ba'
    b_arr = A._backward_algorithm(list(s))
    f_arr = A._forward_algorithm(list(s))
    r3 = np.multiply(b_arr, f_arr).sum(1)

    r = r0 + r1 + r2 + r3
    assert (np.all(np.isclose(r, np.ones(r.shape), atol=0.001)))
import nlp
import matplotlib.pylab as plb
import numpy as np
import pickle
from functools import reduce

tagged_sents = pickle.load(open('brown_tags.dat', 'rb'))

h = nlp.hmm()

h.load_from_file('hmm2.dat')

tagged_words = reduce((lambda x, y: x + y), tagged_sents)
tags = list(set([t for w, t in tagged_words]))
tag_hit_freqs = np.zeros(len(tags), dtype=np.float32)
tag_total_freqs = np.zeros(len(tags), dtype=np.float32)

for sent in tagged_sents:
    our_tags = h.viterbi([w for w, t in sent])
    for i, wt in enumerate(sent):
        if wt[1] == our_tags[i]:
            tag_hit_freqs[tags.index(wt[1])] += 1
        tag_total_freqs[tags.index(wt[1])] += 1

output_array = np.zeros(len(tag_total_freqs) + 1)
output_array[:-1] = tag_hit_freqs / tag_total_freqs
output_array[-1] = (tag_hit_freqs / tag_total_freqs).mean()

plb.bar(np.arange(len(tags) + 1), output_array)
plb.xticks(np.arange(len(tags) + 1), tags + ['average'], rotation='vertical')