Exemple #1
0
def qz6():
    # Initialize
    model = hmm.Hmm(3)
    with open('qz6.txt','r') as f:
        model.train(f)
    with open('qz6.counts.txt','w') as f:
        model.write_counts(f)
    model.read_counts_from_file("qz6.counts.txt")
    model.processing()
    # solve the problem
    str = 'the cat saw the saw'.split(" ")
    hmm.viterbi(str,model)  
Exemple #2
0
    def test_dishonest_casino(self):
        '''Dishonest Casino Example.'''
        # Create transition probability matrix
        A = np.array([[0.99, 0.01],
                      [0.01, 0.99]])
        # Create observable probability distribution matrix. Casino biased toward "6" in state "1".        
        B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
                                              [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]]))
        # Create set of all observable symbols
        V = [1, 2, 3, 4, 5, 6]
    
        # Instantiate an HMM, note Pi is uniform probability distribution by default
        m = hmm.HMM(2, A=A, B=B, V=V)
        
        Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ]
        log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1)
        assert_almost_equal(log_prob_Obs, -20.9468006, decimal=5, err_msg='Wrong observation probability')
        
        Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1)
        assert_equal(Q_star, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Wrong Viterbi path')

        Beta = hmm.backward(m, Obs, c)
        Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta)
        assert_almost_equal(Gamma,
                            [[0.63711364302936, 0.6348934929050587, 0.6271179131667495, 0.6117100305977996, 0.5845543683193845, 0.5383975935172204, 0.46091113744414974, 0.3313982095474306, 0.28864618346708165, 0.27562909135388625, 0.27498372625848855, 0.26932891011973825], [0.36288635697064003, 0.3651065070949412, 0.3728820868332506, 0.38828996940220045, 0.4154456316806155, 0.4616024064827796, 0.5390888625558502, 0.6686017904525694, 0.7113538165329184, 0.7243709086461138, 0.7250162737415115, 0.7306710898802617]],
                            decimal=5, err_msg='Wrong state probabilities')        
        assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
Exemple #3
0
def test_multi():
    """
    To test, A & B are designed in a specific way so we can be more assured 
    the result is correct.
    in this test scenario:
        - observation 0 can only be produced by state 0
        - observation 1 can be produced by state 1 and 2
        - state 0 is highly likely to stay within state 0
        - state 1 is highly likely to jump to state 2
        - state 2 cannot transition back to state 0
    with the above in mind, so given the particular observation in the test y:
        [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0]
    we can expect a state sequence like 
        [0, 0, 0, 0, 2, 2, 2, 2, 1, 0, 0, 0]
    """
    A = np.array([
        [0.5, 0.2, 0.3],
        [0.2, 0.1, 0.7],
        [0.0, 0.3, 0.7],
    ])

    B = np.array([
        [1., 0.],
        [0., 1.],
        [0., 1.]]
    )

    pi = np.array([0.3, 0.3, 0.4])
    y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0])

    x_seq_opt = viterbi(y, A, B, pi)
    np.testing.assert_array_equal(x_seq_opt, np.array([0, 0, 0, 0, 2, 2, 2, 2, 1, 0, 0, 0]))
Exemple #4
0
def guessWord(obs, guessed_set):
    obs_num = 27
    state_num = 26

    states = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n',\
    'o','p','q','r','s','t','u','v','w','x','y','z')

    remain_p = [[0 for c in range(len(obs))] for r in range(state_num)]
    for o_num in range(len(obs)):
        for s_num in range(state_num):
            if states[s_num] == obs[o_num] or states[s_num] not in guessed_set:
                remain_p[s_num][o_num] = 1

    (letter_f, first_p, bi_p, tri_p) = initGame()

    emit_p = [[0 for c in range(obs_num)] for r in range(state_num)]
    for diagnol in range(state_num):
        emit_p[diagnol][diagnol] = 1
        emit_p[diagnol][obs_num - 1] = letter_f[diagnol]

    predictions = hmm.viterbi(obs, states, remain_p, first_p, bi_p, tri_p,
                              emit_p)
    print predictions

    return optimize(predictions)
Exemple #5
0
def test(data_folder):
    all_tests = True
    for filename in sorted(list(os.listdir(data_folder))):
        with open(os.path.join(data_folder, filename), 'rb') as pickle_file:
            data = pickle.load(pickle_file)
        A = data['A']
        B = data['B']
        pi = data['pi']
        O = data['O']
        forward_answer = data['forward_answer']
        viterbi_answer = data['viterbi_answer']

        forward_valid = abs(forward(A, B, pi, O) -
                            forward_answer) <= forward_answer * 10**-4

        viterbi_valid = (viterbi_answer == viterbi(
            A, B, pi, O)).astype(int).sum() == viterbi_answer.shape[0]

        all_tests = all_tests and forward_valid and viterbi_valid

        print(filename)
        print('Forward:', forward_valid)
        print('Viterbi:', viterbi_valid)
        print()
    if all_tests:
        print('PASSED all tests')
    else:
        print('Some of the tests FAILED')
Exemple #6
0
def train_viterbi(X, A, E):
    #####################
    # START CODING HERE #
    #####################
    # Initialize your posterior matrices
    new_A = {}
    # for k in A: ...
    for k in A:
        new_A[k] = {l: 0 for l in A[k]}

    new_E = {}
    # for k in E: ...
    for k in E:
        new_E[k] = {s: 0 for s in E[k]}

    # Get the state path of every sequence in X,
    # using the viterbi() function imported from hmm.py
    print("DEBUG: ", X)

    for seq, label in X:
        # pi = state path, P = Viterbi probability, V = Viterbi trellis
        pi, P, V = viterbi(X, A, E)

        pass

        # Count the transitions and emissions for every state

    # Normalize your row sums

    #####################
    #  END CODING HERE  #
    #####################

    return new_A, new_E
Exemple #7
0
    def test_dishonest_casino_larger_transition_p(self):
        '''Dishonest Casino Example.'''
        # Create transition probability matrix
        A = np.array([[0.9, 0.1],
                      [0.1, 0.9]])
        # Create observable probability distribution matrix. Casino biased toward "6" in state "1"
        B = statutil.scale_row_sums(np.array([[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
                                              [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0 ]]))
        # Create set of all observable symbols
        V = [1, 2, 3, 4, 5, 6]
    
        # Instantiate an HMM, note Pi is uniform probability distribution by default
        m = hmm.HMM(2, A=A, B=B, V=V)
        
        Obs = [ 1, 2, 3, 4, 5, 2, 1, 6, 6, 6, 5, 6 ]
        log_prob_Obs, Alpha, c = hmm.forward(m, Obs, scaling=1)
        assert_almost_equal(log_prob_Obs, -20.124, decimal=3, err_msg='Wrong observation probability')
        
        Q_star, _, _ = hmm.viterbi(m, Obs, scaling=1)
        assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], err_msg='Wrong Viterbi path')

        Beta = hmm.backward(m, Obs, c)
        Gamma, Q_star = hmm.individually_optimal_states(Alpha, Beta)
        assert_almost_equal(Gamma,
                            [[0.8189770516168013, 0.8482906260695058, 0.8525027084764197, 0.8329611652077556, 0.7834127024175411, 0.6880018120129073, 0.5161970090643716, 0.2130207566284025, 0.12024202874950358, 0.10797060639721641, 0.15902649827833876, 0.14930464162738483], [0.18102294838319855, 0.15170937393049422, 0.14749729152358024, 0.16703883479224435, 0.21658729758245884, 0.31199818798709256, 0.4838029909356284, 0.7869792433715975, 0.8797579712504964, 0.8920293936027837, 0.8409735017216613, 0.8506953583726152]],
                            decimal=5, err_msg='Wrong state probabilities')        
        assert_equal(Q_star, [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 'Wrong individually-optimal states')
Exemple #8
0
def get_estimates(state_space, gps_measurements_list, signal_measurements_list,
                  emission_variance, transition_decay, maximum_route_length,
                  base_locations, base_max_range):
    estimated_states_list = list()
    naive_estimates_list = list()

    i = 0
    for gps_measurements, signal_measurements in zip(gps_measurements_list,
                                                     signal_measurements_list):
        print("Route #{}".format(i + 1))

        print("Transition probabilities..")

        tp = transition_probabilties_by_weighting_route_length(state_space,\
                                                           transition_decay, maximum_route_length)

        print("Emission probabilities..")
        ep = emission_probabilities(gps_measurements, emission_variance, signal_measurements,\
                                base_locations, np.array([base_max_range]*base_locations.shape[0]), state_space)

        print("Viterbi..")
        pi = np.ones((len(state_space), )) / len(state_space)
        estimated_states = viterbi(tp, ep, pi)
        estimated_states_list.append(estimated_states)

        naive_estimate = spatially_closest_states(gps_measurements,
                                                  state_space)
        naive_estimates_list.append(naive_estimate)

        i += 1

    return estimated_states_list, naive_estimates_list
Exemple #9
0
def test_viterbi():
    emissions, model = fetch_hmm(Path('test/testcase46.txt'))
    path = hmm.viterbi(emissions=emissions, model=model)
    assert path == 'FFFFF'

    emissions, model = fetch_hmm(Path('test/testcase01.txt'))
    path = hmm.viterbi(emissions=emissions, model=model)
    assert path == 'AAABBAAAAA'

    emissions, model = fetch_hmm(Path('test/testcase02.txt'))
    path = hmm.viterbi(emissions=emissions, model=model)
    assert path == 'AAAAAAAAAAAAAABBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBAAA'

    emissions, model = fetch_hmm(Path('test/testcase03.txt'))
    path = hmm.viterbi(emissions=emissions, model=model)
    assert path == 'ABACCBABBABABACCABCCBABAABBBAABABCCBABBABABACCCCCCCCCCBBBBBABACCBABBACCCCCCCCCCCCCCCCBABABACBABAACCC'

    emissions, model = fetch_hmm(Path('test/testcase04.txt'))
    path = hmm.viterbi(emissions=emissions, model=model)
    assert path == 'CCCCCAAAAAAAAABABCAAAAAAABCCCAABAAAAAAAAAAABABAAABAAAAAAAAAAAAABABAAABAAAABAAABCABAAAABCAAABAAABCCCC'

    emissions, model = fetch_hmm(Path('test/testcase21.txt'))
    path = hmm.viterbi(emissions=emissions, model=model)
    assert path == 'AAABBAAAAA'

    emissions, model = fetch_hmm(Path('test/testcase23.txt'))
    path = hmm.viterbi(emissions=emissions, model=model)
    assert path == 'CCCDABBBBBBBBBBBBBBBBBBBBBBCDACDACCCDABBBBBDACDACDABBBBBBBBBBBBBBBBBBBBBBBBBBBBBDADACCDADACCDADADADA'
Exemple #10
0
def smarthouse(
    dataset=["A", "B"],
    train_days=5,
    train_offset=0,
    test_days=None,
    test_offset=0,
    use_day_period=False,
    n_samples=None,
):
    if not (type(dataset) == tuple or type(dataset) == list):
        dataset = [dataset]

    truths = []
    predicts = []
    accs = []
    for f in dataset:
        df = load_dataset(f, use_day_period=use_day_period)

        train_s, train_o, test_s, test_o = trainset_testset(
            df,
            train_days=train_days,
            train_offset=train_offset,
            test_days=test_days,
            test_offset=test_offset,
        )

        # Calcolo delle distribuzioni della HMM
        n = max(df['activity'] + 1)
        m = max(df['sensors'] + 1)
        P, T, O = hmm(train_s, train_o, n=n, m=m)

        if n_samples:
            test_s, test_o = random_sample(P, T, O, n_samples)

        # Esegue l'algoritmo di Viterbi sul testset e calcola
        # calcola la percentuale di stati predetti correttamente
        predicted, p = viterbi(P, T, O, test_o)
        accuracy = reduce(
            lambda i, j: i + (1 if j[0] == j[1] else 0),
            zip(test_s, predicted),
            0,
        ) / len(predicted)

        accs.append(accuracy)
        truths.append(test_s)
        predicts.append(predicted)

    if len(accs) == 1:
        return truths[0], predicts[0], accs[0]

    return truths, predicts, accs
Exemple #11
0
def show_viterbi(grids):
    grid = np.random.choice(grids)
    H, W = grid.shape
    T = np.random.randint(3, 6)
    observations, states = grid.get_sequence(T)
    decoded, _ = viterbi(observations, grid.get_hmm())
    decoded = [(s // H, s % W) for s in decoded]

    print(colored("Viterbi algorithm", "cyan"))
    print("Agent wandered on map \033[1m" + grid.name + "\033[0m")
    print("... going thorugh states", states)
    print("... observing", ", ".join([Grid.COLORS[o] for o in observations]))
    print("\nThe decoded sequence of states is", decoded)

    fig, axs = plt.subplots(1, 2, figsize=(10, 4), sharey="row")
    cm = LinearSegmentedColormap.from_list("cm", Grid.COLORS)
    sns.heatmap(
        grid.color,
        annot=grid.elevation,
        cmap=cm,
        square=True,
        cbar=False,
        annot_kws={"size": 20},
        ax=axs[0],
    )
    sns.heatmap(
        grid.color,
        annot=grid.elevation,
        cmap=cm,
        square=True,
        cbar=False,
        annot_kws={"size": 20},
        ax=axs[1],
    )
    axs[0].set_title(grid.name + " - original path")
    axs[1].set_title(grid.name + " - decoded path")

    for t in range(T - 1):
        (y0, x0), (y1, x1) = states[t], states[t + 1]
        y0, x0, y1, x1 = y0 + 0.5, x0 + 0.5, y1 + 0.5, x1 + 0.5
        axs[0].annotate("",
                        xy=(x1, y1),
                        xytext=(x0, y0),
                        arrowprops=dict(color="y", width=5.0))
        (y0, x0), (y1, x1) = decoded[t], decoded[t + 1]
        y0, x0, y1, x1 = y0 + 0.5, x0 + 0.5, y1 + 0.5, x1 + 0.5
        axs[1].annotate("",
                        xy=(x1, y1),
                        xytext=(x0, y0),
                        arrowprops=dict(color="y", width=5.0))
    def test_viterbi(self):
        t=np.identity(5)

        e= np.array([[0.600, 0.175, 0.175, 0.050],
                 [0.050, 0.600, 0.175, 0.175],
                 [0.050, 0.175, 0.600, 0.175], 
                 [0.050, 0.175, 0.175, 0.600], 
                 [0.600, 0.050, 0.175, 0.175]])


        obs = np.array([0,0,1,0,0,3])
        result = np.array([0,0,0,0,0,0])

        np.testing.assert_array_equal(result, viterbi(pi, t, e, obs)[0])
Exemple #13
0
def test_viterbi(grid, observations, test_states, test_values):
    print("Testing viterbi...")

    H, W = grid.shape
    states, delta = viterbi(observations, grid.get_hmm())
    states = [(s // H, s % W) for s in states]

    print("States:", states)
    print("TStates:", test_states)

    assert len(states) == len(test_states)
    assert all([s_i == s_j for (s_i, s_j) in zip(states, test_states)])
    assert np.allclose(delta, test_values)

    print(colored(">>> Viterbi looks right!", "green"))
    print("\n")
Exemple #14
0
def test_decoded_by_sequence_length(grids, runs_no=1000):
    print("Evaluate how good the decoded paths are...")

    for T in range(1, 11):
        correct = 0

        for run_id in range(runs_no):
            grid = np.random.choice(grids)
            H, W = grid.shape
            observations, states = grid.get_sequence(T)
            decoded, _ = viterbi(observations, grid.get_hmm())
            decoded = [(s // H, s % W) for s in decoded]
            correct += sum([a == b for a, b in zip(states, decoded)])
        perc = float(correct * 100) / (runs_no * T)

        print("%5d / %5d (%5.2f%%) for T =%2d" % (correct, runs_no * T, perc, T))
    print("\n")
    def test_viterbi2(self):
        t2 = np.array([[0.250, 0.500, 0.025, 0.200, 0.025],
                       [0.250, 0.150, 0.075, 0.500, 0.025], 
                       [0.050, 0.025, 0.050, 0.850, 0.025], 
                       [0.025, 0.075, 0.150, 0.125, 0.625], 
                       [0.050, 0.075, 0.475, 0.025, 0.375]])



        e2=np.array([[0.25, 0.25, 0.25, 0], 
                     [0.25, 0.25, 0.25, 0], 
                     [0.25, 0.25, 0.25, 0], 
                     [0.25, 0.25, 0.25, 0], 
                     [0, 0, 0, 1]])

        obs2 = np.array([3,3,3,3,3,3])

        result2 = np.array([4,4,4,4,4,4])
        np.testing.assert_array_equal(result2, viterbi(pi, t2, e2, obs2)[0])
Exemple #16
0
def validation(hmm, filename):
    p = 1. / 6.  # backround, prefix states and first target
    address_initial_dist = np.array(
        [[p, p, p, p, p, p, 0, 0, 0, 0, 0, 0, 0, 0]])

    target_states = [
        v for k, v in address_states.items() if k.startswith('target')
    ]
    for emissions, orig, pos, adr in text_emissions(filename,
                                                    address_emissions):
        states = viterbi(hmm, address_initial_dist, emissions)
        #print states

        address, addresses = list(), list()
        for i in range(len(states)):
            if states[i] in target_states:
                address.append(orig[i])
            else:
                if len(address) > 1:
                    addresses.append(' '.join([a for a in address if a]))
                    address = list()

        yield ' '.join(adr), addresses
Exemple #17
0
p, alpha = forward(obs, hmm)
q, beta = backward(obs, hmm)

print("p = %f, q = %f" % (p, q))

print("alpha")
for l in alpha:
    print("%f %f" % (l[0], l[1]))
print()

print("beta")
for l in beta:
    print("%f %f" % (l[0], l[1]))
print()

states, delta = viterbi(obs, hmm)

print("states:", states)

print("delta")
for l in delta:
    print("%f %f" % (l[0], l[1]))
print()

print("most prob = ", np.max(delta[-1, :]))

gamma = alpha * beta / p
print("gamma")
for l in gamma:
    print("%f %f" % (l[0], l[1]))
print()
Exemple #18
0
'''
Created on Nov 11, 2014

@author: oropivan
'''
import numpy

from hmm import HMM, viterbi

stateNum = 2
transition_probabilities = numpy.array( [   [.5,.5],
                                            [.4,.6]  ] )
emission_probabilities = numpy.array(   [   [0.2, 0.3, 0.3, 0.2], \
                                            [0, 0.5, 0.2, 0.3] ] )
#symbols
symbolList = ["A","C", "G", "T"]
Pi = [0.5,0.5]

model = HMM(stateNum, A=transition_probabilities, B=emission_probabilities, V=symbolList, Pi=Pi)
print viterbi(model, [ "G", "G", "C", "A", "C", "T", "G", "A", "A"])
Exemple #19
0
import numpy as np
from hmm import forward, viterbi

A = np.asarray([[0.3, 0.7], [0.8, 0.2]])

B = np.asarray([[0.5, 0.3, 0.2], [0.1, 0.1, 0.8]])

pi = np.asarray([0.6, 0.4])

O = np.asarray([1, 2, 1, 0])

alpha_gt = np.asarray([[0.18, 0.0172, 0.027276, 0.0054306],
                       [0.04, 0.1072, 0.003348, 0.00197628]])
forward_result_gt = 0.00740688

delta_gt = np.asarray([[0.18, 0.0108, 0.024192, 0.0036288],
                       [0.04, 0.1008, 0.002016, 0.00169344]])
viterbi_result_gt = np.asarray([0, 1, 0, 0])

forward_result, alpha = forward(A, B, pi, O)
print('Forward result test: {}'.format(
    abs(forward_result_gt - forward_result) < 10**-5))
print('Forward alpha test: {}'.format(
    np.all(np.abs(alpha - alpha_gt) < 10**-5)))

viterbi_result, delta = viterbi(A, B, pi, O)
print('Viterbi result test: {}'.format(
    (viterbi_result_gt == viterbi_result).all()))
print('Viterbi delta test: {}'.format(
    np.all(np.abs(delta - delta_gt) < 10**-5)))
Exemple #20
0
            ass_plots.append(('K-means', results[alg]['seq']))

        elif alg == algos.em:
            ass_plots.append(('EM', results[alg]['seq']))

        elif alg == algos.hmm:
            t = time.time()
            tau, A, obs_distr, pi, ll_train, _ = hmm.em_hmm(
                X, init_pi, init_obs_distr, n_iter=options.n_iter)
            print 'HMM EM: {}s, final loglikelihood: {}'.format(
                time.time() - t, ll_train[-1])

            seq_smoothing = np.argmax(tau, axis=1)
            ass_plots.append(('HMM smoothing', seq_smoothing))

            seq_viterbi, _ = hmm.viterbi(X, pi, A, obs_distr)
            ass_plots.append(('HMM viterbi', seq_viterbi))
            results[alg] = {
                'tau': tau,
                'A': A,
                'obs_distr': obs_distr,
                'pi': pi,
                'll_train': ll_train,
                'seq_smoothing': seq_smoothing,
                'seq_viterbi': seq_viterbi,
            }
            seqs[alg] = (seq_smoothing, seq_viterbi)

        elif alg == algos.map_hmm:
            t = time.time()
            seq, obs_distr, energies = hmm.map_em_hmm(X, init_obs_distr)
# plot( pro0a[:,0], 'b.', pro0b[:,0], 'r.', )

# now use KCPA
(P,alpha,evals) = dr.kpca(x, 2, kernel.rbf1)
#evals

Pa = P[0:a.shape[0],:]
Pb = P[a.shape[0]:,:]
plot(Pa[:,0], Pa[:,1], 'b.', Pb[:,0], Pb[:,1], 'r.')

plot(alpha[:, 0],'r.')
####################
# HMM
####################
(a,b,pi) = datasets.getHMMData()
hmm.viterbi(array([0,1,1,2]), a, b, pi)
#array([0, 0, 0, 1])

hmm.viterbi(array([0,2,1,2]), a, b, pi)
#array([0, 1, 1, 1])

###WU 8
# example 1
hmm.viterbi(array([0,1,1,1]), a, b, pi) # 0 0 0 0
hmm.viterbi(array([0,1,2,1]), a, b, pi) # 0 0 1 1


al = hmm.forward(array([0,1,1,2]), a, b, pi)
be = hmm.backward(array([0,1,1,2]), a, b, pi)
hmm.sanityCheck(al,be)
# plot( pro0a[:,0], 'b.', pro0b[:,0], 'r.', )

# now use KCPA
(P, alpha, evals) = dr.kpca(x, 2, kernel.rbf1)
#evals

Pa = P[0:a.shape[0], :]
Pb = P[a.shape[0]:, :]
plot(Pa[:, 0], Pa[:, 1], 'b.', Pb[:, 0], Pb[:, 1], 'r.')

plot(alpha[:, 0], 'r.')
####################
# HMM
####################
(a, b, pi) = datasets.getHMMData()
hmm.viterbi(array([0, 1, 1, 2]), a, b, pi)
#array([0, 0, 0, 1])

hmm.viterbi(array([0, 2, 1, 2]), a, b, pi)
#array([0, 1, 1, 1])

###WU 8
# example 1
hmm.viterbi(array([0, 1, 1, 1]), a, b, pi)  # 0 0 0 0
hmm.viterbi(array([0, 1, 2, 1]), a, b, pi)  # 0 0 1 1

al = hmm.forward(array([0, 1, 1, 2]), a, b, pi)
be = hmm.backward(array([0, 1, 1, 2]), a, b, pi)
hmm.sanityCheck(al, be)

##########
Exemple #23
0
O = np.asarray([1, 2, 1, 0])

alpha_gt = np.asarray([[0.18, 0.0172, 0.027276, 0.0054306],
                       [0.04, 0.1072, 0.003348, 0.00197628]])
forward_result_gt = 0.00740688

delta_gt = np.asarray([[0.18, 0.0108, 0.024192, 0.0036288],
                       [0.04, 0.1008, 0.002016, 0.00169344]])
viterbi_result_gt = np.asarray([0, 1, 0, 0])

forward_result, alpha = forward(A, B, pi, O)
print('Forward result test: {}'.format(
    abs(forward_result_gt - forward_result) < 10**-5))
print('Forward alpha test: {}'.format(
    np.all(np.abs(alpha - alpha_gt) < 10**-5)))

viterbi_result, delta = viterbi(A, B, pi, O)
print('Viterbi result test: {}'.format(
    (viterbi_result_gt == viterbi_result).all()))
print('Viterbi delta test: {}'.format(
    np.all(np.abs(delta - delta_gt) < 10**-5)))

test_A = np.array([[0.5, 0.5], [0.4, 0.6]])
test_B = np.array([[0.2, 0.3, 0.3, 0.2], [0.3, 0.2, 0.2, 0.3]])
test_pi = np.array([0.5, 0.5])
test_O = np.array([2, 2, 1, 0, 1, 3, 2, 0, 0])

test_viterbi, test_delta = viterbi(test_A, test_B, test_pi, test_O)
print(test_viterbi, test_delta)
Exemple #24
0
                4.90660589e-23, 2.36899500e-24, 8.74828204e-26, 5.76689190e-27,
                2.65176771e-28, 1.02821376e-29, 6.45525118e-31, 2.19395593e-32,
                1.27209762e-33, 7.58706457e-35, 3.14381566e-36, 1.59146266e-37,
                7.28862223e-39, 2.98685713e-40, 1.94326330e-41, 7.07671720e-43,
                4.29072538e-44, 1.72310083e-45, 9.47392163e-47, 4.80288352e-48,
                1.68916233e-49, 1.28052559e-50, 3.77500977e-52, 3.11720906e-53,
                1.24641239e-54, 6.88279760e-56, 2.57904162e-57, 1.67549319e-58,
                8.05105187e-60, 2.98733733e-61, 1.58657249e-62, 8.63982046e-64,
                4.12736566e-65, 1.54044543e-66, 1.10042172e-67, 3.24406071e-69,
                2.16853506e-70, 1.18089490e-71
            ]
        ])
    print("********************************************")

    print("TESTING VITERBI ALGORITHM: 1")
    test_viterbi, test_delta = viterbi(test_a1, test_b1, test_pi1, test_o1)
    #print("test delta")
    #print("delta1 = " + str(list(test_delta)))
    #print("test_viterbi")
    #print("viterbi1 = " + str(list(np.uint8(test_viterbi))))

    print("********************************************")
    print('Viterbi1 result test: {}'.format((test_viterbi == viterbi1).all()))
    print('Viterbi1 delta test: {}'.format(
        np.all(np.abs(test_delta - delta1) < 10**-5)))
    print("********************************************")

    print("TESTING VITERBI ALGORITHM: 2")
    test_viterbi, test_delta = viterbi(test_a2, test_b2, test_pi2, test_o2)
    #print("test delta")
    #print("delta2 = " + str(list(test_delta)))
Exemple #25
0
'''
Created on Nov 11, 2014

@author: oropivan
'''
import numpy

from hmm import HMM, viterbi

stateNum = 2
transition_probabilities = numpy.array([[.5, .5], [.4, .6]])
emission_probabilities = numpy.array(   [   [0.2, 0.3, 0.3, 0.2], \
                                            [0, 0.5, 0.2, 0.3] ] )
#symbols
symbolList = ["A", "C", "G", "T"]
Pi = [0.5, 0.5]

model = HMM(stateNum,
            A=transition_probabilities,
            B=emission_probabilities,
            V=symbolList,
            Pi=Pi)
print viterbi(model, ["G", "G", "C", "A", "C", "T", "G", "A", "A"])
Exemple #26
0
def coalhmm(args):
    """
  Trains and tests a Coal-HMM
  @param args   (argparse.Namespace)   Arguments provided by user: filename,
                                       sample, rounds
  """
    # from table 2 in Hobolth et al.
    # mean_fragment_length_HC1 = 1684
    # mean_fragment_length_others = 65
    # probability_leaving_HC1 = 3 * s = 1 / 1684
    s = 1.0 / (1684 * 3)
    # probability_leaving_others = 1 / 65 = u + 2 * v
    # u + 2 * v = 1 / 65
    stationary = (0.49, 0.17, 0.17, 0.17)
    # stationary = np.array([psi, (1 - psi) / 3, (1 - psi) / 3, (1 - psi) / 3])
    psi = 0.49
    # psi = 1 / (1 + 3 * s / u)
    # 1 + 3 * s / u = 1 / psi
    # u + 3 * s = u / psi
    # 3 * s = (1 / psi - 1) * u
    u = 3 * s / (1 / psi - 1)
    v = (1 / 65.0 - u) / 2

    # Transition probability: HC1, HC2, HG, CG
    transition = np.array([[1 - 3 * s, s, s, s], [u, 1 - (u + 2 * v), v, v],
                           [u, v, 1 - (u + 2 * v), v],
                           [u, v, v, 1 - (u + 2 * v)]])

    print "Reading alignments"
    original_alignments = [np.array(a) for a in utils.read_maf(args.filename)]
    print "done"
    for j in range(args.rounds):
        print "ROUND {}".format(j)

        print "sampling"
        if args.sample is not None:
            alignments = [
                original_alignments[i] for i in np.random.choice(
                    np.arange(len(original_alignments)), args.sample, False)
            ]
        else:
            alignments = original_alignments
        print "Number of alignments: {}".format(len(alignments))
        print "done"

        print "felsenstein"
        groupings = {}
        emission = np.zeros((4, 5**4))
        for alignment in alignments:
            _, len_alignment = alignment.shape
            for i in range(len_alignment):
                column = "".join(alignment[:, i])
                if column not in groupings:
                    groupings[column] = len(groupings)

                    trees = utils.generate_trees(alignment[:, i])

                    # Felsenstein to get emission
                    for i, t in enumerate(trees):
                        emission[i, groupings[column]] = math.exp(
                            felsenstein.felsensteins(t))
        print "done"

        print "BW"
        initial = np.array([0.25, 0.25, 0.25, 0.25])
        # Baum welsh to update matrices
        emission, transition = hmm.baum_welch(initial, emission, transition,
                                              alignments, groupings)
        print "done"

        print "viterbi"
        # use viterbi to see which state we are in the longest
        hidden_states = []
        for alignment in alignments:
            hidden_states.append(
                hmm.viterbi(initial, emission, transition, alignment,
                            groupings))
        print "done"

        # calculate time spent in a state
        counts = Counter([s for states in hidden_states for s in states])
        print "Number of bases in each state: ", counts
Exemple #27
0
def main():
    aa = {}
    bb = {}
    vocabulary = set([])

    file_list = os.listdir(fileparser.resource_path)
    # training
    print('Training...')
    for file in file_list:
        if file.startswith(fileparser.training_prefix):
            training_file = open(fileparser.resource_path + file, 'r')
            sentence_list = fileparser.parse(training_file)
            train(aa, bb, sentence_list, vocabulary)
    print('DONE')
    
    # transform into a and b
    t_start = time.time()
    a = {}
    b = {}
    user_states = list(aa.iterkeys())
    states = list(aa.iterkeys()) + [hmm.START, hmm.END]
    for state in aa.iterkeys():
        sum_counts = sum([aa[state][next_state] for next_state in aa[state].iterkeys()])
        for next_state in states:
            if aa[state].has_key(next_state):
                a[(state, next_state)] = LogProbability(aa[state][next_state]) / sum_counts            
            else:
                a[(state, next_state)] = LogProbability(0.0)
    # Extract vocabulary
    vocab = {}
    for state in bb.iterkeys():
        for output in bb[state].iterkeys():
            vocab[output] = vocab.get(output, 0) + 1
    
    # Create matrix B and apply smoothing
    for state in bb.iterkeys():
        sum_emmited = (sum([bb[state][output] for output in bb[state].iterkeys()]) if bb.has_key(state) else 0)
        b[state] = {}
        for output in vocab.iterkeys():
            b[state][output] = LogProbability(bb.get(state, {}).get(output, 0.0) + 1.0) / (sum_emmited + len(vocab))

    # Calculate average of singleton words
    unknown_b = {}
    singletons = [word for word, count in vocab.iteritems() if count == 1]
    for s in bb.iterkeys():
        sm = LogProbability(0.0)
        for singleton in singletons:
            sm += b[s].get(singleton, LogProbability(0.0))
        b[s][hmm.UNKNOWN] = sm / len(singletons)
    
    print hmm.states(a, b)
    
    def unknown_b_mapper(s, word):
        print 'Could not find word %s in state %s' % (word, s)
        print 'State has: %s' % (list(b[s].iterkeys()))
        assert word not in vocab
        print '**UNKNOWN** %s' % word
        return unknown_b[s]
    
    # computing likelihood
    print('computing likelihood...')
    forward_file = open('forward.txt', 'w')
    for file in file_list:
        if file.startswith(fileparser.test_prefix):
            training_file = open(fileparser.resource_path + file, 'r')
            sentence_list = fileparser.parse(training_file)
            for sentence in sentence_list:
                words = [word for word, tag in sentence]
                words = [(word if word in vocab else hmm.UNKNOWN) for word in words]
                
                forward_table = {}
                backward_table = {}
                forward_p = hmm.forward_algorithm(words, a, b, forward=forward_table)
                backward_p = hmm.backward_algorithm(words, a, b, backward=backward_table)
                forward_file.write('%s\n %s\n %s\n\n' % (words, forward_p.logv, backward_p.logv))
    forward_file.close()
    print('likelihood computed.')
    print 'Took %ds' % (time.time() - t_start)
    
    # computing most likely tag sequencesanc accuracy
    print('computing most likely tag sequence and tagger accuracy...')
    match_count = 0.0
    total_count = 0.0
    for file in file_list:
        if file.startswith(fileparser.test_prefix):
            training_file = open(fileparser.resource_path + file, 'r')
            sentence_list = fileparser.parse(training_file)
            for sentence in sentence_list:
                words = [word for word, tag in sentence]
                words = [(word if word in vocab else hmm.UNKNOWN) for word in words]            
                
                tagger_sequence = hmm.viterbi(words, a, b)
                human_sequence = [tag for word, tag in sentence]
                
                #print tagger_sequence
                #print human_sequence
                #print '----'
                
                # update tagger accuracy information
                for i in range(min(len(human_sequence), len(tagger_sequence))): # because of underflow it is possible that the tag sequences are not equal in length...s
                    if tagger_sequence[i] == human_sequence[i]:
                        match_count = match_count + 1.0
                total_count = total_count + max(len(human_sequence), len(tagger_sequence))
                #print('%s\n%s\nProbability: %f\n' % (human_sequence, tagger_sequence, p))
    print('most likely tag sequence computed.')
    print('accuracy of tagger is: %f' % (match_count / total_count, ))
import numpy as np
from gaussian import Gaussian
import hmm

signal = np.array([[1., 1.1, 0.8, 0.2, 1.6, 1.7, 3.4, 1.4, 1.1]])
trans = np.array([[0., 1. / 3, 1. / 3, 1. / 3, 0.], [0., 0.45, 0.45, 0., 0.1],
                  [0., 0.45, 0.45, 0., 0.1], [0., 0., 0., 1., 0.],
                  [0., 0., 0., 0., 0.]])
dists = [
    Gaussian(mean=np.array([1]), cov=np.array([[1]])),
    Gaussian(mean=np.array([2]), cov=np.array([[1]])),
    Gaussian(mean=np.array([1.5]), cov=np.array([[1]]))
]
vals, nll = hmm.viterbi(signal, trans, dists)
print 'State sequence: ', vals
#State sequence:  [1 1 1 1 2 2 2 1 1]
print 'Negative log-likelihood:', nll
#Negative log-likelihood: 19.5947057502
import numpy as np
from gaussian import Gaussian
import hmm

signal =  np.array([[ 1. ,  1.1,  0.8,  0.2,  1.6,  1.7,  3.4,  1.4,  1.1]])
trans = np.array([[ 0.  ,  1./3 ,  1./3 ,  1./3, 0.  ],
[ 0.  ,  0.45,  0.45,  0.,  0.1 ],
[ 0.  ,  0.45,  0.45,  0.,  0.1 ],
[ 0.  ,  0.  ,  0.  ,  1.,  0.  ],
[ 0.  ,  0.  ,  0.  ,  0.,  0.  ]])
dists = [Gaussian(mean=np.array([1]),cov=np.array([[1]])), 
Gaussian(mean=np.array([2]),cov=np.array([[1]])), 
Gaussian(mean=np.array([1.5]),cov=np.array([[1]]))]
vals, nll = hmm.viterbi(signal, trans, dists)
print 'State sequence: ', vals
#State sequence:  [1 1 1 1 2 2 2 1 1]    
print 'Negative log-likelihood:', nll
#Negative log-likelihood: 19.5947057502    

Exemple #30
0
transition_decay = 1/500
maximum_route_length = speed_limit/polling_frequency*2
no_of_bases = 50
base_max_range = 50
route_length = 200

print("Simulating route..")

base_locations = generate_base_locations(bbox, no_of_bases)

simulated_route = simulate_route(highway_dict, starting_node, starting_highway, intersections, route_length)
gps_measurements, signal_measurements, measurement_states = simulate_observations(simulated_route, node_dict, gps_variance, polling_frequency,\
 [speed_limit]*len(simulated_route), base_locations, np.array([base_max_range]*no_of_bases), state_space)


print("Calculating transition probabilities..")
tp = transition_probabilties_by_weighting_route_length(state_space, transition_decay, maximum_route_length)

print("Calculating emission probabilities..")
ep = emission_probabilities(gps_measurements, measurement_variance, signal_measurements, base_locations, np.array([500]*no_of_bases), state_space)

N = len(state_space)

print("Running Viterbi..")
estimated_states = viterbi(tp, ep, np.array([1/N]*N))


naive_estimate = spatially_closest_states(gps_measurements, state_space)

print("Accuracy with naive method: {}".format(np.mean(measurement_states == naive_estimate)))
print("Accuracy with hidden markov model: {}".format(np.mean(estimated_states == measurement_states)))
                Pi=initialProbabilities)

    # testing data
    iterSentencesCorrect = 0
    iterSentences = 0
    iterTagsCorrect = 0
    iterTags = 0
    for sentence in testSet:
        wordSeq = ['<S>']
        POSSeq = ['<S>']
        for word, POS in sentence:
            wordSeq.append(word)
            POSSeq.append(POS)
        wordSeq.append('<\S>')
        POSSeq.append('<\S>')
        resultPOS = viterbi(model, wordSeq, scaling=False)
        returnedSeq = [map_index_POS[x] for x in resultPOS[0]]

        if returnedSeq == POSSeq:
            numberSentencesCorrect += 1
            iterSentencesCorrect += 1
        numberSentences += 1
        iterSentences += 1

        for x, y in zip(POSSeq, returnedSeq):
            if x == y:
                numberTagsCorrect += 1
                iterTagsCorrect += 1
            numberTags += 1
            iterTags += 1
    print ','.join([
Exemple #32
0
    def predict_viterbi(self,
                        test_data,
                        verbose=1,
                        output_filename="./utter_level_result.txt"):
        """
        Viterbi decoding using output probabilites from the base model,
        marginal probabilities, and transition probabilities.

        Parameters
        ----------
        test_data : MHDTestData
            An object of MHDTestData for test data.
        verbose : int
            The level of verbosity in range [0,3]
        output_filename : str
            Path to the utterance-level result file.
        """
        if self.log_transitions is None:
            print("ERROR: Train or load the model first")
            return

        self.te_data = test_data
        self.n_labels = self.te_data.n_labels

        self.model_info = "_".join(["HMM", self.base_model.model_info])

        te_data_nested = self.te_data.get_utter_level_data_from_sids(
            sorted(self.te_data.sstt2uid.keys()))
        ulists, docs, labs = te_data_nested

        vit_res = []
        for sidx in range(len(ulists)):
            output_prob_s = self.base_model.result.output_prob[sidx]
            log_emissions = convert_class_prob_to_log_emission_prob(
                output_prob_s, self.marginals)

            vit_res.append(
                viterbi(log_emissions, self.log_transitions,
                        self.log_start_prob, self.log_end_prob))

        yhats = [s[1] for s in vit_res]
        output_scores = [s[0] for s in vit_res]
        self.result = DialogResult(self.n_labels, yhats, None, self.marginals,
                                   self.model_info, output_scores)
        if self.te_data.has_label:
            if verbose > 0:
                print("Calculate score")
            self.result.get_scores(labs)
            if verbose > 0:
                print("Printing utterance-level results to file " +
                      output_filename)
            self.result.print_utter_level_results(ulists,
                                                  docs,
                                                  labs,
                                                  self.te_data.lid2name,
                                                  filename=output_filename)
        else:
            if verbose > 0:
                print("Printing utterance-level results to file " +
                      output_filename)
            self.result.print_utter_level_results_without_true_lab(
                ulists, docs, self.te_data.lid2name, filename=output_filename)
        return self.result
    alpha_scaled2, scale_alpha2 = hmm.forward(data_test, states,
                                              start_proba1[i],
                                              transition_proba1[i],
                                              means1[i],
                                              covariances1[i])
    logllh2.append(hmm.loglike(states, alpha_scaled2, scale_alpha2))
plt.figure()
plt.plot(logllh2)

# 6
print "The log-likelihood for HMM on train data is %f" % (logllh1[-1])
print "The log-likelihood for HMM on test data is %f" % (logllh2[-1])

# 7
path1 = hmm.viterbi(data_train, states,
                    start_proba1[-1], transition_proba1[-1],
                    means1[-1], covariances1[-1])


def plotViterbi(data, path, means):
    n = len(data)
    K = len(means)
    colors = ['b', 'g', 'r', 'y']
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    for i in range(0, n):
        cluster = int(path[i])
        ax.scatter(data[i, 0], data[i, 1], color=colors[cluster])
    for j in range(0, K):
        ax.scatter(means[j, 0], means[j, 1], color="black")
Exemple #34
0
 model = HMM(len(map_POS_index.keys()), A=transition_probabilities, B=emission_probabilities, V=symbolList, Pi=initialProbabilities)
 
 # testing data
 iterSentencesCorrect = 0
 iterSentences = 0
 iterTagsCorrect = 0
 iterTags = 0
 for sentence in testSet:
     wordSeq = ['<S>']
     POSSeq = ['<S>']
     for word, POS in sentence:
         wordSeq.append(word)
         POSSeq.append(POS)
     wordSeq.append('<\S>')
     POSSeq.append('<\S>')
     resultPOS = viterbi(model, wordSeq, scaling=False)
     returnedSeq = [map_index_POS[x] for x in resultPOS[0]]
     
     if returnedSeq == POSSeq:
         numberSentencesCorrect += 1
         iterSentencesCorrect += 1
     numberSentences +=1
     iterSentences += 1
     
     for x, y in zip(POSSeq, returnedSeq):
         if x==y:
             numberTagsCorrect +=1
             iterTagsCorrect += 1
         numberTags += 1
         iterTags += 1
 print ','.join(["test", str(i), str(iterSentencesCorrect/float(iterSentences)), str(iterTagsCorrect/float(iterTags))])