def calculate_viterbi_matrix(N, states, sents, ngram_counts, l, vectors, sentiment_counts, word_lists, popular_words, tags): M = [[0 for x in range(len(sents))] for x in range(len(states))] # Populate the first column for the first sentence for i in range(len(states)): state = states[i] ngram = () for j in range(0, N-1): ngram += ("<p>", ) ngram += (state, ) cell_prob = prior_prob.calc_prob(ngram_counts, l, ngram) cell_prob += likelihood.calc_sentiment_prob(vectors, sentiment_counts, sents[0], state, word_lists, popular_words, tags[0]) M[i][0] = dict() M[i][0]['prob'] = cell_prob M[i][0]['ngram'] = ngram M[i][0]['prev'] = "<p>" # Populate the rest of the columns for cur_sent in range(1, len(sents)): max_prev_prob = float('-infinity') max_prev_prob_sent = None for cur_state in range(0, len(states)): state = states[cur_state] max_prob = float('-infinity') max_ngram = () prev = None for test_state in range(0, len(states)): test_ngram = M[test_state][cur_sent-1]['ngram'] test_ngram = test_ngram[1:] + (state, ) test_prev_prob = M[test_state][cur_sent-1]['prob'] test_prob = test_prev_prob + prior_prob.calc_prob(ngram_counts, l, test_ngram) if test_prev_prob > max_prev_prob: max_prev_prob = test_prev_prob max_prev_prob_sent = states[test_state] if test_prob > max_prob: max_prob = test_prob max_ngram = test_ngram cell_prob = max_prob + likelihood.calc_sentiment_prob(vectors, sentiment_counts, sents[cur_sent], state, word_lists, popular_words, tags[cur_sent], max_prev_prob_sent) M[cur_state][cur_sent] = dict() M[cur_state][cur_sent]['prob'] = cell_prob M[cur_state][cur_sent]['ngram'] = max_ngram M[cur_state][cur_sent]['prev'] = max_ngram[N-2] return M
def backtrace_viterbi(M, states, ngram_counts, l): num_states = len(M) num_sents = len(M[0]) max_prob = float('-infinity') max_prev = None result = [] for j in range(num_states): prob = M[j][num_sents-1]['prob'] prev = M[j][num_sents-1]['prev'] test_ngram = M[j][num_sents-1]['ngram'] test_ngram = test_ngram[1:] + ("</p>", ) test_prob = prob + prior_prob.calc_prob(ngram_counts, l, test_ngram) if test_prob > max_prob: state = states[j] max_prob = test_prob max_prev = prev result.append(state) next_prev = max_prev for i in range(num_sents-2, -1, -1): max_prev = next_prev for j in range(num_states): if states[j] == max_prev: result.append(max_prev) next_prev = M[j][i]['prev'] result.reverse() return result