Example #1
0
def calculate_viterbi_matrix(N, states, sents, ngram_counts, l, vectors, sentiment_counts, word_lists, popular_words, tags):
	M = [[0 for x in range(len(sents))] for x in range(len(states))]

	# Populate the first column for the first sentence
	for i in range(len(states)):
		state = states[i]

		ngram = ()
		for j in range(0, N-1):
			ngram += ("<p>", )
		ngram += (state, )

		cell_prob = prior_prob.calc_prob(ngram_counts, l, ngram)

		cell_prob += likelihood.calc_sentiment_prob(vectors, sentiment_counts, sents[0], state, word_lists, popular_words, tags[0])
	
		M[i][0] = dict()
		M[i][0]['prob'] = cell_prob
		M[i][0]['ngram'] = ngram
		M[i][0]['prev'] = "<p>"

	# Populate the rest of the columns
	for cur_sent in range(1, len(sents)):
		max_prev_prob = float('-infinity')
		max_prev_prob_sent = None

		for cur_state in range(0, len(states)):
			state = states[cur_state]
			max_prob = float('-infinity')
			max_ngram = ()
			prev = None
			
			for test_state in range(0, len(states)):
				test_ngram = M[test_state][cur_sent-1]['ngram']
				test_ngram = test_ngram[1:] + (state, )
				test_prev_prob = M[test_state][cur_sent-1]['prob']

				test_prob = test_prev_prob + prior_prob.calc_prob(ngram_counts, l, test_ngram)

				if test_prev_prob > max_prev_prob:
					max_prev_prob = test_prev_prob
					max_prev_prob_sent = states[test_state]

				if test_prob >  max_prob:
					max_prob = test_prob
					max_ngram = test_ngram

			cell_prob = max_prob + likelihood.calc_sentiment_prob(vectors, sentiment_counts, sents[cur_sent], state, word_lists, popular_words, tags[cur_sent], max_prev_prob_sent)

			M[cur_state][cur_sent] = dict()
			M[cur_state][cur_sent]['prob'] = cell_prob
			M[cur_state][cur_sent]['ngram'] = max_ngram
			M[cur_state][cur_sent]['prev'] = max_ngram[N-2]

	return M
Example #2
0
def backtrace_viterbi(M, states, ngram_counts, l):
	num_states = len(M)
	num_sents = len(M[0])
		
	max_prob = float('-infinity')
	max_prev = None

	result = []
	
	for j in range(num_states):
		prob = M[j][num_sents-1]['prob'] 
		prev = M[j][num_sents-1]['prev']

		test_ngram = M[j][num_sents-1]['ngram']
		test_ngram = test_ngram[1:] + ("</p>", )

		test_prob = prob + prior_prob.calc_prob(ngram_counts, l, test_ngram)

		if test_prob > max_prob:
			state = states[j]
			max_prob = test_prob
			max_prev = prev

	result.append(state)

	next_prev = max_prev
	for i in range(num_sents-2, -1, -1):
		max_prev = next_prev
		for j in range(num_states):
			if states[j] == max_prev:
				result.append(max_prev)
				next_prev = M[j][i]['prev']

	result.reverse()
	return result