Beispiel #1
0
def main():
	# load training files into Classifier
	path_to_res = os.path.join(sys.path[0], "resources\\")
	neg_classif = Classifier(path_to_res + "training_negative.txt")
	neu_classif = Classifier(path_to_res + "training_neutral.txt")
	pos_classif = Classifier(path_to_res + "training_positive.txt")

	total_entries = neu_classif.get_entries() + neg_classif.get_entries() + pos_classif.get_entries()
	# load test files
	test_parser = Parser(path_to_res + "test_set.txt")

	counter = 1
	while(True):
		word_list = test_parser.giveWordList()
		if len(word_list) == 0:
			break
		neg_p = neg_classif.classification_probability(word_list, total_entries)
		neu_p = neu_classif.classification_probability(word_list, total_entries)
		pos_p = pos_classif.classification_probability(word_list, total_entries)
		print("Test " + str(counter) + ":\n")
		print("\tNegative: " + str(math.fabs(neg_p)) + "%\n")
		print("\tNeutral: " + str(math.fabs(neu_p)) + "%\n")
		print("\tPositve: " + str(math.fabs(pos_p)) + "%\n")
		counter += 1
class Classifier(object):
	""" Contains stastical data from training text
		One Classifier object per each classifiable catagory 
	"""

	def __init__(self, training_file_path):
		self._dict = {}
		self._entries = 0
		self._total_words = 0
		self.parser = Parser(training_file_path)
		self._learn()

	def _learn(self):
		""" Uses Parser to get entries and build the dictionary """
		while(True):
			word_list = self.parser.giveWordList()
			if len(word_list) == 0:
				return
			else:
				for word in word_list:
					self._add_word(word)
				self._entries += 1

	def _add_word(self, word):
		""" adds a word to the dictionary, or increments it if already inside """
		if word in self._dict:
			self._dict[word] += 1
			self._total_words += 1
		else:
			self._dict[word] = 1
			self._total_words += 1

	def likelihood(self, word):
		""" the likelihood of a word xi occuring conditionally, p(xi | Ck) """
		if word in self._dict:
			return self._dict[word] / self._entries
		else:
			return 0

	def classification_probability(self, word_list, total_entries):
		""" Returns the probability that the given entry is of this classifiable catagory 
			word_list: List, entry from test file Parser's giveWordList()
			total_entries: int, sum of all Classifier's entries

			Naive Bayes Formula:
			p(Ck | x1, x2, .... xi) ~
			p(Ck) * SIGMA(for all i)[ log p(xi | Ck) ] =
		"""
		prob_c = self._entries / total_entries
		total_sum = 1
		for word in word_list:
			likelihood = self.likelihood(word)
			if likelihood == 0:
				total_sum += 0
			else:
				total_sum += math.log(self.likelihood(word))
		#if(total_sum == 1):
		#	return 0
		return prob_c * total_sum

	def get_entries(self):
		""" Returns the number of entries """
		return self._entries