def test_collect_counts(self): classifier = NaiveBayes() classifier._collect_counts(self.training_set) cat_index = classifier.label_codebook.get_index('cat') dog_index = classifier.label_codebook.get_index('dog') purr_index = classifier.feature_codebook.get_index('purr') meow_index = classifier.feature_codebook.get_index('meow') bark_index = classifier.feature_codebook.get_index('bark') woof_index = classifier.feature_codebook.get_index('woof') print """Test collecting counts If any of these fails, check if you have updated the codebooks and check if the counts have been collected correctly without smoothing""" count_x_y = classifier.count_x_y_table self.assertEqual(count_x_y[purr_index, cat_index], 2) self.assertEqual(count_x_y[meow_index, cat_index], 2) self.assertEqual(count_x_y[bark_index, cat_index], 0) self.assertEqual(count_x_y[woof_index, cat_index], 1) self.assertEqual(count_x_y[purr_index, dog_index], 0) self.assertEqual(count_x_y[meow_index, dog_index], 1) self.assertEqual(count_x_y[bark_index, dog_index], 1) self.assertEqual(count_x_y[woof_index, dog_index], 2) count_y = classifier.count_y_table self.assertEqual(count_y[cat_index], 3) self.assertEqual(count_y[dog_index], 2)
from helper import Instance from naive_bayes import NaiveBayes cat1 = Instance(label='cat', data=[1,0], raw_data=['purr' ,'purr' ,'meow']) cat2 = Instance(label='cat', data=[0,2],raw_data=['meow','woof']) cat3 = Instance(label='cat', data=[1],raw_data=['purr']) dog1 = Instance(label='dog', data=[3,2],raw_data=['bark','woof']) dog2 = Instance(label='dog', data=[2,0],raw_data=['woof','meow']) training_set = [cat1, cat2, cat3, dog1, dog2] classifier = NaiveBayes() classifier._collect_counts(training_set) cat_index = classifier.label_codebook.get_index('cat') dog_index = classifier.label_codebook.get_index('dog') purr_index = classifier.feature_codebook.get_index('purr') meow_index = classifier.feature_codebook.get_index('meow') bark_index = classifier.feature_codebook.get_index('bark') woof_index = classifier.feature_codebook.get_index('woof') """Test counting""" count_x_y = classifier.count_table count_x_y[purr_index, cat_index] == (2+1) count_x_y[meow_index, cat_index] == (2+1) count_x_y[bark_index, cat_index] == (0+1) count_x_y[woof_index, cat_index] == (1+1) count_x_y[purr_index, dog_index] == (0+1) count_x_y[meow_index, dog_index] == (1+1) count_x_y[bark_index, dog_index] == (1+1)