def test_collect_counts(self):
     classifier = NaiveBayes()
     classifier._collect_counts(self.training_set)
     
     cat_index = classifier.label_codebook.get_index('cat')
     dog_index = classifier.label_codebook.get_index('dog') 
     purr_index = classifier.feature_codebook.get_index('purr') 
     meow_index = classifier.feature_codebook.get_index('meow') 
     bark_index = classifier.feature_codebook.get_index('bark') 
     woof_index = classifier.feature_codebook.get_index('woof') 
     
     print """Test collecting counts
     If any of these fails, check if you have updated the codebooks
     and check if the counts have been collected correctly without smoothing"""
     count_x_y = classifier.count_x_y_table
     
     self.assertEqual(count_x_y[purr_index, cat_index], 2)
     self.assertEqual(count_x_y[meow_index, cat_index], 2)
     self.assertEqual(count_x_y[bark_index, cat_index], 0)
     self.assertEqual(count_x_y[woof_index, cat_index], 1)
     
     self.assertEqual(count_x_y[purr_index, dog_index], 0)
     self.assertEqual(count_x_y[meow_index, dog_index], 1)
     self.assertEqual(count_x_y[bark_index, dog_index], 1)
     self.assertEqual(count_x_y[woof_index, dog_index], 2)
     
     count_y = classifier.count_y_table
     self.assertEqual(count_y[cat_index], 3)
     self.assertEqual(count_y[dog_index], 2)
Example #2
0
from helper import Instance
from naive_bayes import NaiveBayes

cat1 = Instance(label='cat', data=[1,0], raw_data=['purr' ,'purr' ,'meow'])
cat2 = Instance(label='cat', data=[0,2],raw_data=['meow','woof'])
cat3 = Instance(label='cat', data=[1],raw_data=['purr'])
dog1 = Instance(label='dog', data=[3,2],raw_data=['bark','woof'])
dog2 = Instance(label='dog', data=[2,0],raw_data=['woof','meow'])

training_set = [cat1, cat2, cat3, dog1, dog2]

classifier = NaiveBayes()
classifier._collect_counts(training_set)

cat_index = classifier.label_codebook.get_index('cat')
dog_index = classifier.label_codebook.get_index('dog') 
purr_index = classifier.feature_codebook.get_index('purr') 
meow_index = classifier.feature_codebook.get_index('meow') 
bark_index = classifier.feature_codebook.get_index('bark') 
woof_index = classifier.feature_codebook.get_index('woof') 

"""Test counting"""
count_x_y = classifier.count_table
count_x_y[purr_index, cat_index] == (2+1)
count_x_y[meow_index, cat_index] == (2+1)
count_x_y[bark_index, cat_index] == (0+1)
count_x_y[woof_index, cat_index] == (1+1)

count_x_y[purr_index, dog_index] == (0+1)
count_x_y[meow_index, dog_index] == (1+1)
count_x_y[bark_index, dog_index] == (1+1)