/
nb.py
96 lines (75 loc) · 3.08 KB
/
nb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
The Naive Bayes classifier.
"""
import numpy as np
import matplotlib.pyplot as plt
from plot_digits import *
from utils import *
_SMALL_CONSTANT = 1e-10
class NaiveBayesClassifier(object):
"""
A simple naive Bayes classifier implementation for binary classification.
All conditional distributions are univariate Gaussians.
"""
def __init__(self):
self.model_learned = False
def train(self, training_data, training_label):
"""
Train the naive Bayes classifier on the given training set.
training_data: n_examples x n_dimensions data matrix.
training_label: n_examples x 1 dimensional binary label vector.
"""
n_examples, n_dims = training_data.shape
training_label = training_label.squeeze()
K = 2
prior = np.empty(K, dtype=np.float)
mean = np.empty((K, n_dims), dtype=np.float)
var = np.empty((K, n_dims), dtype=np.float)
for k in range(K):
prior[k] = (training_label == k).mean()
mean[k, :] = training_data[training_label == k, :].mean(axis=0)
var[k, :] = training_data[training_label == k, :].var(axis=0)
self.log_prior = np.log(prior + _SMALL_CONSTANT)
self.mean = mean
self.var = var + _SMALL_CONSTANT
self.model_learned = True
def predict(self, test_data):
"""
Generate predictions using the learned model on test data.
test_data: n_examples x n_dimensions data matrix.
Return: n_examples x 1 dimensional binary label vector, which is the
predictions for the test data.
"""
if not self.model_learned:
raise Exception('You should learn a model first!')
K = self.log_prior.size
n_examples = test_data.shape[0]
log_prob = np.zeros((n_examples, K), dtype=np.float)
for k in range(K):
log_prob[:, k] = (-0.5 * ((test_data - self.mean[k, :][np.newaxis, :])**2 / \
self.var[k, :][np.newaxis, :]) - \
0.5 * np.log(self.var[k, :][np.newaxis, :])).sum(axis=1) + \
self.log_prior[k]
return log_prob.argmax(axis=1)[:, np.newaxis]
def compute_accuracy(self, test_data, test_label):
return (self.predict(test_data) == test_label).mean()
def main():
"""
Learn a Naive Bayes classifier on the digit dataset, evaluate its
performance on training and test sets, then visualize the mean and variance
for each class.
"""
nbc = NaiveBayesClassifier()
nbc.train(load_train()[0],load_train()[1])
print 'Training accuracy: %.4f Test accuracy: %.4f' % (nbc.compute_accuracy(load_train()[0],load_train()[1]), nbc.compute_accuracy(load_test()[0],load_test()[1]))
plot_digits(nbc.mean)
plot_digits(nbc.var)
'''
nbc_test = NaiveBayesClassifier()
nbc_test.train(load_test()[0],load_test()[1])
plot_digits(nbc_test.mean)
plot_digits(nbc_test.var)
'''
# add your code here (it should be less than 10 lines)
if __name__ == '__main__':
main()