-
Notifications
You must be signed in to change notification settings - Fork 0
/
classifier.py
111 lines (84 loc) · 3.35 KB
/
classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
Class for a classification algorithm.
"""
import numpy as np
import collections
class Classifier:
def __init__(self, classifier_type, **kwargs):
"""
Initializer. Classifier_type should be a string which refers
to the specific algorithm the current classifier is using.
Use keyword arguments to store parameters
specific to the algorithm being used. E.g. if you were
making a neural net with 30 input nodes, hidden layer with
10 units, and 3 output nodes your initalization might look
something like this:
neural_net = Classifier(weights = [], num_input=30, num_hidden=10, num_output=3)
Here I have the weight matrices being stored in a list called weights (initially empty).
"""
self.classifier_type = classifier_type
self.params = kwargs
"""
The kwargs you inputted just becomes a dictionary, so we can save
that dictionary to be used in other methods.
"""
def train(self, training_data):
"""
Data should be nx(m+1) numpy matrix where n is the
number of examples and m is the number of features
(recall that the first element of the vector is the label).
I recommend implementing the specific algorithms in a
seperate module and then determining which method to call
based on classifier_type. E.g. if you had a module called
neural_nets:
if self.classifier_type == 'neural_net':
import neural_nets
neural_nets.train_neural_net(self.params, training_data)
Note that your training algorithms should be modifying the parameters
so make sure that your methods are actually modifying self.params
You should print the accuracy, precision, and recall on the training data.
"""
if self.classifier_type == 'decision_tree':
import decision_tree
decision_tree.train_decision_tree(self.params, training_data)
if self.classifier_type == 'naive_bayes':
import naive_bayes
naive_bayes.train_naive_bayes(self.params, training_data)
if self.classifier_type == 'neural_net':
import neural_nets
neural_nets.train_neural_net(self.params, training_data)
def predict(self, data):
"""
Predict class of a single data vector
Data should be 1x(m+1) numpy matrix where m is the number of features
(recall that the first element of the vector is the label).
I recommend implementing the specific algorithms in a
seperate module and then determining which method to call
based on classifier_type.
This method should return the predicted label.
"""
if self.classifier_type == 'decision_tree':
import decision_tree
decision_tree.predict(self.params, data)
if self.classifier_type == 'naive_bayes':
import naive_bayes
naive_bayes.predict(self.params, data)
if self.classifier_type == 'neural_net':
import neural_nets
neural_nets.predict(self.params, data)
def test(self, test_data):
"""
Data should be nx(m+1) numpy matrix where n is the
number of examples and m is the number of features
(recall that the first element of the vector is the label).
You should print the accuracy, precision, and recall on the test data.
"""
if self.classifier_type == 'decision_tree':
import decision_tree
decision_tree.test(self.params, test_data)
if self.classifier_type == 'naive_bayes':
import naive_bayes
naive_bayes.test(self.params, test_data)
if self.classifier_type == 'neural_net':
import neural_nets
neural_nets.test(self.params, test_data)