This repository has been archived by the owner on Nov 5, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
NeuralNetwork.py
244 lines (214 loc) · 9.77 KB
/
NeuralNetwork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import numpy as np
from scipy.special import expit
from scipy.optimize import fmin_cg
__all__ = ['NeuralNetModel', 'train_network']
class NeuralNetModel():
""" Output of a neural network training algorithm."""
def __init__(self, neural_network):
self._network = neural_network
def get_weights(self):
""" Return the weights used by the model to make predictions."""
return self._network.weights
def predict(self, X):
""" Classify the observations in X.
Args:
X (array): data to be classified.
Returns:
array: each entry is the class assigned to the corresponding
row in X.
Raises:
ValueError: if the features in X do not match the model.
"""
num_features = self._network.layers[0]
if not X.shape[1] == num_features:
raise ValueError("Number of features in data must be {0}."
.format(num_features))
forward_propogate(self.get_weights(), X, self._network)
class_predictions = self._network.get_final_activation()
return np.argmax(class_predictions, axis = 1)
def __repr__(self):
return("Trained neural network with {0} hidden layers of size(s) {1}."
.format(len(self._network.layers)-2, self._network.layers[1:-1]))
class NeuralNetwork():
""" Internal network class to keep track of its properties."""
def __init__(self, layer_sizes):
self.layers = layer_sizes
self._shapes = []
self.activations = []
self.raw_outputs = []
self._initialize_shapes()
self.weights = None
def num_transitions(self):
""" Return the number of transitions between layers."""
return len(self.layers) - 1
def get_final_activation(self):
""" Return the last activation, giving the current probabilities."""
return self.activations[self.num_transitions()]
def _initialize_shapes(self):
""" Determine the shapes for the weights in the network."""
for index in range(self.num_transitions()):
input_size = self.layers[index]
output_size = self.layers[index+1]
shape = (output_size, 1 + input_size)
self._shapes.append(shape)
def initialize_weights(self):
""" Return random initial weights according to this network's shapes."""
weights = []
for index in range(self.num_transitions()):
init_epsilon = self._get_initial_epsilon(index)
shape_x, shape_y = self._shapes[index]
weight = init_epsilon * (np.random.rand(shape_x, shape_y) * 2 - 1)
weights.append(weight)
return weights
def _get_initial_epsilon(self, index):
""" Return scaling factor for initialized weights."""
input_size = self.layers[index]
output_size = self.layers[index+1]
return (6 / (input_size + output_size))**(1/2)
def reshape_weights(self, flat_weights):
""" Restore shape of weights according to this network's shapes."""
start_index = 0
shaped_weights = []
for shape in self._shapes:
matrix_size = shape[0] * shape[1]
end_index = start_index + matrix_size
shaped_weights.append(flat_weights[start_index:end_index].reshape(shape))
start_index = end_index
return shaped_weights
def train_network(X, Y, layers, regularization = 0, max_iters = 200):
""" Train a neural network and return the model.
Args:
X (array): data consisting of rows of features.
Y (array): array of labels corresponding to each row in X.
Must consist of integers from 0 to n for some integer n.
layers (list): the number of features in each layer. The first
entry must be the number of features (columns) in X, the
last must be the number of classes, and those inbetween
determine the size of each hidden layer.
regularization (int): penalty factor for having larger weights.
(defualt: 0).
max_iters (int): the max number of iterations used by the algorithm
when searching for optimal weights. A higher number will produce
a better fit but extends run time (default: 200).
"""
check_input_validity(X, Y, layers)
num_classes = layers[-1]
network = NeuralNetwork(layers)
initial_weights = flatten_weights(network.initialize_weights())
Y = process_labels(Y, num_classes)
optimal = fmin_cg(compute_cost,
initial_weights,
back_propogate,
args = (X, Y, network, regularization),
maxiter = max_iters)
forward_propogate(network.reshape_weights(optimal), X, network)
network.weights = network.reshape_weights(optimal)
return NeuralNetModel(network)
def compute_cost(flat_weights, X, label_matrix, network, regularization):
""" Propogate weights through network and compute cost function."""
weights = network.reshape_weights(flat_weights)
forward_propogate(weights, X, network)
return cost_function(weights, label_matrix, network, regularization)
def forward_propogate(weights, X, network):
""" Perform forward propogation on the given network and dataset."""
raw_outputs = [X]
activations = [X]
for i in range(network.num_transitions()):
activations[i] = insert_ones(activations[i])
weight = weights[i]
raw_output = activations[i].dot(weight.transpose())
activation = sigmoid(raw_output)
raw_outputs.append(raw_output)
activations.append(activation)
network.activations = activations
network.raw_outputs = raw_outputs
def cost_function(weights, label_matrix, network, regularization):
""" Compute the cost function for the network's current state."""
a = network.get_final_activation()
Y = label_matrix
m = len(label_matrix)
weight_sum = 0
for weight in weights:
weight_sum += (weight[:,1:]**2).sum()
reg_term = (regularization / (2*m)) * weight_sum
return (-Y * log(a) - (1-Y) * log(1-a)).sum() / m + reg_term
def back_propogate(flat_weights, X, label_matrix, network, regularization):
""" Use back propogation to get the gradient of the cost function."""
weights = network.reshape_weights(flat_weights)
#Todo: clean up algorithm so this step isn't necessary
if len(network.activations) == 0:
forward_propogate(weights, X, network)
deltas = get_deltas(weights, label_matrix, network)
weight_gradients = get_weight_gradients(weights, deltas, network, regularization)
return weight_gradients
def get_deltas(weights, label_matrix, network):
""" Return a list of the deltas needed for the gradient computation."""
deltas = []
delta = network.get_final_activation() - label_matrix
deltas.append(delta)
for index in reversed(range(1, network.num_transitions())):
weight = weights[index][:,1:]
sigmoid_grad = sigmoid_gradient(network.raw_outputs[index])
delta = delta.dot(weight) * sigmoid_grad
deltas.insert(0, delta)
return deltas
def get_weight_gradients(weights, deltas, network, regularization):
""" Return a flat array of the gradients of the weights."""
activations = network.activations
weight_gradients = []
m = activations[0].shape[0]
for index, weight in enumerate(weights):
weight[:,0] = 0
delta, activation = deltas[index], activations[index]
base_term = delta.transpose().dot(activation) / m
reg_term = regularization * weight / m
weight_gradients.append(base_term + reg_term)
return flatten_weights(np.array(weight_gradients))
def flatten_weights(weights):
""" Return a flat array of the weights."""
return np.concatenate([weight.flatten() for weight in weights])
def process_labels(Y, num_labels):
""" Given a sequence of labels 0 to n, produce a 0-1 matrix where entry
i, j is 1 if and only if the ith label is j."""
label_matrix = np.zeros((len(Y), num_labels))
for i in range(num_labels):
label_matrix[:,i] = 1 * (Y == i)
return label_matrix
def sigmoid_gradient(z):
""" Gradient of the sigmoid function."""
return sigmoid(z) * (1 - sigmoid(z))
def sigmoid(z):
""" Vectorized sigmoid/logistic function."""
return expit(z)
def insert_ones(X):
""" Insert a column of ones in front of the dataset X and return it."""
X = array_to_ndarray(X)
num_rows = X.shape[0]
return np.hstack((np.ones((num_rows, 1)), X))
def array_to_ndarray(X):
""" Return a multidimensional version of X if it isn't already one."""
if len(X.shape) == 1:
X = X.reshape(X.shape[0], 1)
return X
def log(num_array):
""" Logarithm extended to include 0 to avoid log of 0 rounding errors."""
offset = 1e-20
return np.log(num_array + offset)
def check_input_validity(X, Y, layers):
""" Raise error if invalid input is passed to network training method."""
try:
observations, features = X.shape
label_size = Y.size
except AttributeError:
raise AttributeError("X and Y must be numpy arrays, "
"or pandas data frames/series.")
if not observations == label_size:
raise ValueError("Number of rows in X does not match "
"number of labels.")
if not features == layers[0]:
raise ValueError("Number of features in X does not match "
"first entry of layers.")
unique_labels = Y.unique()
if not set(unique_labels) <= set(range(layers[-1])):
raise ValueError("Labels in Y must be numbers from 0 and n, "
"where n is the final entry of layers.")