/
neural_network.py
160 lines (122 loc) · 5.43 KB
/
neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from logistic_regression import sigmoid
import numpy as np
from utils import unroll_list_of_matrices_to_vector, roll_vector_to_list_of_matrices
def forward_propagation(layer_coefficients, input_data):
"""
Calculate neural network output based on input data and layer coefficients.
Forward propagation algorithm.
:param layer_coefficients: 1 x (L - 1) array of layer coefficients vectors, where L - layers count
:param input_data: S0 x m input layer vector, where S0 - input layer units count, m - experiments count
:return: 1 x l vector of layer activation vectors Sl x m, where Sl - l'th layer units count,
m - experiments count
"""
data = [input_data] # S0 x m
for theta in layer_coefficients:
data.append(
sigmoid(np.dot(
theta, # Sl x (S[l-1] + 1)
np.vstack(([np.ones(data[-1].shape[1])], data[-1])) # (S[l-1] + 1) x m
)) # Sl x m
)
return data
def nn_cost_function(layer_coefficients, x, y):
"""
Calculate cost function for neural network
:param layer_coefficients: 1 x (L - 1) array of layer coefficients vectors, where L - layers count
:param x: S0 x m input layer vector, where S0 - input layer units count, m - experiments count
:param y: SL x m expected results matrix, where Sl - output layer units count, m - experiments count
:return: summary cost
"""
return - 1 / y.shape[1] * np.sum((
np.multiply(y, np.log(forward_propagation(layer_coefficients, x)[-1])) # SL x m
+
np.multiply((1 - y), np.log(1 - forward_propagation(layer_coefficients, x)[-1])) # SL x m
))
def nn_regularized_cost_function(unrolled_layer_coefficients, x, y, regularization_rate, shape):
"""
Regularized neural network cost function.
See nn_cost_function description.
"""
layer_coefficients = roll_vector_to_list_of_matrices(unrolled_layer_coefficients, shape)
cost = nn_cost_function(layer_coefficients, x, y)
for theta in layer_coefficients:
unrolled_theta = theta.reshape(theta.shape[0] * theta.shape[1], 1)
cost += regularization_rate / (2 * y.shape[1]) \
* (unrolled_theta.transpose() @ unrolled_theta)[0, 0]
return cost
def sigmoid_derivative(z):
"""
Calculate sigmoid function derivative dg/dz
"""
return np.multiply(z, 1 - z)
def back_propagation(layer_coefficients, y, output):
"""
Calculate error delta values for each layer and unit
:param layer_coefficients: 1 x (L - 1) array of layer coefficients vectors, where L - layers count
:param y: SL x m expected results matrix, where SL - output layer units count, m - experiments count
:param output: 1 x l vector of layer activation vectors Sl x m, where Sl - l'th layer units count,
m - experiments count
:return: 1 x (L - 1) vector of Sl x m delta values
"""
delta = [output[-1] - y]
for l in reversed(range(1, len(layer_coefficients))):
delta.insert(
0,
np.multiply(
np.dot(
layer_coefficients[l].transpose(), # (Sl + 1) x S[l + 1]
delta[0] # S[l + 1] x m
)[1:, :], # Sl x m
sigmoid_derivative(output[l]) # Sl x m
)
)
return delta
def nn_gradient(layer_coefficients, x, y):
"""
Neural network gradient (derivative) function
:param layer_coefficients: 1 x (L - 1) array of layer coefficients vectors, where L - layers count
:param x: S0 x m input layer vector, where S0 - input layer units count, m - experiments count
:param y: SL x m expected results matrix, where SL - output layer units count, m - experiments count
:return: 1 x (L - 1) vector of S[l + 1] x (Sl + 1) gradient values
"""
output = forward_propagation(layer_coefficients, x) # l, Sl x m
deltas = back_propagation(layer_coefficients, y, output) # l, Sl x m
grad = []
for l in range(len(deltas)):
grad.append(
1 / y.shape[1] * np.dot(
deltas[l], # S[l + 1] x m
np.vstack([np.ones(output[l].shape[1]), output[l]]).transpose() # m x (Sl + 1)
) # S[l + 1] x (Sl + 1)
)
return grad
def nn_regularized_gradient(unrolled_layer_coefficients, x, y, regularization_rate, shape):
"""
Regularized neural network gradient.
See nn_gradient description.
"""
layer_coefficients = roll_vector_to_list_of_matrices(unrolled_layer_coefficients, shape)
gradients = nn_gradient(layer_coefficients, x, y)
reg_gradients = []
for l in range(len(layer_coefficients) - 1):
reg_gradients.append(
gradients[l] # S[l + 1] x (Sl + 1)
+ regularization_rate / y.shape[1]
* np.hstack([
np.zeros((layer_coefficients[l].shape[0], 1)),
layer_coefficients[l][:, 1:]
]) # S[l + 1] x (Sl + 1)
)
reg_gradients.append(gradients[-1])
return unroll_list_of_matrices_to_vector(reg_gradients)[1]
def predict_digit(nn_coefficients, image):
"""
Find index of the output unit with the max value
:param nn_coefficients: L x (n x 1) nn layer coefficients
:param image: n^2 x 1 image vector
:return:
"""
output_data = forward_propagation(nn_coefficients, image)[-1]
max_index = np.argmax(output_data) + 1
# data set contains 10 instead of 0
return max_index if max_index < 10 else 0