forked from schaul/nnsandbox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Model.py
146 lines (118 loc) · 5.42 KB
/
Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from BigMat import *
from Util import TempMatrix
import numpy as np
import numpy.random as random
##############################################################
class Model(object):
'''
A trainable model. parameters,
inputs X, and targets Y.
Let Z = M(X) be the final outputs, and let H be the hidden activities incurred by X.
A cost is defined as the sum of three additive components:
- loss(Z,Y) depends on the final outputs and targets
- regularizer(H) depends only on the hidden activations
- penalty(M) depends ony on the model and its own parameters
'''
def __init__(self,loss=None):
if isinstance(loss,str):
self._loss_type = loss
self._loss_fn = None
self._loss_delta_fn = None
if loss == 'mse': self._loss_fn = self._loss_mse; self._loss_delta_fn = self._loss_delta_mse
elif loss == 'nll': self._loss_fn = self._loss_nll; self._loss_delta_fn = self._loss_delta_nll
elif loss: raise ValueError("unrecognized loss '%s' requested" % loss_type)
else:
self._loss_type = 'callback'
self._loss_fn = loss[0]
self._loss_delta_fn = loss[1]
self._tmp_E = TempMatrix() # memory for computing loss
self._tmp_e = TempMatrix() # memory for computing loss
def cost(self,data):
'''
Computes the cost = loss(output(X),Y) + regularizer(hidden(X)) + penalty(model)
'''
X,Y = data
H = self.eval(X,want_hidden=True)
l = self.loss(H[-1],Y)
r = self.regularizer(H)
p = self.penalty()
c = l+r+p
return c,l,r,p
def apply_constraints(self):
pass
############### LOSS ###############
def loss(self,Z,Y):
'''Loss of outputs Z with respect to targets Y.'''
return float(self._loss_fn(Z,Y))
def _loss_delta(self,Z,Y,df,out=None):
'''Computes the gradient error signal to backpropagate up the network.
Here df is the derivative f'(A) of the output activation f(A).'''
return self._loss_delta_fn(Z,Y,df,out)
def _loss_mse(self,Z,Y):
"""Mean squared error (mse) of ouputs Z with respect to targets Y."""
E = self._tmp_E.get_capacity(*Z.shape)
e = self._tmp_e.get_capacity(Z.shape[0],1)
subtract(Z,Y,out=E)
square(E,out=E)
sum(E,axis=1,out=e)
return 0.5*as_numpy(mean(e)) # = mean(sum(square(Z-Y),axis=1))
def _loss_delta_mse(self,Z,Y,df,out=None):
"""Computes the MSE gradient error signal to backpropagate up the network.
Here df is the derivative f'(A) of the output activation f(A)."""
self._loss_delta_nll(Z,Y,df,out)
imul(out,df) # = 1/m * (Z-Y) * df for mse
return out
def _loss_nll(self,Z,Y):
'''Negative log-likelihood of outputs Z with respect to targets Y.'''
E = self._tmp_E.get_capacity(*Z.shape)
e = self._tmp_e.get_capacity(Z.shape[0],1)
multiply(Z,Y,out=E)
sum(E,axis=1,out=e)
log(e,out=e)
return -as_numpy(mean(e)) # = -mean(log(sum(Z*Y,axis=1)))
def _loss_delta_nll(self,Z,Y,df,out=None):
"""Computes the NLL gradient error signal to backpropagate up the network."""
subtract(Z,Y,out=out)
imul(out,1./Z.shape[0]) # = 1/m * (Z-Y) for nll
return out
############### REGULARIZER ###############
def regularizer(self,H):
return 0.0
############### PENALTY ###############
def penalty(self):
return 0.0
################################# UTILITY FUNCTIONS #########################
def relative_error(self,A,B,abs_eps):
absA = np.abs(A)
absB = np.abs(B)
I = np.logical_not(np.logical_or(A==B,np.logical_or(absA < abs_eps, absB < abs_eps)))
E = np.zeros(A.shape,dtype=A.dtype)
E[I] = np.abs(A[I]-B[I]) / min(absA[I] + absB[I])
return E
def gradcheck(self,data):
# Only use a tiny subset of the data, both for speed and to avoid
# averaging gradient of many inputs.
data_subset = data[:min(4,data.size)]
# Swap a new weights object into the model, so that we can purturb the model's weights from outside
weights0 = self.weights
self.weights = weights1 = weights0.copy()
# Compute gradient by forward-difference, looping over each individual weight
neps = 1e-7
ngrad = self.make_weights()
for k in range(len(weights1)):
w1 = weights1[k]; wg = ngrad[k]
for i in range(len(w1)):
# temporarily perturb parameter w[i] by 'neps' and evaluate the new loss
temp = w1[i];
w1[i] -= neps; c0,l0,r0,p0 = self.cost(data_subset); w1[i] = temp
w1[i] += neps; c1,l1,r1,p1 = self.cost(data_subset); w1[i] = temp
wg[i] = (c1-c0)/(2*neps)
self.weights = weights0 # restore the original weights object for the model
# Compute backprop's gradient (bgrad), keeping loss/regularizer/penalty separate
bgrad = self.grad(data_subset)
A = ngrad.ravel()
B = bgrad.ravel()
aerr = np.abs(A-B)
rerr = self.relative_error(A,B,1e-20)
print 'absolute_error in gradient: min =', np.min(aerr), 'max =', np.max(aerr)
print 'relative_error in gradient: min =', np.min(rerr), 'max =', np.max(rerr)