def loss(self, X, y=None): """ Evaluate loss and gradient for the three-layer convolutional network. """ W1 = self.params['W1'] W2, b2 = self.params['W2'], self.params['b2'] W3, b3 = self.params['W3'], self.params['b3'] # pass pool_param to the forward pass for the max-pooling layer pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} scores = None conv, cache1 = layers.conv_forward(X,W1) relu1, cache2 = layers.relu_forward(conv) maxp, cache3 = layers.max_pool_forward(relu1,pool_param) fc1, cache4 = layers.fc_forward(maxp,W2,b2) relu2, cache5 = layers.relu_forward(fc1) scores, cache6 = layers.fc_forward(relu2,W3,b3) if y is None: return scores loss, grads = 0, {} loss, dscores = layers.softmax_loss(scores,y) dx3, dW3, db3 = layers.fc_backward(dscores,cache6) dRelu2 = layers.relu_backward(dx3,cache5) dx2, dW2, db2 = layers.fc_backward(dRelu2,cache4) dmaxp = layers.max_pool_backward(dx2.reshape(maxp.shape),cache3) dRelu1 = layers.relu_backward(dmaxp,cache2) dx,dW1 = layers.conv_backward(dRelu1,cache1) grads = {'W1':dW1,'W2':dW2,'b2':db2,'W3':dW3,'b3':db3} return loss, grads
def forward(data, model): """ Input: data : (N, C, H, W) label : (N, K) y : (N, ) Output: cost : rate : """ w1 = "w1" b1 = "b1" w3 = "w3" b3 = "b3" w5 = "w5" b5 = "b5" w6 = "w6" b6 = "b6" wo = "wo" bo = "bo" #forward pass h1_pre = layers.conv_forward(data, model[w1], model[b1]) h1 = layers.ReLu_forward(h1_pre) #print (h1[0][0]) h2 = layers.max_pool(h1, 2) h3_pre = layers.conv_forward(h2, model[w3], model[b3]) h3 = layers.ReLu_forward(h3_pre) h4 = layers.max_pool(h3, 2) h5_pre = layers.conv_forward(h4, model[w5], model[b5]) h5 = layers.ReLu_forward(h5_pre) h6 = layers.full_forward(h5, model[w6], model[b6]) out = layers.full_forward(h6, model[wo], model[bo]) #after this we need softmax return out #soft max is linear so ok
def test_conv_backward(self): # Conv backward np.random.seed(498) x = np.random.randn(3, 2, 7, 7) w = np.random.randn(4, 2, 3, 3) dout = np.random.randn(3, 4, 5, 5) dx_num = eval_numerical_gradient_array( lambda x: layers.conv_forward(x, w)[0], x, dout) dw_num = eval_numerical_gradient_array( lambda w: layers.conv_forward(x, w)[0], w, dout) _, cache = layers.conv_forward(x, w) dx, dw = layers.conv_backward(dout, cache) print('\nTesting conv_backward function:') # The errors should be around 3e-9 print('dx error: ', rel_error(dx_num, dx)) np.testing.assert_allclose(dx, dx_num, atol=1e-8) # The errors should be around 5e-10 print('dw error: ', rel_error(dw_num, dw)) np.testing.assert_allclose(dx, dx_num, atol=1e-8)
def test_conv_forward(self): # Conv forward x = np.linspace(-0.1, 2.5, num=36).reshape(1, 1, 6, 6) w = np.linspace(-0.9, 0.6, num=9).reshape(1, 1, 3, 3) out, _ = layers.conv_forward(x, w) correct_out = np.array( [[[[1.02085714, 0.92057143, 0.82028571, 0.72], [0.41914286, 0.31885714, 0.21857143, 0.11828571], [-0.18257143, -0.28285714, -0.38314286, -0.48342857], [-0.78428571, -0.88457143, -0.98485714, -1.08514286]]]]) # Compare your output with ours. The error might be around 2e-8. # As long as your error is small enough, your implementation should pass this test. print('\nTesting conv_forward function:') print('difference: ', rel_error(out, correct_out)) np.testing.assert_allclose(out, correct_out, atol=1e-7)
def train(data, label, y, model, alpha = 0.001): """ Input: data : (N, C, H, W) label : (N, K) y : (N, ) Output: cost : rate : """ w1 = "w1"; b1 = "b1" w3 = "w3"; b3 = "b3" w5 = "w5"; b5 = "b5" w6 = "w6"; b6 = "b6" wo = "wo"; bo = "bo" #forward pass h1_pre = layers.conv_forward(data, model[w1], model[b1]) h1 = layers.ReLu_forward(h1_pre) #print (h1[0][0]) h2 = layers.max_pool(h1, 2) h3_pre = layers.conv_forward(h2, model[w3], model[b3]) h3 = layers.ReLu_forward(h3_pre) h4 = layers.max_pool(h3, 2) h5_pre = layers.conv_forward(h4, model[w5], model[b5]) h5 = layers.ReLu_forward(h5_pre) h6 = layers.full_forward(h5, model[w6], model[b6]) out = layers.full_forward(h6, model[wo], model[bo]) #after this we need softmax y_hat = layers.softmax(out) y_hat_arg = np.argmax(y_hat, axis = 1) dout = (y_hat - y) cost = layers.cost(y_hat, y) rate = layers.classification_rate(label, y_hat_arg) #gd print ("------") print (y_hat) print ("gradient updates : "); print ("------") dout_h6, dwo_gradient, dbo_gradient = layers.full_backward(dout, h6, model[wo], model[bo]) model[wo] = model[wo] - alpha * dwo_gradient model[bo] = model[bo] - alpha * dbo_gradient dout_h5, dw6_gradient, db6_gradient = layers.full_backward(dout_h6, h5, model[w6], model[b6]) model[w6] = model[w6] - alpha * dw6_gradient model[b6] = model[b6] - alpha * db6_gradient dout_h4, dw5_gradient, db5_gradient = layers.conv_backward(layers.ReLu_backward(h5_pre, dout_h5), h4, model[w5], model[b5]) model[w5] = model[w5] - alpha * dw5_gradient model[b5] = model[b5] - alpha * db5_gradient dout_h3 = layers.max_pool_back(h3, dout_h4, 2) dout_h2, dw3_gradient, db3_gradient = layers.conv_backward(layers.ReLu_backward(h3_pre, dout_h3), h2, model[w3], model[b3]) model[w3] = model[w3] - alpha * dw3_gradient model[b3] = model[b3] - alpha * db3_gradient dout_h1 = layers.max_pool_back(h1, dout_h2, 2) d_data, dw1_gradient, db1_gradient = layers.conv_backward(layers.ReLu_backward(h1_pre, dout_h1), data, model[w1], model[b1]) model[w1] = model[w1] - alpha * dw1_gradient model[b1] = model[b1] - alpha * db1_gradient return cost, rate
def train(data, i, model, alpha=0.0001): """ Input: data : (N, C, H, W) label : (N, K) y : (N, ) Output: cost : rate : """ w1 = "w1" b1 = "b1" w3 = "w3" b3 = "b3" w5 = "w5" b5 = "b5" w6 = "w6" b6 = "b6" wo = "wo" bo = "bo" n = data.shape[0] t = 5 start = i * t start = start % n end = start + t data = data[start:end, ] #data to data and y. label and model y = np.array(data[:, -1]) y.resize(t, 1) label = np.zeros((t, 10)) for i in range(t): label[i][int(y[i])] = 1 data = np.array(data[:, 0:28 * 28]) data.resize(t, 1, 28, 28) data = data * 1.01 #drop out rate #drop out implementation is ugly here, should move to layer as an operation and transparent to convnets p = 0.95 #forward pass h1_pre = layers.conv_forward(data, model[w1], model[b1]) h1 = layers.ReLu_forward(h1_pre) #print (h1[0][0]) h2 = layers.max_pool(h1, 2) U2 = (np.random.rand(*h2.shape) < p) / p h2 *= U2 # drop! h3_pre = layers.conv_forward(h2, model[w3], model[b3]) h3 = layers.ReLu_forward(h3_pre) h4 = layers.max_pool(h3, 2) U4 = (np.random.rand(*h4.shape) < p) / p h4 *= U4 # drop! h5_pre = layers.conv_forward(h4, model[w5], model[b5]) h5 = layers.ReLu_forward(h5_pre) U5 = (np.random.rand(*h5.shape) < p) / p h5 *= U5 # drop! h6 = layers.full_forward(h5, model[w6], model[b6]) U6 = (np.random.rand(*h6.shape) < p) / p h6 *= U6 # drop! out = layers.full_forward(h6, model[wo], model[bo]) y_hat = layers.softmax(out) y_hat_arg = np.argmax(y_hat, axis=1) dout = (y_hat - label) cost = layers.cost(y_hat, label) rate = layers.classification_rate(y, y_hat_arg) print("------") print("gradient updates : ") print("cost : ", cost) print("rate : ", rate) dout_h6, dwo_gradient, dbo_gradient = layers.full_backward( dout, h6, model[wo], model[bo]) dout_h6 *= U6 dout_h5, dw6_gradient, db6_gradient = layers.full_backward( dout_h6, h5, model[w6], model[b6]) dout_h5 *= U5 dout_h4, dw5_gradient, db5_gradient = layers.conv_backward( layers.ReLu_backward(h5_pre, dout_h5), h4, model[w5], model[b5]) dout_h4 *= U4 dout_h3 = layers.max_pool_back(h3, dout_h4, 2) dout_h2, dw3_gradient, db3_gradient = layers.conv_backward( layers.ReLu_backward(h3_pre, dout_h3), h2, model[w3], model[b3]) dout_h2 *= U2 dout_h1 = layers.max_pool_back(h1, dout_h2, 2) d_data, dw1_gradient, db1_gradient = layers.conv_backward( layers.ReLu_backward(h1_pre, dout_h1), data, model[w1], model[b1]) gradients = {} gradients[wo] = dwo_gradient gradients[bo] = dbo_gradient gradients[w6] = dw6_gradient gradients[b6] = db6_gradient gradients[w5] = dw5_gradient gradients[b5] = db5_gradient gradients[w3] = dw3_gradient gradients[b3] = db3_gradient gradients[w1] = dw1_gradient gradients[b1] = db1_gradient return [gradients, cost, rate]