def loss(self, X, y=None): """ Evaluate loss and gradient for the three-layer convolutional network. """ W1 = self.params['W1'] W2, b2 = self.params['W2'], self.params['b2'] W3, b3 = self.params['W3'], self.params['b3'] # pass pool_param to the forward pass for the max-pooling layer pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} scores = None conv, cache1 = layers.conv_forward(X,W1) relu1, cache2 = layers.relu_forward(conv) maxp, cache3 = layers.max_pool_forward(relu1,pool_param) fc1, cache4 = layers.fc_forward(maxp,W2,b2) relu2, cache5 = layers.relu_forward(fc1) scores, cache6 = layers.fc_forward(relu2,W3,b3) if y is None: return scores loss, grads = 0, {} loss, dscores = layers.softmax_loss(scores,y) dx3, dW3, db3 = layers.fc_backward(dscores,cache6) dRelu2 = layers.relu_backward(dx3,cache5) dx2, dW2, db2 = layers.fc_backward(dRelu2,cache4) dmaxp = layers.max_pool_backward(dx2.reshape(maxp.shape),cache3) dRelu1 = layers.relu_backward(dmaxp,cache2) dx,dW1 = layers.conv_backward(dRelu1,cache1) grads = {'W1':dW1,'W2':dW2,'b2':db2,'W3':dW3,'b3':db3} return loss, grads
def affine_relu_backward(dout, cache): ''' backward pass for the affine-relu convenience layer ''' fc_cache, relu_cache = cache da = layers.relu_backward(dout, relu_cache) dx, dw, db = layers.affine_backward(da, fc_cache) return dx, dw, db
def test_relulayer(): x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) dx_num = eval_numerical_gradient_array(lambda x: layers.relu_forward(x)[0], x, dout) _, cache = layers.relu_forward(x) dx = layers.relu_backward(dout, cache) # The error should be around 1e-12 print 'Testing relu layers:' print 'dx error: ', rel_error(dx_num, dx)
def test_relu_backward(self): # ReLU layer: backward np.random.seed(498) x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) dx_num = eval_numerical_gradient_array( lambda x: layers.relu_forward(x)[0], x, dout) _, cache = layers.relu_forward(x) dx = layers.relu_backward(dout, cache) # The error should be around 3e-12 print('\nTesting relu_backward function:') print('dx error: ', rel_error(dx_num, dx)) np.testing.assert_allclose(dx, dx_num, atol=1e-9)
def backward(self, grad_scores, cache): grads = None ####################################################################### # TODO: Implement the backward pass to compute gradients for all # # learnable parameters of the model, storing them in the grads dict # # above. The grads dict should give gradients for all parameters in # # the dict returned by model.parameters(). # ####################################################################### cache11, cache12, cache2 = cache grad_out12, grad_W2, grad_b2 = fc_backward(grad_scores, cache2) grad_out11 = relu_backward(grad_out12, cache12) grad_X, grad_W1, grad_b1 = fc_backward(grad_out11, cache11) grads = { 'W1': grad_W1, 'b1': grad_b1, 'W2': grad_W2, 'b2': grad_b2, } ####################################################################### # END OF YOUR CODE # ####################################################################### return grads
def loss(self, X, y=None): """ Compute loss and gradient for a minibatch of data. Inputs: - X: Array of input data of shape (N, d_in) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ W1, b1 = self.params['W1'], self.params['b1'] W3, b3 = self.params['W3'], self.params['b3'] N, d_in = X.shape scores = None f, cache1 = layers.fc_forward(X, W1, b1) #fc h, cache2 = layers.relu_forward(f) #relu scores, cache3 = layers.fc_forward(h, W3, b3) #fc # If y is None then we are in test mode so just return scores if y is None: return scores loss, grads = 0, {} loss, dscores = layers.softmax_loss(scores, y) dx2, dW3, db3 = layers.fc_backward(dscores, cache3) dx1 = layers.relu_backward(dx2, cache2) dx, dW1, db1 = layers.fc_backward(dx1, cache1) grads = {'W1': dW1, 'b1': db1, 'W3': dW3, 'b3': db3} return loss, grads
def affine_relu_backward(dout, cache): fc_cache, relu_cache = cache da = relu_backward(dout, relu_cache) dx, dw, db = affine_backward(da, fc_cache) return dx, dw, db
def loss(self, X, y=None): """ Compute loss and gradient for a minibatch of data. Args: - X: Input data, numpy array of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ scores = None X = X.astype(self.dtype) linear_cache = dict() relu_cache = dict() dropout_cache = dict() """ TODO: Implement the forward pass for the fully-connected neural network, compute the scores and store them in the scores variable. """ ####################################################################### # BEGIN OF YOUR CODE # ####################################################################### VAL = X.copy() for i in range(1, self.num_layers): linear_cache['L{}'.format(i)] = linear_forward( VAL, self.params['W{}'.format(i)], self.params['b{}'.format(i)]) relu_cache['R{}'.format(i)] = relu_forward( linear_cache['L{}'.format(i)]) if self.use_dropout: dropout_cache['D{}'.format(i)], dropout_cache['MASK{}'.format(i)] = dropout_forward(relu_cache['R{}'.format(i)],\ self.dropout_params['p'], self.dropout_params['train'],\ self.dropout_params['seed']) VAL = dropout_cache['D{}'.format(i)] else: VAL = relu_cache['R{}'.format(i)] linear_cache['L{}'.format(self.num_layers)] = linear_forward(VAL, self.params['W{}'.format(self.num_layers)],\ self.params['b{}'.format(self.num_layers)]) scores = linear_cache['L{}'.format(self.num_layers)] ####################################################################### # END OF YOUR CODE # ####################################################################### # If y is None then we are in test mode so just return scores if y is None: return scores loss, grads = 0, dict() """ TODO: Implement the backward pass for the fully-connected net. Store the loss in the loss variable and all gradients in the grads dictionary. Compute the loss with softmax. grads[k] has the gradients for self.params[k]. Add L2 regularisation to the loss function. NOTE: To ensure that your implementation matches ours and you pass the automated tests, make sure that your L2 regularization includes a factor of 0.5 to simplify the expression for the gradient. """ ####################################################################### # BEGIN OF YOUR CODE # ####################################################################### loss, grad = softmax(scores, y) if self.use_dropout: VAR = dropout_cache['D{}'.format(self.num_layers - 1)] else: VAR = relu_cache['R{}'.format(self.num_layers - 1)] dX, grads['W{}'.format(self.num_layers)], grads['b{}'.format(self.num_layers)] = linear_backward(grad, \ VAR, self.params['W{}'.format(self.num_layers)],self.params['b{}'.format(self.num_layers)]) grads['W{}'.format( self.num_layers)] += self.reg * self.params['W{}'.format( self.num_layers)] loss += 0.5 * self.reg * np.sum(self.params['W' + str(self.num_layers)] **2) for inx in range(self.num_layers - 1, 0, -1): if self.use_dropout: dX = dropout_backward(dX, dropout_cache['MASK{}'.format(inx)], self.dropout_params['p']) dX = relu_backward(dX, linear_cache['L' + str(inx)]) if inx - 1 != 0: if self.use_dropout: pre_layer = dropout_cache['D{}'.format(inx - 1)] else: pre_layer = relu_cache['R{}'.format(inx - 1)] dX, grads['W' + str(inx)], grads['b' + str(inx)] = linear_backward( dX, pre_layer, self.params['W{}'.format(inx)], self.params['b{}'.format(inx)]) grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)] loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2) else: dX, grads['W' + str(inx)], grads['b' + str(inx)] = linear_backward( dX, X, self.params['W{}'.format(inx)], self.params['b{}'.format(inx)]) grads['W' + str(inx)] += self.reg * self.params['W' + str(inx)] loss += 0.5 * self.reg * np.sum(self.params['W' + str(inx)]**2) ####################################################################### # END OF YOUR CODE # ####################################################################### return loss, grads
# In the file layers.py implement the backward pass for the ReLU activation in # # the relu_backward function. # # Once you are done you can test your implementation using the numeric gradient # # checking. # ################################################################################### # Test the relu_backward function x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) if layers.relu_forward(x)[0] is not None: dx_num = eval_numerical_gradient_array(lambda x: layers.relu_forward(x)[0], x, dout) _, cache = layers.relu_forward(x) dx = layers.relu_backward(dout, cache) # The error should be around 1e-12 print 'Testing relu_backward function:' print 'dx error: (should be around 1e-12): ', rel_error(dx_num, dx) ################################################################################### # Sandwich layers # ################################################################################### # There are some common patterns of layers that are frequently used in # # neural nets. For example, affine layers are frequently followed by a # # ReLU nonlinearity. To make these common patterns easy, we define # # several convenience layers in the file layer_utils.py. For now # # take a look at the affine_relu_forward and affine_relu_backward # # functions, and run the following to numerically gradient check the #
def loss(self,X,y=None): X = X.astype(self.dtype) mode = 'test' if y is None else 'train' if self.use_dropout : self.dropout_param['mode'] = mode if self.use_batchnorm: for bn_param in self.bn_params: bn_param['mode'] = mode scores = None inputi = X batch_size = X.shape[0] X = np.reshape(X,[batch_size,-1]) fc_cache_list = [] relu_cache_list = [] bn_cache_list = [] dropout_cache_list = [] for i in range(self.num_layers-1): fc_act,fc_cache= affine_forward(X,self.params['W'+str(i+1)],self.params['b'+str(i+1)]) fc_cache_list.append(fc_cache) if self.use_batchnorm: bn_act,bn_cache = batchnorm_forward(fc_act,self.params['gamma'+str(i+1)],self.params['beta'+str(i+1)],self.bn_params[i]) bn_cache_list.append(bn_cache) relu_act,relu_cache = relu_forward(bn_act) relu_cache_list.append(relu_cache) else: relu_act,relu_cache = relu_forward(fc_act) relu_cache_list.append(relu_cache) if self.use_dropout: relu_act,dropout_cache = dropout_forward(relu_act,self.dropout_param) dropout_cache_list.append(dropout_cache) X = relu_act.copy() ########最后一层 scores,final_cache = affine_forward(X,self.params['W'+str(self.num_layers)],self.params['b'+str(self.num_layers)]) # # for layer in range(self.num_layers): # Wi,bi = self.params['W%d'%(layer+1)],self.params['b%d'%(layer+1)] # outi,fc_cachei = affine_forward(inputi,Wi,bi) # fc_cache_list.append(fc_cachei) # # if self.use_batchnorm and layer!=self.num_layers-1: # gammai,betai = self.params['gamma%d'%(layer+1)],self.params['beta%d'%(layer+1)] # # outi,bn_cachei = batchnorm_forward(outi,gammai,betai,self.bn_params[layer]) # bn_cache_list.append(bn_cachei) # outi,relu_cachei = relu_forward(outi) # relu_cache_list.append(relu_cachei) # # if self.use_dropout: # outi,dropout_cachei = dropout_forward(outi,self.dropout_param) # dropout_cache_list.append(dropout_cachei) # # inputi = outi # # scores = outi if mode == 'test': return scores loss,grads = 0.0,{} loss,dsoft = softmax_loss(scores,y) loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(self.num_layers)]))) #########最后一层的反向传播 dx_last,dw_last,db_last = affine_backward(dsoft,final_cache) grads['W'+str(self.num_layers)] = dw_last+self.reg*self.params['W'+str(self.num_layers)] grads['b'+str(self.num_layers)] = db_last for i in range(self.num_layers-1,0,-1): if self.use_dropout: dx_last = dropout_backward(dx_last,dropout_cache_list[i-1]) drelu = relu_backward(dx_last,relu_cache_list[i-1]) if self.use_batchnorm: dbatchnorm,dgamma,dbeta = batchnorm_backward(drelu,bn_cache_list[i-1]) dx_last,dw_last,db_last = affine_backward(dbatchnorm,fc_cache_list[i-1]) grads['beta'+str(i)] = dbeta grads['gamma'+str(i)] = dgamma else: dx_last,dw_last,db_last = affine_backward(drelu,fc_cache_list[i-1]) grads['W'+str(i)] = dw_last+self.reg*self.params['W'+str(i)] grads['b'+str(i)] = db_last loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(i)]))) return loss,grads
print('Testing relu_forward function:') print('difference: ', rel_error(out, correct_out)) ####################################################################################### ####################################################################################### # Test the relu_backward function ####################################################################################### from layers import relu_backward x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) dx_num = eval_numerical_gradient_array(lambda x: relu_forward(x)[0], x, dout) _, cache = relu_forward(x) dx = relu_backward(dout, cache) # The error should be around 1e-12 print('Testing relu_backward function:') print('dx error: ', rel_error(dx_num, dx)) ####################################################################################### ####################################################################################### # Test the sandwitch layers # -If you implemented affine and relu functions properly, # you would have no problem with this code ####################################################################################### from layer_utils import affine_relu_forward from layer_utils import affine_relu_backward np.random.seed(231)
def loss(self, X, y=None): """ Compute loss and gradient for a minibatch of data. Args: - X: Input data, numpy array of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ scores = None X = X.astype(self.dtype) linear_cache = dict() relu_cache = dict() dropout_cache = dict() """ TODO: Implement the forward pass for the fully-connected neural network, compute the scores and store them in the scores variable. """ ####################################################################### # BEGIN OF YOUR CODE # ####################################################################### # input -> first hidden layer linear_cache[1] = linear_forward(X, self.params["W1"], self.params["b1"]) input_next = relu_forward(linear_cache[1]) relu_cache[1] = input_next.copy() # hidden layer i -> hidden layer i+1 for l in range(2, self.num_layers): if self.use_dropout: input_next, dropout_cache[l - 1] = self.apply_forward_dropout( input_next) linear_cache[l] = linear_forward(input_next, self.params["W%d" % l], self.params["b%d" % l]) input_next = relu_forward(linear_cache[l]) relu_cache[l] = input_next.copy() # last hidden layer -> output layer if self.use_dropout: input_next, dropout_cache[ self.num_layers - 1] = self.apply_forward_dropout(input_next) linear_cache[self.num_layers] = linear_forward( input_next, self.params["W%d" % self.num_layers], self.params["b%d" % self.num_layers]) scores = linear_cache[self.num_layers].copy() # scores = scores / np.abs(scores).sum(axis=1)[:, None] #print(scores) # print(scores.shape) ####################################################################### # END OF YOUR CODE # ####################################################################### # If y is None then we are in test mode so just return scores if y is None: return scores loss, grads = 0, dict() """ TODO: Implement the backward pass for the fully-connected net. Store the loss in the loss variable and all gradients in the grads dictionary. Compute the loss with softmax. grads[k] has the gradients for self.params[k]. Add L2 regularisation to the loss function. NOTE: To ensure that your implementation matches ours and you pass the automated tests, make sure that your L2 regularization includes a factor of 0.5 to simplify the expression for the gradient. """ ####################################################################### # BEGIN OF YOUR CODE # ####################################################################### loss, dlogits = softmax(scores, y) # add L2 regularization loss += 0.5 * self.reg * np.sum([ np.sum(self.params["W%d" % l]**2) for l in range(1, self.num_layers + 1) ]) # last hidden layer <- output layer dX_, dW_, db_ = linear_backward(dlogits, relu_cache[self.num_layers - 1], self.params["W%d" % self.num_layers], self.params["b%d" % self.num_layers]) # add regularization effect to W grads["W%d" % self.num_layers] = dW_ + self.reg * self.params["W%d" % self.num_layers] grads["b%d" % self.num_layers] = db_.copy() # hidden layer i <- hidden layer i+1 for l in reversed(range(2, self.num_layers)): if self.use_dropout: dX_ = self.apply_backward_dropout(dX_, dropout_cache[l]) dX_ = relu_backward(dX_, linear_cache[l]) dX_, dW_, db_ = linear_backward(dX_, relu_cache[l - 1], self.params["W%d" % l], self.params["b%d" % l]) # add regularization effect to W grads["W%d" % l] = dW_ + self.reg * self.params["W%d" % l] grads["b%d" % l] = db_ # input layer <- first hidden layer if self.use_dropout: dX_ = self.apply_backward_dropout(dX_, dropout_cache[1]) dX_ = relu_backward(dX_, linear_cache[1]) dX_, dW_, db_ = linear_backward(dX_, X, self.params["W1"], self.params["b1"]) # add regularization effect to W grads["W1"] = dW_ + self.reg * self.params["W1"] grads["b1"] = db_ ####################################################################### # END OF YOUR CODE # ####################################################################### return loss, grads