def loss(self, X, y=None): """ Evaluate loss and gradient for the three-layer convolutional network. """ W1 = self.params['W1'] W2, b2 = self.params['W2'], self.params['b2'] W3, b3 = self.params['W3'], self.params['b3'] # pass pool_param to the forward pass for the max-pooling layer pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} scores = None conv, cache1 = layers.conv_forward(X,W1) relu1, cache2 = layers.relu_forward(conv) maxp, cache3 = layers.max_pool_forward(relu1,pool_param) fc1, cache4 = layers.fc_forward(maxp,W2,b2) relu2, cache5 = layers.relu_forward(fc1) scores, cache6 = layers.fc_forward(relu2,W3,b3) if y is None: return scores loss, grads = 0, {} loss, dscores = layers.softmax_loss(scores,y) dx3, dW3, db3 = layers.fc_backward(dscores,cache6) dRelu2 = layers.relu_backward(dx3,cache5) dx2, dW2, db2 = layers.fc_backward(dRelu2,cache4) dmaxp = layers.max_pool_backward(dx2.reshape(maxp.shape),cache3) dRelu1 = layers.relu_backward(dmaxp,cache2) dx,dW1 = layers.conv_backward(dRelu1,cache1) grads = {'W1':dW1,'W2':dW2,'b2':db2,'W3':dW3,'b3':db3} return loss, grads
def loss(self, X, y=None, reg=1e-5): print 'start computing loss and grad.............' W1, b1 = self.params['W1'], self.params['b1'] W2, b2 = self.params['W2'], self.params['b2'] W3, b3 = self.params['W3'], self.params['b3'] # pass conv_param to the forward pass for the convolutional layer filter_size = W1.shape[2] conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2} # pass pool_param to the forward pass for the max-pooling layer pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} # compute the forward pass print 'compute the forward pass......' print 'compute the w1 conv_relu_pool_forward forward pass......' a1, cache1 = layers.conv_relu_pool_forward(X, W1, b1, conv_param, pool_param) print 'compute the w2 affine_relu_forward forward pass......' a2, cache2 = layers.affine_relu_forward(a1, W2, b2) print 'compute the w3 affine_forward forward pass......' scores, cache3 = layers.affine_forward(a2, W3, b3) if y is None: return scores # compute the backward pass print 'compute the backward pass......' print 'compute the softmax_loss backward pass......' data_loss, dscores = layers.softmax_loss(scores, y) print 'compute the dw3 affine_backward backward pass......' da2, dW3, db3 = layers.affine_backward(dscores, cache3) print 'compute the dw2 affine_relu_backward backward pass......' da1, dW2, db2 = layers.affine_relu_backward(da2, cache2) print 'compute the dw1 conv_relu_pool_backward backward pass......' dX, dW1, db1 = layers.conv_relu_pool_backward(da1, cache1) # Add regularization dW1 += self.reg * W1 dW2 += self.reg * W2 dW3 += self.reg * W3 reg_loss = 0.5 * self.reg * sum(np.sum(W * W) for W in [W1, W2, W3]) loss = data_loss + reg_loss grads = { 'W1': dW1, 'b1': db1, 'W2': dW2, 'b2': db2, 'W3': dW3, 'b3': db3 } print ' computing loss and grad end !!!!!!!!!!!!!!!!!' print 'loss is :', loss return loss, grads
def test_softmax_loss(self): # Softmax loss np.random.seed(498) num_classes, num_inputs = 10, 50 x = 0.001 * np.random.randn(num_inputs, num_classes) y = np.random.randint(num_classes, size=num_inputs) dx_num = eval_numerical_gradient( lambda x: layers.softmax_loss(x, y)[0], x, verbose=False) loss, dx = layers.softmax_loss(x, y) # Test softmax_loss function. Loss should be 2.3 and dx error might be 1e-8 # As long as your error is small enough, your implementation should pass this test. print('\nTesting softmax_loss:') print('loss: ', loss) print('dx error: ', rel_error(dx_num, dx)) np.testing.assert_allclose(loss, 2.3, atol=0.05) np.testing.assert_allclose(dx, dx_num, atol=1e-7)
def loss(self, X, y=None): """ Compute loss and gradient for a minibatch of data. Inputs: - X: Array of input data of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ out1, cache1 = layer_utilities.affine_relu_forward( X, self.params['W1'], self.params['b1']) out2, cache2 = layers.affine_forward( out1, self.params['W2'], self.params['b2']) # last layer no need to use relu scores = out2 if y is None: return scores # backward loss, grads = 0, {} loss, d_scores = layers.softmax_loss(scores, y) loss = loss + 0.5 * self.reg * ( np.sum(self.params['W1'] * self.params['W1']) + np.sum(self.params['W2'] * self.params['W2'])) dout1, dW2, db2 = layers.affine_backward(d_scores, cache2) dx, dW1, db1 = layer_utilities.affine_relu_backward(dout1, cache1) grads['W2'] = dW2 + self.reg * self.params['W2'] grads['b2'] = db2 grads['W1'] = dW1 + self.reg * self.params['W1'] grads['b1'] = db1 return loss, grads
def training_step(model, X_batch, y_batch, reg): """ Compute the loss and gradients for a single training iteration of a model given a minibatch of data. The loss should be a sum of a cross-entropy loss between the model predictions and the ground-truth image labels, and an L2 regularization term on all weight matrices in the fully-connected layers of the model. You should not regularize the bias vectors. Inputs: - model: A Classifier instance - X_batch: A numpy array of shape (N, D) giving a minibatch of images - y_batch: A numpy array of shape (N,) where 0 <= y_batch[i] < C is the ground-truth label for the image X_batch[i] - reg: A float giving the strength of L2 regularization to use. Returns a tuple of: - loss: A float giving the loss (data loss + regularization loss) for the model on this minibatch of data - grads: A dictionary giving gradients of the loss with respect to the parameters of the model. In particular grads[k] should be the gradient of the loss with respect to model.parameters()[k]. """ loss, grads = None, None ########################################################################### # TODO: Compute the loss and gradient for one training iteration. # ########################################################################### scores, cache = model.forward(X_batch) data_loss, grad_scores = softmax_loss(scores, y_batch) grads = model.backward(grad_scores, cache) #regularization W1_loss, grad_W1 = l2_regularization(model.W1, reg) W2_loss, grad_W2 = l2_regularization(model.W2, reg) loss = data_loss + W1_loss + W2_loss grads['W1'] += grad_W1 grads['W2'] += grad_W2 #breakpoint() ########################################################################### # END OF YOUR CODE # ########################################################################### return loss, grads
def loss(self,X,y=None): """ Compute loss and gradient for a minibatch of data. Inputs: - X: Array of input data of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ scores = None W1,b1 = self.params['W1'],self.params['b1'] W2,b2 = self.params['W2'],self.params['b2'] ar1_out,ar1_cache = affine_relu_forward(X,W1,b1) ar2_out,ar2_cache = affine_forward(ar1_out,W2,b2) scores = ar2_out if y is None: return scores loss,grads = 0,{} loss,dout = softmax_loss(scores,y) loss = loss+0.5*self.reg*np.sum(W1*W1)+0.5*self.reg*np.sum(W2*W2) dx2,dw2,db2 = affine_backward(dout,ar2_cache) grads['W2'] = dw2 +self.reg*W2 grads['b2'] = db2 dx1,dw1,db1 = affine_relu_backward(dx2,ar1_cache) grads['W1'] = dw1+self.reg*W1 grads['b1'] = db1 return loss,grads
def loss(self, X, y=None): """ Compute loss and gradient for a minibatch of data. Inputs: - X: Array of input data of shape (N, d_in) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ W1, b1 = self.params['W1'], self.params['b1'] W3, b3 = self.params['W3'], self.params['b3'] N, d_in = X.shape scores = None f, cache1 = layers.fc_forward(X, W1, b1) #fc h, cache2 = layers.relu_forward(f) #relu scores, cache3 = layers.fc_forward(h, W3, b3) #fc # If y is None then we are in test mode so just return scores if y is None: return scores loss, grads = 0, {} loss, dscores = layers.softmax_loss(scores, y) dx2, dW3, db3 = layers.fc_backward(dscores, cache3) dx1 = layers.relu_backward(dx2, cache2) dx, dW1, db1 = layers.fc_backward(dx1, cache1) grads = {'W1': dW1, 'b1': db1, 'W3': dW3, 'b3': db3} return loss, grads
def loss(self,X,L,Y=None): out_gcnrelu1,cache_gcnrelu1 = gcn_relu_fw(L,X,self.params['Theta1']) out_sum2, cache_sum2 = sum_out_fw(self.params['W2'],out_gcnrelu1) scores = out_sum2 if Y is None: return scores loss,dout = softmax_loss(scores,Y) loss += self.reg * .5 * ( LA.norm(self.params['Theta1'])**2 + LA.norm(self.params['W2'])**2) dx2,dw2 = sum_out_bw(dout,cache_sum2) dw2 += self.reg * self.params['W2'] dx1,dtheta1 = gcn_relu_bw(dx2,cache_gcnrelu1) dtheta1 += self.reg * self.params['Theta1'] grads = {'Theta1':dtheta1,'W2':dw2} return loss,grads
def test_onelayer_gcn(): #Test loss func N, n, l, K = 10, 5, 3, 2 X = np.random.rand(N, n) y = np.random.randint(l, size=N) L = [sp.rand(n, n, density=1, format='csr') for i in range(N)] model = OneLayer(N, K, l, weight_scale=1e-3) loss, grads = model.loss(X, L, y) Theta1 = model.params['Theta1'] W2 = model.params['W2'] out1 = np.array([expmulit(L[i], X[i], Theta1) for i in range(N)]) out1 = np.maximum(out1, 0) out2 = np.dot(out1, np.ones(n)).reshape(-1, 1).dot(W2.reshape(1, -1)) correct_loss, _ = softmax_loss(out2, y) print('check loss diff') assert_diff(loss, correct_loss) #Test gradient _, grads = model.loss(X, L, y) for name in ['Theta1', 'W2']: grad = grads[name] f = lambda _: model.loss(X, L, y)[0] grad_num = eval_numerical_gradient(f, model.params[name], verbose=False) print('Check grad', name) assert_diff(grad, grad_num, 1e-8)
def loss(self, X, y=None): """ Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above. """ X = X.astype(self.dtype) mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they # behave differently during training and testing. if self.dropout_param is not None: self.dropout_param['mode'] = mode if self.use_batchnorm: for bn_param in self.bn_params: bn_param['mode'] = mode scores = None ############################################################################ # TODO: Implement the forward pass for the fully-connected net, computing # # the class scores for X and storing them in the scores variable. # # # # When using dropout, you'll need to pass self.dropout_param to each # # dropout forward pass. # # # # When using batch normalization, you'll need to pass self.bn_params[0] to # # the forward pass for the first batch normalization layer, pass # # self.bn_params[1] to the forward pass for the second batch normalization # # layer, etc. # ############################################################################ layer_input = X ar_cache = {} dp_cache = {} for lay in xrange(self.num_layers - 1): if self.use_batchnorm: layer_input, ar_cache[lay] = affine_bn_relu_forward(layer_input, self.params['W%d' % (lay + 1)], self.params['b%d' % (lay + 1)], self.params['gamma%d' % (lay + 1)], self.params['beta%d' % (lay + 1)], self.bn_params[lay]) else: layer_input, ar_cache[lay] = affine_relu_forward(layer_input, self.params['W%d' % (lay + 1)], self.params['b%d' % (lay + 1)]) if self.use_dropout: layer_input, dp_cache[lay] = dropout_forward(layer_input, self.dropout_param) ar_out, ar_cache[self.num_layers] = affine_forward(layer_input, self.params['W%d' % (self.num_layers)], self.params['b%d' % (self.num_layers)]) scores = ar_out # pass ############################################################################ # END OF YOUR CODE # ############################################################################ # If test mode return early if mode == 'test': return scores loss, grads = 0.0, {} ############################################################################ # TODO: Implement the backward pass for the fully-connected net. Store the # # loss in the loss variable and gradients in the grads dictionary. Compute # # data loss using softmax, and make sure that grads[k] holds the gradients # # for self.params[k]. Don't forget to add L2 regularization! # # # # When using batch normalization, you don't need to regularize the scale # # and shift parameters. # # # # NOTE: To ensure that your implementation matches ours and you pass the # # automated tests, make sure that your L2 regularization includes a factor # # of 0.5 to simplify the expression for the gradient. # ############################################################################ loss, dscores = softmax_loss(scores, y) dhout = dscores loss = loss + 0.5 * self.reg * np.sum( self.params['W%d' % (self.num_layers)] * self.params['W%d' % (self.num_layers)]) dx, dw, db = affine_backward(dhout, ar_cache[self.num_layers]) grads['W%d' % (self.num_layers)] = dw + self.reg * self.params['W%d' % (self.num_layers)] grads['b%d' % (self.num_layers)] = db dhout = dx for idx in xrange(self.num_layers - 1): lay = self.num_layers - 1 - idx - 1 loss = loss + 0.5 * self.reg * np.sum(self.params['W%d' % (lay + 1)] * self.params['W%d' % (lay + 1)]) if self.use_dropout: dhout = dropout_backward(dhout, dp_cache[lay]) if self.use_batchnorm: dx, dw, db, dgamma, dbeta = affine_bn_relu_backward(dhout, ar_cache[lay]) else: dx, dw, db = affine_relu_backward(dhout, ar_cache[lay]) grads['W%d' % (lay + 1)] = dw + self.reg * self.params['W%d' % (lay + 1)] grads['b%d' % (lay + 1)] = db if self.use_batchnorm: grads['gamma%d' % (lay + 1)] = dgamma grads['beta%d' % (lay + 1)] = dbeta dhout = dx # pass ############################################################################ # END OF YOUR CODE # ############################################################################ return loss, grads
def loss(self,X,y=None): X = X.astype(self.dtype) mode = 'test' if y is None else 'train' if self.use_dropout : self.dropout_param['mode'] = mode if self.use_batchnorm: for bn_param in self.bn_params: bn_param['mode'] = mode scores = None inputi = X batch_size = X.shape[0] X = np.reshape(X,[batch_size,-1]) fc_cache_list = [] relu_cache_list = [] bn_cache_list = [] dropout_cache_list = [] for i in range(self.num_layers-1): fc_act,fc_cache= affine_forward(X,self.params['W'+str(i+1)],self.params['b'+str(i+1)]) fc_cache_list.append(fc_cache) if self.use_batchnorm: bn_act,bn_cache = batchnorm_forward(fc_act,self.params['gamma'+str(i+1)],self.params['beta'+str(i+1)],self.bn_params[i]) bn_cache_list.append(bn_cache) relu_act,relu_cache = relu_forward(bn_act) relu_cache_list.append(relu_cache) else: relu_act,relu_cache = relu_forward(fc_act) relu_cache_list.append(relu_cache) if self.use_dropout: relu_act,dropout_cache = dropout_forward(relu_act,self.dropout_param) dropout_cache_list.append(dropout_cache) X = relu_act.copy() ########最后一层 scores,final_cache = affine_forward(X,self.params['W'+str(self.num_layers)],self.params['b'+str(self.num_layers)]) # # for layer in range(self.num_layers): # Wi,bi = self.params['W%d'%(layer+1)],self.params['b%d'%(layer+1)] # outi,fc_cachei = affine_forward(inputi,Wi,bi) # fc_cache_list.append(fc_cachei) # # if self.use_batchnorm and layer!=self.num_layers-1: # gammai,betai = self.params['gamma%d'%(layer+1)],self.params['beta%d'%(layer+1)] # # outi,bn_cachei = batchnorm_forward(outi,gammai,betai,self.bn_params[layer]) # bn_cache_list.append(bn_cachei) # outi,relu_cachei = relu_forward(outi) # relu_cache_list.append(relu_cachei) # # if self.use_dropout: # outi,dropout_cachei = dropout_forward(outi,self.dropout_param) # dropout_cache_list.append(dropout_cachei) # # inputi = outi # # scores = outi if mode == 'test': return scores loss,grads = 0.0,{} loss,dsoft = softmax_loss(scores,y) loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(self.num_layers)]))) #########最后一层的反向传播 dx_last,dw_last,db_last = affine_backward(dsoft,final_cache) grads['W'+str(self.num_layers)] = dw_last+self.reg*self.params['W'+str(self.num_layers)] grads['b'+str(self.num_layers)] = db_last for i in range(self.num_layers-1,0,-1): if self.use_dropout: dx_last = dropout_backward(dx_last,dropout_cache_list[i-1]) drelu = relu_backward(dx_last,relu_cache_list[i-1]) if self.use_batchnorm: dbatchnorm,dgamma,dbeta = batchnorm_backward(drelu,bn_cache_list[i-1]) dx_last,dw_last,db_last = affine_backward(dbatchnorm,fc_cache_list[i-1]) grads['beta'+str(i)] = dbeta grads['gamma'+str(i)] = dgamma else: dx_last,dw_last,db_last = affine_backward(drelu,fc_cache_list[i-1]) grads['W'+str(i)] = dw_last+self.reg*self.params['W'+str(i)] grads['b'+str(i)] = db_last loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(i)]))) return loss,grads
lambda w: affine_relu_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array( lambda b: affine_relu_forward(x, w, b)[0], b, dout) print('Testing affine_relu_forward:') print('dx error: ', rel_error(dx_num, dx)) print('dw error: ', rel_error(dw_num, dw)) print('db error: ', rel_error(db_num, db)) ####################################################################################### ####################################################################################### # Test the softmax _loss function ###################################################################################### from layers import softmax_loss np.random.seed(231) num_classes, num_inputs = 10, 50 x = 0.001 * np.random.randn(num_inputs, num_classes) y = np.random.randint(num_classes, size=num_inputs) dx_num = eval_numerical_gradient(lambda x: softmax_loss(x, y)[0], x, verbose=False) loss, dx = softmax_loss(x, y) # Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8 print('\nTesting softmax_loss:') print('loss: ', loss) print('dx error: ', rel_error(dx_num, dx)) #######################################################################################
# Multilayer Perceptron classifier ########################### # Initialise parameters cnn1 = cnn2d(input_shape=(Xh, Xw, Xd), filter_shape=(f1_field, f1_field), num_filters=h1_units) def flatten(x, n_examples): return np.reshape(x, (n_examples, -1)) relu1 = relu() fc2 = fc2d(cnn1.yw * cnn1.yh * Xd * h1_units, K) data_loss_fn = softmax_loss(y_train) for i in range(n_epochs): # Forward pass conv1 = cnn1.forward(X_train) print("conv1:", conv1.shape) flatten1 = flatten(conv1, num_examples) h1 = relu1.forward(flatten1) scores = fc2.forward(h1) data_loss = data_loss_fn.forward(scores) reg_loss = 0.5 * reg * (np.sum(cnn1.W * cnn1.W) + np.sum(fc2.W * fc2.W)) loss = data_loss + reg_loss if i % 1 == 0: print("Epoch: %d, Loss: %f" % (i, loss)) # Backprop
# Number of dimensions of input D = 2 # Number of classes K = 3 X, y = generate_spiral_data(N, D, K, plot=False) ########################### # Multilayer Perceptron classifier ########################### # Initialise parameters fc1 = fc2d(D, h1_units) relu1 = relu() fc2 = fc2d(h1_units, K) data_loss_fn = softmax_loss(y) for i in range(n_epochs): # Forward pass h1_prod = fc1.forward(X) h1 = relu1.forward(h1_prod) scores = fc2.forward(h1) data_loss = data_loss_fn.forward(scores) reg_loss = 0.5*reg*(np.sum(fc1.W * fc1.W) + np.sum(fc2.W*fc2.W)) loss = data_loss + reg_loss if i % 1000 == 0: print("Epoch: %d, Loss: %f" % (i, loss)) # Backprop # dLi/dfk = pk - 1(yi=k) dscores = data_loss_fn.backward()