def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! h_fc = self.forward_pass(X) simp_loss, dfinal_mat = lc.softmax_with_cross_entropy(h_fc, y) d_hf = self.hidden_fc.backward(dfinal_mat) d_ir = self.input_re.backward(d_hf) d_if = self.input_fc.backward(d_ir) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! param = self.params() i_loss, iL2_Wgrad = l2_regularization(param['iW'].value, self.reg) h_loss, hL2_Wgrad = l2_regularization(param['hW'].value, self.reg) loss = simp_loss + i_loss + h_loss param['iW'].grad += iL2_Wgrad param['hW'].grad += hL2_Wgrad #raise Exception("Not implemented!") return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! self.FC1.W.grad = np.zeros_like(self.FC1.W.grad) self.FC1.B.grad = np.zeros_like(self.FC1.B.grad) self.FC2.W.grad = np.zeros_like(self.FC2.W.grad) self.FC2.B.grad = np.zeros_like(self.FC2.B.grad) # raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model # After that, implement l2 regularization on all params # Hint: self.params() is useful again! # raise Exception("Not implemented!") params = self.params() x = X.copy() x = self.FC1.forward(x) x = self.RL.forward(x) pred = self.FC2.forward(x) # print(f'SHAPE fc1: \n {np.sum(self.FC1.W.grad)}') # print(f'SHAPE b2: \n {np.sum(self.FC1.B.grad)}') loss, dpred = softmax_with_cross_entropy(pred, target_index=y) d_out = self.FC2.backward(dpred) d_out = self.RL.backward(d_out) grad = self.FC1.backward(d_out) # print(f'SHAPE fc1: \n {np.sum(self.FC1.W.grad)}') # print(f'SHAPE fc2: \n {np.sum(self.FC2.W.grad)}') if self.reg > 0: rloss_fc1, dW_rfc1 = l2_regularization(self.FC1.W.value, self.reg) rloss_fc2, dW_rfc2 = l2_regularization(self.FC2.W.value, self.reg) rloss_fc1B, dB_rfc1 = l2_regularization(self.FC1.B.value, self.reg) rloss_fc2B, dB_rfc2 = l2_regularization(self.FC2.B.value, self.reg) loss = loss + rloss_fc1 + rloss_fc2 + rloss_fc1B + rloss_fc2B self.FC1.W.grad += dW_rfc1 self.FC2.W.grad += dW_rfc2 self.FC1.B.grad += dB_rfc1 self.FC2.B.grad += dB_rfc2 # result = {'fc1_w': self.FC1.W.grad, # 'fc1_b': self.FC1.B.grad, # 'fc2_w': self.FC2.W.grad, # 'fc2_b': self.FC2.B.grad} return loss, grad
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! #raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model #init params = self.params() W1 = params["W1"] B1 = params["B1"] W2 = params["W2"] B2 = params["B2"] #clear gradients W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) #forward X_dense1 = self.Dense1.forward(X) X_relu = self.Relu.forward(X_dense1) X_dense2 = self.Dense2.forward(X_relu) loss, dpred = softmax_with_cross_entropy(X_dense2, y) #backward dX_dense2 = self.Dense2.backward(dpred) dX_relu = self.Relu.backward(dX_dense2) dX_dense1 = self.Dense1.backward(dX_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! #raise Exception("Not implemented!") l2_W1_loss, l2_W1_grad = l2_regularization(W1.value, self.reg) l2_B1_loss, l2_B1_grad = l2_regularization(B1.value, self.reg) l2_W2_loss, l2_W2_grad = l2_regularization(W2.value, self.reg) l2_B2_loss, l2_B2_grad = l2_regularization(B2.value, self.reg) l2_reg = l2_W1_loss + l2_B1_loss + l2_W2_loss + l2_B2_loss loss += l2_reg W1.grad += l2_W1_grad B1.grad += l2_B1_grad W2.grad += l2_W2_grad B2.grad += l2_B2_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros for param_name, param in self.params().items(): param.grad = None # print(self.params()) # Hint: using self.params() might be useful! # raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model output = [] output.append(self.layer1.forward(X)) self.params()['W1'].value = self.layer1.W.value self.params()['B1'].value = self.layer1.B.value output.append(self.layer2.forward(output[0])) output.append(self.layer3.forward(output[1])) self.params()['W3'].value = self.layer3.W.value self.params()['B3'].value = self.layer3.B.value loss, grad = softmax_with_cross_entropy(output[2], y) # Backward start l2w_loss, l2w_grad = l2_regularization(self.params()['W3'].value, self.reg) l2b_loss, l2b_grad = l2_regularization(self.params()['B3'].value, self.reg) loss += (l2w_loss + l2b_loss) # print("loss",loss) output.append(self.layer3.backward(grad)) # print(l2b_grad.shape, self.params()['B3'].grad.shape, self.params()['B3'].value.shape) self.params()['W3'].grad += l2w_grad self.params()['B3'].grad += l2b_grad output.append(self.layer2.backward(output[3])) l2w_loss, l2w_grad = l2_regularization(self.params()['W1'].value, self.reg) l2b_loss, l2b_grad = l2_regularization(self.params()['B1'].value, self.reg) loss += (l2w_loss + l2b_loss) output.append(self.layer1.backward(output[4])) self.params()['W1'].grad += l2w_grad self.params()['B1'].grad += l2b_grad # print(self.params()) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! # raise Exception("Not implemented!") return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! hidden_layer_params = self.hidden_layer.params() output_layer_params = self.output_layer.params() hidden_layer_params['W'].grad = np.zeros_like( hidden_layer_params['W'].grad) hidden_layer_params['B'].grad = np.zeros_like( hidden_layer_params['B'].grad) output_layer_params['W'].grad = np.zeros_like( output_layer_params['W'].grad) output_layer_params['B'].grad = np.zeros_like( output_layer_params['B'].grad) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model hidden_l_out = self.hidden_layer.forward(X) relu_l_out = self.relu_layer.forward(hidden_l_out) output_l_out = self.output_layer.forward(relu_l_out) ce_loss, d_pred = softmax_with_cross_entropy(output_l_out, y) reg_loss_first, d_R_first = l2_regularization( hidden_layer_params['W'].value, self.reg) reg_loss_second, d_R_second = l2_regularization( output_layer_params['W'].value, self.reg) loss = ce_loss + reg_loss_first + reg_loss_second d_input_out_layer = self.output_layer.backward(d_pred) output_layer_params['W'].grad += d_R_second d_input_relu_layer = self.relu_layer.backward(d_input_out_layer) d_input_hidden_layer = self.hidden_layer.backward(d_input_relu_layer) hidden_layer_params['W'].grad += d_R_first # After that, implement l2 regularization on all params # Hint: self.params() is useful again return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! #raise Exception("Not implemented!") #--------forward way --------- #print('self.fc1.W.value', self.fc1.W.value) #print('self.fc1.W.grad', self.fc1.W.grad) fc1_out=self.fc1.forward(X) #print('self.fc1.W.value after forward', self.fc1.W.value) #print('fc1_out',fc1_out) relu1_out=self.relu1.forward(fc1_out) #print('relu1_out',relu1_out) fc2_out=self.fc2.forward(relu1_out) #print('fc2_out',fc1_out) #relu2_out=self.relu2.forward(fc2_out) loss,d_preds=softmax_with_cross_entropy(fc2_out, y) #self.pred=fc2_out #print('output.shape', output.shape) #print('loss',loss) loss_fc1_l2,grad_fc1_l2=l2_regularization(self.fc1.W.value,self.reg) #print('loss fc1 l2',loss_fc1_l2) loss_fc2_l2,grad_fc2_l2=l2_regularization(self.fc2.W.value,self.reg) #print('loss fc2 l2',loss_fc2_l2) loss_total=loss+loss_fc1_l2+loss_fc2_l2 #loss_total=loss #-------backward way ----------- #d_out = np.ones_like(output) #print('d_preds.shape',d_preds.shape) d_out=d_preds #print('self.fc1.W.grad befor backward', self.fc1.W.grad) #print('d_out befor backward',d_out) #relu1_dX=self.relu1.backward(d_out) #relu2_dX=self.relu2.backward(d_out) fc2_dX=self.fc2.backward(d_out) #print('self.fc2.W.grad',self.fc2.W.grad) relu1_dX=self.relu1.backward(fc2_dX) fc1_dX=self.fc1.backward(relu1_dX) self.fc1.W.grad+=grad_fc1_l2 self.fc2.W.grad+=grad_fc2_l2 return loss_total
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params = self.params() W1 = params['W1'] B1 = params['B1'] W2 = params['W2'] B2 = params['B2'] W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model forward_linear1 = self.Linear1.forward(X) forward_relu = self.ReLU.forward(forward_linear1) forward_linear2 = self.Linear2.forward(forward_relu) predictions = forward_linear2 loss, d_predictions = softmax_with_cross_entropy(predictions, y) backward_linear2 = self.Linear2.backward(d_predictions) backward_relu = self.ReLU.backward(backward_linear2) backward_linear1 = self.Linear1.backward(backward_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! l2_W1_loss, l2_W1_grad = l2_regularization(W1.value, self.reg) l2_B1_loss, l2_B1_grad = l2_regularization(B1.value, self.reg) l2_W2_loss, l2_W2_grad = l2_regularization(W2.value, self.reg) l2_B2_loss, l2_B2_grad = l2_regularization(B2.value, self.reg) l2_loss = l2_W1_loss + l2_W2_loss + l2_B1_loss + l2_B2_loss loss += l2_loss W1.grad += l2_W1_grad B1.grad += l2_B1_grad W2.grad += l2_W2_grad B2.grad += l2_B2_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! # TODO Compute loss and fill param gradients # by running forward and backward passes through the model y1 = self.FullyConnected_1.forward(X) y2 = self.RELU_1.forward(y1) y3 = self.FullyConnected_2.forward(y2) y_result = self.RELU_2.forward(y3) loss, d_out1 = softmax_with_cross_entropy(y_result, y) d_out2 = self.RELU_2.backward(d_out1) d_out3 = self.FullyConnected_2.backward(d_out2) dW2 = self.FullyConnected_2.params()['W'].grad dB2 = self.FullyConnected_2.params()['B'].grad d_out4 = self.RELU_1.backward(d_out3) d_out_result = self.FullyConnected_1.backward(d_out4) dW1 = self.FullyConnected_1.params()['W'].grad dB1 = self.FullyConnected_1.params()['B'].grad # After that, implement l2 regularization on all params # Hint: self.params() is useful again! loss_l1, dW1_l = l2_regularization( self.FullyConnected_1.params()['W'].value, self.reg) loss_l2, dW2_l = l2_regularization( self.FullyConnected_2.params()['W'].value, self.reg) loss_l3, dB1_l = l2_regularization( self.FullyConnected_1.params()['B'].value, self.reg) loss_l4, dB2_l = l2_regularization( self.FullyConnected_2.params()['B'].value, self.reg) self.FullyConnected_1.params()['W'].grad = dW1 + dW1_l self.FullyConnected_2.params()['W'].grad = dW2 + dW2_l self.FullyConnected_1.params()['B'].grad = dB1 + dB1_l self.FullyConnected_2.params()['B'].grad = dB2 + dB2_l return loss + loss_l1 + loss_l2 + loss_l3 + loss_l4
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for param in params: params[param].clear_grad() res = self.input_layer.forward(X) res = self.relu.forward(res) res = self.output_layer.forward(res) loss, dpred = softmax_with_cross_entropy(res, y) grad = self.output_layer.backward(dpred) grad = self.relu.backward(grad) grad = self.input_layer.backward(grad) for param in params: loss_l2, grad_l2 = l2_regularization(params[param].value, self.reg) loss += loss_l2 params[param].grad += grad_l2 return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for _, param in self.params().items(): param.grad.fill(0) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model to_relu = self.input_layer.forward(X) to_output_layer = self.relu.forward(to_relu) pred = self.output_layer.forward(to_output_layer) loss, dprediction = softmax_with_cross_entropy(pred, y) grad_output_layer = self.output_layer.backward(dprediction) grad_relu_layer = self.relu.backward(grad_output_layer) grad_input_layer = self.input_layer.backward(grad_relu_layer) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for key, param in self.params().items(): loss_l2, grad_l2 = l2_regularization(param.value, self.reg) loss += loss_l2 param.grad += grad_l2 return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ #Обнуляем градиент for param in self.get_params().values(): param.grad = np.zeros_like(param.value) layer_out = X for layer in self.layers: layer_out = layer.forward(layer_out) nn_out = layer_out loss, d_output = softmax_with_cross_entropy(nn_out, y) d_layer = d_output for layer in reversed(self.layers): d_layer = layer.backward(d_layer) #L2 regularization for param in self.get_params().values(): reg_loss, reg_grad = l2_regularization(param.value, self.reg) loss += reg_loss param.grad += reg_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for i, w in self.params().items(): w.grad = np.zeros(w.grad.shape) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model value = self.first_layer.forward(X) value = self.first_relu.forward(value) value = self.second_layer.forward(value) loss, grads = softmax_with_cross_entropy(value, y) value = self.second_layer.backward(grads) value = self.first_relu.backward(value) value = self.first_layer.backward(value) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for i, w in self.params().items(): loss_delta, grad_delta = l2_regularization(w.value, self.reg) w.grad += grad_delta loss += loss_delta return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # def clear_grad(param): # param.grad = np.zeros_like(param.grad) # print('Clear') # map(clear_grad, self.params().items()) for _, param in self.params().items(): param.grad = np.zeros_like(param.grad) # Compute loss and fill param gradients # by running forward and backward passes through the model for layer in self.layers: X = layer.forward(X) loss, grad = softmax_with_cross_entropy(X, y) for i in range(len(self.layers)): grad = self.layers[len(self.layers) - 1 - i].backward(grad) # After that, implement l2 regularization on all params for _, param in self.params().items(): r_loss, r_grad = l2_regularization(param.value, self.reg) loss += r_loss param.grad += r_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params = self.params() for key in params.keys(): params[key].grad = np.zeros_like(params[key].value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model # After that, implement l2 regularization on all params # Hint: self.params() is useful again! out1 = self.ReLU1.forward(self.fc1.forward(X)) out2 = self.fc2.forward(out1) loss, grad = softmax_with_cross_entropy(out2, y) for key in params.keys(): l2_loss, l2_grad = l2_regularization(params[key].value, self.reg) loss += l2_loss params[key].grad += l2_grad d_out2 = self.fc2.backward(grad) self.fc1.backward(self.ReLU1.backward(d_out2)) return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ val = X for i in range(len(self.layers)): val = self.layers[i].forward(val) for p in self.layers[i].params().keys(): self.layers[i].params()[p].grad = np.zeros_like( self.layers[i].params()[p].value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model loss, grad = softmax_with_cross_entropy(val, y) for i in reversed(range(len(self.layers))): grad = self.layers[i].backward(grad) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for i in range(len(self.layers)): for p in self.layers[i].params().keys(): l2_val, l2_grad = l2_regularization( self.layers[i].params()[p].value, self.reg) loss += l2_val self.layers[i].params()[p].grad += l2_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ for p in self.params().values(): p.grad = np.zeros_like(p.value) h0 = self.hidden_layer[0].forward(X) h1 = self.hidden_layer[1].forward(h0) o = self.output_layer.forward(h1) loss_unreg, loss_unreg_grad = softmax_with_cross_entropy(o, y) o_grad = self.output_layer.backward(loss_unreg_grad) h1_grad = self.hidden_layer[1].backward(o_grad) h0_grad = self.hidden_layer[0].backward(h1_grad) loss_reg = 0 for p in self.params().values(): p_reg, p_reg_grad = l2_regularization(p.value, self.reg) p.grad += p_reg_grad loss_reg += p_reg return loss_unreg + loss_reg
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ pred = X.copy() for param in self.params().values(): param.grad = np.zeros_like(param.value) for layer in self.hidden_layers: pred = layer.forward(pred) loss, grad = softmax_with_cross_entropy(pred, y) for layer in self.hidden_layers[::-1]: grad = layer.backward(grad) if self.reg: for param in self.params().values(): l2_loss, l2_grad = l2_regularization(param.value, self.reg) param.grad += l2_grad loss += l2_loss return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ for ix in self.params(): self.params()[ix] = np.zeros_like(self.params()[ix].value) out1 = self.ReLU1.forward(self.fc1.forward(X)) out2 = self.fc2.forward(out1) loss, grad = softmax_with_cross_entropy(out2, y) d_out2 = self.fc2.backward(grad) self.fc1.backward(self.ReLU1.backward(d_out2)) for param_ix in self.params(): l2_loss, l2_grad = l2_regularization(self.params()[param_ix].value, self.reg) loss += l2_loss self.params()[param_ix].grad += l2_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params = self.params() for key in params: params[key].grad = 0 # TODO Compute loss and fill param gradients # by running forward and backward passes through the model # After that, implement l2 regularization on all params # Hint: self.params() is useful again! data = X for layer in self.layers: data = layer.forward(data) loss, d_out = softmax_with_cross_entropy(data, y) for layer in reversed(self.layers): d_out = layer.backward(d_out) for key in params: loss_l2, grad_l2 = l2_regularization(params[key].value, self.reg) params[key].grad += grad_l2 loss += loss_l2 return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for p in params: param = params[p] param.grad = np.zeros_like(param.grad) step_1f = self.layer_1.forward(X) step_2f = self.layer_2.forward(step_1f) step_3f = self.layer_3.forward(step_2f) loss, dpred = softmax_with_cross_entropy(step_3f, y) step_3b = self.layer_3.backward(dpred) step_2b = self.layer_2.backward(step_3b) step_1b = self.layer_1.backward(step_2b) for p in params: param = params[p] loss_l2, grad_l2 = l2_regularization(param.value, self.reg) param.grad += grad_l2 loss += loss_l2 return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for layer in self.layers: if {'W', 'B'} <= set(layer.params()): layer.W.grad = np.zeros(layer.W.value.shape) layer.B.grad = np.zeros(layer.B.value.shape) forward_val = X for layer in self.layers: forward_val = layer.forward(forward_val) loss, backward_val = softmax_with_cross_entropy(forward_val, y) for layer in self.layers[::-1]: backward_val = layer.backward(backward_val) for layer in self.layers: for param_name, param in layer.params().items(): loss_reg, grad_reg = l2_regularization(param.value, self.reg) loss += loss_reg param.grad += grad_reg return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! [self.params()[param].grad.fill(0) for param in self.params().keys()] # raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model hidden_res_forward = self.fcl1.forward(X) hidden_res_forward = self.relu.forward(hidden_res_forward) output = self.fcl2.forward(hidden_res_forward) loss, dprediction = softmax_with_cross_entropy(output, y) hidden_res_backward = self.fcl2.backward(dprediction) hidden_res_backward = self.relu.backward(hidden_res_backward) self.fcl1.backward(hidden_res_backward) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for param in self.params().values(): reg_loss, reg_grad = l2_regularization(param.value, self.reg) loss += reg_loss param.grad += reg_grad # raise Exception("Not implemented!") return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros for layer in self.layers: layer.gradients_reset() # Hint: using self.params() might be useful! for layer in self.layers: # print(' ', X.shape) X = layer.forward(X) # TODO Compute loss and fill param gradients loss, dprediction = softmax_with_cross_entropy(X, y) # by running forward and backward passes through the model for layer in reversed(self.layers): dprediction = layer.backward(dprediction) # After that, implement l2 regularization on all params if self.reg: for num, layer in enumerate(self.layers): if isinstance(layer, FullyConnectedLayer): loss_by_l2, d_reg = l2_regularization( layer.W.value, self.reg) loss += loss_by_l2 layer.W.grad += d_reg # Hint: self.params() is useful again! return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for param in self.params().values(): param.grad = np.zeros(param.grad.shape) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model prediction = X for layer in self.layers: prediction = layer.forward(prediction) loss, dprediction = softmax_with_cross_entropy(prediction, y) prev_grad = dprediction for layer in reversed(self.layers): prev_grad = layer.backward(prev_grad) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for param in self.params().values(): l2_loss, l2_grad = l2_regularization(param.value, self.reg) loss = loss + l2_loss param.grad = param.grad + l2_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for p in params.keys(): params[p].grad = np.zeros_like(params[p].value) out1 = self.act.forward(self.fcl1.forward(X)) out2 = self.fcl2.forward(out1) loss, grad = softmax_with_cross_entropy(out2, y) self.fcl1.backward(self.act.backward(self.fcl2.backward(grad))) for p in params.keys(): l2_loss, l2_grad = l2_regularization(params[p].value, self.reg) loss += l2_loss params[p].grad += l2_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for layer in self.layers: if {'W', 'B'} <= set(layer.params()): layer.W.grad = np.zeros(layer.W.value.shape) layer.B.grad = np.zeros(layer.B.value.shape) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model forward_val = X for layer in self.layers: forward_val = layer.forward(forward_val) loss, backward_val = softmax_with_cross_entropy(forward_val, y) for layer in self.layers[::-1]: backward_val = layer.backward(backward_val) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for layer in self.layers: for param_name, param in layer.params().items(): loss_reg, grad_reg = l2_regularization(param.value, self.reg) loss += loss_reg param.grad += grad_reg return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Clear gradients params = self.params() for p in params: params[p].grad = 0 X = self.fc1.forward(X) X = self.act1.forward(X) X = self.fc2.forward(X) loss, d_pred = softmax_with_cross_entropy(X, y) # X = self.act2.forward(X) # d_act2 = self.act2.backward(d_pred) d_fc2 = self.fc2.backward(d_pred) d_act1 = self.act1.backward(d_fc2) d_fc1 = self.fc1.backward(d_act1) for p in params: regular_loss, regular_grad = l2_regularization( params[p].value, self.reg) loss += regular_loss params[p].grad += regular_grad return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for param_name, param in self.params().items(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model result = self.layers[0].forward(X) for layer in self.layers[1:]: result = layer.forward(result) loss, grad = softmax_with_cross_entropy(result, y) for layer in reversed(self.layers): grad = layer.backward(grad) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for param_name, param in self.params().items(): loss_r, grad = l2_regularization(param.value, self.reg) param.grad += grad loss += loss_r return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ for index, param in self.params().items(): param.grad = np.zeros_like(param.value) forward_input = X.copy() for layer in self.L: forward_input = layer.forward(forward_input) loss, backward_propagation = softmax_with_cross_entropy(forward_input, y) for layer in reversed(self.L): backward_propagation = layer.backward(backward_propagation) for reg_param in ['W', 'B']: if reg_param in layer.params(): loss_l2, dp_l2 = l2_regularization(layer.params()[reg_param].value, self.reg) loss += loss_l2 layer.params()[reg_param].grad += dp_l2 return loss
def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ for _, param in self.params().items(): param.reset_grad() out = X.copy() for layer in self.layers: out = layer.forward(out) loss, d_loss = softmax_with_cross_entropy(out, y) for layer in reversed(self.layers): d_loss = layer.backward(d_loss) for _, param in self.params().items(): reg_loss, reg_grad = l2_regularization(param.value, self.reg) loss += reg_loss param.grad += reg_grad return loss