class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers width, height, n_channels = input_shape self.conv1 = ConvolutionalLayer(n_channels, conv1_channels, 3, 1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(4, 4) self.flatten = Flattener() self.fc = FullyConnectedLayer( (height // 4 // 4) * (width // 4 // 4) * conv2_channels, n_output_classes) self.conv1_params = self.conv1.params() self.conv2_params = self.conv2.params() self.fc_params = self.fc.params() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for key, value in self.params().items(): value.grad.fill(0) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment conv1 = self.conv1.forward(X) relu1 = self.relu1.forward(conv1) maxpool1 = self.maxpool1.forward(relu1) conv2 = self.conv2.forward(maxpool1) relu2 = self.relu2.forward(conv2) maxpool2 = self.maxpool2.forward(relu2) flatten = self.flatten.forward(maxpool2) fc = self.fc.forward(flatten) loss, d_preds = softmax_with_cross_entropy(fc, y) fc = self.fc.backward(d_preds) flatten = self.flatten.backward(fc) maxpool2 = self.maxpool2.backward(flatten) relu2 = self.relu2.backward(maxpool2) conv2 = self.conv2.backward(relu2) maxpool1 = self.maxpool1.backward(conv2) relu1 = self.relu1.backward(maxpool1) conv1 = self.conv1.backward(relu1) return loss def predict(self, X): # You can probably copy the code from previous assignment conv1 = self.conv1.forward(X) relu1 = self.relu1.forward(conv1) maxpool1 = self.maxpool1.forward(relu1) conv2 = self.conv2.forward(maxpool1) relu2 = self.relu2.forward(conv2) maxpool2 = self.maxpool2.forward(relu2) flatten = self.flatten.forward(maxpool2) fc = self.fc.forward(flatten) return np.argmax(fc, axis=1) def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters d1 = {k + '1': v for k, v in self.conv1_params.items()} d2 = {k + '2': v for k, v in self.conv2_params.items()} d3 = {k + '3': v for k, v in self.fc_params.items()} result = {**d1, **d2, **d3} return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization ngth """ self.result = {} self.reg = reg self.n_input = n_input self.n_output = n_output self.hidden_layer_size = hidden_layer_size # # TODO Create necessary layers self.first_layer = FullyConnectedLayer(self.n_input, self.hidden_layer_size) self.first_relu = ReLULayer() self.second_layer = FullyConnectedLayer(self.hidden_layer_size, self.n_output) self.second_relu = ReLULayer() first_layer_param = self.first_layer.params() self.result['first_l_param_W'] = first_layer_param['W'] self.result['first_l_param_B'] = first_layer_param['B'] second_layer_param = self.second_layer.params() self.result['second_l_param_W'] = second_layer_param['W'] self.result['second_l_param_B'] = second_layer_param['B'] def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ self.X = X # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! # raise Exception("Not implemented!") for i in self.result: self.result[i].reset_grads() # TODO Compute loss and fill param gradients # by running forward and backward passes through the model first_layer_f_out = self.first_layer.forward(self.X) first_relu_f_out = self.first_relu.forward(first_layer_f_out) second_layer_f_out = self.second_layer.forward(first_relu_f_out) second_relu_f_out = self.second_relu.forward(second_layer_f_out) # pp(second_relu_f_out.argmax(axis=1), y) # to compare predicted with actual loss, grad = softmax_with_cross_entropy(second_relu_f_out, y) second_relu_b_out = self.second_relu.backward(grad) second_layer_b_out = self.second_layer.backward(second_relu_b_out) first_relu_b_out = self.first_relu.backward(second_layer_b_out) self.first_layer.backward(first_relu_b_out) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for key in self.result: pp('grad', key) l2_loss, l2_grad = l2_regularization(self.result[key], self.reg) pp(self.result[key].grad.sum(), l2_grad.sum()) self.result[key].grad += l2_grad pp(self.result[key].grad.sum()) loss += l2_loss return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # pp('FIRST WEIGHTS') # pp('weights', self.result['first_l_param_W'].value[1]) # pp('grads',self.result['first_l_param_W'].grad[1] ) pred = np.zeros(X.shape[0], np.int) first_layer_f_out = self.first_layer.forward(X) first_relu_f_out = self.first_relu.forward(first_layer_f_out) second_layer_f_out = self.second_layer.forward(first_relu_f_out) second_relu_f_out = self.second_relu.forward(second_layer_f_out) # pp(second_relu_f_out) # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused return second_relu_f_out.argmax(axis=1) def params(self): return self.result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.act1 = ReLULayer() self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) self.act2 = ReLULayer() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Clear gradients params = self.params() for p in params: params[p].grad = 0 X = self.fc1.forward(X) X = self.act1.forward(X) X = self.fc2.forward(X) loss, d_pred = softmax_with_cross_entropy(X, y) # X = self.act2.forward(X) # d_act2 = self.act2.backward(d_pred) d_fc2 = self.fc2.backward(d_pred) d_act1 = self.act1.backward(d_fc2) d_fc1 = self.fc1.backward(d_act1) for p in params: regular_loss, regular_grad = l2_regularization( params[p].value, self.reg) loss += regular_loss params[p].grad += regular_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ pred = np.zeros(X.shape[0], np.int) X = self.fc1.forward(X) X = self.act1.forward(X) X = self.fc2.forward(X) X = self.act2.forward(X) pred = np.argmax(X, axis=1) return pred def params(self): result = {} for param in self.fc1.params(): result[param + '_fc1'] = self.fc1.params()[param] for param in self.fc2.params(): result[param + '_fc2'] = self.fc2.params()[param] return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.input_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) self.W_in = self.input_layer.params()['W'] self.W_out = self.output_layer.params()['W'] self.B_in = self.input_layer.params()['B'] self.B_out = self.output_layer.params()['B'] def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for _, param in self.params().items(): param.grad.fill(0) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model to_relu = self.input_layer.forward(X) to_output_layer = self.relu.forward(to_relu) pred = self.output_layer.forward(to_output_layer) loss, dprediction = softmax_with_cross_entropy(pred, y) grad_output_layer = self.output_layer.backward(dprediction) grad_relu_layer = self.relu.backward(grad_output_layer) grad_input_layer = self.input_layer.backward(grad_relu_layer) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for key, param in self.params().items(): loss_l2, grad_l2 = l2_regularization(param.value, self.reg) loss += loss_l2 param.grad += grad_l2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ to_relu = self.input_layer.forward(X) to_output_layer = self.relu.forward(to_relu) weights = self.output_layer.forward(to_output_layer) pred = np.argmax(weights, axis=-1) return pred def params(self): # TODO Implement aggregating all of the params result = { 'W_out': self.W_out, 'W_in': self.W_in, 'B_out': self.B_out, 'B_in': self.B_in } return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.hidden_layer = (FullyConnectedLayer(n_input, hidden_layer_size), ReLULayer()) self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ for p in self.params().values(): p.grad = np.zeros_like(p.value) h0 = self.hidden_layer[0].forward(X) h1 = self.hidden_layer[1].forward(h0) o = self.output_layer.forward(h1) loss_unreg, loss_unreg_grad = softmax_with_cross_entropy(o, y) o_grad = self.output_layer.backward(loss_unreg_grad) h1_grad = self.hidden_layer[1].backward(o_grad) h0_grad = self.hidden_layer[0].backward(h1_grad) loss_reg = 0 for p in self.params().values(): p_reg, p_reg_grad = l2_regularization(p.value, self.reg) p.grad += p_reg_grad loss_reg += p_reg return loss_unreg + loss_reg def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ h0 = self.hidden_layer[0].forward(X) h1 = self.hidden_layer[1].forward(h0) o = self.output_layer.forward(h1) return np.argmax(o, axis=1) def params(self): hidden_params = self.hidden_layer[0].params() output_params = self.output_layer.params() return { 'hW': hidden_params['W'], 'hB': hidden_params['B'], 'oW': output_params['W'], 'oB': output_params['B'] }
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.lr = 1 # TODO Create necessary layers self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) self.layers = (self.fc1, self.fc2) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params1 = self.fc1.params() params2 = self.fc2.params() for key in ['W', 'B']: params1[key].grad.fill(0) params2[key].grad.fill(0) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model self.relu1 = ReLULayer() x = self.relu1.forward(self.fc1.forward(X)) y_pred = self.fc2.forward(x) loss, dpred = softmax_with_cross_entropy(y_pred, y) dout = self.fc2.backward(dpred) dout = self.relu1.backward(dout) dout = self.fc1.backward(dout) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! loss_fc1_W_reg, grad_fc1_W_reg = l2_regularization( params1['W'].value, self.reg) loss_fc1_B_reg, grad_fc1_B_reg = l2_regularization( params1['B'].value, self.reg) loss_fc2_W_reg, grad_fc2_W_reg = l2_regularization( params2['W'].value, self.reg) loss_fc2_B_reg, grad_fc2_B_reg = l2_regularization( params2['B'].value, self.reg) self.fc2.W.grad += grad_fc2_W_reg self.fc2.B.grad += grad_fc2_B_reg self.fc1.W.grad += grad_fc1_W_reg self.fc1.B.grad += grad_fc1_B_reg return loss + (loss_fc1_W_reg + loss_fc1_B_reg + loss_fc2_W_reg + loss_fc2_B_reg) def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused x = self.relu1.forward(self.fc1.forward(X)) probs = self.fc2.forward(x) return np.argmax(probs, axis=1) def params(self): # TODO Implement aggregating all of the params result = {} for layer in self.layers: for k, param in layer.params().items(): result[' '.join([str(id(layer)), k])] = param return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.hidden_layer1 = FullyConnectedLayer(n_input, hidden_layer_size) self.hidden_layer2 = FullyConnectedLayer(hidden_layer_size, n_output) self.relu_layer1 = ReLULayer() self.relu_layer2 = ReLULayer() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for param_key in params: #print(param_key, ' shape is ', params[param_key].value.shape) params[param_key].grad = 0 # TODO Compute loss and fill param gradients # by running forward and backward passes through the model z1 = self.hidden_layer1.forward(X) a1 = self.relu_layer1.forward(z1) #print(a1.shape, ' - a1') a2 = self.hidden_layer2.forward(a1) #print(a2.shape, ' - a2') ''' output = self.relu_layer2.forward(a2) ''' #print(output, ' - ReLULayer output') #print(output.shape, ' - output') loss, dprediction = softmax_with_cross_entropy(a2, y) #print(dprediction.shape, ' - dpred') ''' d_out_hidden2 = self.relu_layer2.backward(dprediction) ''' #print(d_out_hidden2.shape, ' - d_out_hidden2') d_out_hidden1 = self.hidden_layer2.backward(dprediction) #print(d_out_hidden1.shape, ' - d_out_hidden1') d_out_relu1 = self.relu_layer1.backward(d_out_hidden1) self.hidden_layer1.backward(d_out_relu1) # After that, implement l2 regularization on all params # Hint: use self.params() for param_key in params: reg_loss, reg_grad = l2_regularization(params[param_key].value, self.reg) loss += reg_loss #print(param_key, ' grad before ', params[param_key].grad) params[param_key].grad += reg_grad #loss += self.reg*np.sum(np.square(params[param_key].value)) #params[param_key].grad += 2*np.array(params[param_key].value)*self.reg return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) z1 = self.hidden_layer1.forward(X) a1 = self.relu_layer1.forward(z1) #print(a1.shape, ' - a1') a2 = self.hidden_layer2.forward(a1) #print(a2.shape, ' - a2') output = self.relu_layer2.forward(a2) #print(output.shape, ' - output') pred = np.argmax(output, axis = 1) return pred def params(self): result = {} # TODO Implement aggregating all of the params hidden_layer_params1 = self.hidden_layer1.params() for param_key in hidden_layer_params1: result[param_key + '-1'] = hidden_layer_params1[param_key] hidden_layer_params2 = self.hidden_layer2.params() for param_key in hidden_layer_params2: result[param_key + '-2'] = hidden_layer_params2[param_key] return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.Linear1 = FullyConnectedLayer(n_input, hidden_layer_size) self.ReLU = ReLULayer() self.Linear2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params = self.params() W1 = params['W1'] B1 = params['B1'] W2 = params['W2'] B2 = params['B2'] W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model forward_linear1 = self.Linear1.forward(X) forward_relu = self.ReLU.forward(forward_linear1) forward_linear2 = self.Linear2.forward(forward_relu) predictions = forward_linear2 loss, d_predictions = softmax_with_cross_entropy(predictions, y) backward_linear2 = self.Linear2.backward(d_predictions) backward_relu = self.ReLU.backward(backward_linear2) backward_linear1 = self.Linear1.backward(backward_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! l2_W1_loss, l2_W1_grad = l2_regularization(W1.value, self.reg) l2_B1_loss, l2_B1_grad = l2_regularization(B1.value, self.reg) l2_W2_loss, l2_W2_grad = l2_regularization(W2.value, self.reg) l2_B2_loss, l2_B2_grad = l2_regularization(B2.value, self.reg) l2_loss = l2_W1_loss + l2_W2_loss + l2_B1_loss + l2_B2_loss loss += l2_loss W1.grad += l2_W1_grad B1.grad += l2_B1_grad W2.grad += l2_W2_grad B2.grad += l2_B2_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) output = self.Linear1.forward(X) output = self.ReLU.forward(output) output = self.Linear2.forward(output) probs = softmax(output) pred = np.argmax(probs, axis=1) return pred def params(self): # TODO Implement aggregating all of the params result = { 'W1': self.Linear1.params()['W'], 'B1': self.Linear1.params()['B'], 'W2': self.Linear2.params()['W'], 'B2': self.Linear2.params()['B'] } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers image_width, image_height, n_channels = input_shape conv_padding = 0 conv_filter_size = 3 max_pool_size = 4 max_pool_stride = 1 conv1_output_size = image_width - conv_filter_size + 1 maxpool1_output_size = int( (conv1_output_size - max_pool_size) / max_pool_stride) + 1 conv2_output_size = maxpool1_output_size - conv_filter_size + 1 maxpool2_output_size = int( (conv2_output_size - max_pool_size) / max_pool_stride) + 1 # correct if height == width !!! fc_input_size = maxpool2_output_size * maxpool2_output_size * conv2_channels self.conv1 = ConvolutionalLayer(n_channels, conv1_channels, conv_filter_size, conv_padding) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(max_pool_size, max_pool_stride) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, conv_filter_size, conv_padding) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(max_pool_size, max_pool_stride) self.flattener = Flattener() self.fc = FullyConnectedLayer(fc_input_size, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment self._zero_grad() predictions = self._forward(X) loss, dprediction = softmax_with_cross_entropy(predictions, y) self._backward(dprediction) return loss def _zero_grad(self): for param in self.params().values(): param.grad = np.zeros_like(param.value) def _forward(self, X): output = self.conv1.forward(X) output = self.relu1.forward(output) output = self.maxpool1.forward(output) output = self.conv2.forward(output) output = self.relu2.forward(output) output = self.maxpool2.forward(output) output = self.flattener.forward(output) output = self.fc.forward(output) return output def _backward(self, dprediction): grad = self.fc.backward(dprediction) grad = self.flattener.backward(grad) grad = self.maxpool2.backward(grad) grad = self.relu2.backward(grad) grad = self.conv2.backward(grad) grad = self.maxpool1.backward(grad) grad = self.relu1.backward(grad) self.conv1.backward(grad) def predict(self, X): # You can probably copy the code from previous assignment predictions = self._forward(X) y_pred = np.argmax(softmax(predictions), axis=1) return y_pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters for k, v in self.conv1.params().items(): result["".join(["conv1_", k])] = v for k, v in self.conv2.params().items(): result["".join(["conv2_", k])] = v for k, v in self.fc.params().items(): result["".join(["fc_", k])] = v return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.layer1 = FullyConnectedLayer(n_input, hidden_layer_size) self.ReLU = ReLULayer() self.layer2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ self.params() for value in self.params().values(): value.grad[...] = 0 X_out_layer1 = self.layer1.forward(X) X_out_ReLU = self.ReLU.forward(X_out_layer1) X_out_layer2 = self.layer2.forward(X_out_ReLU) loss1_reg, grad_loss1_reg = l2_regularization(self.layer1.W.value, self.reg) loss2_reg, grad_loss2_reg = l2_regularization(self.layer2.W.value, self.reg) loss_pred, dloss = softmax_with_cross_entropy(X_out_layer2, y) loss = loss_pred + loss1_reg + loss2_reg dlayer2 = self.layer2.backward(dloss) self.layer2.W.grad += grad_loss2_reg dReLU = self.ReLU.backward(dlayer2) self.layer1.W.grad += grad_loss1_reg dlayer1 = self.layer1.backward(dReLU) self.params() return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ pred = np.zeros(X.shape[0], np.int) X_out_layer1 = self.layer1.forward(X) X_out_ReLU = self.ReLU.forward(X_out_layer1) X_out_layer2 = self.layer2.forward(X_out_ReLU) # proba = np.apply_along_axis(softmax, axis=1, arr=X_out_layer2) # pred = np.argmax(proba, axis=1) pred = np.argmax(softmax(X_out_layer2), axis=1) return pred def params(self): result = \ { "W1": self.layer1.params()["W"], "B1": self.layer1.params()["B"], "W2": self.layer2.params()["W"], "B2": self.layer2.params()["B"] } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, reg=0): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ image_width, image_height, n_channels = input_shape padding_1 = 1 padding_2 = 1 filter_size_1 = 3 filter_size_2 = 3 pooling_size_1 = 4 pooling_size_2 = 4 stride_1 = 4 stride_2 = 4 height = image_height + 2 * padding_1 width = image_width + 2 * padding_1 out_height = height - filter_size_1 + 1 out_width = width - filter_size_1 + 1 #print(height, width, filter_size_1, out_height, out_width); assert (out_height - pooling_size_1) % stride_1 == 0 assert (out_width - pooling_size_1) % stride_1 == 0 height = out_height width = out_width out_height = int((height - pooling_size_1) / stride_1 + 1) out_width = int((width - pooling_size_1) / stride_1 + 1) #print(height, width, pooling_size_1, out_height, out_width); height = out_height + 2 * padding_2 width = out_width + 2 * padding_2 out_height = height - filter_size_2 + 1 out_width = width - filter_size_2 + 1 #print(height, width, filter_size_2, out_height, out_width); assert (out_height - pooling_size_2) % stride_2 == 0 assert (out_width - pooling_size_2) % stride_2 == 0 height = out_height width = out_width out_height = int((height - pooling_size_2) / stride_2 + 1) out_width = int((width - pooling_size_2) / stride_2 + 1) #print(height, width, pooling_size_2, out_height, out_width); # TODO Create necessary layers self.Conv_first = ConvolutionalLayer(n_channels, conv1_channels, filter_size_1, padding_1) self.Relu_first = ReLULayer() self.Maxpool_first = MaxPoolingLayer(pooling_size_1, stride_1) self.Conv_second = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size_2, padding_2) self.Relu_second = ReLULayer() self.Maxpool_second = MaxPoolingLayer(pooling_size_2, stride_2) self.Flattener = Flattener() self.FC = FullyConnectedLayer(out_height * out_width * conv2_channels, n_output_classes) self.n_output = n_output_classes self.reg = reg #print(out_height*out_width*conv2_channels, n_output_classes); def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for param in self.params(): self.params()[param].grad = np.zeros_like( self.params()[param].grad) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment #assert check_layer_gradient(self.Conv_first, X); #assert check_layer_param_gradient(self.Conv_first, X, 'W') #print("X = ", X); #print("X_end = ", X[:, 25:, 25:, :]); X1 = self.Conv_first.forward(X) #print(self.Conv_first.W.value); #print("X1 = ", X1); #print("W = ", self.Conv_first.params()['W'].value); #assert check_layer_gradient(self.Relu_first, X1); X1_Relu = self.Relu_first.forward(X1) #print("X1_Relu = ", X1_Relu); #assert check_layer_gradient(self.Maxpool_first, X1_Relu); X1_Max = self.Maxpool_first.forward(X1_Relu) #assert check_layer_gradient(self.Conv_second, X1_Max); X2 = self.Conv_second.forward(X1_Max) #assert check_layer_gradient(self.Relu_second, X2); X2_Relu = self.Relu_second.forward(X2) #assert check_layer_gradient(self.Maxpool_second, X2_Relu); X2_Max = self.Maxpool_second.forward(X2_Relu) #assert check_layer_gradient(self.Flattener, X2_Max); X3 = self.Flattener.forward(X2_Max) #assert check_layer_gradient(self.FC, X3); X3_FC = self.FC.forward(X3) loss, dX3_FC = softmax_with_cross_entropy(X3_FC, y + 1) dX3 = self.FC.backward(dX3_FC) dX2_Max = self.Flattener.backward(dX3) dX2_Relu = self.Maxpool_second.backward(dX2_Max) #print("dX2_Max = ", dX2_Max); #print("dX2_Relu = ", dX2_Relu); dX2 = self.Relu_second.backward(dX2_Relu) dX1_Max = self.Conv_second.backward(dX2) dX1_Relu = self.Maxpool_first.backward(dX1_Max) dX1 = self.Relu_first.backward(dX1_Relu) dX = self.Conv_first.backward(dX1) reg_loss_w, reg_grad_w = l2_regularization(self.FC.W.value, self.reg) reg_loss_b, reg_grad_b = l2_regularization(self.FC.B.value, self.reg) loss += (reg_loss_w + reg_loss_b) self.FC.W.grad += reg_grad_w return loss def predict(self, X): # You can probably copy the code from previous assignment pred = np.zeros(X.shape[0], np.int) predictions = self.FC.forward( self.Flattener.forward( self.Maxpool_second.forward( self.Relu_second.forward( self.Conv_second.forward( self.Maxpool_first.forward( self.Relu_first.forward( self.Conv_first.forward(X)))))))) #print("predictions = ", predictions); i = 0 for predict in predictions: values = [softmax_with_cross_entropy(predict, target_index + 1)[0] \ for target_index in range(self.n_output)] pred[i] = min(range(len(values)), key=values.__getitem__) i += 1 #print("pred = ", pred); return pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters result = {} dict_first = self.Conv_first.params() dict_second = self.Conv_second.params() dict_FC = self.FC.params() # TODO Implement aggregating all of the params for key in dict_first.keys(): result[key + 'C1'] = dict_first[key] for key in dict_second.keys(): result[key + 'C2'] = dict_second[key] for key in dict_FC.keys(): result[key + 'F1'] = dict_FC[key] return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.fc_layer_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu_layer_1 = ReLULayer() self.fc_layer_2 = FullyConnectedLayer(hidden_layer_size, n_output) def clear_gradients(self): for param in self.params().values(): param.grad[:] = 0.0 def forward(self, X): out1 = self.fc_layer_1.forward(X) out2 = self.relu_layer_1.forward(out1) preds = self.fc_layer_2.forward(out2) return preds def backward(self, d_preds): d_fc2 = self.fc_layer_2.backward(d_preds) d_relu1 = self.relu_layer_1.backward(d_fc2) d_fc1 = self.fc_layer_1.backward(d_relu1) return d_fc1 def l2_regularization(self): l2_loss = 0 for param in self.params().values(): loss, grad = l2_regularization(param.value, self.reg) param.grad += grad l2_loss += loss return l2_loss def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # Set parameter gradient to zeros # Hint: using self.params() might be useful! self.clear_gradients() # TODO Compute loss and fill param gradients # by running forward and backward passes through the model preds = self.forward(X) softmax_loss, d_preds = softmax_with_cross_entropy(preds, y) d_X = self.backward(d_preds) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! l2_loss = self.l2_regularization() loss = softmax_loss + l2_loss return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # # can be reused pred = np.zeros(X.shape[0], np.int) preds = self.forward(X) y_pred = np.argmax(preds, axis=1) return y_pred def params(self): result = {} def rename_keys(input_dict: dict, key_prefix: str) -> dict: result_dict = {} for key, value in input_dict.items(): result_dict[key_prefix + key] = value return result_dict result.update(rename_keys(self.fc_layer_1.params(), 'fc1_')) result.update(rename_keys(self.fc_layer_2.params(), 'fc2_')) result.update(rename_keys(self.relu_layer_1.params(), 'relu1_')) return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, reg): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.reg = reg self.conv1 = ConvolutionalLayer(in_channels=input_shape[-1], out_channels=conv1_channels, filter_size=3, padding=1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(pool_size=4, stride=4) self.conv2 = ConvolutionalLayer(in_channels=conv1_channels, out_channels=conv2_channels, filter_size=3, padding=1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(pool_size=4, stride=4) self.flattener = Flattener() ## n_input = 4*conv2_channels - hard coding here, because of constant picture size 32 32 3 self.fullyconlayer = FullyConnectedLayer(n_input=4 * conv2_channels, n_output=n_output_classes) self.W_fc_layer = None self.B_fc_layer = None self.W_con1_layer = None self.B_con1_layer = None self.W_con2_layer = None self.B_con2_layer = None # TODO Create necessary layers #raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment self.conv1.params()['W'].grad = 0 self.conv1.params()['B'].grad = 0 self.conv2.params()['W'].grad = 0 self.conv2.params()['B'].grad = 0 self.fullyconlayer.params()['W'].grad = 0 self.fullyconlayer.params()['B'].grad = 0 to_relu = self.conv1.forward(X) to_maxpool1 = self.relu1.forward(to_relu) to_conv2 = self.maxpool1.forward(to_maxpool1) to_relu2 = self.conv2.forward(to_conv2) to_maxpool2 = self.relu2.forward(to_relu2) to_flat = self.maxpool2.forward(to_maxpool2) to_fc_layer = self.flattener.forward(to_flat) preds = self.fullyconlayer.forward(to_fc_layer) loss, dprediction = softmax_with_cross_entropy(preds, y) grad_from_fc_layer = self.fullyconlayer.backward(dprediction) self.W_fc_layer = self.fullyconlayer.params()['W'] self.B_fc_layer = self.fullyconlayer.params()['B'] grad_from_flatten = self.flattener.backward(grad_from_fc_layer) grad_from_maxpool2 = self.maxpool2.backward(grad_from_flatten) grad_from_relu2 = self.relu2.backward(grad_from_maxpool2) grad_from_conv2 = self.conv2.backward(grad_from_relu2) self.W_con2_layer = self.conv2.params()['W'] self.B_con2_layer = self.conv2.params()['B'] grad_from_maxpool1 = self.maxpool1.backward(grad_from_conv2) grad_from_relu1 = self.relu1.backward(grad_from_maxpool1) grad_from_conv1 = self.conv1.backward(grad_from_relu1) self.W_con1_layer = self.conv1.params()['W'] self.B_con1_layer = self.conv1.params()['B'] loss_fc, grad_fc = l2_regularization(self.W_fc_layer.value, self.reg) loss += loss_fc self.W_fc_layer.grad += grad_fc return loss #raise Exception("Not implemented!") def predict(self, X): # You can probably copy the code from previous assignment pred = np.zeros(X.shape[0], np.int) to_relu = self.conv1.forward(X) to_maxpool1 = self.relu1.forward(to_relu) to_conv2 = self.maxpool1.forward(to_maxpool1) to_relu2 = self.conv2.forward(to_conv2) to_maxpool2 = self.relu2.forward(to_relu2) to_flat = self.maxpool2.forward(to_maxpool2) to_fc_layer = self.flattener.forward(to_flat) preds = self.fullyconlayer.forward(to_fc_layer) probs = softmax(preds) pred = np.argmax(probs, axis=-1) return pred #raise Exception("Not implemented!") def params(self): result = { 'W_fc_layer': self.W_fc_layer, 'B_fc_layer': self.B_fc_layer, 'W_con1_layer': self.W_con1_layer, 'B_con1_layer': self.B_con1_layer, 'W_con2_layer': self.W_con2_layer, 'B_con2_layer': self.B_con2_layer } # TODO: Aggregate all the params from all the layers # which have parameters #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.RELU_1 = ReLULayer() self.RELU_2 = ReLULayer() self.FullyConnected_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.FullyConnected_2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! # TODO Compute loss and fill param gradients # by running forward and backward passes through the model y1 = self.FullyConnected_1.forward(X) y2 = self.RELU_1.forward(y1) y3 = self.FullyConnected_2.forward(y2) y_result = self.RELU_2.forward(y3) loss, d_out1 = softmax_with_cross_entropy(y_result, y) d_out2 = self.RELU_2.backward(d_out1) d_out3 = self.FullyConnected_2.backward(d_out2) dW2 = self.FullyConnected_2.params()['W'].grad dB2 = self.FullyConnected_2.params()['B'].grad d_out4 = self.RELU_1.backward(d_out3) d_out_result = self.FullyConnected_1.backward(d_out4) dW1 = self.FullyConnected_1.params()['W'].grad dB1 = self.FullyConnected_1.params()['B'].grad # After that, implement l2 regularization on all params # Hint: self.params() is useful again! loss_l1, dW1_l = l2_regularization( self.FullyConnected_1.params()['W'].value, self.reg) loss_l2, dW2_l = l2_regularization( self.FullyConnected_2.params()['W'].value, self.reg) loss_l3, dB1_l = l2_regularization( self.FullyConnected_1.params()['B'].value, self.reg) loss_l4, dB2_l = l2_regularization( self.FullyConnected_2.params()['B'].value, self.reg) self.FullyConnected_1.params()['W'].grad = dW1 + dW1_l self.FullyConnected_2.params()['W'].grad = dW2 + dW2_l self.FullyConnected_1.params()['B'].grad = dB1 + dB1_l self.FullyConnected_2.params()['B'].grad = dB2 + dB2_l return loss + loss_l1 + loss_l2 + loss_l3 + loss_l4 def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused y1 = self.FullyConnected_1.forward(X) y2 = self.RELU_1.forward(y1) y3 = self.FullyConnected_2.forward(y2) predictions = self.RELU_2.forward(y3) if predictions.ndim == 1: predictions_new = predictions - np.max(predictions) else: maximum = np.max(predictions, axis=1) predictions_new = predictions - maximum[:, np.newaxis] predictions_new = np.exp(predictions_new) predictions_sum = np.sum(predictions_new, axis=(predictions.ndim - 1)) if predictions.ndim == 1: probabilities = predictions_new / predictions_sum else: probabilities = predictions_new / predictions_sum[:, np.newaxis] pred = np.argmax(probabilities, axis=1) return pred def params(self): # TODO Implement aggregating all of the params return { 'W1': self.FullyConnected_1.params()['W'], 'W2': self.FullyConnected_2.params()['W'], 'B1': self.FullyConnected_1.params()['B'], 'B2': self.FullyConnected_2.params()['B'] }
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO_ Create necessary layers # raise Exception("Not implemented!") weight, height, cannels = input_shape filter_size = 3 pool_size = 4 padding = 1 stride = pool_size self.conv1 = ConvolutionalLayer(cannels, conv1_channels, filter_size, padding) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(pool_size, stride) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(pool_size, stride) self.flatten = Flattener() n_fc_input = int(height / pool_size / pool_size * weight / pool_size / pool_size * conv2_channels) self.fc = FullyConnectedLayer(n_fc_input, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO_ Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment # raise Exception("Not implemented!") # initialization params = self.params() W1 = params["W1"] B1 = params["B1"] W2 = params["W2"] B2 = params["B2"] W3 = params["W3"] B3 = params["B3"] # the cleaning of params # W1.value = np.zeros_like(W1.value) # B1.value = np.zeros_like(B1.value) # W2.value = np.zeros_like(W2.value) # B2.value = np.zeros_like(B2.value) # W3.value = np.zeros_like(W3.value) # B3.value = np.zeros_like(B3.value) # the cleaning of gradients W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) W3.grad = np.zeros_like(W3.value) B3.grad = np.zeros_like(B3.value) # forward pass out = self.conv1.forward(X) out = self.relu1.forward(out) out = self.maxpool1.forward(out) out = self.conv2.forward(out) out = self.relu2.forward(out) out = self.maxpool2.forward(out) out = self.flatten.forward(out) out = self.fc.forward(out) loss, d_preds = softmax_with_cross_entropy(out, y) # backward pass d_out = self.fc.backward(d_preds) d_out = self.flatten.backward(d_out) d_out = self.maxpool2.backward(d_out) d_out = self.relu2.backward(d_out) d_out = self.conv2.backward(d_out) d_out = self.maxpool1.backward(d_out) d_out = self.relu1.backward(d_out) d_out = self.conv1.backward(d_out) return loss def predict(self, X): # You can probably copy the code from previous assignment # raise Exception("Not implemented!") out = self.conv1.forward(X) out = self.relu1.forward(out) out = self.maxpool1.forward(out) out = self.conv2.forward(out) out = self.relu2.forward(out) out = self.maxpool2.forward(out) out = self.flatten.forward(out) out = self.fc.forward(out) probs = softmax(out) y_pred = np.argmax(probs, axis=1) return y_pred def params(self): result = {} # TODO_: Aggregate all the params from all the layers # which have parameters # raise Exception("Not implemented!") result = { "W1": self.conv1.params()["W"], "B1": self.conv1.params()["B"], "W2": self.conv2.params()["W"], "B2": self.conv2.params()["B"], "W3": self.fc.params()["W"], "B3": self.fc.params()["B"] } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.height = input_shape[0] self.width = input_shape[1] self.input_channels = input_shape[2] self.n_output_classes = n_output_classes self.conv1_channels = conv1_channels self.conv2_channels = conv2_channels self.conv1_layer = ConvolutionalLayer(self.input_channels, self.conv1_channels, 3, 1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(4, 4) self.conv2_layer = ConvolutionalLayer(self.conv1_channels, self.conv2_channels, 3, 1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(4, 4) self.flattener = Flattener() self.fc_layer = FullyConnectedLayer(2*2*self.conv2_channels, self.n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # nullify layers gradients # Conv1 Layer self.params()['W1'].grad = np.zeros((3, 3, self.input_channels, self.conv1_channels)) self.params()['B1'].grad = np.zeros(self.conv1_channels) # Conv2 Layer self.params()['W2'].grad = np.zeros((3, 3, self.conv1_channels, self.conv2_channels)) self.params()['B2'].grad = np.zeros(self.conv2_channels) # FC Layer self.params()['W3'].grad = np.zeros((2*2*self.conv2_channels, self.n_output_classes)) self.params()['B3'].grad = np.zeros((1, self.n_output_classes)) # forward conv layer 1 conv_forward1 = self.conv1_layer.forward(X) # forward relu activation funtcion 1 relu_forward1 = self.relu1.forward(conv_forward1) # forward maxpool layer 1 maxpool_forward1 = self.maxpool1.forward(relu_forward1) # forward conv layer 2 conv_forward2 = self.conv2_layer.forward(maxpool_forward1) # forward relu activation funtcion 2 relu_forward2 = self.relu2.forward(conv_forward2) # forward maxpool layer 2 maxpool_forward2 = self.maxpool2.forward(relu_forward2) # forward flattener layer flattener_forward = self.flattener.forward(maxpool_forward2) # forward FC layer fc_forward = self.fc_layer.forward(flattener_forward) # calculate loss and grad loss, grad = softmax_with_cross_entropy(fc_forward, y) # backward FC layer fc_backward = self.fc_layer.backward(grad) # backward flattener layer flattener_backward = self.flattener.backward(fc_backward) # backward maxpool layer 2 maxpool_backward2 = self.maxpool2.backward(flattener_backward) # backward relu activation funtcion 2 relu_backward2 = self.relu2.backward(maxpool_backward2) # forward conv layer 2 conv_backward2 = self.conv2_layer.backward(relu_backward2) # backward maxpool layer 1 maxpool_backward1 = self.maxpool1.backward(conv_backward2) # backward relu activation funtcion 1 relu_backward1 = self.relu1.backward(maxpool_backward1) # forward conv layer 1 conv_backward1 = self.conv1_layer.backward(relu_backward1) return loss def predict(self, X): # forward conv layer 1 conv_forward1 = self.conv1_layer.forward(X) # forward relu activation funtcion 1 relu_forward1 = self.relu1.forward(conv_forward1) # forward maxpool layer 1 maxpool_forward1 = self.maxpool1.forward(relu_forward1) # forward conv layer 2 conv_forward2 = self.conv2_layer.forward(maxpool_forward1) # forward relu activation funtcion 2 relu_forward2 = self.relu2.forward(conv_forward2) # forward maxpool layer 2 maxpool_forward2 = self.maxpool2.forward(relu_forward2) # forward flattener layer flattener_forward = self.flattener.forward(maxpool_forward2) # forward FC layer fc_forward = self.fc_layer.forward(flattener_forward) # make prediction prediciton = fc_forward.argmax(axis=1) return prediciton def params(self): result = {'W1': self.conv1_layer.params()['W'], 'B1': self.conv1_layer.params()['B'], 'W2': self.conv2_layer.params()['W'], 'B2': self.conv2_layer.params()['B'], 'W3': self.fc_layer.params()['W'], 'B3': self.fc_layer.params()['B']} return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: hidden_layer_size, int - number of neurons in the hidden layer n_input, int - dimension of the model input n_output, int - number of classes to predict reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.first = FullyConnectedLayer(n_input, hidden_layer_size) self.ReLU = ReLULayer() self.second = FullyConnectedLayer(hidden_layer_size, n_output) self.n_output = n_output def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for param in self.params(): self.params()[param].grad = np.zeros_like( self.params()[param].grad) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model X1 = self.first.forward(X) X1_ReLU = self.ReLU.forward(X1) X2 = self.second.forward(X1_ReLU) loss, dX2 = softmax_with_cross_entropy(X2, y + 1) dX1_ReLU = self.second.backward(dX2) dX1 = self.ReLU.backward(dX1_ReLU) dX = self.first.backward(dX1) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for param in self.params(): if (param[0] == 'W'): loss_, grad_ = l2_regularization(self.params()[param].value, self.reg) self.params()[param].grad += grad_ loss += loss_ return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) predictions = self.second.forward( self.ReLU.forward(self.first.forward(X))) i = 0 for predict in predictions: values = [softmax_with_cross_entropy(predict, target_index + 1)[0] \ for target_index in range(self.n_output)] pred[i] = min(range(len(values)), key=values.__getitem__) i += 1 return pred def params(self): result = {} dict_first = self.first.params() dict_second = self.second.params() # TODO Implement aggregating all of the params for key in dict_first.keys(): result[key + '1'] = dict_first[key] for key in dict_second.keys(): result[key + '2'] = dict_second[key] return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO_ Create necessary layers # raise Exception("Not implemented!") self.layer1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu_layer = ReLULayer() self.layer2 = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO_ Set parameter gradient to zeros # Hint: using self.params() might be useful! # raise Exception("Not implemented!") # initialization params = self.params() W1 = params["W1"] B1 = params["B1"] W2 = params["W2"] B2 = params["B2"] # the cleaning of gradients W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) # forward pass out1 = self.layer1.forward(X) out_relu = self.relu_layer.forward(out1) out2 = self.layer2.forward(out_relu) loss, d_preds = softmax_with_cross_entropy(out2, y) # backward pass d_out2 = self.layer2.backward(d_preds) d_out_relu = self.relu_layer.backward(d_out2) d_out1 = self.layer1.backward(d_out_relu) # TODO_ Compute loss and fill param gradients # by running forward and backward passes through the model # After that, implement l2 regularization on all params # Hint: self.params() is useful again! # raise Exception("Not implemented!") # add regularization l2_W1_loss, l2_W1_grad = l2_regularization(W1.value, self.reg) l2_B1_loss, l2_B1_grad = l2_regularization(B1.value, self.reg) l2_W2_loss, l2_W2_grad = l2_regularization(W2.value, self.reg) l2_B2_loss, l2_B2_grad = l2_regularization(B2.value, self.reg) l2_reg = l2_W1_loss + l2_B1_loss + l2_W2_loss + l2_B2_loss loss += l2_reg W1.grad += l2_W1_grad B1.grad += l2_B1_grad W2.grad += l2_W2_grad B2.grad += l2_B2_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO_: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused y_pred = np.zeros(X.shape[0], np.int) # raise Exception("Not implemented!") out1 = self.layer1.forward(X) out_relu = self.relu_layer.forward(out1) predictions = self.layer2.forward(out_relu) probs = softmax(predictions) y_pred = np.argmax(probs, axis=1) return y_pred def params(self): result = { "W1": self.layer1.params()["W"], "B1": self.layer1.params()["B"], "W2": self.layer2.params()["W"], "B2": self.layer2.params()["B"] } # TODO_ Implement aggregating all of the params # raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.fully_connected_layer_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu_layer_1 = ReLULayer() self.fully_connected_layer_2 = FullyConnectedLayer(hidden_layer_size, n_output) ###self.relu_layer_2 = ReLULayer() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! self.params()["W1"].grad.fill(0) self.params()["B1"].grad.fill(0) self.params()["W2"].grad.fill(0) self.params()["B2"].grad.fill(0) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model res_fc_1 = self.fully_connected_layer_1.forward(X) res_relu_1 = self.relu_layer_1.forward(res_fc_1) res_fc_2 = self.fully_connected_layer_2.forward(res_relu_1) ###res_relu_2 = self.relu_layer_2.forward(res_fc_2) ###loss_softmax, dprediction = softmax_with_cross_entropy(res_relu_2, y) ###d_res_relu2 = self.relu_layer_2.backward(dprediction) ###d_res_fc_2 = self.fully_connected_layer_2.backward(d_res_relu2) loss_softmax, dprediction = softmax_with_cross_entropy(res_fc_2, y) d_res_fc_2 = self.fully_connected_layer_2.backward(dprediction) d_res_relu_1 = self.relu_layer_1.backward(d_res_fc_2) d_res_fc_1 = self.fully_connected_layer_1.backward(d_res_relu_1) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! l2_loss_W1, l2_grad_W1 = l2_regularization(self.params()["W1"].value, self.reg) l2_loss_B1, l2_grad_B1 = l2_regularization(self.params()["B1"].value, self.reg) l2_loss_W2, l2_grad_W2 = l2_regularization(self.params()["W2"].value, self.reg) l2_loss_B2, l2_grad_B2 = l2_regularization(self.params()["B2"].value, self.reg) self.params()["W1"].grad += l2_grad_W1 self.params()["B1"].grad += l2_grad_B1 self.params()["W2"].grad += l2_grad_W2 self.params()["B2"].grad += l2_grad_B2 loss = loss_softmax + l2_loss_W1 + l2_loss_B1 + l2_loss_W2 + l2_loss_B2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused #print("X ") #print(X.shape) pred = np.zeros(X.shape[0], np.int) #print("W - fully_connected_layer_1") #print(self.fully_connected_layer_1.params()["W"].value.shape) res_fc_1 = self.fully_connected_layer_1.forward(X) #print("res_fc_1 ") #print(res_fc_1.shape) #print("res_fc_1 = ") #print(res_fc_1) res_relu_1 = self.relu_layer_1.forward(res_fc_1) #print("res_relu_1 ") #print(res_relu_1.shape) #print("res_relu_1 = ") #print(res_relu_1) #print("W - fully_connected_layer_2") #print(self.fully_connected_layer_2.params()["W"].value.shape) res_fc_2 = self.fully_connected_layer_2.forward(res_relu_1) #print("res_fc_2 ") #print(res_fc_2.shape) #print("res_fc_2 = ") #print(res_fc_2) ###res_relu_2 = self.relu_layer_2.forward(res_fc_2) #print("res_relu_2 ") #print(res_relu_2.shape) #print("qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq = ", self.params()["W1"].value[0][0]) #print("res_relu_2 = ") #print(res_relu_2) ###pred = np.argmax(res_relu_2, axis=1) pred = np.argmax(res_fc_2, axis=1) #print("pred = ", pred) #raise("The end") return pred def params(self): #print(self.fully_connected_layer_1.params()) result = {"W1" : self.fully_connected_layer_1.params()["W"], "B1" : self.fully_connected_layer_1.params()["B"], "W2" : self.fully_connected_layer_2.params()["W"], "B2" : self.fully_connected_layer_2.params()["B"]} # TODO Implement aggregating all of the params #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.fcl1 = FullyConnectedLayer(n_input, hidden_layer_size) self.fcl2 = FullyConnectedLayer(hidden_layer_size, n_output) self.relu = ReLULayer() self.reg = reg self.w1 = self.fcl1.params()['W'] self.w2 = self.fcl2.params()['W'] self.b1 = self.fcl1.params()['B'] self.b2 = self.fcl1.params()['B'] # TODO Create necessary layers # raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! [self.params()[param].grad.fill(0) for param in self.params().keys()] # raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model hidden_res_forward = self.fcl1.forward(X) hidden_res_forward = self.relu.forward(hidden_res_forward) output = self.fcl2.forward(hidden_res_forward) loss, dprediction = softmax_with_cross_entropy(output, y) hidden_res_backward = self.fcl2.backward(dprediction) hidden_res_backward = self.relu.backward(hidden_res_backward) self.fcl1.backward(hidden_res_backward) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for param in self.params().values(): reg_loss, reg_grad = l2_regularization(param.value, self.reg) loss += reg_loss param.grad += reg_grad # raise Exception("Not implemented!") return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) pred = np.argmax( softmax(self.fcl2.forward(self.relu.forward( self.fcl1.forward(X)))), 1) # raise Exception("Not implemented!") return pred def params(self): result = {"W1": self.w1, "W2": self.w2, "B1": self.b1, "B2": self.b2} # TODO Implement aggregating all of the params # raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength # TODO Create necessary layers self.fully_layers = [] self.relu_layers = [] inp = n_input out = n_output for l in range(hidden_layer_size): #print ("inp = %, out = % ", inp, out) self.fully_layers.append(FullyConnectedLayer(inp, out)) #inp, out = out, inp inp = 10 self.relu_layers.append(ReLULayer()) self.fully_layers.append(FullyConnectedLayer(inp, n_output)) #raise Exception("Not implemented!") """ self.reg = reg self.input_fc = FullyConnectedLayer(n_input, hidden_layer_size) self.input_re = ReLULayer() self.hidden_fc = FullyConnectedLayer(hidden_layer_size, n_output) def forward_pass(self, X): self.input_fc.clearGrad() self.hidden_fc.clearGrad() #raise Exception("Not implemented!") x = X.copy() # TODO Compute loss and fill param gradients # by running forward and backward passes through the model i_fc = self.input_fc.forward(x) i_re = self.input_re.forward(i_fc) h_fc = self.hidden_fc.forward(i_re) return h_fc def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! h_fc = self.forward_pass(X) simp_loss, dfinal_mat = lc.softmax_with_cross_entropy(h_fc, y) d_hf = self.hidden_fc.backward(dfinal_mat) d_ir = self.input_re.backward(d_hf) d_if = self.input_fc.backward(d_ir) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! param = self.params() i_loss, iL2_Wgrad = l2_regularization(param['iW'].value, self.reg) h_loss, hL2_Wgrad = l2_regularization(param['hW'].value, self.reg) loss = simp_loss + i_loss + h_loss param['iW'].grad += iL2_Wgrad param['hW'].grad += hL2_Wgrad #raise Exception("Not implemented!") return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused y_pred = np.zeros(X.shape[0], np.int) predictions = self.forward_pass(X) pred = lc.softmax(predictions) #print (pred.shape) y_pred = np.argmax(pred, axis=1) #raise Exception("Not implemented!") return y_pred def params(self): result = {} # TODO Implement aggregating all of the params d = self.input_fc.params() result['iW'] = d['W'] result['iB'] = d['B'] d = self.hidden_fc.params() result['hW'] = d['W'] result['hB'] = d['B'] #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg self.hidden_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.relu_layer = ReLULayer() self.output_layer = FullyConnectedLayer(hidden_layer_size, n_output) self.n_input = n_input self.n_output = n_output self.hidden_layer_size = hidden_layer_size # TODO Create necessary layers # raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! hidden_layer_params = self.hidden_layer.params() output_layer_params = self.output_layer.params() hidden_layer_params['W'].grad = np.zeros_like( hidden_layer_params['W'].grad) hidden_layer_params['B'].grad = np.zeros_like( hidden_layer_params['B'].grad) output_layer_params['W'].grad = np.zeros_like( output_layer_params['W'].grad) output_layer_params['B'].grad = np.zeros_like( output_layer_params['B'].grad) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model hidden_l_out = self.hidden_layer.forward(X) relu_l_out = self.relu_layer.forward(hidden_l_out) output_l_out = self.output_layer.forward(relu_l_out) ce_loss, d_pred = softmax_with_cross_entropy(output_l_out, y) reg_loss_first, d_R_first = l2_regularization( hidden_layer_params['W'].value, self.reg) reg_loss_second, d_R_second = l2_regularization( output_layer_params['W'].value, self.reg) loss = ce_loss + reg_loss_first + reg_loss_second d_input_out_layer = self.output_layer.backward(d_pred) output_layer_params['W'].grad += d_R_second d_input_relu_layer = self.relu_layer.backward(d_input_out_layer) d_input_hidden_layer = self.hidden_layer.backward(d_input_relu_layer) hidden_layer_params['W'].grad += d_R_first # After that, implement l2 regularization on all params # Hint: self.params() is useful again return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused hidden_l_output = self.hidden_layer.forward(X) relu_output = self.relu_layer.forward(hidden_l_output) output_l_output = self.output_layer.forward(relu_output) pred = np.argmax(output_l_output, axis=1) return pred def params(self): result = {} # TODO Implement aggregating all of the params hidden_layer_params = self.hidden_layer.params() result['W1'] = hidden_layer_params['W'] result['B1'] = hidden_layer_params['B'] output_layer_params = self.output_layer.params() result['W2'] = output_layer_params['W'] result['B2'] = output_layer_params['B'] return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ filter_size = 3 padding = 1 pool_size = 4 stride = 4 width, height, n_channels = input_shape assert ((height + 2 * padding - filter_size + 1) % pool_size == 0) assert ((width + 2 * padding - filter_size + 1) % pool_size == 0) height = (height + 2 * padding - filter_size + 1) // pool_size width = (width + 2 * padding - filter_size + 1) // pool_size assert ((height + 2 * padding - filter_size + 1) % pool_size == 0) assert ((width + 2 * padding - filter_size + 1) % pool_size == 0) height = (height + 2 * padding - filter_size + 1) // pool_size width = (width + 2 * padding - filter_size + 1) // pool_size # TODO Create necessary layers self.Conv_1 = ConvolutionalLayer(n_channels, conv1_channels, filter_size, padding) self.Relu_1 = ReLULayer() self.Maxpool_1 = MaxPoolingLayer(pool_size, stride) self.Conv_2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding) self.Relu_2 = ReLULayer() self.Maxpool_2 = MaxPoolingLayer(pool_size, stride) self.Flattener = Flattener() self.FC = FullyConnectedLayer(height * width * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for param in self.params().values(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment conv_1 = self.Conv_1.forward(X) relu_1 = self.Relu_1.forward(conv_1) maxpool_1 = self.Maxpool_1.forward(relu_1) conv_2 = self.Conv_2.forward(maxpool_1) relu_2 = self.Relu_2.forward(conv_2) maxpool_2 = self.Maxpool_2.forward(relu_2) flat = self.Flattener.forward(maxpool_2) fc = self.FC.forward(flat) loss, grad = softmax_with_cross_entropy(fc, y + 1) d_fc = self.FC.backward(grad) d_flat = self.Flattener.backward(d_fc) d_maxpool_2 = self.Maxpool_2.backward(d_flat) d_relu_2 = self.Relu_2.backward(d_maxpool_2) d_conv_2 = self.Conv_2.backward(d_relu_2) d_maxpool_1 = self.Maxpool_1.backward(d_conv_2) d_relu_1 = self.Relu_1.backward(d_maxpool_1) dX = self.Conv_1.backward(d_relu_1) return loss def predict(self, X): # You can probably copy the code from previous assignment # predictions = self.fc2.forward(self.ReLU.forward(self.fc1.forward(X))) predictions = self.FC.forward( self.Flattener.forward( self.Maxpool_2.forward( self.Relu_2.forward( self.Conv_2.forward( self.Maxpool_1.forward( self.Relu_1.forward( self.Conv_1.forward(X)))))))) pred = np.argmax(predictions, axis=1) return pred def params(self): result = { 'conv1_W': self.Conv_1.params()['W'], 'conv1_B': self.Conv_1.params()['B'], 'conv2_W': self.Conv_2.params()['W'], 'conv2_B': self.Conv_2.params()['B'], 'fc_W': self.FC.params()['W'], 'fc_B': self.FC.params()['B'], } # TODO: Aggregate all the params from all the layers # which have parameters return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.fc1 = FullyConnectedLayer(n_input, hidden_layer_size) self.ReLU = ReLULayer() self.fc2 = FullyConnectedLayer(hidden_layer_size, n_output) # self.n_output = n_output def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for param in self.params().values(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model fc1_forw = self.fc1.forward(X) relu_forw = self.ReLU.forward(fc1_forw) fc2_forw = self.fc2.forward(relu_forw) loss, grad = softmax_with_cross_entropy(fc2_forw, y + 1) fc2_back = self.fc2.backward(grad) relu_back = self.ReLU.backward(fc2_back) self.fc1.backward(relu_back) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! loss_l2_fc1, grad_l2_fc1 = l2_regularization( self.params()['fc1_W'].value, self.reg) loss_l2_fc2, grad_l2_fc2 = l2_regularization( self.params()['fc2_W'].value, self.reg) self.params()['fc1_W'].grad += grad_l2_fc1 self.params()['fc2_W'].grad += grad_l2_fc2 loss += loss_l2_fc1 + loss_l2_fc2 return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused predictions = self.fc2.forward(self.ReLU.forward(self.fc1.forward(X))) pred = np.argmax(predictions, axis=1) return pred def params(self): result = { 'fc1_W': self.fc1.params()['W'], 'fc2_W': self.fc2.params()['W'], 'fc1_B': self.fc1.params()['B'], 'fc2_B': self.fc2.params()['B'] } # TODO Implement aggregating all of the params return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.layer_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.layer_2 = FullyConnectedLayer(hidden_layer_size, n_output) #raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! params = self.params() params['W1'].grad = np.zeros_like(params['W1'].value) params['W2'].grad = np.zeros_like(params['W2'].value) params['B1'].grad = np.zeros_like(params['B1'].value) params['B2'].grad = np.zeros_like(params['B2'].value) #raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model L1 = self.layer_1.forward(X) R1 = self.relu.forward(L1) L2 = self.layer_2.forward(R1) loss, loss_grad = softmax_with_cross_entropy(L2, y) d_w2 = self.layer_2.backward(loss_grad) d_relu = self.relu.backward(d_w2) d_w1 = self.layer_1.backward(d_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! #raise Exception("Not implemented!") #l2_reg = np.array([2, 1]) for i in params: temp = l2_regularization(params[i].value, self.reg) loss += temp[0] params[i].grad += temp[1] return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused #pred = np.zeros(X.shape[0], np.int) L1 = self.layer_1.forward(X) R1 = self.relu.forward(L1) L2 = self.layer_2.forward(R1) #L2_normalized = L2 - np.max(L2, axis=1)[:, np.newaxis] #soft_max = np.exp(L2_normalized) / np.sum(np.exp(L2_normalized), axis=1)[:, np.newaxis] pred = np.argmax(L2, axis=1) #raise Exception("Not implemented!") return pred def params(self): result = {} # TODO Implement aggregating all of the params result['W1'] = self.layer_1.params()['W'] result['W2'] = self.layer_2.params()['W'] result['B1'] = self.layer_1.params()['B'] result['B2'] = self.layer_2.params()['B'] #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.layer_1 = FullyConnectedLayer(n_input, hidden_layer_size) self.relu = ReLULayer() self.layer_2 = FullyConnectedLayer(hidden_layer_size, n_output) self.hidden_layer_size = hidden_layer_size self.n_input = n_input self.n_output = n_output #raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros for v in self.params().values(): v.grad.fill(0) # Hint: using self.params() might be useful! #raise Exception("Not implemented!") # TODO Compute loss and fill param gradients # by running forward and backward passes through the model #Compute FullyConnectedLayer_1 l_1 = self.layer_1.forward(X) #Compute ReLuLayer l_relu = self.relu.forward(l_1) #Compute FullyConnectedLayer_2 l_2 = self.layer_2.forward(l_relu) #compute loss and grad of F loss, grad_pred = softmax_with_cross_entropy(l_2, y) for v in self.params().values(): l2_loss, l2_grad = l2_regularization(v.value, self.reg) loss += l2_loss v.grad += l2_grad grad_l_2 = self.layer_2.backward(grad_pred) grad_relu = self.relu.backward(grad_l_2) grad_l_1 = self.layer_1.backward(grad_relu) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! #raise Exception("Not implemented!") return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) #Compute FullyConnectedLayer_1 l_1 = self.layer_1.forward(X) #Compute ReLuLayer l_relu = self.relu.forward(l_1) #Compute FullyConnectedLayer_2 l_2 = self.layer_2.forward(l_relu) #Compute pred pred = np.argmax(l_2, axis=1) #raise Exception("Not implemented!") return pred def params(self): p1 = self.layer_1.params() p2 = self.layer_2.params() result = {"W1": p1["W"], "B1": p1["B"], "W2": p2["W"], "B2": p2["B"]} #result = {'W1': self.w1, 'W2': self.w2, 'B1': self.b1, 'B2': self.b2} # TODO Implement aggregating all of the params #raise Exception("Not implemented!") return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers # Create layers self.first_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.activation = ReLULayer() self.second_layer = FullyConnectedLayer(hidden_layer_size, n_output) # Add params to the net self.first_layer_params = self.first_layer.params() self.second_layer_params = self.second_layer.params() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for key, value in self.params().items(): value.grad.fill(0) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model #Forward pass first_layer_res = self.first_layer.forward(X) activation_layer_res = self.activation.forward(first_layer_res) second_layer_res = self.second_layer.forward(activation_layer_res) loss, d_preds = softmax_with_cross_entropy(second_layer_res, y) #Backward_pass second_layer_grad = self.second_layer.backward(d_preds) activation_layer_grad = self.activation.backward(second_layer_grad) first_layer_grad = self.first_layer.backward(activation_layer_grad) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for key, param in self.params().items(): l2_loss, l2_grad = l2_regularization(param.value, self.reg) loss += l2_loss param.grad += l2_grad return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) first_layer_res = self.first_layer.forward(X) activation_layer_res = self.activation.forward(first_layer_res) second_layer_res = self.second_layer.forward(activation_layer_res) pred = np.argmax(second_layer_res, axis=-1) return pred def params(self): result = {} # TODO Implement aggregating all of the params d1 = {k+'1': v for k, v in self.first_layer_params.items()} d2 = {k+'2': v for k, v in self.second_layer_params.items()} result = {**d1, **d2} return result
class TwoLayerNet: """ Neural network with two fully connected layers """ def __init__(self, n_input, n_output, hidden_layer_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict hidden_layer_size, int - number of neurons in the hidden layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.first_layer = FullyConnectedLayer(n_input, hidden_layer_size) self.first_relu = ReLULayer() self.second_layer = FullyConnectedLayer(hidden_layer_size, n_output) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Set parameter gradient to zeros # Hint: using self.params() might be useful! for i, w in self.params().items(): w.grad = np.zeros(w.grad.shape) # TODO Compute loss and fill param gradients # by running forward and backward passes through the model value = self.first_layer.forward(X) value = self.first_relu.forward(value) value = self.second_layer.forward(value) loss, grads = softmax_with_cross_entropy(value, y) value = self.second_layer.backward(grads) value = self.first_relu.backward(value) value = self.first_layer.backward(value) # After that, implement l2 regularization on all params # Hint: self.params() is useful again! for i, w in self.params().items(): loss_delta, grad_delta = l2_regularization(w.value, self.reg) w.grad += grad_delta loss += loss_delta return loss def predict(self, X): """ Produces classifier predictions on the set Arguments: X, np array (test_samples, num_features) Returns: y_pred, np.array of int (test_samples) """ # TODO: Implement predict # Hint: some of the code of the compute_loss_and_gradients # can be reused pred = np.zeros(X.shape[0], np.int) pred = self.first_layer.forward(X) pred = self.first_relu.forward(pred) pred = self.second_layer.forward(pred) pred = np.argmax(pred, axis=1) return pred def params(self): result = {} # TODO Implement aggregating all of the params result['first_layer_W'] = self.first_layer.params()['W'] result['second_layer_W'] = self.second_layer.params()['W'] result['first_layer_B'] = self.first_layer.params()['B'] result['second_layer_B'] = self.second_layer.params()['B'] return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.conv1 = ConvolutionalLayer(input_shape[2], conv1_channels, 3, 1) self.reLu1 = ReLULayer() self.mxPl1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.reLu2 = ReLULayer() self.mxPl2 = MaxPoolingLayer(4, 4) self.flat = Flattener() self.fCL = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for param in self.params().values(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment # raise Exception("Not implemented!") pred = self.conv1.forward(X) pred = self.reLu1.forward(pred) pred = self.mxPl1.forward(pred) pred = self.conv2.forward(pred) pred = self.reLu2.forward(pred) pred = self.mxPl2.forward(pred) pred = self.flat.forward(pred) pred = self.fCL.forward(pred) loss, loss_grad = softmax_with_cross_entropy(pred, y) grad = self.fCL.backward(loss_grad) grad = self.flat.backward(grad) grad = self.mxPl2.backward(grad) grad = self.reLu2.backward(grad) grad = self.conv2.backward(grad) grad = self.mxPl1.backward(grad) grad = self.reLu1.backward(grad) grad = self.conv1.backward(grad) return loss def predict(self, X): # You can probably copy the code from previous assignment pred = self.conv1.forward(X) pred = self.reLu1.forward(pred) pred = self.mxPl1.forward(pred) pred = self.conv2.forward(pred) pred = self.reLu2.forward(pred) pred = self.mxPl2.forward(pred) pred = self.flat.forward(pred) pred = self.fCL.forward(pred) pred = np.argmax(pred, axis=1) return pred def params(self): result = {} result['Conv1W'] = self.conv1.params()['W'] result['Conv2W'] = self.conv2.params()['W'] result['FC_W'] = self.fCL.params()['W'] result['Conv1B'] = self.conv1.params()['B'] result['Conv2B'] = self.conv2.params()['B'] result['FC_B'] = self.fCL.params()['B'] return result