class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers input_width, input_height, input_channels = input_shape self.conv1 = ConvolutionalLayer(input_channels, conv1_channels, filter_size=3, padding=1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(pool_size=4, stride=4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size=3, padding=1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(pool_size=4, stride=4) self.flattener = Flattener() self.fc = FullyConnectedLayer( input_width * input_height * conv2_channels // (4**4), n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for layer in (self.conv1, self.conv2, self.fc): for param in layer.params().values(): param.zero_grad() # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment conv1_fwd = self.conv1.forward(X) relu1_fwd = self.relu1.forward(conv1_fwd) maxpool1_fwd = self.maxpool1.forward(relu1_fwd) conv2_fwd = self.conv2.forward(maxpool1_fwd) relu2_fwd = self.relu2.forward(conv2_fwd) maxpool2_fwd = self.maxpool2.forward(relu2_fwd) flattener_fwd = self.flattener.forward(maxpool2_fwd) fc_fwd = self.fc.forward(flattener_fwd) loss, dprediction = softmax_with_cross_entropy(fc_fwd, y) fc_bwd = self.fc.backward(dprediction) flattener_bwd = self.flattener.backward(fc_bwd) maxpool2_bwd = self.maxpool2.backward(flattener_bwd) relu2_bwd = self.relu2.backward(maxpool2_bwd) conv2_bwd = self.conv2.backward(relu2_bwd) maxpool1_bwd = self.maxpool1.backward(conv2_bwd) relu1_bwd = self.relu1.backward(maxpool1_bwd) conv1_bwd = self.conv1.backward(relu1_bwd) return loss def predict(self, X): # You can probably copy the code from previous assignment pred = np.zeros(X.shape[0], np.int) conv1_fwd = self.conv1.forward(X) relu1_fwd = self.relu1.forward(conv1_fwd) maxpool1_fwd = self.maxpool1.forward(relu1_fwd) conv2_fwd = self.conv2.forward(maxpool1_fwd) relu2_fwd = self.relu2.forward(conv2_fwd) maxpool2_fwd = self.maxpool2.forward(relu2_fwd) flattener_fwd = self.flattener.forward(maxpool2_fwd) fc_fwd = self.fc.forward(flattener_fwd) pred = np.argmax(fc_fwd, axis=1) return pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters for layer_name, layer in (('conv1', self.conv1), ('conv2', self.conv2), ('fc', self.fc)): params = layer.params() for param_name in params: result[f'{layer_name}.{param_name}'] = params[param_name] return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ filter_size = 3 pool_size = 4 self.conv1 = ConvolutionalLayer(input_shape[2], conv1_channels, filter_size, padding=1) self.relu1 = ReLULayer() self.max_pool1 = MaxPoolingLayer(pool_size, stride=pool_size) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding=1) self.relu2 = ReLULayer() self.max_pool2 = MaxPoolingLayer(pool_size, stride=pool_size) self.flatten = Flattener() self.fc = FullyConnectedLayer(n_input=4 * conv2_channels, n_output=n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass params = self.params() for param_key, param_value in params.items(): param_value.grad = np.zeros_like(param_value.value) # Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment X = self.conv1.forward(X) X = self.relu1.forward(X) X = self.max_pool1.forward(X) X = self.conv2.forward(X) X = self.relu2.forward(X) X = self.max_pool2.forward(X) X = self.flatten.forward(X) X = self.fc.forward(X) loss, grad = softmax_with_cross_entropy(X, y) grad = self.fc.backward(grad) grad = self.flatten.backward(grad) grad = self.max_pool2.backward(grad) grad = self.relu2.backward(grad) grad = self.conv2.backward(grad) grad = self.max_pool1.backward(grad) grad = self.relu1.backward(grad) grad = self.conv1.backward(grad) return loss def predict(self, X): X = self.conv1.forward(X) X = self.relu1.forward(X) X = self.max_pool1.forward(X) X = self.conv2.forward(X) X = self.relu2.forward(X) X = self.max_pool2.forward(X) X = self.flatten.forward(X) X = self.fc.forward(X) X = softmax(X) return np.argmax(X, axis=1) def params(self): # Aggregate all the params from all the layers # which have parameters result = {} layers_with_params = [self.conv1, self.conv2, self.fc] for i in range(len(layers_with_params)): layer = layers_with_params[i] layer_number = str(i) for param_key, param_value in layer.params().items(): result[param_key + str(layer_number)] = param_value return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.height = input_shape[0] self.width = input_shape[1] self.input_channels = input_shape[2] self.n_output_classes = n_output_classes self.conv1_channels = conv1_channels self.conv2_channels = conv2_channels self.conv1_layer = ConvolutionalLayer(self.input_channels, self.conv1_channels, 3, 1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(4, 4) self.conv2_layer = ConvolutionalLayer(self.conv1_channels, self.conv2_channels, 3, 1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(4, 4) self.flattener = Flattener() self.fc_layer = FullyConnectedLayer(2*2*self.conv2_channels, self.n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # nullify layers gradients # Conv1 Layer self.params()['W1'].grad = np.zeros((3, 3, self.input_channels, self.conv1_channels)) self.params()['B1'].grad = np.zeros(self.conv1_channels) # Conv2 Layer self.params()['W2'].grad = np.zeros((3, 3, self.conv1_channels, self.conv2_channels)) self.params()['B2'].grad = np.zeros(self.conv2_channels) # FC Layer self.params()['W3'].grad = np.zeros((2*2*self.conv2_channels, self.n_output_classes)) self.params()['B3'].grad = np.zeros((1, self.n_output_classes)) # forward conv layer 1 conv_forward1 = self.conv1_layer.forward(X) # forward relu activation funtcion 1 relu_forward1 = self.relu1.forward(conv_forward1) # forward maxpool layer 1 maxpool_forward1 = self.maxpool1.forward(relu_forward1) # forward conv layer 2 conv_forward2 = self.conv2_layer.forward(maxpool_forward1) # forward relu activation funtcion 2 relu_forward2 = self.relu2.forward(conv_forward2) # forward maxpool layer 2 maxpool_forward2 = self.maxpool2.forward(relu_forward2) # forward flattener layer flattener_forward = self.flattener.forward(maxpool_forward2) # forward FC layer fc_forward = self.fc_layer.forward(flattener_forward) # calculate loss and grad loss, grad = softmax_with_cross_entropy(fc_forward, y) # backward FC layer fc_backward = self.fc_layer.backward(grad) # backward flattener layer flattener_backward = self.flattener.backward(fc_backward) # backward maxpool layer 2 maxpool_backward2 = self.maxpool2.backward(flattener_backward) # backward relu activation funtcion 2 relu_backward2 = self.relu2.backward(maxpool_backward2) # forward conv layer 2 conv_backward2 = self.conv2_layer.backward(relu_backward2) # backward maxpool layer 1 maxpool_backward1 = self.maxpool1.backward(conv_backward2) # backward relu activation funtcion 1 relu_backward1 = self.relu1.backward(maxpool_backward1) # forward conv layer 1 conv_backward1 = self.conv1_layer.backward(relu_backward1) return loss def predict(self, X): # forward conv layer 1 conv_forward1 = self.conv1_layer.forward(X) # forward relu activation funtcion 1 relu_forward1 = self.relu1.forward(conv_forward1) # forward maxpool layer 1 maxpool_forward1 = self.maxpool1.forward(relu_forward1) # forward conv layer 2 conv_forward2 = self.conv2_layer.forward(maxpool_forward1) # forward relu activation funtcion 2 relu_forward2 = self.relu2.forward(conv_forward2) # forward maxpool layer 2 maxpool_forward2 = self.maxpool2.forward(relu_forward2) # forward flattener layer flattener_forward = self.flattener.forward(maxpool_forward2) # forward FC layer fc_forward = self.fc_layer.forward(flattener_forward) # make prediction prediciton = fc_forward.argmax(axis=1) return prediciton def params(self): result = {'W1': self.conv1_layer.params()['W'], 'B1': self.conv1_layer.params()['B'], 'W2': self.conv2_layer.params()['W'], 'B2': self.conv2_layer.params()['B'], 'W3': self.fc_layer.params()['W'], 'B3': self.fc_layer.params()['B']} return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ width, height, channels = input_shape filter_size = 3 padding = 1 pool_size = 4 pool_stride = 4 self.Conv1 = ConvolutionalLayer(channels, conv1_channels, filter_size, padding) self.ReLU1 = ReLULayer() self.MaxPool1 = MaxPoolingLayer(pool_size, pool_stride) self.Conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding) self.ReLU2 = ReLULayer() self.MaxPool2 = MaxPoolingLayer(pool_size, pool_stride) left_width = width // pool_stride // pool_stride left_height = height // pool_stride // pool_stride self.Flat = Flattener() self.FullyConnected = FullyConnectedLayer( left_width * left_height * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for _, v in self.params().items(): v.grad = np.zeros(v.grad.shape) out = self.Conv1.forward(X) out = self.ReLU1.forward(out) out = self.MaxPool1.forward(out) out = self.Conv2.forward(out) out = self.ReLU2.forward(out) out = self.MaxPool2.forward(out) out = self.Flat.forward(out) out = self.FullyConnected.forward(out) loss, d_out = softmax_with_cross_entropy(out, y) d_out = self.FullyConnected.backward(d_out) d_out = self.Flat.backward(d_out) d_out = self.MaxPool2.backward(d_out) d_out = self.ReLU2.backward(d_out) d_out = self.Conv2.backward(d_out) d_out = self.MaxPool1.backward(d_out) d_out = self.ReLU1.backward(d_out) d_out = self.Conv1.backward(d_out) return loss def predict(self, X): # You can probably copy the code from previous assignment out = self.Conv1.forward(X) out = self.ReLU1.forward(out) out = self.MaxPool1.forward(out) out = self.Conv2.forward(out) out = self.ReLU2.forward(out) out = self.MaxPool2.forward(out) out = self.Flat.forward(out) out = self.FullyConnected.forward(out) pred = np.argmax(out, axis=1) return pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters name2layer = { "Conv1": self.Conv1, "Conv2": self.Conv2, "Fully": self.FullyConnected } for name, layer in name2layer.items(): for k, v in layer.params().items(): result['{}_{}'.format(name, k)] = v return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.out_classes = n_output_classes image_width, image_height, in_channels = input_shape self.Conv1 = ConvolutionalLayer(in_channels, conv1_channels, 3, 1) self.ReLU1 = ReLULayer() self.MaxPool1 = MaxPoolingLayer(4, 4) self.Conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.ReLU2 = ReLULayer() self.MaxPool2 = MaxPoolingLayer(4, 4) self.Flatten = Flattener() self.FC = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for param in self.params().values(): param.grad = np.zeros_like(param.value) preds = self.Conv1.forward(X) preds = self.ReLU1.forward(preds) preds = self.MaxPool1.forward(preds) preds = self.Conv2.forward(preds) preds = self.ReLU2.forward(preds) preds = self.MaxPool2.forward(preds) preds = self.Flatten.forward(preds) preds = self.FC.forward(preds) loss, grad = softmax_with_cross_entropy(preds, y) grad = self.FC.backward(grad) grad = self.Flatten.backward(grad) grad = self.MaxPool2.backward(grad) grad = self.ReLU2.backward(grad) grad = self.Conv2.backward(grad) grad = self.MaxPool1.backward(grad) grad = self.ReLU1.backward(grad) grad = self.Conv1.backward(grad) return loss def predict(self, X): # You can probably copy the code from previous assignment preds = self.Conv1.forward(X) preds = self.ReLU1.forward(preds) preds = self.MaxPool1.forward(preds) preds = self.Conv2.forward(preds) preds = self.ReLU2.forward(preds) preds = self.MaxPool2.forward(preds) preds = self.Flatten.forward(preds) preds = self.FC.forward(preds) probs = softmax(preds) return np.argmax(probs, axis=1) def params(self): # TODO: Aggregate all the params from all the layers # which have parameters return { 'Conv1.W': self.Conv1.W, 'Conv1.B': self.Conv1.B, 'Conv2.W': self.Conv2.W, 'Conv2.B': self.Conv2.B, 'FC.W': self.FC.W, 'FC.B': self.FC.B }
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, reg): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.reg = reg self.conv1 = ConvolutionalLayer(in_channels=input_shape[-1], out_channels=conv1_channels, filter_size=3, padding=1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(pool_size=4, stride=4) self.conv2 = ConvolutionalLayer(in_channels=conv1_channels, out_channels=conv2_channels, filter_size=3, padding=1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(pool_size=4, stride=4) self.flattener = Flattener() ## n_input = 4*conv2_channels - hard coding here, because of constant picture size 32 32 3 self.fullyconlayer = FullyConnectedLayer(n_input=4 * conv2_channels, n_output=n_output_classes) self.W_fc_layer = None self.B_fc_layer = None self.W_con1_layer = None self.B_con1_layer = None self.W_con2_layer = None self.B_con2_layer = None # TODO Create necessary layers #raise Exception("Not implemented!") def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment self.conv1.params()['W'].grad = 0 self.conv1.params()['B'].grad = 0 self.conv2.params()['W'].grad = 0 self.conv2.params()['B'].grad = 0 self.fullyconlayer.params()['W'].grad = 0 self.fullyconlayer.params()['B'].grad = 0 to_relu = self.conv1.forward(X) to_maxpool1 = self.relu1.forward(to_relu) to_conv2 = self.maxpool1.forward(to_maxpool1) to_relu2 = self.conv2.forward(to_conv2) to_maxpool2 = self.relu2.forward(to_relu2) to_flat = self.maxpool2.forward(to_maxpool2) to_fc_layer = self.flattener.forward(to_flat) preds = self.fullyconlayer.forward(to_fc_layer) loss, dprediction = softmax_with_cross_entropy(preds, y) grad_from_fc_layer = self.fullyconlayer.backward(dprediction) self.W_fc_layer = self.fullyconlayer.params()['W'] self.B_fc_layer = self.fullyconlayer.params()['B'] grad_from_flatten = self.flattener.backward(grad_from_fc_layer) grad_from_maxpool2 = self.maxpool2.backward(grad_from_flatten) grad_from_relu2 = self.relu2.backward(grad_from_maxpool2) grad_from_conv2 = self.conv2.backward(grad_from_relu2) self.W_con2_layer = self.conv2.params()['W'] self.B_con2_layer = self.conv2.params()['B'] grad_from_maxpool1 = self.maxpool1.backward(grad_from_conv2) grad_from_relu1 = self.relu1.backward(grad_from_maxpool1) grad_from_conv1 = self.conv1.backward(grad_from_relu1) self.W_con1_layer = self.conv1.params()['W'] self.B_con1_layer = self.conv1.params()['B'] loss_fc, grad_fc = l2_regularization(self.W_fc_layer.value, self.reg) loss += loss_fc self.W_fc_layer.grad += grad_fc return loss #raise Exception("Not implemented!") def predict(self, X): # You can probably copy the code from previous assignment pred = np.zeros(X.shape[0], np.int) to_relu = self.conv1.forward(X) to_maxpool1 = self.relu1.forward(to_relu) to_conv2 = self.maxpool1.forward(to_maxpool1) to_relu2 = self.conv2.forward(to_conv2) to_maxpool2 = self.relu2.forward(to_relu2) to_flat = self.maxpool2.forward(to_maxpool2) to_fc_layer = self.flattener.forward(to_flat) preds = self.fullyconlayer.forward(to_fc_layer) probs = softmax(preds) pred = np.argmax(probs, axis=-1) return pred #raise Exception("Not implemented!") def params(self): result = { 'W_fc_layer': self.W_fc_layer, 'B_fc_layer': self.B_fc_layer, 'W_con1_layer': self.W_con1_layer, 'B_con1_layer': self.B_con1_layer, 'W_con2_layer': self.W_con2_layer, 'B_con2_layer': self.B_con2_layer } # TODO: Aggregate all the params from all the layers # which have parameters #raise Exception("Not implemented!") return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers #raise Exception("Not implemented!") image_width, image_height, n_channels = input_shape self.conv1 = ConvolutionalLayer(n_channels, conv1_channels, 3, 1) self.relu1 = ReLULayer() self.maxp1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.maxp2 = MaxPoolingLayer(4, 4) self.flatn = Flattener() fc_input = int(image_width * image_height * conv2_channels / pow(4, 4)) self.fc = FullyConnectedLayer(fc_input, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass self.conv1.W.grad = np.zeros_like(self.conv1.W.grad) self.conv1.B.grad = np.zeros_like(self.conv1.B.grad) self.conv2.W.grad = np.zeros_like(self.conv2.W.grad) self.conv2.B.grad = np.zeros_like(self.conv2.B.grad) self.fc.W.grad = np.zeros_like(self.fc.W.grad) self.fc.B.grad = np.zeros_like(self.fc.B.grad) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment #raise Exception("Not implemented!") fconv1 = self.conv1.forward(X) frelu1 = self.relu1.forward(fconv1) fmaxp1 = self.maxp1.forward(frelu1) fconv2 = self.conv2.forward(fmaxp1) frelu2 = self.relu2.forward(fconv2) fmaxp2 = self.maxp2.forward(frelu2) fflatn = self.flatn.forward(fmaxp2) ffc = self.fc.forward(fflatn) loss, d_preds = softmax_with_cross_entropy(ffc, y) bfc = self.fc.backward(d_preds) bflatn = self.flatn.backward(bfc) bmaxp2 = self.maxp2.backward(bflatn) brelu2 = self.relu2.backward(bmaxp2) bconv2 = self.conv2.backward(brelu2) bmaxp1 = self.maxp1.backward(bconv2) brelu1 = self.relu1.backward(bmaxp1) bconv1 = self.conv1.backward(brelu1) return loss def predict(self, X): # You can probably copy the code from previous assignment #raise Exception("Not implemented!") fconv1 = self.conv1.forward(X) frelu1 = self.relu1.forward(fconv1) fmaxp1 = self.maxp1.forward(frelu1) fconv2 = self.conv2.forward(fmaxp1) frelu2 = self.relu2.forward(fconv2) fmaxp2 = self.maxp2.forward(frelu2) fflatn = self.flatn.forward(fmaxp2) ffc = self.fc.forward(fflatn) prob = softmax(ffc) pred = np.argmax(prob, axis=1) return pred def params(self): result = { 'Wc1': self.conv1.W, 'Bc1': self.conv1.B, 'Wc2': self.conv2.W, 'Bc2': self.conv2.B, 'Wfc': self.fc.W, 'Bfc': self.fc.B } # TODO: Aggregate all the params from all the layers # which have parameters #raise Exception("Not implemented!") return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ _, _, input_channels = input_shape self.conv1 = ConvolutionalLayer(input_channels, conv1_channels, 3, 1) self.relu1 = ReLULayer() self.pool1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.pool2 = MaxPoolingLayer(4, 4) self.flattener = Flattener() self.fc = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ for param_ix in self.params(): self.params()[param_ix].grad = np.zeros_like( self.params()[param_ix].value) out = self.pool1.forward(self.relu1.forward(self.conv1.forward(X))) out = self.pool2.forward(self.relu2.forward(self.conv2.forward(out))) out = self.fc.forward(self.flattener.forward(out)) loss, grad = softmax_with_cross_entropy(out, y) grad = self.flattener.backward(self.fc.backward(grad)) grad = self.conv2.backward( self.relu2.backward(self.pool2.backward(grad))) grad = self.conv1.backward( self.relu1.backward(self.pool1.backward(grad))) return loss def predict(self, X): out = self.pool1.forward(self.relu1.forward(self.conv1.forward(X))) out = self.pool2.forward(self.relu2.forward(self.conv2.forward(out))) out = self.fc.forward(self.flattener.forward(out)) predictions = softmax(out) return np.argmax(predictions, axis=1) def params(self): result = { 'conv1.W': self.conv1.W, 'conv1.B': self.conv1.B, 'conv2.W': self.conv1.W, 'conv2.B': self.conv1.B, 'fc.W': self.fc.W, 'fc.B': self.fc.B } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, filter_size=3): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.input_shape = input_shape self.n_output_classes = n_output_classes self.conv1_channels = conv1_channels self.conv2_channels = conv2_channels self.filter_size = filter_size self.padding = 1 c1 = int( (input_shape[0] - self.filter_size + 2 * self.padding) / 1) + 1 mp1 = int((c1 - 4) / 4) + 1 c2 = int((mp1 - self.filter_size + 2 * self.padding) / 1) + 1 self.size_after_2maxpool = int((c2 - 4) / 4) + 1 self.RL1 = ReLULayer() self.RL2 = ReLULayer() self.MaxPool1 = MaxPoolingLayer(pool_size=4, stride=4) self.MaxPool2 = MaxPoolingLayer(pool_size=4, stride=4) self.Flatten = Flattener() self.Conv1 = ConvolutionalLayer(in_channels=self.input_shape[-1], out_channels=conv1_channels, filter_size=self.filter_size, padding=self.padding) self.Conv2 = ConvolutionalLayer(in_channels=conv1_channels, out_channels=conv2_channels, filter_size=self.filter_size, padding=self.padding) self.FC = FullyConnectedLayer(n_input=conv2_channels * self.size_after_2maxpool**2, n_output=self.n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for param in self.params().values(): param.grad = np.zeros_like(param.value) # self.Conv1.W.grad = np.zeros_like(self.Conv1.W.grad) # self.Conv1.B.grad = np.zeros_like(self.Conv1.B.grad) # self.Conv2.W.grad = np.zeros_like(self.Conv2.W.grad) # self.Conv2.B.grad = np.zeros_like(self.Conv2.B.grad) # self.FC.W.grad = np.zeros_like(self.FC.W.grad) # self.FC.B.grad = np.zeros_like(self.FC.B.grad) # Input -> Conv[3 # x3] -> Relu -> Maxpool[4 # x4] -> # Conv[3 # x3] -> Relu -> MaxPool[4 # x4] -> # Flatten -> FC -> Softmax x = self.Conv1.forward(X) x = self.RL1.forward(x) x = self.MaxPool1.forward(x) x = self.Conv2.forward(x) x = self.RL2.forward(x) x = self.MaxPool2.forward(x) x = self.Flatten.forward(x) pred = self.FC.forward(x) loss, dpred = softmax_with_cross_entropy(pred, target_index=y) d_out = self.FC.backward(dpred) d_out = self.Flatten.backward(d_out) d_out = self.MaxPool2.backward(d_out) d_out = self.RL2.backward(d_out) d_out = self.Conv2.backward(d_out) d_out = self.MaxPool1.backward(d_out) d_out = self.RL1.backward(d_out) _ = self.Conv1.backward(d_out) # param_ = self.Conv1.W # before_opt = param_.value[:2, :2] # print(f"PREDICT stage Conv1_W value: \n {before_opt} \n") # print(f"PREDICT stage Conv1_dW: \n {param_.grad[:2, :2]} \n") ## !! do not update params # print(f'SHAPE fc1: \n {np.sum(self.FC1.W.grad)}') # print(f'SHAPE fc2: \n {np.sum(self.FC2.W.grad)}') # result = {'fc1_w': self.FC1.W.grad, # 'fc1_b': self.FC1.B.grad, # 'fc2_w': self.FC2.W.grad, # 'fc2_b': self.FC2.B.grad} return loss def predict(self, X): # You can probably copy the code from previous assignment x = self.Conv1.forward(X) x = self.RL1.forward(x) x = self.MaxPool1.forward(x) x = self.Conv2.forward(x) x = self.RL2.forward(x) x = self.MaxPool2.forward(x) x = self.Flatten.forward(x) x = self.FC.forward(x) y_hat = softmax(predictions=x) y_hat = np.argmax(y_hat, axis=1) return y_hat def params(self): result = { 'Conv1.W': self.Conv1.W, 'Conv1.B': self.Conv1.B, 'Conv2.W': self.Conv2.W, 'Conv2.B': self.Conv2.B, 'FC.W': self.FC.W, 'FC.B': self.FC.B } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ filter_size = 3 padding = 1 pool_size = 4 stride = 4 width, height, n_channels = input_shape assert ((height + 2 * padding - filter_size + 1) % pool_size == 0) assert ((width + 2 * padding - filter_size + 1) % pool_size == 0) height = (height + 2 * padding - filter_size + 1) // pool_size width = (width + 2 * padding - filter_size + 1) // pool_size assert ((height + 2 * padding - filter_size + 1) % pool_size == 0) assert ((width + 2 * padding - filter_size + 1) % pool_size == 0) height = (height + 2 * padding - filter_size + 1) // pool_size width = (width + 2 * padding - filter_size + 1) // pool_size # TODO Create necessary layers self.Conv_1 = ConvolutionalLayer(n_channels, conv1_channels, filter_size, padding) self.Relu_1 = ReLULayer() self.Maxpool_1 = MaxPoolingLayer(pool_size, stride) self.Conv_2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding) self.Relu_2 = ReLULayer() self.Maxpool_2 = MaxPoolingLayer(pool_size, stride) self.Flattener = Flattener() self.FC = FullyConnectedLayer(height * width * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for param in self.params().values(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment conv_1 = self.Conv_1.forward(X) relu_1 = self.Relu_1.forward(conv_1) maxpool_1 = self.Maxpool_1.forward(relu_1) conv_2 = self.Conv_2.forward(maxpool_1) relu_2 = self.Relu_2.forward(conv_2) maxpool_2 = self.Maxpool_2.forward(relu_2) flat = self.Flattener.forward(maxpool_2) fc = self.FC.forward(flat) loss, grad = softmax_with_cross_entropy(fc, y + 1) d_fc = self.FC.backward(grad) d_flat = self.Flattener.backward(d_fc) d_maxpool_2 = self.Maxpool_2.backward(d_flat) d_relu_2 = self.Relu_2.backward(d_maxpool_2) d_conv_2 = self.Conv_2.backward(d_relu_2) d_maxpool_1 = self.Maxpool_1.backward(d_conv_2) d_relu_1 = self.Relu_1.backward(d_maxpool_1) dX = self.Conv_1.backward(d_relu_1) return loss def predict(self, X): # You can probably copy the code from previous assignment # predictions = self.fc2.forward(self.ReLU.forward(self.fc1.forward(X))) predictions = self.FC.forward( self.Flattener.forward( self.Maxpool_2.forward( self.Relu_2.forward( self.Conv_2.forward( self.Maxpool_1.forward( self.Relu_1.forward( self.Conv_1.forward(X)))))))) pred = np.argmax(predictions, axis=1) return pred def params(self): result = { 'conv1_W': self.Conv_1.params()['W'], 'conv1_B': self.Conv_1.params()['B'], 'conv2_W': self.Conv_2.params()['W'], 'conv2_B': self.Conv_2.params()['B'], 'fc_W': self.FC.params()['W'], 'fc_B': self.FC.params()['B'], } # TODO: Aggregate all the params from all the layers # which have parameters return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO_ Create necessary layers # raise Exception("Not implemented!") weight, height, cannels = input_shape filter_size = 3 pool_size = 4 padding = 1 stride = pool_size self.conv1 = ConvolutionalLayer(cannels, conv1_channels, filter_size, padding) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(pool_size, stride) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(pool_size, stride) self.flatten = Flattener() n_fc_input = int(height / pool_size / pool_size * weight / pool_size / pool_size * conv2_channels) self.fc = FullyConnectedLayer(n_fc_input, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO_ Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment # raise Exception("Not implemented!") # initialization params = self.params() W1 = params["W1"] B1 = params["B1"] W2 = params["W2"] B2 = params["B2"] W3 = params["W3"] B3 = params["B3"] # the cleaning of params # W1.value = np.zeros_like(W1.value) # B1.value = np.zeros_like(B1.value) # W2.value = np.zeros_like(W2.value) # B2.value = np.zeros_like(B2.value) # W3.value = np.zeros_like(W3.value) # B3.value = np.zeros_like(B3.value) # the cleaning of gradients W1.grad = np.zeros_like(W1.value) B1.grad = np.zeros_like(B1.value) W2.grad = np.zeros_like(W2.value) B2.grad = np.zeros_like(B2.value) W3.grad = np.zeros_like(W3.value) B3.grad = np.zeros_like(B3.value) # forward pass out = self.conv1.forward(X) out = self.relu1.forward(out) out = self.maxpool1.forward(out) out = self.conv2.forward(out) out = self.relu2.forward(out) out = self.maxpool2.forward(out) out = self.flatten.forward(out) out = self.fc.forward(out) loss, d_preds = softmax_with_cross_entropy(out, y) # backward pass d_out = self.fc.backward(d_preds) d_out = self.flatten.backward(d_out) d_out = self.maxpool2.backward(d_out) d_out = self.relu2.backward(d_out) d_out = self.conv2.backward(d_out) d_out = self.maxpool1.backward(d_out) d_out = self.relu1.backward(d_out) d_out = self.conv1.backward(d_out) return loss def predict(self, X): # You can probably copy the code from previous assignment # raise Exception("Not implemented!") out = self.conv1.forward(X) out = self.relu1.forward(out) out = self.maxpool1.forward(out) out = self.conv2.forward(out) out = self.relu2.forward(out) out = self.maxpool2.forward(out) out = self.flatten.forward(out) out = self.fc.forward(out) probs = softmax(out) y_pred = np.argmax(probs, axis=1) return y_pred def params(self): result = {} # TODO_: Aggregate all the params from all the layers # which have parameters # raise Exception("Not implemented!") result = { "W1": self.conv1.params()["W"], "B1": self.conv1.params()["B"], "W2": self.conv2.params()["W"], "B2": self.conv2.params()["B"], "W3": self.fc.params()["W"], "B3": self.fc.params()["B"] } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers image_width, image_height, n_channels = input_shape conv_padding = 0 conv_filter_size = 3 max_pool_size = 4 max_pool_stride = 1 conv1_output_size = image_width - conv_filter_size + 1 maxpool1_output_size = int( (conv1_output_size - max_pool_size) / max_pool_stride) + 1 conv2_output_size = maxpool1_output_size - conv_filter_size + 1 maxpool2_output_size = int( (conv2_output_size - max_pool_size) / max_pool_stride) + 1 # correct if height == width !!! fc_input_size = maxpool2_output_size * maxpool2_output_size * conv2_channels self.conv1 = ConvolutionalLayer(n_channels, conv1_channels, conv_filter_size, conv_padding) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(max_pool_size, max_pool_stride) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, conv_filter_size, conv_padding) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(max_pool_size, max_pool_stride) self.flattener = Flattener() self.fc = FullyConnectedLayer(fc_input_size, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment self._zero_grad() predictions = self._forward(X) loss, dprediction = softmax_with_cross_entropy(predictions, y) self._backward(dprediction) return loss def _zero_grad(self): for param in self.params().values(): param.grad = np.zeros_like(param.value) def _forward(self, X): output = self.conv1.forward(X) output = self.relu1.forward(output) output = self.maxpool1.forward(output) output = self.conv2.forward(output) output = self.relu2.forward(output) output = self.maxpool2.forward(output) output = self.flattener.forward(output) output = self.fc.forward(output) return output def _backward(self, dprediction): grad = self.fc.backward(dprediction) grad = self.flattener.backward(grad) grad = self.maxpool2.backward(grad) grad = self.relu2.backward(grad) grad = self.conv2.backward(grad) grad = self.maxpool1.backward(grad) grad = self.relu1.backward(grad) self.conv1.backward(grad) def predict(self, X): # You can probably copy the code from previous assignment predictions = self._forward(X) y_pred = np.argmax(softmax(predictions), axis=1) return y_pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters for k, v in self.conv1.params().items(): result["".join(["conv1_", k])] = v for k, v in self.conv2.params().items(): result["".join(["conv2_", k])] = v for k, v in self.fc.params().items(): result["".join(["fc_", k])] = v return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, reg=0): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ image_width, image_height, n_channels = input_shape padding_1 = 1 padding_2 = 1 filter_size_1 = 3 filter_size_2 = 3 pooling_size_1 = 4 pooling_size_2 = 4 stride_1 = 4 stride_2 = 4 height = image_height + 2 * padding_1 width = image_width + 2 * padding_1 out_height = height - filter_size_1 + 1 out_width = width - filter_size_1 + 1 #print(height, width, filter_size_1, out_height, out_width); assert (out_height - pooling_size_1) % stride_1 == 0 assert (out_width - pooling_size_1) % stride_1 == 0 height = out_height width = out_width out_height = int((height - pooling_size_1) / stride_1 + 1) out_width = int((width - pooling_size_1) / stride_1 + 1) #print(height, width, pooling_size_1, out_height, out_width); height = out_height + 2 * padding_2 width = out_width + 2 * padding_2 out_height = height - filter_size_2 + 1 out_width = width - filter_size_2 + 1 #print(height, width, filter_size_2, out_height, out_width); assert (out_height - pooling_size_2) % stride_2 == 0 assert (out_width - pooling_size_2) % stride_2 == 0 height = out_height width = out_width out_height = int((height - pooling_size_2) / stride_2 + 1) out_width = int((width - pooling_size_2) / stride_2 + 1) #print(height, width, pooling_size_2, out_height, out_width); # TODO Create necessary layers self.Conv_first = ConvolutionalLayer(n_channels, conv1_channels, filter_size_1, padding_1) self.Relu_first = ReLULayer() self.Maxpool_first = MaxPoolingLayer(pooling_size_1, stride_1) self.Conv_second = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size_2, padding_2) self.Relu_second = ReLULayer() self.Maxpool_second = MaxPoolingLayer(pooling_size_2, stride_2) self.Flattener = Flattener() self.FC = FullyConnectedLayer(out_height * out_width * conv2_channels, n_output_classes) self.n_output = n_output_classes self.reg = reg #print(out_height*out_width*conv2_channels, n_output_classes); def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for param in self.params(): self.params()[param].grad = np.zeros_like( self.params()[param].grad) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment #assert check_layer_gradient(self.Conv_first, X); #assert check_layer_param_gradient(self.Conv_first, X, 'W') #print("X = ", X); #print("X_end = ", X[:, 25:, 25:, :]); X1 = self.Conv_first.forward(X) #print(self.Conv_first.W.value); #print("X1 = ", X1); #print("W = ", self.Conv_first.params()['W'].value); #assert check_layer_gradient(self.Relu_first, X1); X1_Relu = self.Relu_first.forward(X1) #print("X1_Relu = ", X1_Relu); #assert check_layer_gradient(self.Maxpool_first, X1_Relu); X1_Max = self.Maxpool_first.forward(X1_Relu) #assert check_layer_gradient(self.Conv_second, X1_Max); X2 = self.Conv_second.forward(X1_Max) #assert check_layer_gradient(self.Relu_second, X2); X2_Relu = self.Relu_second.forward(X2) #assert check_layer_gradient(self.Maxpool_second, X2_Relu); X2_Max = self.Maxpool_second.forward(X2_Relu) #assert check_layer_gradient(self.Flattener, X2_Max); X3 = self.Flattener.forward(X2_Max) #assert check_layer_gradient(self.FC, X3); X3_FC = self.FC.forward(X3) loss, dX3_FC = softmax_with_cross_entropy(X3_FC, y + 1) dX3 = self.FC.backward(dX3_FC) dX2_Max = self.Flattener.backward(dX3) dX2_Relu = self.Maxpool_second.backward(dX2_Max) #print("dX2_Max = ", dX2_Max); #print("dX2_Relu = ", dX2_Relu); dX2 = self.Relu_second.backward(dX2_Relu) dX1_Max = self.Conv_second.backward(dX2) dX1_Relu = self.Maxpool_first.backward(dX1_Max) dX1 = self.Relu_first.backward(dX1_Relu) dX = self.Conv_first.backward(dX1) reg_loss_w, reg_grad_w = l2_regularization(self.FC.W.value, self.reg) reg_loss_b, reg_grad_b = l2_regularization(self.FC.B.value, self.reg) loss += (reg_loss_w + reg_loss_b) self.FC.W.grad += reg_grad_w return loss def predict(self, X): # You can probably copy the code from previous assignment pred = np.zeros(X.shape[0], np.int) predictions = self.FC.forward( self.Flattener.forward( self.Maxpool_second.forward( self.Relu_second.forward( self.Conv_second.forward( self.Maxpool_first.forward( self.Relu_first.forward( self.Conv_first.forward(X)))))))) #print("predictions = ", predictions); i = 0 for predict in predictions: values = [softmax_with_cross_entropy(predict, target_index + 1)[0] \ for target_index in range(self.n_output)] pred[i] = min(range(len(values)), key=values.__getitem__) i += 1 #print("pred = ", pred); return pred def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters result = {} dict_first = self.Conv_first.params() dict_second = self.Conv_second.params() dict_FC = self.FC.params() # TODO Implement aggregating all of the params for key in dict_first.keys(): result[key + 'C1'] = dict_first[key] for key in dict_second.keys(): result[key + 'C2'] = dict_second[key] for key in dict_FC.keys(): result[key + 'F1'] = dict_FC[key] return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.input_shape = input_shape self.n_output_classes = n_output_classes self.layer1 = ConvolutionalLayer(3, conv1_channels, 3, 1) #32x32x3xconv1_channels self.layer2 = ReLULayer() self.layer3 = MaxPoolingLayer(4, 4) #8x8x3xconv1_channels self.layer4 = ConvolutionalLayer( conv1_channels, conv2_channels, 3, 1) #8x8x3x conv1_channels x conv2_channels self.layer5 = ReLULayer() self.layer6 = MaxPoolingLayer( 4, 4) #2x2x3 conv1_channels x conv2_channels self.layer7 = Flattener() self.layer8 = FullyConnectedLayer(conv1_channels * conv2_channels * 2, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment self.layer1.W.grad = np.zeros_like(self.layer1.W.grad) self.layer4.W.grad = np.zeros_like(self.layer4.W.grad) self.layer1.B.grad = np.zeros_like(self.layer1.B.grad) self.layer4.B.grad = np.zeros_like(self.layer4.B.grad) self.layer8.W.grad = np.zeros_like(self.layer8.W.grad) self.layer8.B.grad = np.zeros_like(self.layer8.B.grad) out1 = self.layer1.forward(X) out2 = self.layer2.forward(out1) out3 = self.layer3.forward(out2) out4 = self.layer4.forward(out3) out5 = self.layer5.forward(out4) out6 = self.layer6.forward(out5) out7 = self.layer7.forward(out6) out8 = self.layer8.forward(out7) loss, grad = softmax_with_cross_entropy(out8, y) back8 = self.layer8.backward(grad) back7 = self.layer7.backward(back8) back6 = self.layer6.backward(back7) back5 = self.layer5.backward(back6) back4 = self.layer4.backward(back5) back3 = self.layer3.backward(back4) back2 = self.layer2.backward(back3) back1 = self.layer1.backward(back2) return loss def predict(self, X): # You can probably copy the code from previous assignment out1 = self.layer1.forward(X) out2 = self.layer2.forward(out1) out3 = self.layer3.forward(out2) out4 = self.layer4.forward(out3) out5 = self.layer5.forward(out4) out6 = self.layer6.forward(out5) out7 = self.layer7.forward(out6) out8 = self.layer8.forward(out7) pred = np.argmax(out8, axis=1) return pred def params(self): result = { 'layer1.W': self.layer1.W, 'layer1.B': self.layer1.B, 'layer4.W': self.layer4.W, 'layer1.B': self.layer4.B, 'layer8.W': self.layer8.W, 'layer8.B': self.layer8.B, } # TODO: Aggregate all the params from all the layers # which have parameters #raise Exception("Not implemented!") return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ width, height, channels = input_shape self.conv1 = ConvolutionalLayer(in_channels=3, out_channels=3, filter_size=conv1_channels) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(in_channels=3, out_channels=3, filter_size=conv2_channels) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(4, 4) self.flatten = Flattener() self.f_connected = FullyConnectedLayer(3, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() params['W1'].grad = 0 params['B1'].grad = 0 params['W2'].grad = 0 params['B2'].grad = 0 params['W3'].grad = 0 params['B3'].grad = 0 Z1 = self.conv1.forward(X) A1 = self.relu1.forward(Z1) M1 = self.maxpool1.forward(A1) Z2 = self.conv2.forward(M1) A1 = self.relu2.forward(Z2) M2 = self.maxpool2.forward(A1) F = self.flatten.forward(M2) Out = self.f_connected.forward(F) loss, grad = softmax_with_cross_entropy(Out, y) grad_f_con = self.f_connected.backward(grad) grad_flatten = self.flatten.backward(grad_f_con) grad_maxpool2 = self.maxpool2.backward(grad_flatten) grad_relu2 = self.relu2.backward(grad_maxpool2) grad_conv2 = self.conv2.backward(grad_relu2) grad_maxpool1 = self.maxpool1.backward(grad_conv2) grad_relu1 = self.relu1.backward(grad_maxpool1) self.conv1.backward(grad_relu1) self.b = grad_relu1 self.params() return loss def predict(self, X): Z1 = self.conv1.forward(X) A1 = self.relu1.forward(Z1) M1 = self.maxpool1.forward(A1) Z2 = self.conv2.forward(M1) A1 = self.relu2.forward(Z2) M2 = self.maxpool2.forward(A1) F = self.flatten.forward(M2) Out = np.argmax(self.f_connected.forward(F), axis=1) return Out def params(self): result = {} result = { 'W2': self.conv2.W, 'W1': self.conv1.W, 'B2': self.conv2.B, 'W3': self.f_connected.W, 'B3': self.f_connected.B, 'B1': self.conv1.B } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers width, height, n_channels = input_shape self.conv1 = ConvolutionalLayer(n_channels, conv1_channels, 3, 1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(4, 4) self.flatten = Flattener() self.fc = FullyConnectedLayer( (height // 4 // 4) * (width // 4 // 4) * conv2_channels, n_output_classes) self.conv1_params = self.conv1.params() self.conv2_params = self.conv2.params() self.fc_params = self.fc.params() def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass for key, value in self.params().items(): value.grad.fill(0) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment conv1 = self.conv1.forward(X) relu1 = self.relu1.forward(conv1) maxpool1 = self.maxpool1.forward(relu1) conv2 = self.conv2.forward(maxpool1) relu2 = self.relu2.forward(conv2) maxpool2 = self.maxpool2.forward(relu2) flatten = self.flatten.forward(maxpool2) fc = self.fc.forward(flatten) loss, d_preds = softmax_with_cross_entropy(fc, y) fc = self.fc.backward(d_preds) flatten = self.flatten.backward(fc) maxpool2 = self.maxpool2.backward(flatten) relu2 = self.relu2.backward(maxpool2) conv2 = self.conv2.backward(relu2) maxpool1 = self.maxpool1.backward(conv2) relu1 = self.relu1.backward(maxpool1) conv1 = self.conv1.backward(relu1) return loss def predict(self, X): # You can probably copy the code from previous assignment conv1 = self.conv1.forward(X) relu1 = self.relu1.forward(conv1) maxpool1 = self.maxpool1.forward(relu1) conv2 = self.conv2.forward(maxpool1) relu2 = self.relu2.forward(conv2) maxpool2 = self.maxpool2.forward(relu2) flatten = self.flatten.forward(maxpool2) fc = self.fc.forward(flatten) return np.argmax(fc, axis=1) def params(self): result = {} # TODO: Aggregate all the params from all the layers # which have parameters d1 = {k + '1': v for k, v in self.conv1_params.items()} d2 = {k + '2': v for k, v in self.conv2_params.items()} d3 = {k + '3': v for k, v in self.fc_params.items()} result = {**d1, **d2, **d3} return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> ReLU -> Maxpool[4x4] -> Conv[3x3] -> ReLU -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network :param input_shape: tuple of 3 ints - image_width, image_height, n_channels :param n_output_classes: int - number of classes to predict :param conv1_channels: int - number of filters in the 1st conv layer :param conv2_channels: int - number of filters in the 2nd conv layer """ self.convolution_one = ConvolutionalLayer(input_shape[2], conv1_channels, 3, 1) self.relu_one = ReLULayer() self.maxpool_one = MaxPoolingLayer(4, 4) self.convolution_two = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu_two = ReLULayer() self.maxpool_two = MaxPoolingLayer(4, 4) self.flattener = Flattener() height = ((input_shape[0] + 2 * 1 - 3 + 1) // 4 + 2 * 1 - 3 + 1) // 4 width = ((input_shape[1] + 2 * 1 - 3 + 1) // 4 + 2 * 1 - 3 + 1) // 4 self.fc = FullyConnectedLayer(width * height * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples :param X: np array (batch_size, height, width, input_features) - input data :param y: np array of int (batch_size) - classes :return: cross-entropy loss using soft-max """ for param in self.params().values(): param.grad = np.zeros_like(param.value) loss, grad = softmax_with_cross_entropy( self.fc.forward( self.flattener.forward( self.maxpool_two.forward( self.relu_two.forward( self.convolution_two.forward( self.maxpool_one.forward( self.relu_one.forward( self.convolution_one.forward(X)))))))), y) self.convolution_one.backward( self.relu_one.backward( self.maxpool_one.backward( self.convolution_two.backward( self.relu_two.backward( self.maxpool_two.backward( self.flattener.backward( self.fc.backward(grad)))))))) return loss def predict(self, X): preds = self.fc.forward( self.flattener.forward( self.maxpool_two.forward( self.relu_two.forward( self.convolution_two.forward( self.maxpool_one.forward( self.relu_one.forward( self.convolution_one.forward(X)))))))) y_pred = np.argmax(preds, axis=1) return y_pred def params(self): result = { 'conv1_W': self.convolution_one.W, 'conv1_B': self.convolution_one.B, 'conv2_W': self.convolution_two.W, 'conv2_B': self.convolution_two.B, 'fc_W': self.fc.W, 'fc_B': self.fc.B } return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.layer_1 = ConvolutionalLayer(input_shape[2], conv1_channels, 3, 1) self.layer_2 = ReLULayer() self.layer_3 = MaxPoolingLayer(4, 4) self.layer_4 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.layer_5 = ReLULayer() self.layer_6 = MaxPoolingLayer(4, 4) self.layer_7 = Flattener() self.layer_8 = FullyConnectedLayer( (input_shape[0] * input_shape[1] * conv2_channels) // (16**2), n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ params = self.params() for param_key in params: param = params[param_key] param.grad = np.zeros_like(param.grad) step_1f = self.layer_1.forward(X) step_2f = self.layer_2.forward(step_1f) step_3f = self.layer_3.forward(step_2f) step_4f = self.layer_4.forward(step_3f) step_5f = self.layer_5.forward(step_4f) step_6f = self.layer_6.forward(step_5f) step_7f = self.layer_7.forward(step_6f) step_8f = self.layer_8.forward(step_7f) loss, dpred = softmax_with_cross_entropy(step_8f, y) step_8b = self.layer_8.backward(dpred) step_7b = self.layer_7.backward(step_8b) step_6b = self.layer_6.backward(step_7b) step_5b = self.layer_5.backward(step_6b) step_4b = self.layer_4.backward(step_5b) step_3b = self.layer_3.backward(step_4b) step_2b = self.layer_2.backward(step_3b) step_1b = self.layer_1.backward(step_2b) return loss def predict(self, X): step_1f = self.layer_1.forward(X) step_2f = self.layer_2.forward(step_1f) step_3f = self.layer_3.forward(step_2f) step_4f = self.layer_4.forward(step_3f) step_5f = self.layer_5.forward(step_4f) step_6f = self.layer_6.forward(step_5f) step_7f = self.layer_7.forward(step_6f) step_8f = self.layer_8.forward(step_7f) probs = softmax(step_8f) pred = np.array(list(map(lambda x: x.argsort()[-1], probs))) return pred def params(self): result = {} result['W1'] = self.layer_1.W result['W2'] = self.layer_4.W result['W3'] = self.layer_8.W result['B1'] = self.layer_1.B result['B2'] = self.layer_4.B result['B3'] = self.layer_8.B return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.conv1 = ConvolutionalLayer(input_shape[2], conv1_channels, 3, 1) self.relu1 = ReLULayer() self.max_pl1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.relu2 = ReLULayer() self.max_pl2 = MaxPoolingLayer(4, 4) self.flat = Flattener() self.fc = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for i_param in self.params(): param = self.params()[i_param] param.grad = np.zeros_like(param.grad) step1 = self.conv1.forward(X) step2 = self.relu1.forward(step1) step3 = self.max_pl1.forward(step2) step4 = self.conv2.forward(step3) step5 = self.relu2.forward(step4) step6 = self.max_pl2.forward(step5) step7 = self.flat.forward(step6) step8 = self.fc.forward(step7) loss, loss_grad = softmax_with_cross_entropy(step8, y) d8 = self.fc.backward(loss_grad) d7 = self.flat.backward(d8) d6 = self.max_pl2.backward(d7) d5 = self.relu2.backward(d6) d4 = self.conv2.backward(d5) d3 = self.max_pl1.backward(d4) d2 = self.relu1.backward(d3) d1 = self.conv1.backward(d2) return loss def predict(self, X): # You can probably copy the code from previous assignment step1 = self.conv1.forward(X) step2 = self.relu1.forward(step1) step3 = self.max_pl1.forward(step2) step4 = self.conv2.forward(step3) step5 = self.relu2.forward(step4) step6 = self.max_pl2.forward(step5) step7 = self.flat.forward(step6) step8 = self.fc.forward(step7) pred = step8.argmax(axis=1) return pred def params(self): result = { 'conv1.W': self.conv1.W, 'conv1.B': self.conv1.B, 'conv2.W': self.conv2.W, 'conv2.B': self.conv2.B, 'fc.W': self.fc.W, 'fc.B': self.fc.B } # TODO: Aggregate all the params from all the layers # which have parameters return result
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.layer1 = ConvolutionalLayer(input_shape[2], conv1_channels, 3, 1) self.layer2 = ReLULayer() self.layer3 = MaxPoolingLayer(4, 4) self.layer4 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.layer5 = ReLULayer() self.layer6 = MaxPoolingLayer(4, 4) self.layer7 = Flattener() self.layer8 = FullyConnectedLayer( input_shape[0] * input_shape[1] * conv2_channels // (16 * 16), n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment params = self.params() for param_key in params: param = params[param_key] param.grad = np.zeros_like(param.grad) step1 = self.layer1.forward(X) step2 = self.layer2.forward(step1) step3 = self.layer3.forward(step2) step4 = self.layer4.forward(step3) step5 = self.layer5.forward(step4) step6 = self.layer6.forward(step5) step7 = self.layer7.forward(step6) step8 = self.layer8.forward(step7) loss, dL = softmax_with_cross_entropy(step8, y) dstep8 = self.layer8.backward(dL) dstep7 = self.layer7.backward(dstep8) dstep6 = self.layer6.backward(dstep7) dstep5 = self.layer5.backward(dstep6) dstep4 = self.layer4.backward(dstep5) dstep3 = self.layer3.backward(dstep4) dstep2 = self.layer2.backward(dstep3) dstep1 = self.layer1.backward(dstep2) return loss def predict(self, X): # You can probably copy the code from previous assignment step1 = self.layer1.forward(X) step2 = self.layer2.forward(step1) step3 = self.layer3.forward(step2) step4 = self.layer4.forward(step3) step5 = self.layer5.forward(step4) step6 = self.layer6.forward(step5) step7 = self.layer7.forward(step6) step8 = self.layer8.forward(step7) probs = softmax(step8) pred = np.array(list(map(lambda x: x.argsort()[-1], probs))) return pred def params(self): # TODO: Aggregate all the params from all the layers # which have parameters return { 'W1': self.layer1.W, 'B1': self.layer1.B, 'W2': self.layer4.W, 'B2': self.layer4.B, 'W3': self.layer8.W, 'B3': self.layer8.B }
class ConvNet: """ Implements a very simple conv net Input -> Conv[3x3] -> Relu -> Maxpool[4x4] -> Conv[3x3] -> Relu -> MaxPool[4x4] -> Flatten -> FC -> Softmax """ def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.conv1 = ConvolutionalLayer(input_shape[2], conv1_channels, 3, 1) self.reLu1 = ReLULayer() self.mxPl1 = MaxPoolingLayer(4, 4) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.reLu2 = ReLULayer() self.mxPl2 = MaxPoolingLayer(4, 4) self.flat = Flattener() self.fCL = FullyConnectedLayer(4 * conv2_channels, n_output_classes) def compute_loss_and_gradients(self, X, y): """ Computes total loss and updates parameter gradients on a batch of training examples Arguments: X, np array (batch_size, height, width, input_features) - input data y, np array of int (batch_size) - classes """ # Before running forward and backward pass through the model, # clear parameter gradients aggregated from the previous pass # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment for param in self.params().values(): param.grad = np.zeros_like(param.value) # TODO Compute loss and fill param gradients # Don't worry about implementing L2 regularization, we will not # need it in this assignment # raise Exception("Not implemented!") pred = self.conv1.forward(X) pred = self.reLu1.forward(pred) pred = self.mxPl1.forward(pred) pred = self.conv2.forward(pred) pred = self.reLu2.forward(pred) pred = self.mxPl2.forward(pred) pred = self.flat.forward(pred) pred = self.fCL.forward(pred) loss, loss_grad = softmax_with_cross_entropy(pred, y) grad = self.fCL.backward(loss_grad) grad = self.flat.backward(grad) grad = self.mxPl2.backward(grad) grad = self.reLu2.backward(grad) grad = self.conv2.backward(grad) grad = self.mxPl1.backward(grad) grad = self.reLu1.backward(grad) grad = self.conv1.backward(grad) return loss def predict(self, X): # You can probably copy the code from previous assignment pred = self.conv1.forward(X) pred = self.reLu1.forward(pred) pred = self.mxPl1.forward(pred) pred = self.conv2.forward(pred) pred = self.reLu2.forward(pred) pred = self.mxPl2.forward(pred) pred = self.flat.forward(pred) pred = self.fCL.forward(pred) pred = np.argmax(pred, axis=1) return pred def params(self): result = {} result['Conv1W'] = self.conv1.params()['W'] result['Conv2W'] = self.conv2.params()['W'] result['FC_W'] = self.fCL.params()['W'] result['Conv1B'] = self.conv1.params()['B'] result['Conv2B'] = self.conv2.params()['B'] result['FC_B'] = self.fCL.params()['B'] return result