def forward(self, X: np.ndarray) -> np.ndarray: """Forward pass for convolutional layer. This layer convolves the input `X` with a filter of weights, adds a bias term, and applies an activation function to compute the output. This layer also supports padding and integer strides. Intermediates necessary for the backward pass are stored in the cache. Parameters ---------- X input with shape (batch_size, in_rows, in_cols, in_channels) Returns ------- output feature maps with shape (batch_size, out_rows, out_cols, out_channels) """ if self.n_in is None: self._init_parameters(X.shape) W = self.parameters["W"] b = self.parameters["b"] kernel_height, kernel_width, in_channels, out_channels = W.shape n_examples, in_rows, in_cols, in_channels = X.shape kernel_shape = (kernel_height, kernel_width) ### BEGIN YOUR CODE ### self.cache["X"] = X # implement a convolutional forward pass X, p = pad2d(X, self.pad, self.kernel_shape, self.stride) out_rows = 1 + (in_rows + p[0] + p[1] - kernel_height) // self.stride out_cols = 1 + (in_cols + p[0] + p[1] - kernel_height) // self.stride out = np.zeros((n_examples, out_rows, out_cols, out_channels)) Z = np.zeros(out.shape) for n in range(n_examples): for o in range(out_channels): for hprime in range(out_rows): for wprime in range(out_cols): starth = hprime * self.stride endh = hprime * self.stride + kernel_height startw = wprime * self.stride endw = wprime * self.stride + kernel_width x_overlap = X[n, starth:endh, startw:endw, :] z = np.sum(x_overlap * W[:, :, :, o]) + b[o] Z[n, hprime, wprime, o] = z out[n, hprime, wprime, o] = self.activation.forward(z) # cache any values required for backprop self.cache["W"] = W self.cache["b"] = b self.cache["Z"] = Z self.cache["out"] = out ### END YOUR CODE ### return out
def backward(self, dLdY: np.ndarray) -> np.ndarray: """Backward pass for conv layer. Computes the gradients of the output with respect to the input feature maps as well as the filter weights and biases. Parameters ---------- dLdY derivative of loss with respect to output of this layer shape (batch_size, out_rows, out_cols, out_channels) Returns ------- derivative of the loss with respect to the input of this layer shape (batch_size, in_rows, in_cols, in_channels) """ ### BEGIN YOUR CODE ### X = self.cache["X"] W = self.cache["W"] b = self.cache["b"] Z = self.cache["Z"] kernel_height, kernel_width, in_channels, out_channels = W.shape n_examples, in_rows, in_cols, in_channels = X.shape new_x, p = pad2d(X, self.pad, self.kernel_shape, self.stride) out_rows = 1 + (in_rows + p[0] + p[1] - kernel_height) // self.stride out_cols = 1 + (in_cols + p[0] + p[1] - kernel_height) // self.stride der_new_x = np.zeros(new_x.shape) dw = np.zeros(W.shape) db = np.zeros(b.shape) dldIK = self.activation.backward(Z, dLdY) for n in range(n_examples): for o in range(out_channels): db[o] += np.sum(dldIK[n, :, :, o]) for hprime in range(out_rows): for wprime in range(out_cols): starth = hprime * self.stride endh = hprime * self.stride + kernel_height startw = wprime * self.stride endw = wprime * self.stride + kernel_width dw_new = new_x[n, starth:endh, startw:endw, :] dw[:, :, :, o] += dw_new * dldIK[n, hprime, wprime, o] der_new_x[n, starth:endh, startw:endw, :] += W[:, :, :, o] * dldIK[n, hprime, wprime, o] dX = der_new_x[:, p[0]:p[1] + in_rows, p[2]:p[3] + in_cols, :] # perform a backward pass self.gradients["W"] = dw self.gradients["b"] = db ### END YOUR CODE ### return dX
def backward(self, dLdY: np.ndarray) -> np.ndarray: """Backward pass for conv layer. Computes the gradients of the output with respect to the input feature maps as well as the filter weights and biases. Parameters ---------- dLdY derivative of loss with respect to output of this layer shape (batch_size, out_rows, out_cols, out_channels) Returns ------- derivative of the loss with respect to the input of this layer shape (batch_size, in_rows, in_cols, in_channels) """ ### BEGIN YOUR CODE ### W = self.parameters["W"] b = self.parameters["b"] Z = self.cache["Z"] X = self.cache["X"] kernel_height, kernel_width, in_channels, out_channels = W.shape n_examples, in_rows, in_cols, in_channels = X.shape kernel_shape = (kernel_height, kernel_width) X_pad = pad2d(X, self.pad, kernel_shape, self.stride)[0] # perform a backward pass dZ = self.activation.backward(Z, dLdY) dX = np.zeros_like(X_pad) dW = np.zeros_like(W) db = np.zeros_like(b) for data_point in range(n_examples): for n in range(out_channels): for d2 in range(in_cols): for d1 in range(in_rows): X_r_start, X_c_start = self.stride * d1, self.stride * d2 window = X_pad[data_point, X_r_start:X_r_start + kernel_height, X_c_start:X_c_start + kernel_width, :] dZ_curr = dZ[data_point, d1, d2, n] db[0, n] += dZ_curr dW[:, :, :, n] += dZ_curr * window dX[data_point, X_r_start:X_r_start + kernel_height, X_c_start:X_c_start + kernel_width, :] += W[:, :, :, n] * dZ_curr ### END YOUR CODE ### self.gradients["b"] = db self.gradients["W"] = dW row_ = np.floor_divide(X_pad.shape[1] - in_rows, 2) col_ = np.floor_divide(X_pad.shape[2] - in_cols, 2) return dX[:, row_:row_ + in_rows, col_:col_ + in_cols, :]
def forward(self, X: np.ndarray) -> np.ndarray: """Forward pass for convolutional layer. This layer convolves the input `X` with a filter of weights, adds a bias term, and applies an activation function to compute the output. This layer also supports padding and integer strides. Intermediates necessary for the backward pass are stored in the cache. Parameters ---------- X input with shape (batch_size, in_rows, in_cols, in_channels) Returns ------- output feature maps with shape (batch_size, out_rows, out_cols, out_channels) """ if self.n_in is None: self._init_parameters(X.shape) # print(self.pad) self.cache["X"] = X W = self.parameters["W"] b = self.parameters["b"] kernel_height, kernel_width, in_channels, out_channels = W.shape n_examples, in_rows, in_cols, in_channels = X.shape kernel_shape = (kernel_height, kernel_width) Z = np.zeros((n_examples, X.shape[1], X.shape[2], out_channels)) X = pad2d(X, self.pad, kernel_shape, self.stride)[0] ### BEGIN YOUR CODE ### # implement a convolutional forward pass for ex in range(n_examples): for n in range(out_channels): for d2 in range(in_cols): for d1 in range(in_rows): X_c = d2 * self.stride X_r = d1 * self.stride window = X[ex, X_r:X_r + kernel_width, X_c:X_c + kernel_height, :] Z[ex, d1, d2, n] = np.sum(W[:, :, :, n] * window) + b[0, n] # cache any values required for backprop ### END YOUR CODE ### out = self.activation(Z) self.cache["Z"] = Z return out
def forward(self, X: np.ndarray) -> np.ndarray: """Forward pass for convolutional layer. This layer convolves the input `X` with a filter of weights, adds a bias term, and applies an activation function to compute the output. This layer also supports padding and integer strides. Intermediates necessary for the backward pass are stored in the cache. Parameters ---------- X input with shape (batch_size, in_rows, in_cols, in_channels) Returns ------- output feature maps with shape (batch_size, out_rows, out_cols, out_channels) """ if self.n_in is None: self._init_parameters(X.shape) W = self.parameters["W"] b = self.parameters["b"] kernel_height, kernel_width, in_channels, out_channels = W.shape n_examples, in_rows, in_cols, in_channels = X.shape kernel_shape = (kernel_height, kernel_width) ### BEGIN YOUR CODE ### self.cache["X"] = X X, _ = pad2d(X, self.pad, kernel_shape, self.stride) # implement a convolutional forward pass Z = np.zeros((n_examples, X.shape[1] - kernel_height + 1, X.shape[2] - kernel_width + 1, self.n_out)) rows, cols, examples, channels = np.arange(Z.shape[1]), np.arange( Z.shape[2]), np.arange(n_examples), np.arange(in_channels) for example in examples: x = X[example] z = np.zeros(Z.shape[1:]) for row in rows: for col in cols: x_slice = x[row:row + kernel_height, col:col + kernel_width, :] r = np.tensordot(x_slice, W, axes=([0, 1, 2], [0, 1, 2 ])) + b z[row, col] = r Z[example] = z self.cache["Z"] = Z out = self.activation(Z) # cache any values required for backprop ### END YOUR CODE ### return out
def forward(self, X: np.ndarray) -> np.ndarray: """ Forward pass for convolutional layer. This layer convolves the input `X` with a filter of weights, adds a bias term, and applies an activation function to compute the output. This layer also supports padding and integer strides. Intermediates necessary for the backward pass are stored in the cache. Parameters ---------- X input with shape (batch_size, in_rows, in_cols, in_channels) Returns ------- output feature maps with shape (batch_size, out_rows, out_cols, out_channels) """ if self.n_in is None: self._init_parameters(X.shape) WEIGHT = self.parameters["W"] b = self.parameters["b"] HH, WW, C, F = WEIGHT.shape N, H, W, C = X.shape kernel_shape = (HH, WW) padded_x, p = pad2d(X, self.pad, kernel_shape, stride=self.stride) _, padH, padW, _ = p ### BEGIN YOUR CODE ### Hout = int(1 + (H + 2 * padH - HH) / self.stride) Wout = int(1 + (W + 2 * padW - WW) / self.stride) Z = np.empty((N, Hout, Wout, F)) # implement a convolutional forward pass for h in range(Hout): for wi in range(Wout): toConvolute = padded_x[:, h * self.stride:h * self.stride + HH, wi * self.stride:wi * self.stride + WW, :] for f in range(F): Z[:, h, wi, f] = np.sum(toConvolute * WEIGHT[:, :, :, f], axis=(1, 2, 3)) + b[0, f] out = self.activation(Z) # cache any values required for backprop self.cache["X"] = X self.cache["Z"] = Z ### END YOUR CODE ### return out
def backward(self, dLdY: np.ndarray) -> np.ndarray: """Backward pass for conv layer. Computes the gradients of the output with respect to the input feature maps as well as the filter weights and biases. Parameters ---------- dLdY derivative of loss with respect to output of this layer shape (batch_size, out_rows, out_cols, out_channels) Returns ------- derivative of the loss with respect to the input of this layer shape (batch_size, in_rows, in_cols, in_channels) """ ### BEGIN YOUR CODE ### X = self.cache["X"] Z = self.cache["Z"] W = self.parameters["W"] b = self.parameters["b"] kernel_height, kernel_width, in_channels, out_channels = W.shape n_examples, in_rows, in_cols, in_channels = X.shape kernel_shape = (kernel_height, kernel_width) dldZ = self.activation.backward(Z, dLdY) dLdZ, p = pad2d(dldZ, self.pad, kernel_shape, self.stride) out = np.zeros(X.shape) rows, cols, examples = np.arange(out.shape[1]), np.arange( out.shape[2]), np.arange(n_examples) for example in examples: x = dLdZ[example] z = np.zeros(out.shape[1:]) for row in rows: for col in cols: x_slice = np.rot90( x[row:row + kernel_height, col:col + kernel_width, :], 2, (0, 1)) z[row, col] = np.tensordot(x_slice, W, axes=([0, 1, 2], [0, 1, 3])) out[example] = z dW = np.zeros(W.shape) for example in examples: x = X[example] dldz = dLdZ[example] z = np.zeros(kernel_shape) for o in range(self.n_out): for i in range(self.n_in): im = x[:, :, i] dz = dldz[:, :, o] for row in range(kernel_height): for col in range(kernel_width): dz_slice = dz[kernel_height - row - 1:kernel_height - row - 1 + im.shape[0], kernel_width - col - 1:kernel_width - col - 1 + im.shape[1]] z[row, col] = np.sum(im * dz_slice) dW[:, :, i, o] += z self.gradients['W'] = dW self.gradients['b'] = np.sum(dldZ, axis=(0, 1, 2)) ### END YOUR CODE ### return out
def backward(self, dLdY: np.ndarray) -> np.ndarray: """Backward pass for conv layer. Computes the gradients of the output with respect to the input feature maps as well as the filter weights and biases. Parameters ---------- dLdY derivative of loss with respect to output of this layer shape (batch_size, out_rows, out_cols, out_channels) Returns ------- derivative of the loss with respect to the input of this layer shape (batch_size, in_rows, in_cols, in_channels) """ ### BEGIN YOUR CODE ### X = self.cache["X"] Z = self.cache["Z"] WEIGHT = self.parameters["W"] b = self.parameters["b"] #Sizes N, H, W, C = X.shape HH, WW, C, F = WEIGHT.shape kernel_shape = (HH, WW) Hout = dLdY.shape[1] Wout = dLdY.shape[2] #padding x padded_x, p = pad2d(X, self.pad, kernel_shape, stride=self.stride) _, padH, padW, _ = p #setting up gradient matrices padded_dx = np.zeros(padded_x.shape) dw = np.zeros(WEIGHT.shape) # perform a backward pass dLdY = self.activation.backward(Z, dLdY) for h in range(Hout): for wi in range(Wout): for n in range(N): #Gradient for dx padded_dx[n,h*self.stride : h*self.stride+HH, wi*self.stride : wi*self.stride+WW, :] += \ (WEIGHT*dLdY[n,h,wi,:]).sum(axis=3) for f in range(F): #Gradient for W dw[:,:,:,f] += (padded_x[:, h*self.stride : h*self.stride+HH, wi*self.stride : wi*self.stride+WW, :]*\ dLdY[:,h,wi,f][:,None,None,None]).sum(axis=0) #removing padding and calculating db dx = padded_dx[:, padH:-padH, padW:-padW, :] db = dLdY.sum(axis=(0, 1, 2)).reshape(1, -1) #storing gradients self.gradients["W"] = dw self.gradients["b"] = db ### END YOUR CODE ### return dx
def backward(self, dLdY: np.ndarray) -> np.ndarray: """Backward pass for conv layer. Computes the gradients of the output with respect to the input feature maps as well as the filter weights and biases. Parameters ---------- dLdY derivative of loss with respect to output of this layer shape (batch_size, out_rows, out_cols, out_channels) Returns ------- derivative of the loss with respect to the input of this layer shape (batch_size, in_rows, in_cols, in_channels) """ ### BEGIN YOUR CODE ### X = self.cache["X"] Z = self.cache["Z"] W = self.parameters["W"] b = self.parameters["b"] kernel_height, kernel_width, in_channels, out_channels = W.shape n_examples, in_rows, in_cols, in_channels = X.shape kernel_shape = (kernel_height, kernel_width) Hout = dLdY.shape[1] Wout = dLdY.shape[2] padded_x, p = pad2d(X, self.pad, kernel_shape, stride=self.stride) _, padH, padW, _ = p padded_dx = np.zeros(padded_x.shape) dw = np.zeros(W.shape) # perform a backward pass dLdY = self.activation.backward(Z, dLdY) for i in range(Hout): for j in range(Wout): h_start = i * self.stride h_end = h_start + kernel_height w_start = j * self.stride w_end = w_start + kernel_width padded_dx [:, h_start:h_end, w_start:w_end, :] +=\ (W[np.newaxis, :, :, :, :]*dLdY[:, i:i+1, j:j+1, np.newaxis, :]).sum(axis=4) dw += np.sum(padded_x[:, h_start:h_end, w_start:w_end, :, np.newaxis] *\ dLdY[:, i:i+1, j:j+1, np.newaxis, :], axis=0) dx = padded_dx[:, padH:-padH, padW:-padW, :] db = dLdY.sum(axis=(0, 1, 2)).reshape(1, -1) #storing gradients self.gradients["W"] = dw self.gradients["b"] = db ### END YOUR CODE ### return dx