def max_pool_forward_im2col(x, pool_param): """ An implementation of the forward pass for max pooling based on im2col. This isn't much faster than the naive version, so it should be avoided if possible. """ N, C, H, W = x.shape pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] stride = pool_param['stride'] assert (H - pool_height) % stride == 0, 'Invalid height' assert (W - pool_width) % stride == 0, 'Invalid width' out_height = (H - pool_height) / stride + 1 out_width = (W - pool_width) / stride + 1 x_split = x.reshape(N * C, 1, H, W) x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride) x_cols_argmax = np.argmax(x_cols, axis=0) x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) cache = (x, x_cols, x_cols_argmax, pool_param) return out, cache
def max_pool_forward_im2col(x, pool_param): """ An implementation of the forward pass for max pooling based on im2col. This isn't much faster than the naive version, so it should be avoided if possible. """ N, C, H, W = x.shape pool_height, pool_width = pool_param['pool_height'], pool_param[ 'pool_width'] stride = pool_param['stride'] assert (H - pool_height) % stride == 0, 'Invalid height' assert (W - pool_width) % stride == 0, 'Invalid width' out_height = (H - pool_height) // stride + 1 out_width = (W - pool_width) // stride + 1 x_split = x.reshape(N * C, 1, H, W) x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride) x_cols_argmax = np.argmax(x_cols, axis=0) x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) cache = (x, x_cols, x_cols_argmax, pool_param) return out, cache
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (FN, C, FH, FW) - b: Biases, of shape (FN,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None cache = None ########################################################################### # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ########################################################################### stride = conv_param['stride'] pad = conv_param['pad'] N, C, H, W = x.shape FN, C, FH, FW = w.shape OH = int(1 + (H + 2 * pad - FH) / stride) OW = int(1 + (W + 2 * pad - FW) / stride) col = im2col(x, FH, FW, stride, pad) # (N*OH*OW, C*FH*FW) col_w = w.reshape(FN, -1).T # (C*FH*FW, FN) out = np.dot(col, col_w) + b # (N*OH*OW, FN) out = out.reshape(N, OH, OW, FN).transpose(0, 3, 1, 2) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, w, b, col, col_w, conv_param) return out, cache
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None ########################################################################### # TODO: Implement the max pooling forward pass # ########################################################################### N, C, H, W = x.shape FH, FW = pool_param['pool_height'], pool_param['pool_width'] stride = pool_param['stride'] pad = 0 OH = int(1 + (H + 2 * pad - FH) / stride) OW = int(1 + (W + 2 * pad - FW) / stride) col = im2col(x, FH, FW, stride, pad) # (N*OH*OW, C*FH*FW) col = col.reshape(-1, FH * FW) arg_max = np.argmax(col, axis=1) out = np.max(col, axis=1) out = out.reshape(N, OH, OW, -1).transpose(0, 3, 1, 2) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, pool_param, arg_max) return out, cache