def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# x = cache[0] w = cache[1] b = cache[2] conv_param = cache[3] pad = conv_param['pad'] stride = conv_param['stride'] (N, C, H, W) = x.shape (F, C, HH, WW) = w.shape # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, field_height=HH, field_width=WW, padding=pad, stride=stride) # Calculate w_row using the im2col helper function w_row = im2col.im2col_indices(w, field_height=HH, field_width=WW, padding=0, stride=1) # Reshape the output gradient into col form dout_col = im2col.im2col_indices(dout, field_height=1, field_width=1, padding=0, stride=1) # Calculate and reshape the dx gradient dx_col = w_row.dot(dout_col) dx = im2col.col2im_indices(dx_col, x.shape, field_height=HH, field_width=WW, padding=pad, stride=stride) # Calculate and reshape the dw gradient dw_row = x_col.dot(dout_col.T) dw = im2col.col2im_indices(dw_row, w.shape, field_height=HH, field_width=WW, padding=0, stride=1) # Calculate and reshape the db gradient db = np.sum(dout_col, axis=1) pass ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ############################################################################# # TODO: Implement the max pooling backward pass # ############################################################################# x = cache[0] switches = cache[1] pool_param = cache[2] (N, C, H, W) = x.shape HH = pool_param['pool_height'] WW = pool_param['pool_width'] stride = pool_param['stride'] # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, field_height=HH, field_width=WW, padding=0, stride=stride) # Reshape into pools over all channels # x_col_pools = x_col.T.reshape(-1, HH*WW).T x_col_pools = x_col.reshape(-1,HH*WW).T # Reshape the output gradient into col form dout_col = im2col.im2col_indices(dout, field_height=1, field_width=1, padding=0, stride=1) # Since we're taking the gradient of a max function, # we route the output gradient to the inputs that had # the max values on the forward pass using the cached switches. dx_col_pools = np.zeros(x_col_pools.shape) dx_col_pools[switches, np.arange(dx_col_pools.shape[-1])] = dout_col.T.flatten() # Reshape into col form dx_col = dx_col_pools.T.reshape(x_col.T.shape).T # Finally, reshape dx_col dx = im2col.col2im_indices(dx_col, x.shape, field_height=HH, field_width=WW, padding=0, stride=stride) pass ############################################################################# # END OF YOUR CODE # ############################################################################# return dx
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max-pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ########################################################################### # TODO: Implement the max-pooling backward pass # ########################################################################### x, pool_param, mask = cache N, C, H, W = x.shape dx = np.zeros(x.shape) for i in range(N): for j in range(C): m_temp = im2col_indices(np.reshape(mask[i][j], (1, 1, H, W)), pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) dout_temp = np.reshape(dout[i][j], (-1)) dx[i, j, :, :] = col2im_indices(m_temp * dout_temp, (1, 1, H, W), pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) pass ########################################################################### # END OF YOUR CODE # ########################################################################### return dx
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width WW. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. During padding, 'pad' zeros should be placed symmetrically (i.e equally on both sides) along the height and width axes of the input. Be careful not to modfiy the original input x directly. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ########################################################################### # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ########################################################################### N, C, H, W = x.shape F, C, HH, WW = w.shape H_d = int(1 + (H + 2 * conv_param['pad'] - HH) / conv_param['stride']) W_d = int(1 + (W + 2 * conv_param['pad'] - WW) / conv_param['stride']) out = np.zeros((N, F, H_d, W_d)) for i in range(N): x_temp = im2col_indices(x[i:i + 1], HH, WW, conv_param['pad'], conv_param['stride']) for j in range(F): w_temp = np.reshape(w[j], (-1)) out[i][j][:][:] = np.reshape(np.dot(w_temp, x_temp), (H_d, W_d)) out[i][j][:][:] += b[j] pass ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, w, b, conv_param) return out, cache
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ from cs231n.im2col import im2col_indices out = None ########################################################################### # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ########################################################################### N, C, H, W = x.shape F, C, HH, WW = w.shape stride, pad = conv_param['stride'], conv_param['pad'] H_new = 1 + (H + 2 * pad - HH) // stride W_new = 1 + (W + 2 * pad - WW) // stride # x_col has shape (HH*WW*C, N*H'*W') x_col = im2col_indices(x, HH, WW, padding=pad, stride=stride) # W_col : (F, C*HH*WW) matrix w_col = w.reshape(F, -1) out = np.dot(w_col, x_col) + b.reshape(F, 1) out = out.reshape(F, H_new, W_new, N) out = out.transpose(3, 0, 1, 2) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, w, b, conv_param) return out, cache
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None ############################################################################# # TODO: Implement the max pooling forward pass # ############################################################################# # Unpack params (N, C, H, W) = x.shape HH = pool_param['pool_height'] WW = pool_param['pool_width'] stride = pool_param['stride'] # Calculate H' and W' H_ = 1 + (H - HH) / stride W_ = 1 + (W - WW) / stride # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, HH, WW, padding=0, stride=stride) # Reshape into pools over all channels x_col_pools = x_col.T.reshape(-1, HH*WW).T # Perform the max-pooling switches = np.argmax(x_col_pools, axis=0) out_maxpool = x_col_pools[switches, np.arange(x_col_pools.shape[-1])] # Reshape into columns out_maxpool_col = out_maxpool.reshape(-1, C).T # Reshape the output using the col2im helper function out = im2col.col2im_indices(out_maxpool_col, (N,C,H_,W_), field_height=1, field_width=1, padding=0, stride=1) pass ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, switches, pool_param) return out, cache
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ############################################################################# # TODO: Implement the max pooling backward pass # ############################################################################# x, pool_param = cache pool_height = pool_param['pool_height'] pool_width = pool_param['pool_width'] stride = pool_param['stride'] N, C, H, W = x.shape HH = (H - pool_height) // stride + 1 WW = (W - pool_width) // stride + 1 #k, i, j = get_im2col_indices(x.shape, pool_height, pool_width, 0, stride) #pool_col = x[:, k, i, j] pool_col = im2col_indices(x, pool_height, pool_width, 0, stride) pool_col = pool_col.reshape(C, pool_height * pool_width, -1).transpose( 1, 0, 2).reshape(pool_height * pool_width, -1) pool_max_index = np.argmax(pool_col, axis=0) #print('pool_max_index', len(pool_max_index)) dx_col = np.zeros(pool_col.shape) #print('dx_col.shape', dx_col.shape) #print('dout.shape--before', dout.shape) dout = dout.transpose(1, 2, 3, 0).reshape(dout.size) #print('dout.shape', dout.shape) dx_col[pool_max_index, range(len(pool_max_index))] = dout dx_col = dx_col.reshape(pool_height * pool_width, C, -1).transpose( 1, 0, 2).reshape(C * pool_height * pool_width, -1) dx = col2im_indices(dx_col, x.shape, pool_height, pool_width, 0, stride) #print(dx) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ########################################################################### # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ########################################################################### num_data, num_channels, im_height, im_width = x.shape num_filters, num_channels, filter_height, filter_width = w.shape pad, stride = conv_param['pad'], conv_param['stride'] out_height = (im_height + 2 * pad - filter_height) // stride + 1 out_width = (im_width + 2 * pad - filter_width) // stride + 1 cols = im2col_indices(x, filter_height, filter_width, padding=pad, stride=stride) filter_matrix = w.reshape(num_filters, -1) out = np.reshape( filter_matrix.dot(cols) + b.reshape(-1, 1), (num_filters, out_height, out_width, num_data)) out = out.transpose(3, 0, 1, 2) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, w, b, conv_param) return out, cache
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max-pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions No padding is necessary here. Output size is given by Returns a tuple of: - out: Output data, of shape (N, C, H', W') where H' and W' are given by H' = 1 + (H - pool_height) / stride W' = 1 + (W - pool_width) / stride - cache: (x, pool_param) """ out = None ########################################################################### # TODO: Implement the max-pooling forward pass # ########################################################################### N, C, H, W = x.shape H_d = int(1 + (H - pool_param['pool_height']) / pool_param['stride']) W_d = int(1 + (W - pool_param['pool_width']) / pool_param['stride']) out = np.zeros((N, C, H_d, W_d)) mask = np.zeros(x.shape) for i in range(N): for j in range(C): x_temp = np.reshape(x[i][j], (1, 1, H, W)) x_temp = im2col_indices(x_temp, pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) out[i, j, :, :] = np.reshape(np.amax(x_temp, axis=0), (1, 1, H_d, W_d)) inds = np.argmax(x_temp, axis=0) temp = np.zeros(x_temp.shape) temp[inds, range(H_d * W_d)] = 1 mask[i, j, :, :] = col2im_indices(temp, (1, 1, H, W), pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) pass ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, pool_param, mask) return out, cache
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ############################################################################# # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ############################################################################# stride = conv_param['stride'] pad = conv_param['pad'] N, C, H, W = x.shape F, C, HH, WW = w.shape Hdot = 1 + (H + 2 * pad - HH) // stride Wdot = 1 + (W + 2 * pad - WW) // stride x_col = im2col_indices(x, HH, WW, pad, stride) #w_transfer = w.transpose(0, 2,3,1).reshape(F, -1) w_transfer = w.reshape(F, -1) out_t = (np.dot(w_transfer, x_col).T + b).T #print('out_t.shape', out_t.shape) out = out_t.reshape(F, Hdot, Wdot, N).transpose(3, 0, 1, 2) ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, w, b, conv_param) return out, cache
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ from cs231n.im2col import im2col_indices out = None ########################################################################### # TODO: Implement the max pooling forward pass # ########################################################################### N, C, H, W = x.shape pool_height, pool_width, stride = pool_param['pool_height'], \ pool_param['pool_width'], pool_param['stride'] H_new = 1 + (H - pool_height) // stride W_new = 1 + (W - pool_width) // stride x_ = x.reshape(N * C, 1, H, W) # x_col has shape (C*HH*WW, N*H_new*W_new) x_col = im2col_indices(x_, pool_height, pool_width, padding=0, stride=stride) # out_has shape(1,N*H_new*W_new) out_ = np.max(x_col, axis=0) out_mask = np.zeros_like(x_col) out_index = np.argmax(x_col, axis=0) #print(out_mask.shape) out_mask[out_index, np.arange(H_new * W_new * N * C)] = 1 out = out_.reshape(H_new, W_new, N, C).transpose(2, 3, 0, 1) pool_param['out_mask'] = out_mask #print(out_mask) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, pool_param) return out, cache
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives of shape (N, C, H, W) - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### x, w, b, conv_param = cache N, C, H, W = x.shape F, _, HH, WW = w.shape _, _, h_out, w_out = dout.shape stride = conv_param['stride'] pad = conv_param['pad'] # dx = np.zeros_like(x) # dw = np.zeros_like(w) # db = np.zeros_like(b) db = np.sum(dout, axis=(0, 2, 3)) dout = dout.transpose(1, 2, 3, 0) dout = dout.reshape(dout.shape[0], -1) import cs231n.im2col as im2col X_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride) W_col = w.reshape(F, -1) dw = np.dot(dout, X_col.T) dx = np.dot(W_col.T, dout) dw = dw.reshape(F, C, HH, WW) dx = im2col.col2im_indices(dx, x.shape, field_height=HH, field_width=WW, padding=pad, stride=stride) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ########################################################################### # TODO: Implement the max pooling backward pass # ########################################################################### x, pool_param = cache num_data, num_channels, im_height, im_width = x.shape pool_height, pool_width, stride = (pool_param['pool_height'], pool_param['pool_width'], pool_param['stride']) out_height = (im_height - pool_height) // stride + 1 out_width = (im_width - pool_width) // stride + 1 x_reshaped = x.reshape(num_data * num_channels, 1, im_height, im_width) x_col = im2col_indices(x_reshaped, pool_height, pool_width, padding=0, stride=stride) dx_col = np.zeros(x_col.shape) max_idx = np.argmax(x_col, axis=0) dx_col[max_idx, np.arange(dout.size)] = dout.transpose(2, 3, 0, 1).ravel() dx = col2im_indices(dx_col, (num_data * num_channels, 1, im_height, im_width), pool_height, pool_width, padding=0, stride=stride) dx = dx.reshape(x.shape) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None ############################################################################# # TODO: Implement the max pooling forward pass # ############################################################################# pool_height = pool_param['pool_height'] pool_width = pool_param['pool_width'] stride = pool_param['stride'] N, C, H, W = x.shape HH = (H - pool_height) // stride + 1 WW = (W - pool_width) // stride + 1 #k, i, j = get_im2col_indices(x.shape, pool_height, pool_width, 0, stride) #pool_col = x[:, k, i, j] pool_col = im2col_indices(x, pool_height, pool_width, 0, stride) pool_col = pool_col.reshape(C, pool_height * pool_width, -1).transpose( 1, 0, 2).reshape(pool_height * pool_width, -1) pool_max = np.amax(pool_col, axis=0) out = pool_max.reshape(C, HH, WW, N).transpose(3, 0, 1, 2) ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, pool_param) return out, cache
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None x, w, b, conv_param = cache N, C, H, W = x.shape f_filter, c_fitler, h_filter, w_filter = w.shape ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### db = np.sum(dout, axis=(0, 2, 3)) x_col = im2col.im2col_indices(x, h_filter, w_filter, padding=conv_param['pad'], stride=conv_param['stride']) dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(f_filter, -1) dw = np.dot(dout_reshaped, x_col.T) dw = dw.reshape(w.shape) w_shape = w.reshape(f_filter, -1) dX_col = np.dot(w_shape.T, dout_reshaped) dx = im2col.col2im_indices(dX_col, x.shape, h_filter, w_filter, padding=conv_param['pad'], stride=conv_param['stride']) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### x, w, b, conv_param = cache pad, stride = conv_param['pad'], conv_param['stride'] num_data = dout.shape[0] num_filters, num_channels, filter_height, filter_width = w.shape dout_matrix = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) cols = im2col_indices(x, filter_height, filter_width, padding=pad, stride=stride) filter_matrix = w.reshape(num_filters, -1) db = np.sum(dout, axis=(0, 2, 3)) dw = np.reshape(dout_matrix.dot(cols.T), w.shape) dx_col = filter_matrix.T.dot(dout_matrix) dx = col2im_indices(dx_col, x.shape, filter_height, filter_width, padding=pad, stride=stride) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ ############################################################################# # TODO: Implement the max pooling forward pass # ############################################################################# HH, WW = pool_param['pool_height'], pool_param['pool_width'] stride = pool_param['stride'] N, C, H, W = x.shape H_, W_ = (H - HH) / stride + 1, (W - WW) / stride + 1 x_col = im2col_indices(x.reshape(N * C, 1, H, W), HH, WW, padding=0, stride=stride) switches = np.argmax(x_col, axis=0) out = np.choose(switches, x_col).reshape(H_, W_, N, C).transpose(2, 3, 0, 1) out = np.zeros((N, C, H_, W_)) for i, ii in enumerate(xrange(0, H - HH + 1, stride)): for j, jj in enumerate(xrange(0, W - WW + 1, stride)): out[:, :, i, j] = np.max(x[:, :, ii:ii + HH, jj:jj + WW], axis=(2, 3)) ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, pool_param, x_col, switches) return out, cache
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ########################################################################### # TODO: Implement the max pooling backward pass # ########################################################################### x, pool_param = cache N, C, H, W = x.shape x_reshaped = x.reshape(N * C, 1, H, W) h_out = int(1 + (H + 2 * 0 - pool_param['pool_height']) / pool_param['stride']) w_out = int(1 + (W + 2 * 0 - pool_param['pool_width']) / pool_param['stride']) x_col = im2col.im2col_indices(x_reshaped, pool_param['pool_height'], pool_param['pool_width'], padding=0, stride=pool_param['stride']) max_idx = np.argmax(x_col, axis=0) dx_col = np.zeros_like(x_col) dout_ = dout.transpose(2, 3, 0, 1).ravel() dx_col[max_idx, range(max_idx.size)] = dout_ dx = im2col.col2im_indices(dx_col, (N * C, 1, H, W), pool_param['pool_height'], pool_param['pool_width'], padding=0, stride=pool_param['stride']) dx = dx.reshape(x.shape) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None ########################################################################### # TODO: Implement the max pooling forward pass # ########################################################################### num_data, num_channels, im_height, im_width = x.shape pool_height, pool_width, stride = (pool_param['pool_height'], pool_param['pool_width'], pool_param['stride']) out_height = (im_height - pool_height) // stride + 1 out_width = (im_width - pool_width) // stride + 1 x_reshaped = x.reshape(num_data * num_channels, 1, im_height, im_width) x_col = im2col_indices(x_reshaped, pool_height, pool_width, padding=0, stride=stride) out = np.amax(x_col, axis=0) out = out.reshape(out_height, out_width, num_data, num_channels) out = out.transpose(2, 3, 0, 1) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, pool_param) return out, cache
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# x, w, b, conv_param = cache stride = conv_param['stride'] pad = conv_param['pad'] #print('pad', pad) N, C, H, W = x.shape F, C, HH, WW = w.shape #Hdot = 1 + (H+2*pad - HH) // stride #Wdot = 1 + (W + 2*pad -WW) // stride x_col = im2col_indices(x, HH, WW, pad, stride) #print('x_col shape', x_col.shape) w_transfer = w.reshape(F, -1) dout_t = dout.transpose(1, 2, 3, 0).reshape(F, -1) dx_col = np.dot(w_transfer.T, dout_t) dx = col2im_indices(dx_col, x.shape, HH, WW, pad, stride) dw_transfer = np.dot(dout_t, x_col.T) dw = dw_transfer.reshape(w.shape) db = np.sum(dout_t, axis=1) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None ########################################################################### # TODO: Implement the max pooling forward pass # ########################################################################### N, C, H, W = x.shape x_reshaped = x.reshape(N * C, 1, H, W) h_out = int(1 + (H + 2 * 0 - pool_param['pool_height']) / pool_param['stride']) w_out = int(1 + (W + 2 * 0 - pool_param['pool_width']) / pool_param['stride']) x_col = im2col.im2col_indices(x_reshaped, pool_param['pool_height'], pool_param['pool_width'], padding=0, stride=pool_param['stride']) max_idx = np.argmax(x_col, axis=0) out = x_col[max_idx, range(max_idx.size)] out = out.reshape(h_out, w_out, N, C) out = out.transpose(2, 3, 0, 1) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, pool_param) return out, cache
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ from cs231n.im2col import im2col_indices, col2im_indices x, w, b, conv_param = cache stride, pad = conv_param['stride'], conv_param['pad'] F, C, HH, WW = w.shape N, C, H, W = x.shape dx, dw, db = None, None, None ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### db = np.sum(dout, axis=(0, 2, 3)) # dout has shape (N, F, H_new, W_new) # dout_reshape has shape (F, H_new*W_new*N) dout_reshape = dout.transpose(1, 2, 3, 0).reshape((F, -1)) # x_col has shape (C*HH*WW, N*H_new*W_new) x_col = im2col_indices(x, HH, WW, padding=pad, stride=stride) dw_ = np.dot(dout_reshape, x_col.transpose()) dw = dw_.reshape(F, C, HH, WW) w_reshape = w.reshape(F, -1) dx_ = np.dot(w_reshape.transpose(), dout_reshape) dx = col2im_indices(dx_, x.shape, HH, WW, padding=pad, stride=stride) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ########################################################################### # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ########################################################################### ### reference: https://wiseodd.github.io/techblog/2016/07/16/convnet-conv-layer/ # eg. x_shape = (2, 3, 4, 4), X_col.shape = (3*4*4, 2*(h_out*w_out)) # eg. w_shape = (3, 3, 4, 4), W_col.shape = (3, 3*4*4) stride = conv_param['stride'] pad = conv_param['pad'] N, C, H, W = x.shape F, _, HH, WW = w.shape h_out = 1 + (H + 2 * pad - HH) // stride w_out = 1 + (W + 2 * pad - WW) // stride assert (H + 2 * pad - HH) % stride == 0 assert (W + 2 * pad - WW) % stride == 0 import cs231n.im2col as im2col X_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride) W_col = w.reshape(F, -1) # print('X_col shape is: ', X_col.shape): (48, 8) # print('W_col shape is: ', W_col.shape): (3, 48) out = np.dot(W_col, X_col) + b.reshape(b.shape[0],1) # out.shape = (3, 8) out = out.reshape(F, h_out, w_out, N) # out.shape = (3, 2, 2, 2) out = out.transpose(3, 0, 1, 2) # be careful of the reshape and transpose ### naive loop implementation # stride = conv_param['stride'] # pad = conv_param['pad'] # N, C, H, W = x.shape # F, _, HH, WW = w.shape # assert (H + 2 * pad - HH) % stride == 0 # assert (W + 2 * pad - WW) % stride == 0 # H_prime = 1 + (H + 2 * pad - HH) // stride # W_prime = 1 + (W + 2 * pad - WW) // stride # out = np.zeros((N, F, H_prime, W_prime)) # for n in range(N): # x_pad = np.pad(x[n,:,:,:], ((0,0),(pad,pad),(pad,pad)), 'constant') # for f in range(F): # for h_prime in range(H_prime): # for w_prime in range(W_prime): # h1 = h_prime*stride # h2 = h_prime*stride + HH # w1 = w_prime*stride # w2 = w_prime*stride + WW # window = x_pad[:, h1:h2, w1:w2] # out[n,f,h_prime,w_prime] = np.sum(window * w[f]) + b[f] ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, w, b, conv_param) return out, cache
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ############################################################################# # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ############################################################################# # Unpack params pad = conv_param['pad'] stride = conv_param['stride'] (N, C, H, W) = x.shape (F, C, HH, WW) = w.shape # Calculate H' and W' H_ = 1 + (H + 2 * pad - HH) / stride W_ = 1 + (W + 2 * pad - WW) / stride # TODO: Add some exception throwing here if H_ and W_ are not ints # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride) # Calculate w_row using the im2col helper function w_row = im2col.im2col_indices(w, HH, WW, padding=0, stride=1) # Pad out x_col with ones so we can use the bias trick x_col_1 = np.vstack((x_col, np.ones(x_col.shape[-1]))) # Pad out w_row with the bias term b w_row_1 = np.vstack((w_row, b)) # Perform the convolution out_ = np.dot(w_row_1.T, x_col_1) # Reshape the output using the col2im helper function out = im2col.col2im_indices(out_, (N,F,H_,W_), field_height=1, field_width=1, padding=0, stride=1) pass ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, w, b, conv_param) return out, cache
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### x, w, b, conv_param = cache N, C, H, W = x.shape F, C, HH, WW = w.shape N, F, H_d, W_d = dout.shape pad = conv_param['pad'] stride = conv_param['stride'] dw = np.zeros(w.shape) dx_t = np.zeros((N, C, H + 2 * pad, W + 2 * pad)) dx = np.zeros(x.shape) db = np.zeros(b.shape) for j in range(F): for i in range(N): for k in range(C): dout_temp = dout[i][j] temp = np.zeros(stride * np.array(dout_temp.shape) - stride + 1, dtype=dout_temp.dtype) temp[::stride, ::stride] = dout_temp dout_temp = temp x_temp = np.reshape(x[i][k], (1, 1, H, W)) x_temp = im2col_indices(x_temp, dout_temp.shape[0], dout_temp.shape[1], pad, 1) dout_temp = np.reshape(dout_temp, (-1)) dw[j][k] += np.reshape(np.dot(dout_temp, x_temp), (HH, WW)) for i in range(N): for k in range(C): for j in range(F): dout_temp = np.reshape(dout[i][j][:][:], (1, 1, H_d, W_d)) temp = np.zeros(stride * np.array(dout_temp.shape) - stride + 1, dtype=dout_temp.dtype) temp[:, :, ::stride, ::stride] = dout_temp dout_temp = temp w_temp = w[j][k] w_temp = np.fliplr(np.flipud(w_temp)) dout_temp = im2col_indices(dout_temp, w_temp.shape[0], w_temp.shape[1], w_temp.shape[0] - 1, 1) w_temp = np.reshape(w_temp, (-1)) dx_t[i][k] += np.reshape(np.dot(w_temp, dout_temp), (H + 2 * pad, W + 2 * pad)) dx = dx_t[:, :, pad:H + pad, pad:W + pad] for j in range(F): db[j] = np.sum(dout[:, j, :, :]) pass ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db