def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# x = cache[0] w = cache[1] b = cache[2] conv_param = cache[3] pad = conv_param['pad'] stride = conv_param['stride'] (N, C, H, W) = x.shape (F, C, HH, WW) = w.shape # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, field_height=HH, field_width=WW, padding=pad, stride=stride) # Calculate w_row using the im2col helper function w_row = im2col.im2col_indices(w, field_height=HH, field_width=WW, padding=0, stride=1) # Reshape the output gradient into col form dout_col = im2col.im2col_indices(dout, field_height=1, field_width=1, padding=0, stride=1) # Calculate and reshape the dx gradient dx_col = w_row.dot(dout_col) dx = im2col.col2im_indices(dx_col, x.shape, field_height=HH, field_width=WW, padding=pad, stride=stride) # Calculate and reshape the dw gradient dw_row = x_col.dot(dout_col.T) dw = im2col.col2im_indices(dw_row, w.shape, field_height=HH, field_width=WW, padding=0, stride=1) # Calculate and reshape the db gradient db = np.sum(dout_col, axis=1) pass ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max-pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ########################################################################### # TODO: Implement the max-pooling backward pass # ########################################################################### x, pool_param, mask = cache N, C, H, W = x.shape dx = np.zeros(x.shape) for i in range(N): for j in range(C): m_temp = im2col_indices(np.reshape(mask[i][j], (1, 1, H, W)), pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) dout_temp = np.reshape(dout[i][j], (-1)) dx[i, j, :, :] = col2im_indices(m_temp * dout_temp, (1, 1, H, W), pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) pass ########################################################################### # END OF YOUR CODE # ########################################################################### return dx
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ ############################################################################# # TODO: Implement the max pooling backward pass # ############################################################################# x, pool_param, x_col, switches = cache HH, WW = pool_param['pool_height'], pool_param['pool_width'] stride = pool_param['stride'] N, C, H, W = x.shape dx_col = np.zeros_like(x_col) dout_flat = dout.transpose(2, 3, 0, 1).ravel() dx_col[switches, np.arange(switches.size)] = dout_flat dx = col2im_indices(dx_col, (N * C, 1, H, W), HH, WW, padding=0, stride=stride).reshape(x.shape) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None ############################################################################# # TODO: Implement the max pooling forward pass # ############################################################################# # Unpack params (N, C, H, W) = x.shape HH = pool_param['pool_height'] WW = pool_param['pool_width'] stride = pool_param['stride'] # Calculate H' and W' H_ = 1 + (H - HH) / stride W_ = 1 + (W - WW) / stride # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, HH, WW, padding=0, stride=stride) # Reshape into pools over all channels x_col_pools = x_col.T.reshape(-1, HH*WW).T # Perform the max-pooling switches = np.argmax(x_col_pools, axis=0) out_maxpool = x_col_pools[switches, np.arange(x_col_pools.shape[-1])] # Reshape into columns out_maxpool_col = out_maxpool.reshape(-1, C).T # Reshape the output using the col2im helper function out = im2col.col2im_indices(out_maxpool_col, (N,C,H_,W_), field_height=1, field_width=1, padding=0, stride=1) pass ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, switches, pool_param) return out, cache
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ############################################################################# # TODO: Implement the max pooling backward pass # ############################################################################# x, pool_param = cache pool_height = pool_param['pool_height'] pool_width = pool_param['pool_width'] stride = pool_param['stride'] N, C, H, W = x.shape HH = (H - pool_height) // stride + 1 WW = (W - pool_width) // stride + 1 #k, i, j = get_im2col_indices(x.shape, pool_height, pool_width, 0, stride) #pool_col = x[:, k, i, j] pool_col = im2col_indices(x, pool_height, pool_width, 0, stride) pool_col = pool_col.reshape(C, pool_height * pool_width, -1).transpose( 1, 0, 2).reshape(pool_height * pool_width, -1) pool_max_index = np.argmax(pool_col, axis=0) #print('pool_max_index', len(pool_max_index)) dx_col = np.zeros(pool_col.shape) #print('dx_col.shape', dx_col.shape) #print('dout.shape--before', dout.shape) dout = dout.transpose(1, 2, 3, 0).reshape(dout.size) #print('dout.shape', dout.shape) dx_col[pool_max_index, range(len(pool_max_index))] = dout dx_col = dx_col.reshape(pool_height * pool_width, C, -1).transpose( 1, 0, 2).reshape(C * pool_height * pool_width, -1) dx = col2im_indices(dx_col, x.shape, pool_height, pool_width, 0, stride) #print(dx) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ############################################################################# # TODO: Implement the max pooling backward pass # ############################################################################# x = cache[0] switches = cache[1] pool_param = cache[2] (N, C, H, W) = x.shape HH = pool_param['pool_height'] WW = pool_param['pool_width'] stride = pool_param['stride'] # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, field_height=HH, field_width=WW, padding=0, stride=stride) # Reshape into pools over all channels # x_col_pools = x_col.T.reshape(-1, HH*WW).T x_col_pools = x_col.reshape(-1,HH*WW).T # Reshape the output gradient into col form dout_col = im2col.im2col_indices(dout, field_height=1, field_width=1, padding=0, stride=1) # Since we're taking the gradient of a max function, # we route the output gradient to the inputs that had # the max values on the forward pass using the cached switches. dx_col_pools = np.zeros(x_col_pools.shape) dx_col_pools[switches, np.arange(dx_col_pools.shape[-1])] = dout_col.T.flatten() # Reshape into col form dx_col = dx_col_pools.T.reshape(x_col.T.shape).T # Finally, reshape dx_col dx = im2col.col2im_indices(dx_col, x.shape, field_height=HH, field_width=WW, padding=0, stride=stride) pass ############################################################################# # END OF YOUR CODE # ############################################################################# return dx
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max-pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions No padding is necessary here. Output size is given by Returns a tuple of: - out: Output data, of shape (N, C, H', W') where H' and W' are given by H' = 1 + (H - pool_height) / stride W' = 1 + (W - pool_width) / stride - cache: (x, pool_param) """ out = None ########################################################################### # TODO: Implement the max-pooling forward pass # ########################################################################### N, C, H, W = x.shape H_d = int(1 + (H - pool_param['pool_height']) / pool_param['stride']) W_d = int(1 + (W - pool_param['pool_width']) / pool_param['stride']) out = np.zeros((N, C, H_d, W_d)) mask = np.zeros(x.shape) for i in range(N): for j in range(C): x_temp = np.reshape(x[i][j], (1, 1, H, W)) x_temp = im2col_indices(x_temp, pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) out[i, j, :, :] = np.reshape(np.amax(x_temp, axis=0), (1, 1, H_d, W_d)) inds = np.argmax(x_temp, axis=0) temp = np.zeros(x_temp.shape) temp[inds, range(H_d * W_d)] = 1 mask[i, j, :, :] = col2im_indices(temp, (1, 1, H, W), pool_param['pool_height'], pool_param['pool_width'], 0, pool_param['stride']) pass ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, pool_param, mask) return out, cache
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives of shape (N, C, H, W) - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### x, w, b, conv_param = cache N, C, H, W = x.shape F, _, HH, WW = w.shape _, _, h_out, w_out = dout.shape stride = conv_param['stride'] pad = conv_param['pad'] # dx = np.zeros_like(x) # dw = np.zeros_like(w) # db = np.zeros_like(b) db = np.sum(dout, axis=(0, 2, 3)) dout = dout.transpose(1, 2, 3, 0) dout = dout.reshape(dout.shape[0], -1) import cs231n.im2col as im2col X_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride) W_col = w.reshape(F, -1) dw = np.dot(dout, X_col.T) dx = np.dot(W_col.T, dout) dw = dw.reshape(F, C, HH, WW) dx = im2col.col2im_indices(dx, x.shape, field_height=HH, field_width=WW, padding=pad, stride=stride) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ########################################################################### # TODO: Implement the max pooling backward pass # ########################################################################### x, pool_param = cache num_data, num_channels, im_height, im_width = x.shape pool_height, pool_width, stride = (pool_param['pool_height'], pool_param['pool_width'], pool_param['stride']) out_height = (im_height - pool_height) // stride + 1 out_width = (im_width - pool_width) // stride + 1 x_reshaped = x.reshape(num_data * num_channels, 1, im_height, im_width) x_col = im2col_indices(x_reshaped, pool_height, pool_width, padding=0, stride=stride) dx_col = np.zeros(x_col.shape) max_idx = np.argmax(x_col, axis=0) dx_col[max_idx, np.arange(dout.size)] = dout.transpose(2, 3, 0, 1).ravel() dx = col2im_indices(dx_col, (num_data * num_channels, 1, im_height, im_width), pool_height, pool_width, padding=0, stride=stride) dx = dx.reshape(x.shape) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### x, w, b, conv_param = cache pad, stride = conv_param['pad'], conv_param['stride'] num_data = dout.shape[0] num_filters, num_channels, filter_height, filter_width = w.shape dout_matrix = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) cols = im2col_indices(x, filter_height, filter_width, padding=pad, stride=stride) filter_matrix = w.reshape(num_filters, -1) db = np.sum(dout, axis=(0, 2, 3)) dw = np.reshape(dout_matrix.dot(cols.T), w.shape) dx_col = filter_matrix.T.dot(dout_matrix) dx = col2im_indices(dx_col, x.shape, filter_height, filter_width, padding=pad, stride=stride) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None x, w, b, conv_param = cache N, C, H, W = x.shape f_filter, c_fitler, h_filter, w_filter = w.shape ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### db = np.sum(dout, axis=(0, 2, 3)) x_col = im2col.im2col_indices(x, h_filter, w_filter, padding=conv_param['pad'], stride=conv_param['stride']) dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(f_filter, -1) dw = np.dot(dout_reshaped, x_col.T) dw = dw.reshape(w.shape) w_shape = w.reshape(f_filter, -1) dX_col = np.dot(w_shape.T, dout_reshaped) dx = im2col.col2im_indices(dX_col, x.shape, h_filter, w_filter, padding=conv_param['pad'], stride=conv_param['stride']) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None ########################################################################### # TODO: Implement the max pooling backward pass # ########################################################################### x, pool_param = cache N, C, H, W = x.shape x_reshaped = x.reshape(N * C, 1, H, W) h_out = int(1 + (H + 2 * 0 - pool_param['pool_height']) / pool_param['stride']) w_out = int(1 + (W + 2 * 0 - pool_param['pool_width']) / pool_param['stride']) x_col = im2col.im2col_indices(x_reshaped, pool_param['pool_height'], pool_param['pool_width'], padding=0, stride=pool_param['stride']) max_idx = np.argmax(x_col, axis=0) dx_col = np.zeros_like(x_col) dout_ = dout.transpose(2, 3, 0, 1).ravel() dx_col[max_idx, range(max_idx.size)] = dout_ dx = im2col.col2im_indices(dx_col, (N * C, 1, H, W), pool_param['pool_height'], pool_param['pool_width'], padding=0, stride=pool_param['stride']) dx = dx.reshape(x.shape) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# x, w, b, conv_param = cache stride = conv_param['stride'] pad = conv_param['pad'] #print('pad', pad) N, C, H, W = x.shape F, C, HH, WW = w.shape #Hdot = 1 + (H+2*pad - HH) // stride #Wdot = 1 + (W + 2*pad -WW) // stride x_col = im2col_indices(x, HH, WW, pad, stride) #print('x_col shape', x_col.shape) w_transfer = w.reshape(F, -1) dout_t = dout.transpose(1, 2, 3, 0).reshape(F, -1) dx_col = np.dot(w_transfer.T, dout_t) dx = col2im_indices(dx_col, x.shape, HH, WW, pad, stride) dw_transfer = np.dot(dout_t, x_col.T) dw = dw_transfer.reshape(w.shape) db = np.sum(dout_t, axis=1) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ from cs231n.im2col import im2col_indices, col2im_indices x, w, b, conv_param = cache stride, pad = conv_param['stride'], conv_param['pad'] F, C, HH, WW = w.shape N, C, H, W = x.shape dx, dw, db = None, None, None ########################################################################### # TODO: Implement the convolutional backward pass. # ########################################################################### db = np.sum(dout, axis=(0, 2, 3)) # dout has shape (N, F, H_new, W_new) # dout_reshape has shape (F, H_new*W_new*N) dout_reshape = dout.transpose(1, 2, 3, 0).reshape((F, -1)) # x_col has shape (C*HH*WW, N*H_new*W_new) x_col = im2col_indices(x, HH, WW, padding=pad, stride=stride) dw_ = np.dot(dout_reshape, x_col.transpose()) dw = dw_.reshape(F, C, HH, WW) w_reshape = w.reshape(F, -1) dx_ = np.dot(w_reshape.transpose(), dout_reshape) dx = col2im_indices(dx_, x.shape, HH, WW, padding=pad, stride=stride) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx, dw, db
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ from cs231n.im2col import im2col_indices, col2im_indices dx = None ########################################################################### # TODO: Implement the max pooling backward pass # ########################################################################### x, pool_param = cache N, C, H, W = x.shape pool_height, pool_width, stride = pool_param['pool_height'], \ pool_param['pool_width'], pool_param['stride'] out_mask = pool_param['out_mask'] N, C, H_new, W_new = dout.shape dout_ = dout.transpose(2, 3, 0, 1).reshape([N * C * H_new * W_new]) dx_ = dout_ * out_mask dx_ = col2im_indices(dx_, [N * C, 1, H, W], pool_height, pool_width, padding=0, stride=stride) #print(out_mask) dx = dx_.reshape(N, C, W, H) ########################################################################### # END OF YOUR CODE # ########################################################################### return dx
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ############################################################################# # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ############################################################################# # Unpack params pad = conv_param['pad'] stride = conv_param['stride'] (N, C, H, W) = x.shape (F, C, HH, WW) = w.shape # Calculate H' and W' H_ = 1 + (H + 2 * pad - HH) / stride W_ = 1 + (W + 2 * pad - WW) / stride # TODO: Add some exception throwing here if H_ and W_ are not ints # Calculate x_col using the im2col helper function x_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride) # Calculate w_row using the im2col helper function w_row = im2col.im2col_indices(w, HH, WW, padding=0, stride=1) # Pad out x_col with ones so we can use the bias trick x_col_1 = np.vstack((x_col, np.ones(x_col.shape[-1]))) # Pad out w_row with the bias term b w_row_1 = np.vstack((w_row, b)) # Perform the convolution out_ = np.dot(w_row_1.T, x_col_1) # Reshape the output using the col2im helper function out = im2col.col2im_indices(out_, (N,F,H_,W_), field_height=1, field_width=1, padding=0, stride=1) pass ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, w, b, conv_param) return out, cache