def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# x, w, b, conv_param, X_col = cache N, C, H, W = x.shape F, _, HH, WW = w.shape _, _, out_h, out_w = dout.shape stride, pad = conv_param['stride'], conv_param['pad'] db = np.sum(dout, axis=(0, 2, 3)) dout = dout.transpose(1, 0, 2, 3).reshape(F, -1) dw = np.dot(dout, X_col.T).reshape(w.shape) dx = np.dot(w.reshape(F, -1).T, dout) # I then need col2im, and sum up entries that computed multi-times in conv forward dx.shape = (C, HH, WW, N, out_h, out_w) from cs231n.im2col_cython import col2im_6d_cython dx = col2im_6d_cython(dx, N, C, H, W, HH, WW, pad, stride) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
def conv_backward_strides(dout, cache): x, w, b, conv_param, x_cols = cache stride, pad = conv_param['stride'], conv_param['pad'] N, C, H, W = x.shape F, _, HH, WW = w.shape _, _, out_h, out_w = dout.shape db = np.sum(dout, axis=(0, 2, 3)) # dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1) # dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) # dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) # dx_cols.shape = (C, HH, WW, N, out_h, out_w) # dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) # 省内存 dout = dout.transpose(1, 0, 2, 3).reshape(F, -1) dw = dout.dot(x_cols.T).reshape(w.shape) dx_cols = w.reshape(F, -1).T.dot(dout) dx_cols.shape = (C, HH, WW, N, out_h, out_w) dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) return dx, dw, db
def conv_backward_strides(dout, cache): x, w, b, conv_param, x_cols = cache stride, pad = conv_param['stride'], conv_param['pad'] N, C, H, W = x.shape # F, _, HH, WW = w.shape if (len(w.shape) > 3): F, _, HH, WW = w.shape else: w = w.reshape(1, w.shape[0], w.shape[1], w.shape[2]) F, _, HH, WW = w.shape _, _, out_h, out_w = dout.shape db = np.sum(dout, axis=(0, 2, 3)) dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1) dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) dx_cols.shape = (C, HH, WW, N, out_h, out_w) dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) return dx, dw, db
def conv_backward_strides(dout, cache): x, w, b, conv_param, x_cols = cache stride, pad = conv_param['stride'], conv_param['pad'] N, C, H, W = x.shape num_filters, _, filter_height, filter_width = w.shape _, _, out_h, out_w = dout.shape db = np.sum(dout, axis=(0, 2, 3)) dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(num_filters, -1) dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped) dx_cols.shape = (C, filter_height, filter_width, N, out_h, out_w) dx = col2im_6d_cython(dx_cols, N, C, H, W, filter_height, filter_width, pad, stride) return dx, dw, db