def bprop_conv(imgs, imgs_cols, convout_grad, filters, stride, pad): """ Back-propagate gradients of convolution imgs has shape (n_imgs, n_channels_in, img_h, img_w) imgs_cols is the pre-computed im2col of imgs filters has shape (n_channels_in, n_channels_out, img_h, img_w) convout has shape (n_imgs, n_channels_out, img_h, img_w) """ N, C, H, W = imgs.shape w = np.transpose(filters, axes=(1,0,2,3)) num_filters, _, filter_height, filter_width = w.shape # first reshape the output gradient such that the # first dimension is the number of filters and the # second dimension is the rest, this allows us to compute # the gradient with respect to the weights as a simple dot # product (as in a fully connected layer) out_grad_reshaped = convout_grad.transpose(1, 2, 3, 0).reshape(num_filters, -1) dW = out_grad_reshaped.dot(imgs_cols.T).reshape(w.shape) # flip dimensions to match the filter and divide by N for proper scaling dFilter = np.transpose(dW, axes=(1,0,2,3)) / N # next compute the gradient with respect to the input # which again is a simple dot product # as in a fully connected layer dImg_cols = w.reshape(num_filters, -1).T.dot(out_grad_reshaped) # we now simply have to call col2im on these in order # to distribute the gradients appropriately dImg = col2im_cython(dImg_cols, N, C, H, W, filter_height, filter_width, pad, stride) return dImg, dFilter
def bprop_conv(imgs, imgs_cols, convout_grad, filters, stride, pad): """ Back-propagate gradients of convolution imgs has shape (n_imgs, n_channels_in, img_h, img_w) imgs_cols is the pre-computed im2col of imgs filters has shape (n_channels_in, n_channels_out, img_h, img_w) convout has shape (n_imgs, n_channels_out, img_h, img_w) """ N, C, H, W = imgs.shape w = np.transpose(filters, axes=(1, 0, 2, 3)) num_filters, _, filter_height, filter_width = w.shape # first reshape the output gradient such that the # first dimension is the number of filters and the # second dimension is the rest, this allows us to compute # the gradient with respect to the weights as a simple dot # product (as in a fully connected layer) out_grad_reshaped = convout_grad.transpose(1, 2, 3, 0).reshape(num_filters, -1) dW = out_grad_reshaped.dot(imgs_cols.T).reshape(w.shape) # flip dimensions to match the filter and divide by N for proper scaling dFilter = np.transpose(dW, axes=(1, 0, 2, 3)) / N # next compute the gradient with respect to the input # which again is a simple dot product # as in a fully connected layer dImg_cols = w.reshape(num_filters, -1).T.dot(out_grad_reshaped) # we now simply have to call col2im on these in order # to distribute the gradients appropriately dImg = col2im_cython(dImg_cols, N, C, H, W, filter_height, filter_width, pad, stride) return dImg, dFilter
def avg_pool_backward_im2col(dout, cache): ''' An implementation of the backward pass for avg pooling based on im2col. This isn't much faster than the naive version, so it should be avoided if possible. Possibly bogus if used w/o square pooling regions that tile the input ''' x_shape, x_cols, pool_param = cache N, C, H, W = x_shape pool_height, pool_width = pool_param[ 'pool_height'], pool_param['pool_width'] stride = pool_param['stride'] pool_dim = pool_height * pool_width dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() dx_cols = np.zeros_like(x_cols) dx_cols[:, np.arange(dx_cols.shape[1])] = 1. / pool_dim * dout_reshaped dx = col2im_cython(dx_cols, N * C, 1, H, W, pool_height, pool_width, padding=0, stride=stride) dx = dx.reshape(x_shape) return dx
def backward_inputs(self, grad_out): # nice trick from CS231n, backward pass can be done with just matrix mul and col2im grad_out = grad_out.transpose(1, 2, 3, 0).reshape(self.num_filters, -1) grad_x_cols = self.weights.T.dot(grad_out) N, C, H, W = self.input_shape k = self.kernel_size grad_x = col2im_cython(grad_x_cols, N, C, H, W, k, k, self.pad, self.stride) return grad_x
def bprop_max_pool(imgs, imgs_cols, imgs_argmax, poolout_grad, pool_height, pool_width, stride): N, C, H, W = imgs.shape # first reorder and flatten the output gradient # so that we can simply extract the gradient at # maximum positions poolout_grad_reshaped = poolout_grad.transpose(2, 3, 0, 1).flatten() dImg_cols = np.zeros_like(imgs_cols) dImg_cols[imgs_argmax, np.arange(imgs_cols.shape[1])] = poolout_grad_reshaped dImg = col2im_cython(dImg_cols, N * C, 1, H, W, pool_height, pool_width, 0, stride) return dImg.reshape(imgs.shape)
def conv_backward_im2col(dout, cache): """ A fast implementation of the backward pass for a convolutional layer based on im2col and col2im. """ x, w, b, conv_param, x_cols = cache stride, pad = conv_param['stride'], conv_param['pad'] db = np.sum(dout, axis=(0, 2, 3)) num_filters, _, filter_height, filter_width = w.shape dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped) # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride) dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3], filter_height, filter_width, pad, stride) return dx, dw, db
def backward_netIn_to_prevLayer_netAct(self, d_upstream): '''Computes fast convolution backward pass using the im2col algorithm, C-compiled via Cython Parameters: ----------- d_upstream: Upstream gradient Returns: ----------- dprev_net_act: Gradient with respect to layer below d_wts: Wt gradient of current layer d_b: bias gradient of current layer ''' batch_sz, n_chans, img_y, img_x = self.input.shape n_kers, n_ker_chans, ker_x, ker_y = self.wts.shape ker_sz = ker_x stride = 1 pad = int(np.ceil((ker_sz - 1) / 2)) # bias gradient d_b = np.sum(d_upstream, axis=(0, 2, 3)) # wt gradient # # reshape upstream grad to be compatible with im2col format d_upstream = d_upstream.transpose(1, 2, 3, 0) d_upstream = d_upstream.reshape(n_kers, -1) d_wts = d_upstream @ self.input_cols.T d_wts = d_wts.reshape(self.wts.shape) # prev layer net_act grad # # dprev_net_act_cols = self.wts.reshape(n_kers, -1).T @ d_upstream dprev_net_act = im2col_cython.col2im_cython(dprev_net_act_cols, batch_sz, n_chans, img_y, img_x, ker_x, ker_y, pad, stride) return dprev_net_act, d_wts, d_b
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): stride, pad = self.stride, self.pad x = in_data[0].asnumpy() w_real = in_data[1].asnumpy() x_n, x_d, x_h, x_w = x.shape f_n, _, f_h, f_w = w_real.shape # convert to colums x_cols = im2col_cython(x, f_w, f_h, pad[0], stride[0]) dout = out_grad[0].asnumpy() # (x_n, f_n, out_h, out_w) dout_ = dout.transpose(1, 2, 3, 0).reshape(f_n, -1) dwbin = dout_.dot(x_cols.T).reshape(self.wbin.shape) db = np.sum(dout, axis=(0, 2, 3)) # (f_n,) dx_cols = self.wbin.reshape(f_n, -1).T.dot(dout_) dx = col2im_cython(dx_cols, x_n, x_d, x_h, x_w, f_h, f_w, pad[0], stride[0]) # update gradient dw = self.update_grad(w_real, dwbin) self.assign(in_grad[0], req[0], mx.nd.array(dx)) self.assign(in_grad[1], req[0], mx.nd.array(dw))
def max_pool_backward_im2col(dout, cache): ''' An implementation of the backward pass for max pooling based on im2col. This isn't much faster than the naive version, so it should be avoided if possible. ''' x_shape, x_cols, x_cols_argmax, pool_param = cache N, C, H, W = x_shape pool_height, pool_width = pool_param[ 'pool_height'], pool_param['pool_width'] stride = pool_param['stride'] dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() dx_cols = np.zeros_like(x_cols) dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped dx = col2im_cython(dx_cols, N * C, 1, H, W, pool_height, pool_width, padding=0, stride=stride) dx = dx.reshape(x_shape) return dx
def backward(self, dout): x = self.x w = self.w b = self.b x_cols = self.x_cols stride, pad = self.stride, self.pad db = np.sum(dout, axis=(0, 2, 3)) num_filters, _, filter_height, filter_width = w.shape dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped) dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3], filter_height, filter_width, pad, stride) self.w = self.wupdate.update(self.w, dw) self.b = self.bupdate.update(self.b, db) return dx