def _im2col_gpu(img, kernel_size, stride, pad): """im2col function for GPU. This code is ported from Chainer: https://github.com/chainer/chainer/blob/v6.4.0/chainer/utils/conv.py """ n, c, h, w = img.shape kh, kw = pair(kernel_size) sy, sx = pair(stride) ph, pw = pair(pad) out_h = get_conv_outsize(h, kh, sy, ph) out_w = get_conv_outsize(w, kw, sx, pw) dy, dx = 1, 1 col = cuda.cupy.empty((n, c, kh, kw, out_h, out_w), dtype=img.dtype) cuda.cupy.ElementwiseKernel( 'raw T img, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,' 'int32 dy, int32 dx', 'T col', ''' int c0 = i / (kh * kw * out_h * out_w); int ky = i / (kw * out_h * out_w) % kh; int kx = i / (out_h * out_w) % kw; int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y = ky * dy + out_y * sy - ph; int in_x = kx * dx + out_x * sx - pw; if (in_y >= 0 && in_y < h && in_x >= 0 && in_x < w) { col = img[in_x + w * (in_y + h * c0)]; } else { col = 0; } ''', 'im2col')(img.reduced_view(), h, w, out_h, out_w, kh, kw, sy, sx, ph, pw, dy, dx, col) return col
def im2col_array(img, kernel_size, stride, pad, to_matrix=True): N, C, H, W = img.shape KH, KW = pair(kernel_size) SH, SW = pair(stride) PH, PW = pair(pad) OH = get_conv_outsize(H, KH, SH, PH) OW = get_conv_outsize(W, KW, SW, PW) img = np.pad(img, ((0, 0), (0, 0), (PH, PH + SH - 1), (PW, PW + SW - 1)), mode='constant', constant_values=(0, )) col = np.ndarray((N, C, KH, KW, OH, OW), dtype=img.dtype) # fig 57-1 mini-batch verion for j in range(KH): j_lim = j + SH * OH for i in range(KW): i_lim = i + SW * OW # assign data applied by kernel at j, i. # https://qiita.com/jun40vn/items/d2e8711cabc9cfb1e0d5 # added batch dim and channel dim col[:, :, j, i, :, :] = img[:, :, j:j_lim:SH, i:i_lim:SW] # reshape method if to_matrix: # fig 57-1 col = col.transpose((0, 4, 5, 1, 2, 3)).reshape((N * OH * OW, -1)) return col
def im2col_array(img, kernel_size, stride, pad, to_matrix=True): N, C, H, W = img.shape KH, KW = pair(kernel_size) SH, SW = pair(stride) PH, PW = pair(pad) OH = get_conv_outsize(H, KH, SH, PH) OW = get_conv_outsize(W, KW, SW, PW) xp = cuda.get_array_module(img) if xp != np: col = _im2col_gpu(img, kernel_size, stride, pad) else: img = np.pad(img, ((0, 0), (0, 0), (PH, PH + SH - 1), (PW, PW + SW - 1)), mode='constant', constant_values=(0, )) col = np.ndarray((N, C, KH, KW, OH, OW), dtype=img.dtype) for j in range(KH): j_lim = j + SH * OH for i in range(KW): i_lim = i + SW * OW col[:, :, j, i, :, :] = img[:, :, j:j_lim:SH, i:i_lim:SW] if to_matrix: col = col.transpose((0, 4, 5, 1, 2, 3)).reshape((N * OH * OW, -1)) return col
def pooling_simple(x, kernel_size, stride=1, pad=0): x = as_variable(x) N, C, H, W = x.shape KH, KW = pair(kernel_size) PH, PW = pair(pad) SH, SW = pair(stride) OH = get_conv_outsize(H, KH, SH, PH) OW = get_conv_outsize(W, KW, SW, PW) col = im2col(x, kernel_size, stride, pad, to_matrix=True) col = col.reshape(-1, KH * KW) y = col.max(axis=1) y = y.reshape(N, OH, OW, C).transpose(0, 3, 1, 2) return y
def conv2d_simple(x, W, b=None, stride=1, pad=0): x, W = as_variable(x), as_variable(W) Weight = W N, C, H, W = x.shape OC, C, KH, KW = Weight.shape SH, SW = pair(stride) PH, PW = pair(pad) OH = get_conv_outsize(H, KH, SH, PH) OW = get_conv_outsize(W, KW, SW, PW) col = im2col(x, (KH, KW), stride, pad, to_matrix=True) Weight = Weight.reshape(OC, -1).transpose() t = linear(col, Weight, b) y = t.reshape(N, OH, OW, OC).transpose(0, 3, 1, 2) return y
def forward(self, x, W, b): xp = cuda.get_array_module(x) Weight = W SH, SW = self.stride PH, PW = self.pad C, OC, KH, KW = Weight.shape N, C, H, W = x.shape if self.outsize is None: out_h = get_deconv_outsize(H, KH, SH, PH) out_w = get_deconv_outsize(W, KW, SW, PW) else: out_h, out_w = pair(self.outsize) img_shape = (N, OC, out_h, out_w) gcol = xp.tensordot(Weight, x, (0, 1)) gcol = xp.rollaxis(gcol, 3) y = col2im_array(gcol, img_shape, (KH, KW), self.stride, self.pad, to_matrix=False) # b, k, h, w if b is not None: self.no_bias = True y += b.reshape((1, b.size, 1, 1)) return y
def _init_W(self, x): self.in_channels = x.shape[1] xp = cuda.get_array_module(x) C, OC = self.in_channels, self.out_channels KH, KW = pair(self.kernel_size) W_data = xp.random.randn(OC, C, KH, KW).astype(np.float32) * np.sqrt( 1 / C * KH * KW) self.W.data = W_data
def conv2d_simple(x, K: Variable, b: Optional[Variable] = None, stride: int = 1, pad: int = 0): x = as_variable(x) N, C, H, W = x.shape OC, C, KH, KW = K.shape SH, SW = pair(stride) PH, PW = pair(pad) OH = get_conv_outsize(H, KH, SH, PH) OW = get_conv_outsize(W, KW, SW, PW) col = im2col(x, (KH, KW), stride, pad, to_matrix=True) K = K.reshape((OC, -1)).transpose() t = F.linear(col, K, b) y = t.reshape((N, OH, OW, OC)).transpose((0, 3, 1, 2)) return y
def col2im_array(col, img_shape, kernel_size, stride, pad, to_matrix=True): N, C, H, W = img_shape KH, KW = pair(kernel_size) SH, SW = pair(stride) PH, PW = pair(pad) OH = get_conv_outsize(H, KH, SH, PH) OW = get_conv_outsize(W, KW, SW, PW) if to_matrix: col = col.reshape(N, OH, OW, C, KH, KW).transpose(0, 3, 4, 5, 1, 2) img = np.zeros((N, C, H + 2 * PH + SH - 1, W + 2 * PW + SW - 1), dtype=col.dtype) for j in range(KH): j_lim = j + SH * OH for i in range(KW): i_lim = i + SW * OW img[:, :, j:j_lim:SH, i:i_lim:SW] += col[:, :, j, i, :, :] return img[:, :, PH:H + PH, PW:W + PW]
def backward(self, gy): # TODO(Koki): This is simple implementation N, C, OH, OW = gy.shape KW, KH = pair(self.kernel_size) gy /= (KW*KH) gcol = broadcast_to(gy.reshape(-1), (KH, KW, N*C*OH*OW)) gcol = gcol.reshape(KH, KW, N, C, OH, OW).transpose(2, 3, 0, 1, 4, 5) gx = col2im(gcol, self.input_shape, self.kernel_size, self.stride, self.pad, to_matrix=False) return gx
def forward(self, gy): N, C, OH, OW = gy.shape N, C, H, W = self.input_shape KH, KW = pair(self.kernel_size) gcol = np.zeros((N * C * OH * OW * KH * KW), dtype=self.dtype) indexes = (self.indexes.ravel() + np.arange(0, self.indexes.size * KH * KW, KH * KW)) gcol[indexes] = gy.ravel() gcol = gcol.reshape(N, C, OH, OW, KH, KW) gcol = np.swapaxes(gcol, 2, 4) gcol = np.swapaxes(gcol, 3, 5) gx = col2im_array(gcol, (N, C, H, W), self.kernel_size, self.stride, self.pad, to_matrix=False) return gx
def forward(self, gy): xp = cuda.get_array_module(gy) N, C, OH, OW = gy.shape H, W = self.input_shpae[2:] KH, KW = pair(self.kernel_size) gcol = xp.zeros((N * C * OH * OW * KH * KW), dtype=self.dtype) indexes = self.indexes.ravel() + xp.arange( 0, self.indexes.size * KH * KW, KH * KW) gcol[indexes] = gy[0].ravel() gcol = gcol.reshape(N, C, OH, OW, KH, KW) gcol = xp.swapaxes(gcol, 2, 4) gcol = xp.swapaxes(gcol, 3, 5) gx = utils.col2im(gcol, (N, C, H, W), self.kernel_size, self.stride, self.pad, to_matrix=False) return gx
def _init_W(self, xp=np): C, OC = self.in_channels, self.out_channels KH, KW = pair(self.kernel_size) W_data = xp.random.randn(OC, C, KH, KW).astype(self.dtype) * np.sqrt( 1 / C * KH * KW) self.W.data = W_data
def __init__(self, size): self.size = pair(size)
def __init__(self, size, mode=Image.BILINEAR): self.size = pair(size) self.mode = mode
def __init__(self, stride=1, pad=0, outsize=None): super().__init__() self.stride = pair(stride) self.pad = pair(pad) self.outsize = outsize
def __init__(self, stride=1, pad=0): super().__init__() self.stride = pair(stride) self.pad = pair(pad)
def _init_W(self): C, OC = self.in_channels, self.out_channels KH, KW = pair(self.kernel_size) scale = np.sqrt(1 / (C * KH * KW)) W_data = np.random.randn(OC, C, KH, KW).astype(self.dtype) * scale self.W_data = W_data