def _forward_grouped_convolution(self, x, gy): # G: group count # N: batch size # kH, kW: kernel height, kernel width # iC, iH, iW: input channels, input height, input width # oC, oH, oW: output channels, output height, output width G = self.groups N, iC, iH, iW = x.shape _, oC, oH, oW = gy.shape # _ == N kH = self.kh kW = self.kw iCg = iC // G oCg = oC // G # (N, iC, kH, kW, oH, oW) x = conv.im2col(x, kH, kW, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) x = x.transpose(1, 2, 3, 0, 4, 5) # (iC, kH, kW, N, oH, oW) x = x.reshape(G, iCg * kH * kW, N * oH * oW) x = x.transpose(0, 2, 1) # (G, N*oH*oW, iCg*kH*kW) gy = gy.transpose(1, 0, 2, 3) # (oC, N, oH, oW) gy = gy.reshape(G, oCg, N * oH * oW) # (G, oCg, iCg*kH*kW) = (G, oCg, N*oH*oW) @ (G, N*oH*oW, iCg*kH*kW) gW = _matmul(gy, x).astype(self.W_dtype, copy=False) gW = gW.reshape(oC, iCg, kH, kW) return gW,
def _forward_grouped_convolution(self, x, W, b): # G: group count # N: batch size # kH, kW: kernel height, kernel width # iC, iH, iW: input channels, input height, input width # oC, oH, oW: output channels, output height, output width G = self.groups N, iC, iH, iW = x.shape oC, _, kH, kW = W.shape # _ == iCg iCg = iC // G oCg = oC // G # (N, iC, kW, kW, oH, oW) x = conv.im2col(x, kH, kW, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) oH, oW = x.shape[-2:] x = x.transpose(1, 2, 3, 0, 4, 5) # (iC, kH, kW, N, oH, oW) x = x.reshape(G, iCg * kH * kW, N * oH * oW) W = W.reshape(G, oCg, iCg * kH * kW) # (G, oCg, N*oH*oW) = (G, oCg, iCg*kH*kW) @ (G, iCg*kH*kW, N*oH*oW) y = _matmul(W, x).astype(x.dtype, copy=False) y = y.reshape(oC, N, oH, oW) y = y.transpose(1, 0, 2, 3) # (N, oC, oH, oW) if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): if gpu: img = cuda.to_gpu(self.img) else: img = self.img col = conv.im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx) col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w)) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < self.h and 0 <= ox < self.w: testing.assert_allclose( col[:, :, ky, kx, y, x], self.img[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), self.dtype))
def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): if gpu: img = cuda.to_gpu(self.img) else: img = self.img col = conv.im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx) col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w)) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < self.h and 0 <= ox < self.w: testing.assert_allclose(col[:, :, ky, kx, y, x], self.img[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), self.dtype))