def test_col2im_consistency(self): col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 2, 2, dy=2, dx=2) h, w = self.x.shape[2:] im_cpu = conv.col2im_cpu(col, 2, 2, 2, 2, h, w, dy=2, dx=2) im_gpu = conv.col2im_gpu( cuda.to_gpu(col), 2, 2, 2, 2, h, w, dy=2, dx=2) testing.assert_allclose(im_cpu, im_gpu.get())
def backward_cpu(self, x, gy): h, w = x[0].shape[2:] gcol = numpy.tile(gy[0][:, :, numpy.newaxis, numpy.newaxis], (1, 1, self.kh, self.kw, 1, 1)) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) gx /= self.kh * self.kw return gx,
def forward(self, x): self.retain_inputs(()) h, w = x[0].shape[2:] if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1)) if xp is numpy: y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] n, c, h, w = x.shape out_c, input_c, kh, kw = W.shape gn, gout_c, gout_h, gout_w = gy.shape """ For MKLDNN backward, only support float32 """ if mkld.enable_convF(inputs): gW = numpy.empty(shape=(out_c, input_c, kh, kw), dtype=W.dtype) gx = numpy.empty(shape=(n, c, h, w), dtype=W.dtype) if b is None: mkldnn.Convolution2D_F32.do_backward(x, W, gy, gW, gx, kh, kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, self.mkldnn_opt) return gx, gW else: gb = numpy.empty(shape=b.shape, dtype=W.dtype) mkldnn.Convolution2D_F32.do_backward(x, W, b, gy, gW, gx, gb, kh, kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, self.mkldnn_opt) return gx, gW, gb else: gW = numpy.tensordot( gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot( gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) if not self.requires_x_grad: gx = None else: gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] h, w = x.shape[2:] """ N = numpy.random.uniform(-0.5, 0.5) coef = numpy.max(gy, axis=tuple([i for i in xrange(1,gy.ndim)])).astype(numpy.float32) coef = _as_mat(coef) gy = _as_mat(gy) coef_invert = 0.5*coef**(-1) gy = gy*coef_invert+0.5+N/E_g gy = quantize(gy, E_g) gy = 2*coef*(gy-0.5) gy = gy.reshape(grad_outputs[0].shape) """ gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) W = numpy.clip(W * 0.5 + 5, 0, 1) Wq = 2 * quantize(W, E_w) - 1 gcol = numpy.tensordot(Wq, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def forward_cpu(self, x): self.retain_inputs(()) self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) for n in six.moves.range(up_y.shape[0]): for c in six.moves.range(up_y.shape[1]): for oy in six.moves.range(up_y.shape[4]): for ox in six.moves.range(up_y.shape[5]): ky = self.indexes[n, c, oy, ox] // up_y.shape[3] kx = self.indexes[n, c, oy, ox] % up_y.shape[3] up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox] up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def backward_cpu(self, x, gy): if mkld.enable_max_poolingF((x, gy)): n, c, h, w = x[0].shape gx = numpy.empty((n, c, h, w), dtype=x[0].dtype) mkldnn.MaxPooling_F32.do_backward(gy[0], x[0], gx, self.indexes, self.sy, self.sx, self.ph, self.pd, self.pw, self.pr, self.kh, self.kw) return gx, else: n, c, out_h, out_w = gy[0].shape h, w = x[0].shape[2:] kh, kw = self.kh, self.kw gcol = numpy.zeros((n * c * out_h * out_w * kh * kw), dtype=x[0].dtype) indexes = self.indexes.flatten() indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw) gcol[indexes] = gy[0].ravel() gcol = gcol.reshape(n, c, out_h, out_w, kh, kw) gcol = numpy.swapaxes(gcol, 2, 4) gcol = numpy.swapaxes(gcol, 3, 5) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) return gx,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not all([isinstance(i, numpy.ndarray) for i in inputs]): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward_cpu(self, x): self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all).transpose( 0, 1, 4, 5, 2, 3) colh, colw = up_y.shape[2:4] up_y = up_y.reshape(-1, self.kh * self.kw) indexes = self.indexes.ravel() up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel() up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw) up_y = conv.col2im_cpu(up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def template(col_shape=(2, 5, 3, 3, 3, 4), col_order=col_chainer_order, im_order=OrderNHWC, ksize=(3, 3), padding=(1, 1), stride=(1, 1), description: str = ""): col = Variable(col_shape, col_order) op = Col2Im(None, ksize, stride, padding) im, = op(col) im = im.change_order(im_order) vcol = np.random.rand(*(col.shape_dict[a] for a in col_chainer_order.axes)).astype( np.float32) h1 = get_deconv_outsize(col.shape_dict[Axis.H], op.KH, op.SH, op.PH) w1 = get_deconv_outsize(col.shape_dict[Axis.W], op.KW, op.SW, op.PW) vim = col2im_cpu(vcol, op.SH, op.SW, op.PH, op.PW, h1, w1) vcol = vcol.transpose( [col_chainer_order.axes_dict[a] for a in col_order.axes]) vim = vim.transpose([OrderNCHW.axes_dict[a] for a in im_order.axes]) generate_kernel_test_case( description=f"Col2Im {description}", backend=["webgpu", "webgl", "webassembly"], graph=Graph([col], [im]), inputs={col: vcol}, expected={im: vim}, )
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) if not self.requires_x_grad: gx = None else: gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w, dy=self.dy, dx=self.dx) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def forward_cpu(self, x): n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=numpy.float32) up_y = conv.im2col_cpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) for n in six.moves.range(up_y.shape[0]): for c in six.moves.range(up_y.shape[1]): for oy in six.moves.range(up_y.shape[4]): for ox in six.moves.range(up_y.shape[5]): ky = self.indexes[n, c, oy, ox] // up_y.shape[3] kx = self.indexes[n, c, oy, ox] % up_y.shape[3] up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox] up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def backward_cpu(self, x, gy): h, w = self._in_shape[2:] gcol = numpy.tile(gy[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1)) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) gx /= self.kh * self.kw return gx,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] if self.bcoeffs is not None: olen, ilen, hlen, wlen = W.shape if self.coeffs is None: self.coeffs = numpy.ones(ilen) coeffs = numpy.copy(self.bcoeffs) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 0) coeffs = numpy.broadcast_to(coeffs, W.shape) self.mW = numpy.asarray(coeffs,numpy.float32).reshape(W.shape) if self.ocoeffs is not None: coeffs = numpy.copy(self.ocoeffs) self.mb = numpy.asarray(coeffs,numpy.float32) W = self.M*W b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot( gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) if not self.requires_x_grad: gx = None else: gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if hasattr(self,'mW'): gW = self.mW * gW if hasattr(self,'mb'): xp = cuda.get_array_module(*x) gW = xp.broadcast_to( xp.expand_dims(xp.expand_dims(xp.expand_dims(self.mb,1),1),1) ,gW.shape) * gW if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) if hasattr(self,'mb'): gb = self.mb * gb return gx, gW, gb
def backward_cpu(self, x, gy): if self.gb is not None: self.gb += gy[0].sum(axis=(0, 2, 3)) self.gW += numpy.tensordot(gy[0], self.col, ([0, 2, 3], [0, 4, 5])) gcol = numpy.tensordot(self.W, gy[0], (0, 1)) gcol = numpy.rollaxis(gcol, 3) h, w = x[0].shape[2:] return conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w),
def _forward_cpu_core(self, x, W, b): gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw, dy=self.dy, dx=self.dx) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward_cpu(self, gy): if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(gy)): return self._forward_ideep(gy) h, w = self._in_shape[2:] gcol = numpy.tile(gy[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1)) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) gx /= self.kh * self.kw return gx,
def backward_cpu(self, x, gy): n, c, out_h, out_w = gy[0].shape h, w = x[0].shape[2:] gcol = numpy.zeros( (n, c, self.kh, self.kw, out_h, out_w), dtype=numpy.float32) # TODO(beam2d): Make it fast gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2) for i in numpy.ndindex(n, c, out_h, out_w): gcol_r[self.indexes[i]][i] = gy[0][i] gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) return gx,
def forward(self, x): h, w = x[0].shape[2:] if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1)) if isinstance(x[0], cuda.ndarray): y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return (y,)
def backward_cpu(self, x, gy): n, c, out_h, out_w = gy[0].shape h, w = x[0].shape[2:] gcol = numpy.zeros((n, c, self.kh, self.kw, out_h, out_w), dtype=numpy.float32) # TODO(beam2d): Make it fast gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2) for i in numpy.ndindex(n, c, out_h, out_w): gcol_r[self.indexes[i]][i] = gy[0][i] gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) return gx,
def forward(self, x): h, w = x[0].shape[2:] n = x[0].shape[0] c = x[0].shape[1] indexes = x[1] if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1)) # NOTE(hvy): Take indexes(Switches) into account # TODO(hvy): Remove the loops and make it efficient y = xp.zeros_like(col) if isinstance(x[0], cuda.ndarray): indexes = cuda.cupy.asnumpy(indexes) for n_i in range(n): for c_i in range(c): for r in range(h): for c in range(w): index = indexes[n_i][c_i][r][c] if index < self.kw: y[n_i][c_i].T[c][r][index][0] = col[n_i][c_i].T[c][ r][index][0] else: y[n_i][c_i].T[c][r][ index % self.kw][1] = col[n_i][c_i].T[c][r][index % self.kw][1] if isinstance(x[0], cuda.ndarray): y = conv.col2im_gpu(y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_cpu(y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return y,
def forward_cpu(self, x): n, c, h, w = x[0].shape gcol = numpy.tensordot(self.W, x[0], (0, 1)) # k, m, n, b, h, w gcol = numpy.rollaxis(gcol, 3) # b, k, m, n, h, w h_ = get_deconv_outsize(h, self.kh, self.sy, self.ph) w_ = get_deconv_outsize(w, self.kw, self.sx, self.pw) y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, h_, w_) # b, k, h, w if self.b is not None: y += self.b.reshape(1, self.b.size, 1, 1) return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))) gcol = numpy.tensordot(W, gy, (0, 1)) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if len(inputs) == 3: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb else: return gx, gW
def backward(self, inputs, grad_outputs): x, = inputs xp = cuda.get_array_module(x) gy, = grad_outputs n, _, out_h, out_w = gy.shape _, c, h, w = x.shape gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w) if xp == numpy: gx = col2im_cpu( gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx) else: gx = col2im_gpu( gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx) return gx,
def backward(self, inputs, grad_outputs): x, = inputs xp = cuda.get_array_module(x) gy, = grad_outputs n, _, out_h, out_w = gy.shape _, c, h, w = x.shape gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w) if xp == numpy: gx = col2im_cpu(gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx) else: gx = col2im_gpu(gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx) return gx,
def forward_cpu(self, x): n, c, out_h, out_w = x[0].shape gcol = numpy.zeros((n, c, self.kh, self.kw, out_h, out_w), dtype=numpy.float32) # TODO(beam2d): Make it fast gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2) for i in numpy.ndindex(n, c, out_h, out_w): # gcol_r[self.indexes[i]][i] = x[0][i] for j in xrange(gcol_r.shape[0]): gcol_r[j][i] = x[0][i] y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.h, self.w) return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] Wb = numpy.where(W >= 0, 1, -1).astype(numpy.float32, copy=False) b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))) gcol = numpy.tensordot(Wb, gy, (0, 1)) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w, dy=self.dy, dx=self.dx) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] Wb = numpy.where(W>=0, 1, -1).astype(numpy.float32, copy=False) b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))) gcol = numpy.tensordot(Wb, gy, (0, 1)) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def backward_cpu(self, x, gy): n, c, out_h, out_w = gy[0].shape h, w = x[0].shape[2:] kh, kw = self.kh, self.kw gcol = numpy.zeros((n * c * out_h * out_w * kh * kw), dtype=x[0].dtype) indexes = self.indexes.ravel() indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw) gcol[indexes] = gy[0].ravel() gcol = gcol.reshape(n, c, out_h, out_w, kh, kw) gcol = numpy.swapaxes(gcol, 2, 4) gcol = numpy.swapaxes(gcol, 3, 5) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) return gx,
def backward(self, inputs, grad_outputs): x, W = inputs[:2] xp = cuda.get_array_module(*x) W = xp.where(W >= 0, 1, -1).astype(numpy.float32, copy=False) W = self.M * W b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] h, w = x.shape[2:] xp = cuda.get_array_module(*x) B, C, KY, KX, IY, IX = self.col.shape D = W.shape[0] # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D) gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \ .reshape((C, D, B * IY * IX)) c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \ .reshape((C, B * IY * IX, KY * KX)) # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX) gW_ = _matmul(gy_, c_, xp) gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3) gW = gW.astype(W.dtype, copy=False) w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D)) # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX) gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX)) gcol = gcol.astype(x.dtype, copy=False) gcol = xp.rollaxis(gcol, 3) if xp is numpy: gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) else: gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gy = xp.rollaxis(gy, 1, 4) gb = gy.sum(axis=(0, 1, 2)) return gx, gW, gb
def backward_cpu(self, x, gy): n, c, out_h, out_w = gy[0].shape h, w = self._in_shape[2:] kh, kw = self.kh, self.kw gcol = numpy.zeros( (n * c * out_h * out_w * kh * kw), dtype=self._in_dtype) indexes = self.indexes.flatten() indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw) gcol[indexes] = gy[0].ravel() gcol = gcol.reshape(n, c, out_h, out_w, kh, kw) gcol = numpy.swapaxes(gcol, 2, 4) gcol = numpy.swapaxes(gcol, 3, 5) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) return gx,
def backward_cpu(self, x, gy): if mkld.enable_avg_poolingF((x, gy)): n, c, h, w = x[0].shape gx = numpy.empty((n, c, h, w), dtype=x[0].dtype) mkldnn.AvgPooling_F32.do_backward(gy[0], x[0], gx, self.sy, self.sx, self.ph, self.pd, self.pw, self.pr, self.kh, self.kw) return gx, else: h, w = x[0].shape[2:] gcol = numpy.tile(gy[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1)) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) gx /= self.kh * self.kw return gx,
def forward(self, x): h, w = x[0].shape[2:] if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1)) if xp is numpy: y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return y,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def backward(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] h, w = x.shape[2:] xp = cuda.get_array_module(*x) B, C, KY, KX, IY, IX = self.col.shape D = W.shape[0] # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D) gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \ .reshape((C, D, B * IY * IX)) c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \ .reshape((C, B * IY * IX, KY * KX)) # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX) gW_ = _matmul(gy_, c_, xp) gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3) gW = gW.astype(W.dtype, copy=False) w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D)) # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX) gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX)) gcol = gcol.astype(x.dtype, copy=False) gcol = xp.rollaxis(gcol, 3) if xp is numpy: gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) else: gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gy = xp.rollaxis(gy, 1, 4) gb = gy.sum(axis=(0, 1, 2)) return gx, gW, gb
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, "Height in the output should be positive." if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, "Width in the output should be positive." y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return (y,)
def forward_cpu(self, gy): if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(gy)): return self._forward_ideep(gy) n, c, out_h, out_w = gy[0].shape h, w = self._in_shape[2:] kh, kw = self.kh, self.kw gcol = numpy.zeros( (n * c * out_h * out_w * kh * kw), dtype=self._in_dtype) indexes = self.indexes.flatten() indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw) gcol[indexes] = gy[0].ravel() gcol = gcol.reshape(n, c, out_h, out_w, kh, kw) gcol = numpy.swapaxes(gcol, 2, 4) gcol = numpy.swapaxes(gcol, 3, 5) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) return gx,
def forward_cpu(self, gy): if (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(gy)): return self._forward_ideep(gy) n, c, out_h, out_w = gy[0].shape h, w = self._in_shape[2:] kh, kw = self.kh, self.kw gcol = numpy.zeros((n * c * out_h * out_w * kh * kw), dtype=self._in_dtype) indexes = self.indexes.ravel() + numpy.arange( 0, self.indexes.size * kh * kw, kh * kw) gcol[indexes] = gy[0].ravel() gcol = gcol.reshape(n, c, out_h, out_w, kh, kw) gcol = numpy.swapaxes(gcol, 2, 4) gcol = numpy.swapaxes(gcol, 3, 5) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) return gx,
def forward(self, x): h, w = x[0].shape[2:] if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1)) if isinstance(x[0], cuda.ndarray): y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return y,
def forward_cpu(self, inputs): self.retain_inputs((0, 1)) # only retain x and W if len(inputs) == 2: (x, W), b = inputs, None else: x, W, b = inputs self._calc_out_size(x, W) gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw, dy=self.dy, dx=self.dx) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward_cpu(self, x): self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all).transpose(0, 1, 4, 5, 2, 3) colh, colw = up_y.shape[2:4] up_y = up_y.reshape(-1, self.kh * self.kw) indexes = self.indexes.ravel() up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel() up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw) up_y = conv.col2im_cpu( up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def test_col2im_consistency(self): col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 1, 1) h, w = self.x.shape[2:] im_cpu = conv.col2im_cpu(col, 2, 2, 1, 1, h, w) im_gpu = conv.col2im_gpu(cuda.to_gpu(col), 2, 2, 1, 1, h, w) gradient_check.assert_allclose(im_cpu, im_gpu.get())
def _col2im(x, *args, **kwargs): if isinstance(x, numpy.ndarray): return col2im_cpu(x, *args, **kwargs) return col2im_gpu(x, *args, **kwargs)