def setUp(self): kh, kw = _pair(self.ksize) sh, sw = _pair(self.stride) ph, pw = _pair(self.pad) self.W = numpy.random.normal( 0, numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw)).astype(self.W_dtype) self.b = None if self.nobias else numpy.random.uniform( -1, 1, self.out_channels).astype(self.x_dtype) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.outsize = (outh, outw) if self.test_outsize else None self.x = numpy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype) self.gy = numpy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype) self.test_forward_options = {} self.check_backward_options = {'dtype': numpy.float64} if self.x_dtype == numpy.float16: self.test_forward_options = {'atol': 5e-3, 'rtol': 5e-2} self.check_backward_options = { 'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3 } elif self.W_dtype == numpy.float16: self.check_backward_options = { 'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3 }
def calc_unpooling_2d(func, in_data, **kwargs): """[Unpooling2D](https://docs.chainer.org/en/v4.3.0/reference/generated/chainer.functions.unpooling_2d.html) Unpooling2D only reads the data from memory and writes to the certain position in the output. Unlike the upsampling2D, it does not use indices and all pixels are filled by corresponding pixels in the input tensor. | Item | Value | |:-------|:------| | FLOPs | $$ 0 $$ | | mread | $$ \| x \| $$ | | mwrite | $$ \| y \| $$ | | params | Unpooling parameter `k`, `s`, `p`, `outsize` and `cover_all` | """ x, = in_data n, c, h, w = x.shape kh, kw = int(func.kh), int(func.kw) sy, sx = int(func.sy), int(func.sx) ph, pw = int(func.ph), int(func.pw) outh, outw = func.outh, func.outw if outh is None: outh = get_deconv_outsize(h, kh, sy, ph, cover_all=func.cover_all) if outw is None: outw = get_deconv_outsize(w, kw, sx, pw, cover_all=func.cover_all) params = { 'k': kw if kw == kh else (kh, kw), 's': sx if sx == sy else (sy, sx), 'p': pw if pw == ph else (ph, pw), 'outsize': (outh, outw), 'cover_all': func.cover_all } return (0, x.size, n * c * outh * outw, params)
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not all([isinstance(i, numpy.ndarray) for i in inputs]): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward(self, x): self.retain_inputs(()) h, w = x[0].shape[2:] if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1)) if xp is numpy: y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return y,
def forward_cpu(self, inputs): self.retain_inputs((0, 1)) # retain x, W x, W = inputs[:2] kh, kw = W.shape[2:] n, in_c, in_h, in_w = x.shape if self.outh is None: self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' self._set_cover_all(x, W) cc = xnn.ConvolutionBackwardData(inputs, stride=(self.sy, self.sx), pad=(self.ph, self.pw), outsize=(self.outh, self.outw), cover_all=self.cover_all) self.hint = cc.hint y, = cc.execute_on() if len(inputs) == 3: b = inputs[2] y += b.reshape(1, b.size, 1, 1) return y,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward_cpu(self, x): self.retain_inputs(()) self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) for n in six.moves.range(up_y.shape[0]): for c in six.moves.range(up_y.shape[1]): for oy in six.moves.range(up_y.shape[4]): for ox in six.moves.range(up_y.shape[5]): ky = self.indexes[n, c, oy, ox] // up_y.shape[3] kx = self.indexes[n, c, oy, ox] % up_y.shape[3] up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox] up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def setUp(self): self.N = 2 self.inh, self.inw = 4, 3 self.in_channels_a_group = 3 self.out_channels_a_group = 2 self.in_channels = self.in_channels_a_group * self.groups self.out_channels = self.out_channels_a_group * self.groups self.ksize = 3 self.pad = 1 self.kh, self.kw = _pair(self.ksize) self.sh, self.sw = _pair(self.stride) self.ph, self.pw = _pair(self.pad) outh = conv.get_deconv_outsize(self.inh, self.kh, self.sh, self.ph, d=self.dilate) outw = conv.get_deconv_outsize(self.inw, self.kw, self.sw, self.pw, d=self.dilate) self.outsize = (outh, outw) if self.test_outsize else None if self.x_dtype == numpy.float16: self.check_forward_options.update(atol=5e-3, rtol=5e-2) self.check_backward_options.update(atol=5e-3, rtol=5e-2) self.check_double_backward_options.update(atol=5e-3, rtol=5e-2) elif self.W_dtype == numpy.float16: self.check_backward_options.update(atol=5e-3, rtol=5e-2) self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
def forward_cpu(self, x): n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=numpy.float32) up_y = conv.im2col_cpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) for n in six.moves.range(up_y.shape[0]): for c in six.moves.range(up_y.shape[1]): for oy in six.moves.range(up_y.shape[4]): for ox in six.moves.range(up_y.shape[5]): ky = self.indexes[n, c, oy, ox] // up_y.shape[3] kx = self.indexes[n, c, oy, ox] % up_y.shape[3] up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox] up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def _calc_out_size(self, x, W): """Calculates and stores `outh` and `outw`.""" kh, kw = W.shape[2:] _, _, in_h, in_w = x.shape # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w if self.outh is None: self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph, d=self.dy) if self.outh <= 0: raise RuntimeError('Height in the output must be positive.') if self.outw is None: self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw, d=self.dx) if self.outw <= 0: raise RuntimeError('Width in the output must be positive.')
def forward_cpu(self, x): self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all).transpose( 0, 1, 4, 5, 2, 3) colh, colw = up_y.shape[2:4] up_y = up_y.reshape(-1, self.kh * self.kw) indexes = self.indexes.ravel() up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel() up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw) up_y = conv.col2im_cpu(up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def forward_gpu(self, x): xp = cuda.cupy n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32) up_y = conv.im2col_gpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) n, c, oy, ox, ky, kx = up_y.shape indexes = xp.asarray(self.indexes, dtype=numpy.int32) xp.ElementwiseKernel( "int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox," "int32 ky, int32 kx", "raw float32 up_y", """ int yn = i / c / oy / ox; int yc = (i / oy / ox) % c; int yoy = (i / ox) % oy; int yox = i % ox; up_y[yn * c * oy * ox * ky * kx + yc * oy * ox * ky * kx + yoy * ox * ky * kx + yox * ky * kx + index] = x; """, "upsampling_2d_fwd", )(indexes, x[0], n, c, oy, ox, ky, kx, up_y) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return (up_y,)
def setUp(self): in_channels = 3 out_channels = 2 ksize = 3 stride = 2 pad = 1 self.link = L.Deconvolution2D( in_channels, out_channels, ksize, stride=stride, pad=pad, nobias=self.nobias) self.link.W.data[...] = numpy.random.uniform( -1, 1, self.link.W.data.shape).astype(numpy.float32) if not self.nobias: self.link.b.data[...] = numpy.random.uniform( -1, 1, self.link.b.data.shape).astype(numpy.float32) self.link.cleargrads() N = 2 h, w = 3, 2 kh, kw = _pair(ksize) out_h = conv.get_deconv_outsize(h, kh, stride, pad) out_w = conv.get_deconv_outsize(w, kw, stride, pad) self.gy = numpy.random.uniform( -1, 1, (N, out_channels, out_h, out_w)).astype(numpy.float32) self.x = numpy.random.uniform( -1, 1, (N, in_channels, h, w)).astype(numpy.float32)
def setUp(self, use_cudnn=True): kh, kw = _pair(self.ksize) sh, sw = _pair(self.stride) ph, pw = _pair(self.pad) self.W = numpy.random.normal( 0, self.wscale * numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw) ).astype(self.W_dtype) self.b = None if self.nobias else numpy.random.uniform( -1, 1, self.out_channels).astype(self.x_dtype) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.outsize = (outh, outw) if self.test_outsize else None self.x = numpy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype) self.gy = numpy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype) self.test_forward_options = {} self.check_backward_options = {'dtype': numpy.float64} if self.x_dtype == numpy.float16: self.test_forward_options = {'atol': 5e-3, 'rtol': 5e-2} self.check_backward_options = { 'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3} elif self.W_dtype == numpy.float16: self.check_backward_options = { 'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3}
def template(col_shape=(2, 5, 3, 3, 3, 4), col_order=col_chainer_order, im_order=OrderNHWC, ksize=(3, 3), padding=(1, 1), stride=(1, 1), description: str = ""): col = Variable(col_shape, col_order) op = Col2Im(None, ksize, stride, padding) im, = op(col) im = im.change_order(im_order) vcol = np.random.rand(*(col.shape_dict[a] for a in col_chainer_order.axes)).astype( np.float32) h1 = get_deconv_outsize(col.shape_dict[Axis.H], op.KH, op.SH, op.PH) w1 = get_deconv_outsize(col.shape_dict[Axis.W], op.KW, op.SW, op.PW) vim = col2im_cpu(vcol, op.SH, op.SW, op.PH, op.PW, h1, w1) vcol = vcol.transpose( [col_chainer_order.axes_dict[a] for a in col_order.axes]) vim = vim.transpose([OrderNCHW.axes_dict[a] for a in im_order.axes]) generate_kernel_test_case( description=f"Col2Im {description}", backend=["webgpu", "webgl", "webassembly"], graph=Graph([col], [im]), inputs={col: vcol}, expected={im: vim}, )
def _compute_outsize(self, in_h, in_w): if self.out_h is None: self.out_h = conv.get_deconv_outsize( in_h, self.orig_kh, self.orig_sy, self.orig_ph, d=1) // self.r if self.out_w is None: self.out_w = conv.get_deconv_outsize( in_w, self.orig_kw, self.orig_sx, self.orig_pw, d=1) // self.r
def setUp(self): N = 2 in_channels = 3 out_channels = 2 ndim = len(self.dims) ksize = (3, ) * ndim stride = (2, ) * ndim pad = (1, ) * ndim if self.used_outsize == 'case1' or self.used_outsize == 'None': # Use output size determined with get_deconv_outsize. outs = tuple( conv.get_deconv_outsize(d, k, s, p) for (d, k, s, p) in zip(self.dims, ksize, stride, pad)) elif self.used_outsize == 'case2': # Use possible output size other than the one determined with # get_deconv_outsize. outs = tuple( conv.get_deconv_outsize(d, k, s, p) + 1 for (d, k, s, p) in zip(self.dims, ksize, stride, pad)) if self.used_outsize != 'None': outsize = outs else: outsize = None if not self.nobias: initial_bias = initializers.Uniform(scale=1, dtype=self.dtype) else: initial_bias = None self.link = deconvolution_nd.DeconvolutionND(ndim, in_channels, out_channels, ksize, stride=stride, pad=pad, outsize=outsize, initial_bias=initial_bias, nobias=self.nobias) self.link.cleargrads() x_shape = (N, in_channels) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype) gy_shape = (N, out_channels) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype) self.check_forward_options = {} self.check_backward_options = {'eps': 1e-2, 'atol': 1e-4, 'rtol': 1e-3} if self.dtype == numpy.float16: self.check_forward_options = {'atol': 5e-3, 'rtol': 5e-2} self.check_backward_options = { 'eps': 2**-3, 'atol': 1e-2, 'rtol': 1e-1 }
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] n, in_c, in_h, in_w = x.shape c = W.shape[1] # out_c if self.outh is None: self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph) if self.outw is None: self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw) if cuda.cudnn_enabled and self.use_cudnn: x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y = cuda.cupy.empty((n, c, self.outh, self.outw), dtype=numpy.float32) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx)) if b is not None: self.bias_desc = cudnn.create_tensor_descriptor( b[None, :, None, None]) one = numpy.array(1, dtype=x.dtype).ctypes zero = numpy.array(0, dtype=x.dtype).ctypes libcudnn.convolutionBackwardData_v2( handle, one.data, self.filter_desc.value, W.data.ptr, x_desc.value, x.data.ptr, self.conv_desc.value, zero.data, y_desc.value, y.data.ptr) if b is not None: libcudnn.addTensor_v2( handle, libcudnn.CUDNN_ADD_SAME_C, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: W_mat = W.reshape(in_c, c * kh * kw) x_mats = x.reshape(n, in_c, in_h * in_w) gcol = cuda.cupy.empty( (n, c, kh, kw, in_h, in_w), dtype=numpy.float32) gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w) for i in moves.range(n): cuda.cupy.dot(W_mat.T, x_mats[i], gcol_mats[i]) y = conv.col2im_gpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def setUp(self): N = 2 out_channels = 2 ndim = len(self.dims) ksize = (3,) * ndim stride = (2,) * ndim pad = (1,) * ndim if self.used_outsize == 'case1' or self.used_outsize == 'None': # Use output size determined with get_deconv_outsize. outs = tuple( conv.get_deconv_outsize(d, k, s, p) for (d, k, s, p) in zip(self.dims, ksize, stride, pad)) elif self.used_outsize == 'case2': # Use possible output size other than the one determined with # get_deconv_outsize. outs = tuple( conv.get_deconv_outsize(d, k, s, p) + 1 for (d, k, s, p) in zip(self.dims, ksize, stride, pad)) if self.used_outsize != 'None': outsize = outs else: outsize = None if not self.nobias: initial_bias = initializers.Uniform(scale=1, dtype=self.dtype) else: initial_bias = None if self.in_channels == 'omit': self.link = deconvolution_nd.DeconvolutionND( ndim, out_channels, ksize, stride=stride, pad=pad, outsize=outsize, initial_bias=initial_bias, nobias=self.nobias, groups=self.groups) else: self.link = deconvolution_nd.DeconvolutionND( ndim, self.in_channels, out_channels, ksize, stride=stride, pad=pad, outsize=outsize, initial_bias=initial_bias, nobias=self.nobias, groups=self.groups) self.link.cleargrads() x_shape = (N, 4) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype) gy_shape = (N, out_channels) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype) self.check_forward_options = {} self.check_backward_options = { 'eps': 1e-2, 'atol': 1e-4, 'rtol': 1e-3} if self.dtype == numpy.float16: self.check_forward_options = {'atol': 5e-3, 'rtol': 5e-2} self.check_backward_options = { 'eps': 2 ** -3, 'atol': 1e-2, 'rtol': 1e-1}
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] n, in_c, in_h, in_w = x.shape c = W.shape[1] # out_c if self.outh is None: self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph) if self.outw is None: self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw) if cuda.cudnn_enabled and self.use_cudnn: x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y = cuda.cupy.empty((n, c, self.outh, self.outw), dtype=numpy.float32) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx)) if b is not None: self.bias_desc = cudnn.create_tensor_descriptor(b[None, :, None, None]) one = numpy.array(1, dtype=x.dtype).ctypes zero = numpy.array(0, dtype=x.dtype).ctypes libcudnn.convolutionBackwardData_v2( handle, one.data, self.filter_desc.value, W.data.ptr, x_desc.value, x.data.ptr, self.conv_desc.value, zero.data, y_desc.value, y.data.ptr) if b is not None: libcudnn.addTensor_v2(handle, libcudnn.CUDNN_ADD_SAME_C, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: W_mat = W.reshape(in_c, c * kh * kw) x_mats = x.reshape(n, in_c, in_h * in_w) gcol = cuda.cupy.empty((n, c, kh, kw, in_h, in_w), dtype=numpy.float32) gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w) for i in moves.range(n): cuda.cupy.dot(W_mat.T, x_mats[i], gcol_mats[i]) y = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward(self, x): h, w = x[0].shape[2:] if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1)) if isinstance(x[0], cuda.ndarray): y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return (y,)
def forward(self, x): h, w = x[0].shape[2:] n = x[0].shape[0] c = x[0].shape[1] indexes = x[1] if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1)) # NOTE(hvy): Take indexes(Switches) into account # TODO(hvy): Remove the loops and make it efficient y = xp.zeros_like(col) if isinstance(x[0], cuda.ndarray): indexes = cuda.cupy.asnumpy(indexes) for n_i in range(n): for c_i in range(c): for r in range(h): for c in range(w): index = indexes[n_i][c_i][r][c] if index < self.kw: y[n_i][c_i].T[c][r][index][0] = col[n_i][c_i].T[c][ r][index][0] else: y[n_i][c_i].T[c][r][ index % self.kw][1] = col[n_i][c_i].T[c][r][index % self.kw][1] if isinstance(x[0], cuda.ndarray): y = conv.col2im_gpu(y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_cpu(y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return y,
def convert_Unpooling2D(func, opset_version, input_names, output_names, context): pad = [func.ph, func.pw] stride = [func.sy, func.sx] ksize = [func.kh, func.kw] outsize = [func.outh, func.outw] # TODO(hamaji): These could be implemented by `Slice` and `Pad`. if func.cover_all: raise RuntimeError('ONNX-chainer does not support `cover_all=True` ' 'for Unpooling2D') h, w = func.inputs[0].shape[2:] expected_outsize = [ conv.get_deconv_outsize(h, func.kh, func.sy, func.ph, cover_all=func.cover_all), conv.get_deconv_outsize(w, func.kh, func.sy, func.ph, cover_all=func.cover_all) ] if outsize != expected_outsize: raise RuntimeError('ONNX-chainer does not support `outsize!=None` ' 'for Unpooling2D: expected={} actual={}'.format( expected_outsize, outsize)) if pad != [0, 0]: raise RuntimeError('ONNX-chainer does not support `pad!=0` ' 'for Unpooling2D') # This one would require an extra 1x1 MaxPool. if stride != ksize: raise RuntimeError('ONNX-chainer does not support `stride!=ksize` ' 'for Unpooling2D: stride={} ksize={}'.format( stride, ksize)) scales = [1.0, 1.0, float(func.kh), float(func.kw)] if opset_version == 7: return onnx_helper.make_node('Upsample', input_names, output_names, scales=scales), scales_name = context.add_const(np.array(scales, dtype=np.float32), 'scales') if opset_version in [9, 10]: input_names.append(scales_name) op = 'Upsample' if opset_version == 9 else 'Resize' return onnx_helper.make_node(op, input_names, output_names), if opset_version == 11: roi_name = context.add_const(np.array([]), 'roi') input_names.extend([roi_name, scales_name]) return onnx_helper.make_node('Resize', input_names, output_names),
def setUp(self): in_channels_a_group = 3 out_channels_a_group = 2 self.in_channels = in_channels_a_group * self.groups self.out_channels = out_channels_a_group * self.groups self.ksize = 3 self.pad = 1 kh, kw = _pair(self.ksize) sh, sw = _pair(self.stride) ph, pw = _pair(self.pad) W = numpy.random.normal( 0, numpy.sqrt(1. / (kh * kw * in_channels_a_group)), (self.in_channels, out_channels_a_group, kh, kw) ).astype(self.W_dtype) b = None if self.nobias else numpy.random.uniform( -1, 1, self.out_channels).astype(self.x_dtype) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph, d=self.dilate) outw = conv.get_deconv_outsize(inw, kw, sw, pw, d=self.dilate) self.outsize = (outh, outw) if self.test_outsize else None x = numpy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype) gy = numpy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype) ggx = numpy.random.uniform(-1, 1, x.shape).astype( self.x_dtype) ggW = numpy.random.uniform(-1, 1, W.shape).astype( self.W_dtype) ggb = None if self.nobias else numpy.random.uniform( -1, 1, b.shape).astype(self.x_dtype) self.inputs = [x, W, b] self.grad_outputs = [gy] self.grad_grad_inputs = [ggx, ggW, ggb] self.test_forward_options = {} self.check_backward_options = {'dtype': numpy.float64} self.check_double_backward_options = {'dtype': numpy.float64} if self.x_dtype == numpy.float16: self.test_forward_options.update(atol=5e-3, rtol=5e-2) self.check_backward_options.update(atol=5e-4, rtol=5e-3) self.check_double_backward_options.update(atol=5e-3, rtol=5e-2) elif self.W_dtype == numpy.float16: self.check_backward_options.update(atol=5e-4, rtol=5e-3) self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
def forward_gpu(self, x): self.retain_inputs(()) self._in_dtype = x[0].dtype xp = cuda.cupy n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32) up_y = conv.im2col_gpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) n, c, oy, ox, ky, kx = up_y.shape indexes = xp.asarray(self.indexes, dtype=numpy.int32) xp.ElementwiseKernel( 'int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox,' 'int32 ky, int32 kx', 'raw float32 up_y', ''' int yn = i / c / oy / ox; int yc = (i / oy / ox) % c; int yoy = (i / ox) % oy; int yox = i % ox; up_y[yn * c * oy * ox * ky * kx + yc * oy * ox * ky * kx + yoy * ox * ky * kx + yox * ky * kx + index] = x; ''', 'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx, up_y) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def setUp(self): in_channels = 3 out_channels = 2 ndim = len(self.dims) ksize = (3,) * ndim self.stride = (2,) * ndim self.pad = (1,) * ndim W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels)) W_shape = (in_channels, out_channels) + ksize self.W = numpy.random.normal(0, W_scale, W_shape).astype(self.W_dtype) self.b = numpy.random.uniform(-1, 1, out_channels).astype(self.x_dtype) outs = tuple( conv.get_deconv_outsize(d, k, s, p) for (d, k, s, p) in zip(self.dims, ksize, self.stride, self.pad)) self.outsize = outs if self.test_outsize else None x_shape = (2, in_channels) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.x_dtype) gy_shape = (2, out_channels) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.x_dtype) self.test_forward_options = {} self.check_backward_options = { 'eps': 1e-2, 'atol': 1e-4, 'rtol': 1e-3} if self.x_dtype == numpy.float16: self.test_forward_options = {'atol': 5e-3, 'rtol': 5e-2} self.check_backward_options = { 'eps': 2 ** -3, 'atol': 1e-2, 'rtol': 1e-1} elif self.W_dtype == numpy.float16: self.check_backward_options = { 'eps': 2 ** -3, 'atol': 1e-3, 'rtol': 1e-2}
def forward(self, inputs): self.retain_inputs((0, 1)) # only retain x and W x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) if self.outs is None: dims = x.shape[2:] ksize = W.shape[2:] self.outs = tuple( conv.get_deconv_outsize(d, k, s, p) for d, k, s, p in zip(dims, ksize, self.stride, self.pad)) assert all(out > 0 for out in self.outs), \ 'Output sizes should be positive.' self._set_cover_all(x, W) xp = cuda.get_array_module(*inputs) if xp is numpy: return self._forward_xp(x, W, b, numpy) elif self._use_cudnn(x, W): return self._forward_cudnn(x, W, b) else: return self._forward_xp(x, W, b, cuda.cupy)
def forward(self, x): self.retain_inputs(()) dims = x[0].shape[2:] ndim = self.ndim ksize = self.ksize stride = self.stride pad = self.pad if self.outs is None: self.outs = tuple( conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all) for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad)) xp = backend.get_array_module(*x) colon = slice(None) # (:, :, None, None, ..., None) tile_index = (colon, colon) + (None,) * ndim # (1, 1, k_1, k_2, ..., k_n, 1, 1, ..., 1) tile_reps = (1, 1) + ksize + (1,) * ndim col = xp.tile(x[0][tile_index], tile_reps) if xp is numpy: col2im_nd = conv_nd.col2im_nd_cpu else: col2im_nd = conv_nd.col2im_nd_gpu y = col2im_nd(col, stride, pad, self.outs) return y,
def setUp(self): self.N = 2 self.in_channels = 4 self.out_channels = 2 self.ndim = len(self.dims) self.ksize = (3, ) * self.ndim self.stride = (2, ) * self.ndim self.pad = (1, ) * self.ndim self.dilate = (self.dilate, ) * self.ndim self.W_scale = numpy.sqrt( 1. / functools.reduce(mul, self.ksize, self.in_channels)) self.W_shape = (self.in_channels, self.out_channels // self.groups) + self.ksize outs = tuple( conv.get_deconv_outsize(d, k, s, p, d=di) for (d, k, s, p, di) in zip(self.dims, self.ksize, self.stride, self.pad, self.dilate)) self.outsize = outs if self.test_outsize else None self.x_shape = (self.N, self.in_channels) + self.dims self.gy_shape = (self.N, self.out_channels) + outs self.check_backward_options.update({'atol': 3e-5, 'rtol': 3e-4}) self.check_double_backward_options.update({'atol': 5e-3, 'rtol': 5e-2}) if (self.x_dtype == numpy.float16 or self.W_dtype == numpy.float16 or self.b_dtype == numpy.float16): self.check_forward_options.update({'atol': 5e-3, 'rtol': 5e-3}) self.check_backward_options.update({'atol': 2**-4, 'rtol': 2**-4}) self.check_double_backward_options.update({ 'atol': 2**-4, 'rtol': 2**-4 })
def setUp(self): N = 2 c = 3 ndim = len(self.dims) self.ksize = (self._ksize, ) * ndim self.stride = (self._stride, ) * ndim self.pad = (self._pad, ) * ndim x_shape = (N, c) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype) outs = tuple( conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all) for (d, k, s, p) in zip(self.dims, self.ksize, self.stride, self.pad)) gy_shape = (N, c) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype) if self.dtype == numpy.float16: self.check_forward_options = {'atol': 2**-4, 'rtol': 2**-4} self.check_backward_options = { 'dtype': numpy.float64, 'atol': 2**-4, 'rtol': 2**-4 } else: self.check_forward_options = {} self.check_backward_options = {'atol': 1e-3, 'rtol': 1e-3}
def _forward_xp(self, x, W, b, xp): ndim = self.ndim ksize = W.shape[2:] # W: C_I, C_O, k_1, k_2, ..., k_N dims = x.shape[2:] # x: n, C_I, d_1, d_2, ..., d_N stride = self.stride pad = self.pad # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # Roll n, which is batch size, before the first. gcol = xp.rollaxis(gcol, ndim + 1) if self.outs is None: self.outs = tuple( conv.get_deconv_outsize(d, k, s, p) for d, k, s, p in zip(dims, ksize, stride, pad)) assert all(out > 0 for out in self.outs), \ 'Output sizes should be positive.' # y: n, C_O, d_1, d_2, ..., d_N if xp is numpy: y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs) else: y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs) if b is not None: b_shape = (1, -1) + (1, ) * ndim y += b.reshape(b_shape) return y,
def setUp(self): N = 2 c = 3 ndim = len(self.dims) self.ksize = (self._ksize,) * ndim self.stride = (self._stride,) * ndim self.pad = (self._pad,) * ndim x_shape = (N, c) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype) outs = tuple( conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all) for (d, k, s, p) in zip(self.dims, self.ksize, self.stride, self.pad)) gy_shape = (N, c) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype) if self.dtype == numpy.float16: self.check_forward_options = {'atol': 2 ** -4, 'rtol': 2 ** -4} self.check_backward_options = { 'dtype': numpy.float64, 'atol': 2 ** -4, 'rtol': 2 ** -4} self.check_double_backward_options = {} else: self.check_forward_options = {} self.check_backward_options = {'atol': 1e-3, 'rtol': 1e-3} self.check_double_backward_options = {'atol': 3e-3, 'rtol': 3e-2} self.ggx = numpy.random.uniform( -1, 1, self.x.shape).astype(self.dtype)
def forward(self, x): dims = x[0].shape[2:] ndim = self.ndim ksize = self.ksize stride = self.stride pad = self.pad if self.outs is None: self.outs = tuple( conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all) for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad)) xp = cuda.get_array_module(*x) colon = slice(None) # (:, :, None, None, ..., None) tile_index = (colon, colon) + (None, ) * ndim # (1, 1, k_1, k_2, ..., k_n, 1, 1, ..., 1) tile_reps = (1, 1) + ksize + (1, ) * ndim col = xp.tile(x[0][tile_index], tile_reps) if xp is numpy: col2im_nd = conv_nd.col2im_nd_cpu else: col2im_nd = conv_nd.col2im_nd_gpu y = col2im_nd(col, stride, pad, self.outs) return y,
def _forward_xp(self, x, W, b, xp): ndim = self.ndim ksize = W.shape[2:] # W: C_I, C_O, k_1, k_2, ..., k_N dims = x.shape[2:] # x: n, C_I, d_1, d_2, ..., d_N stride = self.stride pad = self.pad # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # Roll n, which is batch size, before the first. gcol = xp.rollaxis(gcol, ndim + 1) if self.outs is None: self.outs = tuple( conv.get_deconv_outsize(d, k, s, p) for d, k, s, p in zip(dims, ksize, stride, pad)) assert all(out > 0 for out in self.outs), \ 'Output sizes should be positive.' # y: n, C_O, d_1, d_2, ..., d_N if xp is numpy: y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs) else: y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs) if b is not None: b_shape = (1, -1) + (1,) * ndim y += b.reshape(b_shape) return y,
def _forward_cudnn(self, x, W, b): c = W.shape[1] # W: C_I, C_O, k_1, k_2, ..., k_N ksize = W.shape[2:] n, in_c = x.shape[:2] # x: n, C_I, d_1, d_2, ..., d_N dims = x.shape[2:] ndim = self.ndim colon = slice(None) # Make empty array for output. if self.outs is None: self.outs = tuple( conv.get_deconv_outsize(d, k, s, p) for d, k, s, p in zip(dims, ksize, self.stride, self.pad)) assert all(out > 0 for out in self.outs), \ 'Output sizes should be positive.' y_shape = (n, c) + self.outs # (n, c_O, out_1, out_2, ..., out_N) y = cuda.cupy.empty(y_shape, dtype=x.dtype) # Convert to C-contiguous arrays. x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) # Get cuDNN handler and descriptors. handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( self.pad, self.stride, x.dtype) if b is not None: b_index = (None, colon) + (None,) * ndim self.bias_desc = cudnn.create_tensor_descriptor(b[b_index]) # cuDNN forward computation. oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size,), dtype='b') algo = libcudnn.getConvolutionBackwardDataAlgorithm( handle, self.filter_desc.value, x_desc.value, self.conv_desc.value, y_desc.value, _bwd_data_pref, workspace_size) libcudnn.convolutionBackwardData_v3( handle, one.data, self.filter_desc.value, W.data.ptr, x_desc.value, x.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) # Add bias if given. # TODO(takagi) Support unshared bias if b is not None: cudnn.add_tensor( handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) return y,
def _process_deconv2d(self, function, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] batch_size, in_c, in_h, in_w = x.shape out_c = W.shape[1] # out_c out_h = conv.get_deconv_outsize(in_h, kh, function.sy, function.ph) out_w = conv.get_deconv_outsize(in_w, kw, function.sx, function.pw) ops = 2 * batch_size * in_c * out_c * kw * kh * in_w * in_h # twice because of multiply-and-add if b is not None: ops += batch_size * out_c * out_w * out_h # bias self._print( '%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%f' % (function.label, batch_size, in_w, in_h, in_c, out_w, out_h, out_c, kw, kh, function.pw, function.sx, ops / 1e9)) self.total_ops += ops
def forward(self, x): h = self.features(x) h = self.classifier(h) if self.nodeconv: from chainer.utils import conv in_h, in_w = h.size()[2:4] out_h = conv.get_deconv_outsize(in_h, k=64, s=32, p=0) out_w = conv.get_deconv_outsize(in_w, k=64, s=32, p=0) self.upscore.size = out_h, out_w h = self.upscore(h) else: h = self.upscore(h) h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous() return h
def convert_Unpooling2D(func, opset_version, input_names, num_outputs, parameters): pad = [func.ph, func.pw] stride = [func.sy, func.sx] ksize = [func.kh, func.kw] outsize = [func.outh, func.outw] # TODO(hamaji): These could be implemented by `Slice` and `Pad`. if func.cover_all: raise RuntimeError('ONNX-chainer does not support `cover_all=True` ' 'for Unpooling2D') h, w = func.inputs[0].shape[2:] expected_outsize = [ conv.get_deconv_outsize(h, func.kh, func.sy, func.ph, cover_all=func.cover_all), conv.get_deconv_outsize(w, func.kh, func.sy, func.ph, cover_all=func.cover_all) ] if outsize != expected_outsize: raise RuntimeError('ONNX-chainer does not support `outsize!=None` ' 'for Unpooling2D: expected={} actual={}'.format( expected_outsize, outsize)) if pad != [0, 0]: raise RuntimeError('ONNX-chainer does not support `pad!=0` ' 'for Unpooling2D') # This one would require an extra 1x1 MaxPool. if stride != ksize: raise RuntimeError('ONNX-chainer does not support `stride!=1` ' 'for Unpooling2D') scales = [1.0, 1.0, float(func.kh), float(func.kw)] if opset_version == 7: return onnx_helper.make_node('Upsample', input_names, num_outputs, scales=scales), if opset_version == 9: scales = np.array(scales, dtype=np.float32) scales_param = chainer.Parameter(scales) parameters.append(scales_param) input_names.append(str(id(scales_param))) return onnx_helper.make_node('Upsample', input_names, num_outputs),
def forward(self, x): h, w = x[0].shape[2:] if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) xp = cuda.get_array_module(*x) col = xp.tile(x[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1)) if xp is numpy: y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) else: y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return y,
def setUp(self): self.in_channels = 3 self.out_channels = 2 kh, kw = _pair(3) sh, sw = _pair(1) ph, pw = _pair(1) self.W = cuda.cupy.random.normal( 0, numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw) ).astype(numpy.float32) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.x = cuda.cupy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(numpy.float32) self.gy = cuda.cupy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(numpy.float32)
def setUp(self): self.in_channels = 3 self.out_channels = 2 kh, kw = _pair(3) sh, sw = _pair(1) ph, pw = _pair(1) self.W = cuda.cupy.random.normal( 0, numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw)).astype( numpy.float32) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.x = cuda.cupy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(numpy.float32) self.gy = cuda.cupy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(numpy.float32)
def setUp(self): self.N = 2 self.out_channels = 2 self.ndim = len(self.dims) self.ksize = (3, ) * self.ndim self.stride = (2, ) * self.ndim self.pad = (1, ) * self.ndim if self.nobias: self.param_names = ('W', ) else: self.param_names = ('W', 'b') if self.used_outsize == 'case1' or self.used_outsize == 'None': # Use output size determined with get_deconv_outsize. outs = tuple( conv.get_deconv_outsize(d, k, s, p) for (d, k, s, p) in zip(self.dims, self.ksize, self.stride, self.pad)) elif self.used_outsize == 'case2': # Use possible output size other than the one determined with # get_deconv_outsize. outs = tuple( conv.get_deconv_outsize(d, k, s, p) + 1 for (d, k, s, p) in zip(self.dims, self.ksize, self.stride, self.pad)) if self.used_outsize != 'None': self.outsize = outs else: self.outsize = None self.x_shape = (self.N, 4) + self.dims self.check_backward_options.update({ 'eps': 1e-2, 'atol': 1e-4, 'rtol': 1e-3 }) if self.dtype == numpy.float16: self.check_forward_options.update({'atol': 5e-3, 'rtol': 5e-2}) self.check_backward_options.update({ 'eps': 2**-3, 'atol': 1e-2, 'rtol': 1e-1 })
def _forward_cudnn(self, x, W, b): c = W.shape[1] # W: C_I, C_O, k_1, k_2, ..., k_N ksize = W.shape[2:] n, in_c = x.shape[:2] # x: n, C_I, d_1, d_2, ..., d_N dims = x.shape[2:] ndim = self.ndim colon = slice(None) # Make empty array for output. if self.outs is None: self.outs = tuple( conv.get_deconv_outsize(d, k, s, p) for d, k, s, p in zip(dims, ksize, self.stride, self.pad)) assert all(out > 0 for out in self.outs), \ 'Output sizes should be positive.' y_shape = (n, c) + self.outs # (n, c_O, out_1, out_2, ..., out_N) y = cuda.cupy.empty(y_shape, dtype=x.dtype) # Convert to C-contiguous arrays. x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) # Get cuDNN handler and descriptors. handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( self.pad, self.stride, x.dtype) if b is not None: b_index = (None, colon) + (None, ) * ndim self.bias_desc = cudnn.create_tensor_descriptor(b[b_index]) # cuDNN forward computation. oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') algo = libcudnn.getConvolutionBackwardDataAlgorithm( handle, self.filter_desc.value, x_desc.value, self.conv_desc.value, y_desc.value, _bwd_data_pref, workspace_size) libcudnn.convolutionBackwardData_v3( handle, one.data, self.filter_desc.value, W.data.ptr, x_desc.value, x.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) # Add bias if given. # TODO(takagi) Support unshared bias if b is not None: cudnn.add_tensor(handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) return y,
def _calc_out_size(self, x, W): """Calculates and stores `outh` and `outw`.""" kh, kw = W.shape[2:] _, _, in_h, in_w = x.shape # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w if self.outh is None: self.outh = conv.get_deconv_outsize( in_h, kh, self.sy, self.ph, d=self.dy) if self.outh <= 0: raise RuntimeError('Height in the output must be positive.') if self.outw is None: self.outw = conv.get_deconv_outsize( in_w, kw, self.sx, self.pw, d=self.dx) if self.outw <= 0: raise RuntimeError('Width in the output must be positive.')
def setUp(self): self.in_channels = 3 self.out_channels = 2 kh, kw = _pair(3) sh, sw = _pair(1) ph, pw = _pair(1) self.W = cuda.cupy.random.normal( 0, numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw) ).astype(self.dtype) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.x = cuda.cupy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(self.dtype) self.gy = cuda.cupy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(self.dtype) with chainer.using_config('use_cudnn', self.use_cudnn): self.should_call_cudnn = chainer.should_use_cudnn('>=auto')
def setUp(self, use_cudnn=True): kh, kw = _pair(self.ksize) sh, sw = _pair(self.stride) ph, pw = _pair(self.pad) self.W = numpy.random.normal( 0, self.wscale * numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw) ).astype(numpy.float32) self.b = None if self.nobias else numpy.random.uniform( -1, 1, self.out_channels).astype(numpy.float32) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.outsize = (outh, outw) if self.test_outsize else None self.x = numpy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(numpy.float32) self.gy = numpy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(numpy.float32)
def setUp(self): self.in_channels = 3 self.out_channels = 2 kh, kw = _pair(3) sh, sw = _pair(1) ph, pw = _pair(1) self.W = cuda.cupy.random.normal( 0, numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw) ).astype(self.dtype) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.x = cuda.cupy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(self.dtype) self.gy = cuda.cupy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(self.dtype) self.expect = self.use_cudnn and ( cuda.cudnn.cudnn.getVersion() >= 3000 or self.dtype != numpy.float16)
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def setUp(self): args, kwargs = self.deconv_args kwargs['nobias'] = self.nobias self.link = L.Deconvolution2D(*args, **kwargs) if not self.nobias: self.link.b.data[...] = numpy.random.uniform( -1, 1, self.link.b.data.shape).astype(numpy.float32) out_channels = self.link.out_channels ksize = self.link.ksize stride = self.link.stride[0] pad = self.link.pad[0] N = 2 h, w = 3, 2 kh, kw = _pair(ksize) out_h = conv.get_deconv_outsize(h, kh, stride, pad) out_w = conv.get_deconv_outsize(w, kw, stride, pad) self.gy = numpy.random.uniform( -1, 1, (N, out_channels, out_h, out_w)).astype(numpy.float32) self.x = numpy.random.uniform( -1, 1, (N, 3, h, w)).astype(numpy.float32) self.link(chainer.Variable(self.x)) self.link.cleargrads()
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, "Height in the output should be positive." if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, "Width in the output should be positive." y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return (y,)
def setUp(self): kh, kw = _pair(self.ksize) sh, sw = _pair(self.stride) ph, pw = _pair(self.pad) self.W = numpy.random.normal( 0, numpy.sqrt(1. / (kh * kw * self.in_channels)), (self.in_channels, self.out_channels, kh, kw) ).astype(self.W_dtype) self.b = None if self.nobias else numpy.random.uniform( -1, 1, self.out_channels).astype(self.x_dtype) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.outsize = (outh, outw) if self.test_outsize else None self.x = numpy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype) self.gy = numpy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype) self.ggx = numpy.random.uniform(-1, 1, self.x.shape).astype( self.x_dtype) self.ggW = numpy.random.uniform(-1, 1, self.W.shape).astype( self.W_dtype) self.ggb = None if self.nobias else numpy.random.uniform( -1, 1, self.b.shape).astype(self.x_dtype) self.test_forward_options = {} self.check_backward_options = {'dtype': numpy.float64} self.check_double_backward_options = {'dtype': numpy.float64} if self.x_dtype == numpy.float16: self.test_forward_options.update(atol=5e-3, rtol=5e-2) self.check_backward_options.update(atol=5e-4, rtol=5e-3) self.check_double_backward_options.update(atol=5e-3, rtol=5e-2) elif self.W_dtype == numpy.float16: self.check_backward_options.update(atol=5e-4, rtol=5e-3) self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
def forward_gpu(self, x): self.retain_inputs(()) self._in_dtype = x[0].dtype xp = cuda.cupy n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = xp.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_gpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) n, c, oy, ox, ky, kx = up_y.shape indexes = xp.asarray(self.indexes, dtype=numpy.int32) xp.ElementwiseKernel( 'int32 index, T x, int32 n, int32 c, int32 oy, int32 ox,' 'int32 ky, int32 kx', 'raw T up_y', ''' int yn = i / c / oy / ox; int yc = (i / oy / ox) % c; int yoy = (i / ox) % oy; int yox = i % ox; up_y[yn * c * oy * ox * ky * kx + yc * oy * ox * ky * kx + yoy * ox * ky * kx + yox * ky * kx + index] = x; ''', 'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx, up_y) up_y = up_y.transpose(0, 1, 4, 5, 2, 3) up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def setUp(self): in_channels_a_group = 3 out_channels_a_group = 2 self.in_channels = in_channels_a_group * self.groups self.out_channels = out_channels_a_group * self.groups kh, kw = _pair(3) sh, sw = _pair(1) ph, pw = _pair(1) self.W = cuda.cupy.random.normal( 0, numpy.sqrt(1. / (kh * kw * in_channels_a_group)), (self.in_channels, out_channels_a_group, kh, kw) ).astype(self.dtype) N = 2 inh, inw = 4, 3 outh = conv.get_deconv_outsize(inh, kh, sh, ph) outw = conv.get_deconv_outsize(inw, kw, sw, pw) self.x = cuda.cupy.random.uniform( -1, 1, (N, self.in_channels, inh, inw)).astype(self.dtype) self.gy = cuda.cupy.random.uniform( -1, 1, (N, self.out_channels, outh, outw)).astype(self.dtype) with chainer.using_config('use_cudnn', self.use_cudnn): self.should_call_cudnn = chainer.should_use_cudnn('>=auto') if self.groups > 1 and cuda.cuda.cudnn.getVersion() < 7000: self.should_call_cudnn = False
def forward_cpu(self, x): self._in_dtype = x[0].dtype n, c, h, w = x[0].shape if self.outh is None: self.outh = conv.get_deconv_outsize( h, self.kh, self.sy, self.ph, cover_all=self.cover_all) if self.outw is None: self.outw = conv.get_deconv_outsize( w, self.kw, self.sx, self.pw, cover_all=self.cover_all) up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype) up_y = conv.im2col_cpu( up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all).transpose(0, 1, 4, 5, 2, 3) colh, colw = up_y.shape[2:4] up_y = up_y.reshape(-1, self.kh * self.kw) indexes = self.indexes.ravel() up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel() up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw) up_y = conv.col2im_cpu( up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) return up_y,
def setUp(self): N = 2 in_channels = 4 out_channels = 2 ndim = len(self.dims) ksize = (3,) * ndim self.stride = (2,) * ndim self.pad = (1,) * ndim self.dilate = (self.dilate,) * ndim W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels)) W_shape = (in_channels, out_channels // self.groups) + ksize self.W = numpy.random.normal(0, W_scale, W_shape).astype(self.W_dtype) self.b = numpy.random.uniform(-1, 1, out_channels).astype(self.b_dtype) self.check_double_backward_options = { 'dtype': numpy.float64, 'atol': 5e-3, 'rtol': 5e-2} outs = tuple( conv.get_deconv_outsize(d, k, s, p, d=di) for (d, k, s, p, di) in zip(self.dims, ksize, self.stride, self.pad, self.dilate)) self.outsize = outs if self.test_outsize else None x_shape = (N, in_channels) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.x_dtype) gy_shape = (N, out_channels) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.x_dtype) self.ggx = numpy.random.uniform( -1, 1, self.x.shape).astype(self.x.dtype) self.ggW = numpy.random.uniform( -1, 1, self.W.shape).astype(self.W.dtype) self.ggb = numpy.random.uniform( -1, 1, self.b.shape).astype(self.b.dtype) self.test_forward_options = {} self.check_backward_options = { 'dtype': numpy.float64, 'atol': 3e-5, 'rtol': 3e-4} if (self.x_dtype == numpy.float16 or self.W_dtype == numpy.float16 or self.b_dtype == numpy.float16): self.test_forward_options = {'atol': 5e-4, 'rtol': 5e-3} self.check_backward_options = { 'dtype': numpy.float64, 'atol': 2 ** -4, 'rtol': 2 ** -4}
def setUp(self): in_channels = 3 out_channels = 2 ndim = len(self.dims) ksize = (3,) * ndim stride = (1,) * ndim pad = (1,) * ndim W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels)) W_shape = (in_channels, out_channels) + ksize self.W = cuda.cupy.random.normal( 0, W_scale, W_shape).astype(self.dtype) outs = tuple( conv.get_deconv_outsize(d, k, s, p) for (d, k, s, p) in zip(self.dims, ksize, stride, pad)) x_shape = (2, in_channels) + self.dims self.x = cuda.cupy.random.uniform(-1, 1, x_shape).astype(self.dtype) gy_shape = (2, out_channels) + outs self.gy = cuda.cupy.random.uniform(-1, 1, gy_shape).astype(self.dtype) with chainer.using_config('use_cudnn', self.use_cudnn): self.expected = chainer.should_use_cudnn('>=auto') and ndim > 1
def setUp(self): in_channels = 3 out_channels = 2 ndim = len(self.dims) ksize = (3,) * ndim stride = (1,) * ndim pad = (1,) * ndim W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels)) W_shape = (in_channels, out_channels) + ksize self.W = cuda.cupy.random.normal( 0, W_scale, W_shape).astype(self.dtype) outs = tuple( conv.get_deconv_outsize(d, k, s, p) for (d, k, s, p) in zip(self.dims, ksize, stride, pad)) x_shape = (2, in_channels) + self.dims self.x = cuda.cupy.random.uniform(-1, 1, x_shape).astype(self.dtype) gy_shape = (2, out_channels) + outs self.gy = cuda.cupy.random.uniform(-1, 1, gy_shape).astype(self.dtype) self.expected = self.use_cudnn and ndim > 1 and ( cuda.cudnn.cudnn.getVersion() >= 3000 or self.dtype != numpy.float16)