    def setUp(self):
        kh, kw = _pair(self.ksize)
        sh, sw = _pair(self.stride)
        ph, pw = _pair(self.pad)
        self.W = numpy.random.normal(
            0, numpy.sqrt(1. / (kh * kw * self.in_channels)),
            (self.in_channels, self.out_channels, kh, kw)).astype(self.W_dtype)
        self.b = None if self.nobias else numpy.random.uniform(
            -1, 1, self.out_channels).astype(self.x_dtype)

        N = 2
        inh, inw = 4, 3
        outh = conv.get_deconv_outsize(inh, kh, sh, ph)
        outw = conv.get_deconv_outsize(inw, kw, sw, pw)
        self.outsize = (outh, outw) if self.test_outsize else None
        self.x = numpy.random.uniform(
            -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype)
        self.gy = numpy.random.uniform(
            -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype)
        self.test_forward_options = {}
        self.check_backward_options = {'dtype': numpy.float64}
        if self.x_dtype == numpy.float16:
            self.test_forward_options = {'atol': 5e-3, 'rtol': 5e-2}
            self.check_backward_options = {
                'dtype': numpy.float64,
                'atol': 5e-4,
                'rtol': 5e-3
        elif self.W_dtype == numpy.float16:
            self.check_backward_options = {
                'dtype': numpy.float64,
                'atol': 5e-4,
                'rtol': 5e-3
def calc_unpooling_2d(func, in_data, **kwargs):

    Unpooling2D only reads the data from memory and writes to the certain
    position in the output. Unlike the upsampling2D, it does not use indices
    and all pixels are filled by corresponding pixels in the input tensor.

    | Item   | Value |
    | FLOPs  | $$ 0 $$ |
    | mread  | $$ \| x \| $$ |
    | mwrite | $$ \| y \| $$ |
    | params | Unpooling parameter `k`, `s`, `p`, `outsize` and `cover_all` |
    x, = in_data
    n, c, h, w = x.shape
    kh, kw = int(func.kh), int(func.kw)
    sy, sx = int(func.sy), int(func.sx)
    ph, pw = int(func.ph), int(func.pw)

    outh, outw = func.outh, func.outw
    if outh is None:
        outh = get_deconv_outsize(h, kh, sy, ph, cover_all=func.cover_all)
    if outw is None:
        outw = get_deconv_outsize(w, kw, sx, pw, cover_all=func.cover_all)
    params = {
        'k': kw if kw == kh else (kh, kw),
        's': sx if sx == sy else (sy, sx),
        'p': pw if pw == ph else (ph, pw),
        'outsize': (outh, outw),
        'cover_all': func.cover_all
    return (0, x.size, n * c * outh * outw, params)
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not all([isinstance(i, numpy.ndarray) for i in inputs]):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        _, _, h, w = x.shape
        gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        gcol = numpy.rollaxis(gcol, 3)
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        y = conv.col2im_cpu(
            gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
        # b, k, h, w
        if b is not None:
            y += b.reshape(1, b.size, 1, 1)
        return y,
    def forward(self, x):

        h, w = x[0].shape[2:]
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
        xp = cuda.get_array_module(*x)
        col = xp.tile(x[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1))
        if xp is numpy:
            y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
            y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
        return y,
    def forward_cpu(self, inputs):
        self.retain_inputs((0, 1))  # retain x, W

        x, W = inputs[:2]
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape

        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'

        self._set_cover_all(x, W)

        cc = xnn.ConvolutionBackwardData(inputs,
                                         stride=(self.sy, self.sx),
                                         pad=(self.ph, self.pw),
                                         outsize=(self.outh, self.outw),

        self.hint = cc.hint
        y, = cc.execute_on()

        if len(inputs) == 3:
            b = inputs[2]
            y += b.reshape(1, b.size, 1, 1)

        return y,
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        kh, kw = W.shape[2:]
        _, _, h, w = x.shape
        gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        gcol = numpy.rollaxis(gcol, 3)
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                            self.outh, self.outw)
        # b, k, h, w
        if b is not None:
            y += b.reshape(1, b.size, 1, 1)
        return y,
    def forward_cpu(self, x):
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                w, self.kw, self.sx, self.pw, cover_all=self.cover_all)

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_cpu(
            up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
        for n in six.moves.range(up_y.shape[0]):
            for c in six.moves.range(up_y.shape[1]):
                for oy in six.moves.range(up_y.shape[4]):
                    for ox in six.moves.range(up_y.shape[5]):
                        ky = self.indexes[n, c, oy, ox] // up_y.shape[3]
                        kx = self.indexes[n, c, oy, ox] % up_y.shape[3]
                        up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox]
        up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph,
                               self.pw, self.outh, self.outw)
        return up_y,
    def setUp(self):
        self.N = 2
        self.inh, self.inw = 4, 3
        self.in_channels_a_group = 3
        self.out_channels_a_group = 2
        self.in_channels = self.in_channels_a_group * self.groups
        self.out_channels = self.out_channels_a_group * self.groups
        self.ksize = 3
        self.pad = 1
        self.kh, self.kw = _pair(self.ksize)
        self.sh, self.sw = _pair(self.stride)
        self.ph, self.pw = _pair(self.pad)

        outh = conv.get_deconv_outsize(self.inh,
        outw = conv.get_deconv_outsize(self.inw,

        self.outsize = (outh, outw) if self.test_outsize else None

        if self.x_dtype == numpy.float16:
            self.check_forward_options.update(atol=5e-3, rtol=5e-2)
            self.check_backward_options.update(atol=5e-3, rtol=5e-2)
            self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
        elif self.W_dtype == numpy.float16:
            self.check_backward_options.update(atol=5e-3, rtol=5e-2)
            self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
    def forward_cpu(self, x):
        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
        up_y = conv.im2col_cpu(up_y,
        for n in six.moves.range(up_y.shape[0]):
            for c in six.moves.range(up_y.shape[1]):
                for oy in six.moves.range(up_y.shape[4]):
                    for ox in six.moves.range(up_y.shape[5]):
                        ky = self.indexes[n, c, oy, ox] // up_y.shape[3]
                        kx = self.indexes[n, c, oy, ox] % up_y.shape[3]
                        up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox]
        up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw,
                               self.outh, self.outw)
        return up_y,
    def _calc_out_size(self, x, W):
        """Calculates and stores `outh` and `outw`."""
        kh, kw = W.shape[2:]
        _, _, in_h, in_w = x.shape
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h,
            if self.outh <= 0:
                raise RuntimeError('Height in the output must be positive.')

        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w,
            if self.outw <= 0:
                raise RuntimeError('Width in the output must be positive.')
    def forward_cpu(self, x):
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_cpu(up_y,
                                   0, 1, 4, 5, 2, 3)
        colh, colw = up_y.shape[2:4]
        up_y = up_y.reshape(-1, self.kh * self.kw)
        indexes = self.indexes.ravel()
        up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel()
        up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw)
        up_y = conv.col2im_cpu(up_y.transpose(0, 1, 4, 5, 2, 3), self.sy,
                               self.sx, self.ph, self.pw, self.outh, self.outw)
        return up_y,
 def forward_gpu(self, x):
     xp = cuda.cupy
     n, c, h, w = x[0].shape
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
     up_y = conv.im2col_gpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all)
     up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
     n, c, oy, ox, ky, kx = up_y.shape
     indexes = xp.asarray(self.indexes, dtype=numpy.int32)
         "int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox," "int32 ky, int32 kx",
         "raw float32 up_y",
         int yn = i / c / oy / ox;
         int yc = (i / oy / ox) % c;
         int yoy = (i / ox) % oy;
         int yox = i % ox;
         up_y[yn * c * oy * ox * ky * kx +
           yc * oy * ox * ky * kx +
           yoy * ox * ky * kx +
           yox * ky * kx +
           index] = x;
     )(indexes, x[0], n, c, oy, ox, ky, kx, up_y)
     up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
     up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     return (up_y,)
    def setUp(self):
        in_channels = 3
        out_channels = 2
        ksize = 3
        stride = 2
        pad = 1
        self.link = L.Deconvolution2D(
            in_channels, out_channels, ksize,
            stride=stride, pad=pad, nobias=self.nobias)
        self.link.W.data[...] = numpy.random.uniform(
            -1, 1, self.link.W.data.shape).astype(numpy.float32)
        if not self.nobias:
            self.link.b.data[...] = numpy.random.uniform(
                -1, 1, self.link.b.data.shape).astype(numpy.float32)


        N = 2
        h, w = 3, 2
        kh, kw = _pair(ksize)
        out_h = conv.get_deconv_outsize(h, kh, stride, pad)
        out_w = conv.get_deconv_outsize(w, kw, stride, pad)
        self.gy = numpy.random.uniform(
            -1, 1, (N, out_channels, out_h, out_w)).astype(numpy.float32)
        self.x = numpy.random.uniform(
            -1, 1, (N, in_channels, h, w)).astype(numpy.float32)
    def setUp(self, use_cudnn=True):
        kh, kw = _pair(self.ksize)
        sh, sw = _pair(self.stride)
        ph, pw = _pair(self.pad)
        self.W = numpy.random.normal(
            0, self.wscale * numpy.sqrt(1. / (kh * kw * self.in_channels)),
            (self.in_channels, self.out_channels, kh, kw)
        self.b = None if self.nobias else numpy.random.uniform(
            -1, 1, self.out_channels).astype(self.x_dtype)

        N = 2
        inh, inw = 4, 3
        outh = conv.get_deconv_outsize(inh, kh, sh, ph)
        outw = conv.get_deconv_outsize(inw, kw, sw, pw)
        self.outsize = (outh, outw) if self.test_outsize else None
        self.x = numpy.random.uniform(
            -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype)
        self.gy = numpy.random.uniform(
            -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype)
        self.test_forward_options = {}
        self.check_backward_options = {'dtype': numpy.float64}
        if self.x_dtype == numpy.float16:
            self.test_forward_options = {'atol': 5e-3, 'rtol': 5e-2}
            self.check_backward_options = {
                'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3}
        elif self.W_dtype == numpy.float16:
            self.check_backward_options = {
                'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3}
def template(col_shape=(2, 5, 3, 3, 3, 4),
             ksize=(3, 3),
             padding=(1, 1),
             stride=(1, 1),
             description: str = ""):
    col = Variable(col_shape, col_order)
    op = Col2Im(None, ksize, stride, padding)
    im, = op(col)
    im = im.change_order(im_order)

    vcol = np.random.rand(*(col.shape_dict[a]
                            for a in col_chainer_order.axes)).astype(
    h1 = get_deconv_outsize(col.shape_dict[Axis.H], op.KH, op.SH, op.PH)
    w1 = get_deconv_outsize(col.shape_dict[Axis.W], op.KW, op.SW, op.PW)
    vim = col2im_cpu(vcol, op.SH, op.SW, op.PH, op.PW, h1, w1)

    vcol = vcol.transpose(
        [col_chainer_order.axes_dict[a] for a in col_order.axes])
    vim = vim.transpose([OrderNCHW.axes_dict[a] for a in im_order.axes])

        description=f"Col2Im {description}",
        backend=["webgpu", "webgl", "webassembly"],
        graph=Graph([col], [im]),
        inputs={col: vcol},
        expected={im: vim},
    def _compute_outsize(self, in_h, in_w):
        if self.out_h is None:
            self.out_h = conv.get_deconv_outsize(
                in_h, self.orig_kh, self.orig_sy, self.orig_ph, d=1) // self.r

        if self.out_w is None:
            self.out_w = conv.get_deconv_outsize(
                in_w, self.orig_kw, self.orig_sx, self.orig_pw, d=1) // self.r
    def setUp(self):
        N = 2
        in_channels = 3
        out_channels = 2
        ndim = len(self.dims)
        ksize = (3, ) * ndim
        stride = (2, ) * ndim
        pad = (1, ) * ndim

        if self.used_outsize == 'case1' or self.used_outsize == 'None':
            # Use output size determined with get_deconv_outsize.
            outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for (d, k, s, p) in zip(self.dims, ksize, stride, pad))
        elif self.used_outsize == 'case2':
            # Use possible output size other than the one determined with
            # get_deconv_outsize.
            outs = tuple(
                conv.get_deconv_outsize(d, k, s, p) + 1
                for (d, k, s, p) in zip(self.dims, ksize, stride, pad))

        if self.used_outsize != 'None':
            outsize = outs
            outsize = None

        if not self.nobias:
            initial_bias = initializers.Uniform(scale=1, dtype=self.dtype)
            initial_bias = None

        self.link = deconvolution_nd.DeconvolutionND(ndim,

        x_shape = (N, in_channels) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype)
        gy_shape = (N, out_channels) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype)

        self.check_forward_options = {}
        self.check_backward_options = {'eps': 1e-2, 'atol': 1e-4, 'rtol': 1e-3}
        if self.dtype == numpy.float16:
            self.check_forward_options = {'atol': 5e-3, 'rtol': 5e-2}
            self.check_backward_options = {
                'eps': 2**-3,
                'atol': 1e-2,
                'rtol': 1e-1
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
        if cuda.cudnn_enabled and self.use_cudnn:
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx))
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            one = numpy.array(1, dtype=x.dtype).ctypes
            zero = numpy.array(0, dtype=x.dtype).ctypes

                handle, one.data, self.filter_desc.value, W.data.ptr,
                x_desc.value, x.data.ptr, self.conv_desc.value,
                zero.data, y_desc.value, y.data.ptr)
            if b is not None:
                    handle, libcudnn.CUDNN_ADD_SAME_C,
                    one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
            W_mat = W.reshape(in_c, c * kh * kw)
            x_mats = x.reshape(n, in_c, in_h * in_w)
            gcol = cuda.cupy.empty(
                (n, c, kh, kw, in_h, in_w), dtype=numpy.float32)
            gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w)
            for i in moves.range(n):
                cuda.cupy.dot(W_mat.T, x_mats[i], gcol_mats[i])
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
    def setUp(self):
        N = 2
        out_channels = 2
        ndim = len(self.dims)
        ksize = (3,) * ndim
        stride = (2,) * ndim
        pad = (1,) * ndim

        if self.used_outsize == 'case1' or self.used_outsize == 'None':
            # Use output size determined with get_deconv_outsize.
            outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for (d, k, s, p) in zip(self.dims, ksize, stride, pad))
        elif self.used_outsize == 'case2':
            # Use possible output size other than the one determined with
            # get_deconv_outsize.
            outs = tuple(
                conv.get_deconv_outsize(d, k, s, p) + 1
                for (d, k, s, p) in zip(self.dims, ksize, stride, pad))

        if self.used_outsize != 'None':
            outsize = outs
            outsize = None

        if not self.nobias:
            initial_bias = initializers.Uniform(scale=1, dtype=self.dtype)
            initial_bias = None

        if self.in_channels == 'omit':
            self.link = deconvolution_nd.DeconvolutionND(
                ndim, out_channels, ksize, stride=stride, pad=pad,
                outsize=outsize, initial_bias=initial_bias, nobias=self.nobias,
            self.link = deconvolution_nd.DeconvolutionND(
                ndim, self.in_channels, out_channels, ksize, stride=stride,
                pad=pad, outsize=outsize, initial_bias=initial_bias,
                nobias=self.nobias, groups=self.groups)

        x_shape = (N, 4) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype)
        gy_shape = (N, out_channels) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype)

        self.check_forward_options = {}
        self.check_backward_options = {
            'eps': 1e-2, 'atol': 1e-4, 'rtol': 1e-3}
        if self.dtype == numpy.float16:
            self.check_forward_options = {'atol': 5e-3, 'rtol': 5e-2}
            self.check_backward_options = {
                'eps': 2 ** -3, 'atol': 1e-2, 'rtol': 1e-1}
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
        if cuda.cudnn_enabled and self.use_cudnn:
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx))
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(b[None, :,
                                                                  None, None])

            one = numpy.array(1, dtype=x.dtype).ctypes
            zero = numpy.array(0, dtype=x.dtype).ctypes

                handle, one.data, self.filter_desc.value, W.data.ptr,
                x_desc.value, x.data.ptr, self.conv_desc.value, zero.data,
                y_desc.value, y.data.ptr)
            if b is not None:
                libcudnn.addTensor_v2(handle, libcudnn.CUDNN_ADD_SAME_C,
                                      one.data, self.bias_desc.value,
                                      b.data.ptr, one.data, y_desc.value,
            W_mat = W.reshape(in_c, c * kh * kw)
            x_mats = x.reshape(n, in_c, in_h * in_w)
            gcol = cuda.cupy.empty((n, c, kh, kw, in_h, in_w),
            gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w)
            for i in moves.range(n):
                cuda.cupy.dot(W_mat.T, x_mats[i], gcol_mats[i])
            y = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1))
     if isinstance(x[0], cuda.ndarray):
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     return (y,)
    def forward(self, x):
        h, w = x[0].shape[2:]
        n = x[0].shape[0]
        c = x[0].shape[1]
        indexes = x[1]

        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
        xp = cuda.get_array_module(*x)

        col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis],
                      (1, 1, self.kh, self.kw, 1, 1))

        # NOTE(hvy): Take indexes(Switches) into account
        # TODO(hvy): Remove the loops and make it efficient
        y = xp.zeros_like(col)
        if isinstance(x[0], cuda.ndarray):
            indexes = cuda.cupy.asnumpy(indexes)

        for n_i in range(n):
            for c_i in range(c):
                for r in range(h):
                    for c in range(w):
                        index = indexes[n_i][c_i][r][c]
                        if index < self.kw:
                            y[n_i][c_i].T[c][r][index][0] = col[n_i][c_i].T[c][
                                index %
                                self.kw][1] = col[n_i][c_i].T[c][r][index %

        if isinstance(x[0], cuda.ndarray):
            y = conv.col2im_gpu(y, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
            y = conv.col2im_cpu(y, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)

        return y,
def convert_Unpooling2D(func, opset_version, input_names, output_names,
    pad = [func.ph, func.pw]
    stride = [func.sy, func.sx]
    ksize = [func.kh, func.kw]
    outsize = [func.outh, func.outw]
    # TODO(hamaji): These could be implemented by `Slice` and `Pad`.
    if func.cover_all:
        raise RuntimeError('ONNX-chainer does not support `cover_all=True` '
                           'for Unpooling2D')
    h, w = func.inputs[0].shape[2:]
    expected_outsize = [
    if outsize != expected_outsize:
        raise RuntimeError('ONNX-chainer does not support `outsize!=None` '
                           'for Unpooling2D: expected={} actual={}'.format(
                               expected_outsize, outsize))
    if pad != [0, 0]:
        raise RuntimeError('ONNX-chainer does not support `pad!=0` '
                           'for Unpooling2D')
    # This one would require an extra 1x1 MaxPool.
    if stride != ksize:
        raise RuntimeError('ONNX-chainer does not support `stride!=ksize` '
                           'for Unpooling2D: stride={} ksize={}'.format(
                               stride, ksize))
    scales = [1.0, 1.0, float(func.kh), float(func.kw)]
    if opset_version == 7:
        return onnx_helper.make_node('Upsample',
    scales_name = context.add_const(np.array(scales, dtype=np.float32),
    if opset_version in [9, 10]:
        op = 'Upsample' if opset_version == 9 else 'Resize'
        return onnx_helper.make_node(op, input_names, output_names),
    if opset_version == 11:
        roi_name = context.add_const(np.array([]), 'roi')
        input_names.extend([roi_name, scales_name])
        return onnx_helper.make_node('Resize', input_names, output_names),
    def setUp(self):
        in_channels_a_group = 3
        out_channels_a_group = 2
        self.in_channels = in_channels_a_group * self.groups
        self.out_channels = out_channels_a_group * self.groups
        self.ksize = 3
        self.pad = 1
        kh, kw = _pair(self.ksize)
        sh, sw = _pair(self.stride)
        ph, pw = _pair(self.pad)

        W = numpy.random.normal(
            0, numpy.sqrt(1. / (kh * kw * in_channels_a_group)),
            (self.in_channels, out_channels_a_group, kh, kw)
        b = None if self.nobias else numpy.random.uniform(
            -1, 1, self.out_channels).astype(self.x_dtype)

        N = 2
        inh, inw = 4, 3
        outh = conv.get_deconv_outsize(inh, kh, sh, ph, d=self.dilate)
        outw = conv.get_deconv_outsize(inw, kw, sw, pw, d=self.dilate)
        self.outsize = (outh, outw) if self.test_outsize else None
        x = numpy.random.uniform(
            -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype)
        gy = numpy.random.uniform(
            -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype)

        ggx = numpy.random.uniform(-1, 1, x.shape).astype(
        ggW = numpy.random.uniform(-1, 1, W.shape).astype(
        ggb = None if self.nobias else numpy.random.uniform(
            -1, 1, b.shape).astype(self.x_dtype)

        self.inputs = [x, W, b]
        self.grad_outputs = [gy]
        self.grad_grad_inputs = [ggx, ggW, ggb]

        self.test_forward_options = {}
        self.check_backward_options = {'dtype': numpy.float64}
        self.check_double_backward_options = {'dtype': numpy.float64}
        if self.x_dtype == numpy.float16:
            self.test_forward_options.update(atol=5e-3, rtol=5e-2)
            self.check_backward_options.update(atol=5e-4, rtol=5e-3)
            self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
        elif self.W_dtype == numpy.float16:
            self.check_backward_options.update(atol=5e-4, rtol=5e-3)
            self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
    def forward_gpu(self, x):
        self._in_dtype = x[0].dtype

        xp = cuda.cupy
        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
        up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
        up_y = conv.im2col_gpu(up_y,
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        n, c, oy, ox, ky, kx = up_y.shape
        indexes = xp.asarray(self.indexes, dtype=numpy.int32)
            'int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox,'
            'int32 ky, int32 kx', 'raw float32 up_y', '''
            int yn = i / c / oy / ox;
            int yc = (i / oy / ox) % c;
            int yoy = (i / ox) % oy;
            int yox = i % ox;
            up_y[yn * c * oy * ox * ky * kx +
              yc * oy * ox * ky * kx +
              yoy * ox * ky * kx +
              yox * ky * kx +
              index] = x;
            ''', 'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx,
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw,
                               self.outh, self.outw)
        return up_y,
    def setUp(self):
        in_channels = 3
        out_channels = 2
        ndim = len(self.dims)
        ksize = (3,) * ndim
        self.stride = (2,) * ndim
        self.pad = (1,) * ndim

        W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels))
        W_shape = (in_channels, out_channels) + ksize
        self.W = numpy.random.normal(0, W_scale, W_shape).astype(self.W_dtype)
        self.b = numpy.random.uniform(-1, 1, out_channels).astype(self.x_dtype)

        outs = tuple(
            conv.get_deconv_outsize(d, k, s, p)
            for (d, k, s, p) in zip(self.dims, ksize, self.stride, self.pad))
        self.outsize = outs if self.test_outsize else None
        x_shape = (2, in_channels) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.x_dtype)
        gy_shape = (2, out_channels) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.x_dtype)

        self.test_forward_options = {}
        self.check_backward_options = {
            'eps': 1e-2, 'atol': 1e-4, 'rtol': 1e-3}
        if self.x_dtype == numpy.float16:
            self.test_forward_options = {'atol': 5e-3, 'rtol': 5e-2}
            self.check_backward_options = {
                'eps': 2 ** -3, 'atol': 1e-2, 'rtol': 1e-1}
        elif self.W_dtype == numpy.float16:
            self.check_backward_options = {
                'eps': 2 ** -3, 'atol': 1e-3, 'rtol': 1e-2}
    def forward(self, inputs):
        self.retain_inputs((0, 1))  # only retain x and W
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        if self.outs is None:
            dims = x.shape[2:]
            ksize = W.shape[2:]
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, self.stride, self.pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        self._set_cover_all(x, W)

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            return self._forward_xp(x, W, b, numpy)
        elif self._use_cudnn(x, W):
            return self._forward_cudnn(x, W, b)
            return self._forward_xp(x, W, b, cuda.cupy)
    def forward(self, x):

        dims = x[0].shape[2:]
        ndim = self.ndim
        ksize = self.ksize
        stride = self.stride
        pad = self.pad
        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all)
                for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad))

        xp = backend.get_array_module(*x)

        colon = slice(None)
        # (:, :, None, None, ..., None)
        tile_index = (colon, colon) + (None,) * ndim
        # (1, 1, k_1, k_2, ..., k_n, 1, 1, ..., 1)
        tile_reps = (1, 1) + ksize + (1,) * ndim
        col = xp.tile(x[0][tile_index], tile_reps)

        if xp is numpy:
            col2im_nd = conv_nd.col2im_nd_cpu
            col2im_nd = conv_nd.col2im_nd_gpu
        y = col2im_nd(col, stride, pad, self.outs)

        return y,
    def setUp(self):
        self.N = 2
        self.in_channels = 4
        self.out_channels = 2
        self.ndim = len(self.dims)
        self.ksize = (3, ) * self.ndim
        self.stride = (2, ) * self.ndim
        self.pad = (1, ) * self.ndim
        self.dilate = (self.dilate, ) * self.ndim

        self.W_scale = numpy.sqrt(
            1. / functools.reduce(mul, self.ksize, self.in_channels))
        self.W_shape = (self.in_channels,
                        self.out_channels // self.groups) + self.ksize

        outs = tuple(
            conv.get_deconv_outsize(d, k, s, p, d=di) for (d, k, s, p, di) in
            zip(self.dims, self.ksize, self.stride, self.pad, self.dilate))
        self.outsize = outs if self.test_outsize else None
        self.x_shape = (self.N, self.in_channels) + self.dims
        self.gy_shape = (self.N, self.out_channels) + outs

        self.check_backward_options.update({'atol': 3e-5, 'rtol': 3e-4})
        self.check_double_backward_options.update({'atol': 5e-3, 'rtol': 5e-2})
        if (self.x_dtype == numpy.float16 or self.W_dtype == numpy.float16
                or self.b_dtype == numpy.float16):
            self.check_forward_options.update({'atol': 5e-3, 'rtol': 5e-3})
            self.check_backward_options.update({'atol': 2**-4, 'rtol': 2**-4})
                'atol': 2**-4,
                'rtol': 2**-4
    def setUp(self):
        N = 2
        c = 3
        ndim = len(self.dims)
        self.ksize = (self._ksize, ) * ndim
        self.stride = (self._stride, ) * ndim
        self.pad = (self._pad, ) * ndim

        x_shape = (N, c) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype)

        outs = tuple(
            conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all)
            for (d, k, s,
                 p) in zip(self.dims, self.ksize, self.stride, self.pad))
        gy_shape = (N, c) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype)

        if self.dtype == numpy.float16:
            self.check_forward_options = {'atol': 2**-4, 'rtol': 2**-4}
            self.check_backward_options = {
                'dtype': numpy.float64,
                'atol': 2**-4,
                'rtol': 2**-4
            self.check_forward_options = {}
            self.check_backward_options = {'atol': 1e-3, 'rtol': 1e-3}
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]  # W: C_I, C_O, k_1, k_2, ..., k_N
        dims = x.shape[2:]  # x: n, C_I, d_1, d_2, ..., d_N
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, stride, pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1, ) * ndim
            y += b.reshape(b_shape)

        return y,
    def setUp(self):
        N = 2
        c = 3
        ndim = len(self.dims)
        self.ksize = (self._ksize,) * ndim
        self.stride = (self._stride,) * ndim
        self.pad = (self._pad,) * ndim

        x_shape = (N, c) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype)

        outs = tuple(
            conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all)
            for (d, k, s, p)
            in zip(self.dims, self.ksize, self.stride, self.pad))
        gy_shape = (N, c) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype)

        if self.dtype == numpy.float16:
            self.check_forward_options = {'atol': 2 ** -4, 'rtol': 2 ** -4}
            self.check_backward_options = {
                'dtype': numpy.float64, 'atol': 2 ** -4, 'rtol': 2 ** -4}
            self.check_double_backward_options = {}
            self.check_forward_options = {}
            self.check_backward_options = {'atol': 1e-3, 'rtol': 1e-3}
            self.check_double_backward_options = {'atol': 3e-3, 'rtol': 3e-2}
        self.ggx = numpy.random.uniform(
            -1, 1, self.x.shape).astype(self.dtype)
    def forward(self, x):
        dims = x[0].shape[2:]
        ndim = self.ndim
        ksize = self.ksize
        stride = self.stride
        pad = self.pad
        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p, cover_all=self.cover_all)
                for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad))

        xp = cuda.get_array_module(*x)

        colon = slice(None)
        # (:, :, None, None, ..., None)
        tile_index = (colon, colon) + (None, ) * ndim
        # (1, 1, k_1, k_2, ..., k_n, 1, 1, ..., 1)
        tile_reps = (1, 1) + ksize + (1, ) * ndim
        col = xp.tile(x[0][tile_index], tile_reps)

        if xp is numpy:
            col2im_nd = conv_nd.col2im_nd_cpu
            col2im_nd = conv_nd.col2im_nd_gpu
        y = col2im_nd(col, stride, pad, self.outs)

        return y,
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]     # W: C_I, C_O, k_1, k_2, ..., k_N
        dims = x.shape[2:]      # x: n, C_I, d_1, d_2, ..., d_N
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, stride, pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1,) * ndim
            y += b.reshape(b_shape)

        return y,
    def _forward_cudnn(self, x, W, b):
        c = W.shape[1]          # W: C_I, C_O, k_1, k_2, ..., k_N
        ksize = W.shape[2:]
        n, in_c = x.shape[:2]   # x: n, C_I, d_1, d_2, ..., d_N
        dims = x.shape[2:]
        ndim = self.ndim
        colon = slice(None)

        # Make empty array for output.
        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, self.stride, self.pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        y_shape = (n, c) + self.outs  # (n, c_O, out_1, out_2, ..., out_N)
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)
        self.filter_desc = cudnn.create_filter_descriptor(W)
        self.conv_desc = cudnn.create_convolution_descriptor(
            self.pad, self.stride, x.dtype)
        if b is not None:
            b_index = (None, colon) + (None,) * ndim
            self.bias_desc = cudnn.create_tensor_descriptor(b[b_index])

        # cuDNN forward computation.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')
        algo = libcudnn.getConvolutionBackwardDataAlgorithm(
            handle, self.filter_desc.value, x_desc.value,
            self.conv_desc.value, y_desc.value, _bwd_data_pref,
            handle, one.data, self.filter_desc.value, W.data.ptr,
            x_desc.value, x.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, y_desc.value, y.data.ptr)

        # Add bias if given.
        # TODO(takagi) Support unshared bias
        if b is not None:
                handle, one.data, self.bias_desc.value, b.data.ptr,
                one.data, y_desc.value, y.data.ptr)

        return y,
    def _process_deconv2d(self, function, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        batch_size, in_c, in_h, in_w = x.shape
        out_c = W.shape[1]  # out_c
        out_h = conv.get_deconv_outsize(in_h, kh, function.sy, function.ph)
        out_w = conv.get_deconv_outsize(in_w, kw, function.sx, function.pw)

        ops = 2 * batch_size * in_c * out_c * kw * kh * in_w * in_h  # twice because of multiply-and-add
        if b is not None:
            ops += batch_size * out_c * out_w * out_h  # bias
            '%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%f' %
            (function.label, batch_size, in_w, in_h, in_c, out_w, out_h, out_c,
             kw, kh, function.pw, function.sx, ops / 1e9))
        self.total_ops += ops
    def forward(self, x):
        h = self.features(x)

        h = self.classifier(h)

        if self.nodeconv:
            from chainer.utils import conv
            in_h, in_w = h.size()[2:4]
            out_h = conv.get_deconv_outsize(in_h, k=64, s=32, p=0)
            out_w = conv.get_deconv_outsize(in_w, k=64, s=32, p=0)
            self.upscore.size = out_h, out_w
            h = self.upscore(h)
            h = self.upscore(h)
        h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous()

        return h
def convert_Unpooling2D(func, opset_version, input_names, num_outputs,
    pad = [func.ph, func.pw]
    stride = [func.sy, func.sx]
    ksize = [func.kh, func.kw]
    outsize = [func.outh, func.outw]
    # TODO(hamaji): These could be implemented by `Slice` and `Pad`.
    if func.cover_all:
        raise RuntimeError('ONNX-chainer does not support `cover_all=True` '
                           'for Unpooling2D')
    h, w = func.inputs[0].shape[2:]
    expected_outsize = [
    if outsize != expected_outsize:
        raise RuntimeError('ONNX-chainer does not support `outsize!=None` '
                           'for Unpooling2D: expected={} actual={}'.format(
                               expected_outsize, outsize))
    if pad != [0, 0]:
        raise RuntimeError('ONNX-chainer does not support `pad!=0` '
                           'for Unpooling2D')
    # This one would require an extra 1x1 MaxPool.
    if stride != ksize:
        raise RuntimeError('ONNX-chainer does not support `stride!=1` '
                           'for Unpooling2D')
    scales = [1.0, 1.0, float(func.kh), float(func.kw)]
    if opset_version == 7:
        return onnx_helper.make_node('Upsample',
    if opset_version == 9:
        scales = np.array(scales, dtype=np.float32)
        scales_param = chainer.Parameter(scales)
        return onnx_helper.make_node('Upsample', input_names, num_outputs),
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(
             h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(
             w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, None, None],
                   (1, 1, self.kh, self.kw, 1, 1))
     if xp is numpy:
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
     return y,
 def setUp(self):
     self.in_channels = 3
     self.out_channels = 2
     kh, kw = _pair(3)
     sh, sw = _pair(1)
     ph, pw = _pair(1)
     self.W = cuda.cupy.random.normal(
         0, numpy.sqrt(1. / (kh * kw * self.in_channels)),
         (self.in_channels, self.out_channels, kh, kw)
     N = 2
     inh, inw = 4, 3
     outh = conv.get_deconv_outsize(inh, kh, sh, ph)
     outw = conv.get_deconv_outsize(inw, kw, sw, pw)
     self.x = cuda.cupy.random.uniform(
         -1, 1, (N, self.in_channels, inh, inw)).astype(numpy.float32)
     self.gy = cuda.cupy.random.uniform(
         -1, 1, (N, self.out_channels, outh, outw)).astype(numpy.float32)
 def setUp(self):
     self.in_channels = 3
     self.out_channels = 2
     kh, kw = _pair(3)
     sh, sw = _pair(1)
     ph, pw = _pair(1)
     self.W = cuda.cupy.random.normal(
         0, numpy.sqrt(1. / (kh * kw * self.in_channels)),
         (self.in_channels, self.out_channels, kh, kw)).astype(
     N = 2
     inh, inw = 4, 3
     outh = conv.get_deconv_outsize(inh, kh, sh, ph)
     outw = conv.get_deconv_outsize(inw, kw, sw, pw)
     self.x = cuda.cupy.random.uniform(
         -1, 1, (N, self.in_channels, inh, inw)).astype(numpy.float32)
     self.gy = cuda.cupy.random.uniform(
         -1, 1, (N, self.out_channels, outh, outw)).astype(numpy.float32)
    def setUp(self):
        self.N = 2
        self.out_channels = 2
        self.ndim = len(self.dims)
        self.ksize = (3, ) * self.ndim
        self.stride = (2, ) * self.ndim
        self.pad = (1, ) * self.ndim
        if self.nobias:
            self.param_names = ('W', )
            self.param_names = ('W', 'b')

        if self.used_outsize == 'case1' or self.used_outsize == 'None':
            # Use output size determined with get_deconv_outsize.
            outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for (d, k, s,
                     p) in zip(self.dims, self.ksize, self.stride, self.pad))
        elif self.used_outsize == 'case2':
            # Use possible output size other than the one determined with
            # get_deconv_outsize.
            outs = tuple(
                conv.get_deconv_outsize(d, k, s, p) + 1
                for (d, k, s,
                     p) in zip(self.dims, self.ksize, self.stride, self.pad))
        if self.used_outsize != 'None':
            self.outsize = outs
            self.outsize = None

        self.x_shape = (self.N, 4) + self.dims

            'eps': 1e-2,
            'atol': 1e-4,
            'rtol': 1e-3
        if self.dtype == numpy.float16:
            self.check_forward_options.update({'atol': 5e-3, 'rtol': 5e-2})
                'eps': 2**-3,
                'atol': 1e-2,
                'rtol': 1e-1
    def _forward_cudnn(self, x, W, b):
        c = W.shape[1]  # W: C_I, C_O, k_1, k_2, ..., k_N
        ksize = W.shape[2:]
        n, in_c = x.shape[:2]  # x: n, C_I, d_1, d_2, ..., d_N
        dims = x.shape[2:]
        ndim = self.ndim
        colon = slice(None)

        # Make empty array for output.
        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, self.stride, self.pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        y_shape = (n, c) + self.outs  # (n, c_O, out_1, out_2, ..., out_N)
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)
        self.filter_desc = cudnn.create_filter_descriptor(W)
        self.conv_desc = cudnn.create_convolution_descriptor(
            self.pad, self.stride, x.dtype)
        if b is not None:
            b_index = (None, colon) + (None, ) * ndim
            self.bias_desc = cudnn.create_tensor_descriptor(b[b_index])

        # cuDNN forward computation.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')
        algo = libcudnn.getConvolutionBackwardDataAlgorithm(
            handle, self.filter_desc.value, x_desc.value, self.conv_desc.value,
            y_desc.value, _bwd_data_pref, workspace_size)
            handle, one.data, self.filter_desc.value, W.data.ptr, x_desc.value,
            x.data.ptr, self.conv_desc.value, algo, workspace.data.ptr,
            workspace_size, zero.data, y_desc.value, y.data.ptr)

        # Add bias if given.
        # TODO(takagi) Support unshared bias
        if b is not None:
            cudnn.add_tensor(handle, one.data, self.bias_desc.value,
                             b.data.ptr, one.data, y_desc.value, y.data.ptr)

        return y,
    def _calc_out_size(self, x, W):
        """Calculates and stores `outh` and `outw`."""
        kh, kw = W.shape[2:]
        _, _, in_h, in_w = x.shape
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                in_h, kh, self.sy, self.ph, d=self.dy)
            if self.outh <= 0:
                raise RuntimeError('Height in the output must be positive.')

        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                in_w, kw, self.sx, self.pw, d=self.dx)
            if self.outw <= 0:
                raise RuntimeError('Width in the output must be positive.')
 def setUp(self):
     self.in_channels = 3
     self.out_channels = 2
     kh, kw = _pair(3)
     sh, sw = _pair(1)
     ph, pw = _pair(1)
     self.W = cuda.cupy.random.normal(
         0, numpy.sqrt(1. / (kh * kw * self.in_channels)),
         (self.in_channels, self.out_channels, kh, kw)
     N = 2
     inh, inw = 4, 3
     outh = conv.get_deconv_outsize(inh, kh, sh, ph)
     outw = conv.get_deconv_outsize(inw, kw, sw, pw)
     self.x = cuda.cupy.random.uniform(
         -1, 1, (N, self.in_channels, inh, inw)).astype(self.dtype)
     self.gy = cuda.cupy.random.uniform(
         -1, 1, (N, self.out_channels, outh, outw)).astype(self.dtype)
     with chainer.using_config('use_cudnn', self.use_cudnn):
         self.should_call_cudnn = chainer.should_use_cudnn('>=auto')
    def setUp(self, use_cudnn=True):
        kh, kw = _pair(self.ksize)
        sh, sw = _pair(self.stride)
        ph, pw = _pair(self.pad)
        self.W = numpy.random.normal(
            0, self.wscale * numpy.sqrt(1. / (kh * kw * self.in_channels)),
            (self.in_channels, self.out_channels, kh, kw)
        self.b = None if self.nobias else numpy.random.uniform(
            -1, 1, self.out_channels).astype(numpy.float32)

        N = 2
        inh, inw = 4, 3
        outh = conv.get_deconv_outsize(inh, kh, sh, ph)
        outw = conv.get_deconv_outsize(inw, kw, sw, pw)
        self.outsize = (outh, outw) if self.test_outsize else None
        self.x = numpy.random.uniform(
            -1, 1, (N, self.in_channels, inh, inw)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (N, self.out_channels, outh, outw)).astype(numpy.float32)
 def setUp(self):
     self.in_channels = 3
     self.out_channels = 2
     kh, kw = _pair(3)
     sh, sw = _pair(1)
     ph, pw = _pair(1)
     self.W = cuda.cupy.random.normal(
         0, numpy.sqrt(1. / (kh * kw * self.in_channels)),
         (self.in_channels, self.out_channels, kh, kw)
     N = 2
     inh, inw = 4, 3
     outh = conv.get_deconv_outsize(inh, kh, sh, ph)
     outw = conv.get_deconv_outsize(inw, kw, sw, pw)
     self.x = cuda.cupy.random.uniform(
         -1, 1, (N, self.in_channels, inh, inw)).astype(self.dtype)
     self.gy = cuda.cupy.random.uniform(
         -1, 1, (N, self.out_channels, outh, outw)).astype(self.dtype)
     self.expect = self.use_cudnn and (
         cuda.cudnn.cudnn.getVersion() >= 3000 or
         self.dtype != numpy.float16)
 def forward_cpu(self, inputs):
     x, W = inputs[:2]
     b = inputs[2] if len(inputs) == 3 else None
     kh, kw = W.shape[2:]
     _, _, h, w = x.shape
     gcol = numpy.tensordot(W, x, (0, 1))
     # - k, m, n: shape of out_channel
     # - b: number of inputs
     # - h, w: height and width of kernels
     # k, m, n, b, h, w -> b, k, m, n, h, w
     gcol = numpy.rollaxis(gcol, 3)
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
     y = conv.col2im_cpu(
         gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     # b, k, h, w
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return y,
 def setUp(self):
     args, kwargs = self.deconv_args
     kwargs['nobias'] = self.nobias
     self.link = L.Deconvolution2D(*args, **kwargs)
     if not self.nobias:
         self.link.b.data[...] = numpy.random.uniform(
             -1, 1, self.link.b.data.shape).astype(numpy.float32)
     out_channels = self.link.out_channels
     ksize = self.link.ksize
     stride = self.link.stride[0]
     pad = self.link.pad[0]
     N = 2
     h, w = 3, 2
     kh, kw = _pair(ksize)
     out_h = conv.get_deconv_outsize(h, kh, stride, pad)
     out_w = conv.get_deconv_outsize(w, kw, stride, pad)
     self.gy = numpy.random.uniform(
         -1, 1, (N, out_channels, out_h, out_w)).astype(numpy.float32)
     self.x = numpy.random.uniform(
         -1, 1, (N, 3, h, w)).astype(numpy.float32)
 def forward_cpu(self, inputs):
     x, W = inputs[:2]
     b = inputs[2] if len(inputs) == 3 else None
     kh, kw = W.shape[2:]
     _, _, h, w = x.shape
     gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
     # - k, m, n: shape of out_channel
     # - b: number of inputs
     # - h, w: height and width of kernels
     # k, m, n, b, h, w -> b, k, m, n, h, w
     gcol = numpy.rollaxis(gcol, 3)
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
         assert self.outh > 0, "Height in the output should be positive."
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
         assert self.outw > 0, "Width in the output should be positive."
     y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     # b, k, h, w
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return (y,)
    def setUp(self):
        kh, kw = _pair(self.ksize)
        sh, sw = _pair(self.stride)
        ph, pw = _pair(self.pad)
        self.W = numpy.random.normal(
            0, numpy.sqrt(1. / (kh * kw * self.in_channels)),
            (self.in_channels, self.out_channels, kh, kw)
        self.b = None if self.nobias else numpy.random.uniform(
            -1, 1, self.out_channels).astype(self.x_dtype)

        N = 2
        inh, inw = 4, 3
        outh = conv.get_deconv_outsize(inh, kh, sh, ph)
        outw = conv.get_deconv_outsize(inw, kw, sw, pw)
        self.outsize = (outh, outw) if self.test_outsize else None
        self.x = numpy.random.uniform(
            -1, 1, (N, self.in_channels, inh, inw)).astype(self.x_dtype)
        self.gy = numpy.random.uniform(
            -1, 1, (N, self.out_channels, outh, outw)).astype(self.x_dtype)

        self.ggx = numpy.random.uniform(-1, 1, self.x.shape).astype(
        self.ggW = numpy.random.uniform(-1, 1, self.W.shape).astype(
        self.ggb = None if self.nobias else numpy.random.uniform(
            -1, 1, self.b.shape).astype(self.x_dtype)

        self.test_forward_options = {}
        self.check_backward_options = {'dtype': numpy.float64}
        self.check_double_backward_options = {'dtype': numpy.float64}
        if self.x_dtype == numpy.float16:
            self.test_forward_options.update(atol=5e-3, rtol=5e-2)
            self.check_backward_options.update(atol=5e-4, rtol=5e-3)
            self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
        elif self.W_dtype == numpy.float16:
            self.check_backward_options.update(atol=5e-4, rtol=5e-3)
            self.check_double_backward_options.update(atol=5e-3, rtol=5e-2)
    def forward_gpu(self, x):
        self._in_dtype = x[0].dtype

        xp = cuda.cupy
        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
        up_y = xp.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_gpu(
            up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        n, c, oy, ox, ky, kx = up_y.shape
        indexes = xp.asarray(self.indexes, dtype=numpy.int32)
            'int32 index, T x, int32 n, int32 c, int32 oy, int32 ox,'
            'int32 ky, int32 kx', 'raw T up_y',
            int yn = i / c / oy / ox;
            int yc = (i / oy / ox) % c;
            int yoy = (i / ox) % oy;
            int yox = i % ox;
            up_y[yn * c * oy * ox * ky * kx +
              yc * oy * ox * ky * kx +
              yoy * ox * ky * kx +
              yox * ky * kx +
              index] = x;
            'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx, up_y)
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw,
                               self.outh, self.outw)
        return up_y,
 def setUp(self):
     in_channels_a_group = 3
     out_channels_a_group = 2
     self.in_channels = in_channels_a_group * self.groups
     self.out_channels = out_channels_a_group * self.groups
     kh, kw = _pair(3)
     sh, sw = _pair(1)
     ph, pw = _pair(1)
     self.W = cuda.cupy.random.normal(
         0, numpy.sqrt(1. / (kh * kw * in_channels_a_group)),
         (self.in_channels, out_channels_a_group, kh, kw)
     N = 2
     inh, inw = 4, 3
     outh = conv.get_deconv_outsize(inh, kh, sh, ph)
     outw = conv.get_deconv_outsize(inw, kw, sw, pw)
     self.x = cuda.cupy.random.uniform(
         -1, 1, (N, self.in_channels, inh, inw)).astype(self.dtype)
     self.gy = cuda.cupy.random.uniform(
         -1, 1, (N, self.out_channels, outh, outw)).astype(self.dtype)
     with chainer.using_config('use_cudnn', self.use_cudnn):
         self.should_call_cudnn = chainer.should_use_cudnn('>=auto')
         if self.groups > 1 and cuda.cuda.cudnn.getVersion() < 7000:
             self.should_call_cudnn = False
    def forward_cpu(self, x):
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                w, self.kw, self.sx, self.pw, cover_all=self.cover_all)

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_cpu(
            up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
            cover_all=self.cover_all).transpose(0, 1, 4, 5, 2, 3)
        colh, colw = up_y.shape[2:4]
        up_y = up_y.reshape(-1, self.kh * self.kw)
        indexes = self.indexes.ravel()
        up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel()
        up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw)
        up_y = conv.col2im_cpu(
            up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph,
            self.pw, self.outh, self.outw)
        return up_y,
    def setUp(self):
        N = 2
        in_channels = 4
        out_channels = 2
        ndim = len(self.dims)
        ksize = (3,) * ndim
        self.stride = (2,) * ndim
        self.pad = (1,) * ndim
        self.dilate = (self.dilate,) * ndim

        W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels))
        W_shape = (in_channels, out_channels // self.groups) + ksize
        self.W = numpy.random.normal(0, W_scale, W_shape).astype(self.W_dtype)
        self.b = numpy.random.uniform(-1, 1, out_channels).astype(self.b_dtype)
        self.check_double_backward_options = {
            'dtype': numpy.float64, 'atol': 5e-3, 'rtol': 5e-2}

        outs = tuple(
            conv.get_deconv_outsize(d, k, s, p, d=di)
            for (d, k, s, p, di)
            in zip(self.dims, ksize, self.stride, self.pad, self.dilate))
        self.outsize = outs if self.test_outsize else None
        x_shape = (N, in_channels) + self.dims
        self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.x_dtype)
        gy_shape = (N, out_channels) + outs
        self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.x_dtype)

        self.ggx = numpy.random.uniform(
            -1, 1, self.x.shape).astype(self.x.dtype)
        self.ggW = numpy.random.uniform(
            -1, 1, self.W.shape).astype(self.W.dtype)
        self.ggb = numpy.random.uniform(
            -1, 1, self.b.shape).astype(self.b.dtype)

        self.test_forward_options = {}
        self.check_backward_options = {
            'dtype': numpy.float64, 'atol': 3e-5, 'rtol': 3e-4}
        if (self.x_dtype == numpy.float16 or self.W_dtype == numpy.float16
                or self.b_dtype == numpy.float16):
            self.test_forward_options = {'atol': 5e-4, 'rtol': 5e-3}
            self.check_backward_options = {
                'dtype': numpy.float64, 'atol': 2 ** -4, 'rtol': 2 ** -4}
    def setUp(self):
        in_channels = 3
        out_channels = 2
        ndim = len(self.dims)
        ksize = (3,) * ndim
        stride = (1,) * ndim
        pad = (1,) * ndim

        W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels))
        W_shape = (in_channels, out_channels) + ksize
        self.W = cuda.cupy.random.normal(
            0, W_scale, W_shape).astype(self.dtype)
        outs = tuple(
            conv.get_deconv_outsize(d, k, s, p)
            for (d, k, s, p) in zip(self.dims, ksize, stride, pad))
        x_shape = (2, in_channels) + self.dims
        self.x = cuda.cupy.random.uniform(-1, 1, x_shape).astype(self.dtype)
        gy_shape = (2, out_channels) + outs
        self.gy = cuda.cupy.random.uniform(-1, 1, gy_shape).astype(self.dtype)
        with chainer.using_config('use_cudnn', self.use_cudnn):
            self.expected = chainer.should_use_cudnn('>=auto') and ndim > 1
    def setUp(self):
        in_channels = 3
        out_channels = 2
        ndim = len(self.dims)
        ksize = (3,) * ndim
        stride = (1,) * ndim
        pad = (1,) * ndim

        W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels))
        W_shape = (in_channels, out_channels) + ksize
        self.W = cuda.cupy.random.normal(
            0, W_scale, W_shape).astype(self.dtype)
        outs = tuple(
            conv.get_deconv_outsize(d, k, s, p)
            for (d, k, s, p) in zip(self.dims, ksize, stride, pad))
        x_shape = (2, in_channels) + self.dims
        self.x = cuda.cupy.random.uniform(-1, 1, x_shape).astype(self.dtype)
        gy_shape = (2, out_channels) + outs
        self.gy = cuda.cupy.random.uniform(-1, 1, gy_shape).astype(self.dtype)
        self.expected = self.use_cudnn and ndim > 1 and (
            cuda.cudnn.cudnn.getVersion() >= 3000 or
