예제 #1
0
 def test_col2im_consistency(self):
     col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 2, 2, dy=2, dx=2)
     h, w = self.x.shape[2:]
     im_cpu = conv.col2im_cpu(col, 2, 2, 2, 2, h, w, dy=2, dx=2)
     im_gpu = conv.col2im_gpu(
         cuda.to_gpu(col), 2, 2, 2, 2, h, w, dy=2, dx=2)
     testing.assert_allclose(im_cpu, im_gpu.get())
예제 #2
0
 def forward_gpu(self, x):
     xp = cuda.cupy
     n, c, h, w = x[0].shape
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
     up_y = conv.im2col_gpu(up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all)
     up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
     n, c, oy, ox, ky, kx = up_y.shape
     indexes = xp.asarray(self.indexes, dtype=numpy.int32)
     xp.ElementwiseKernel(
         "int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox," "int32 ky, int32 kx",
         "raw float32 up_y",
         """
         int yn = i / c / oy / ox;
         int yc = (i / oy / ox) % c;
         int yoy = (i / ox) % oy;
         int yox = i % ox;
         up_y[yn * c * oy * ox * ky * kx +
           yc * oy * ox * ky * kx +
           yoy * ox * ky * kx +
           yox * ky * kx +
           index] = x;
         """,
         "upsampling_2d_fwd",
     )(indexes, x[0], n, c, oy, ox, ky, kx, up_y)
     up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
     up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     return (up_y,)
예제 #3
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if cuda.cudnn_enabled and self.use_cudnn:
            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            if not gy.flags.c_contiguous:
                gy = cuda.cupy.ascontiguousarray(gy)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            dtype = x.dtype
            one = numpy.array(1, dtype=dtype).ctypes
            zero = numpy.array(0, dtype=dtype).ctypes

            libcudnn.convolutionBackwardFilter(
                handle, one.data, x_desc.value, x.data.ptr,
                gy_desc.value, gy.data.ptr, self.conv_desc.value,
                zero.data, self.filter_desc.value, gW.data.ptr)

            gx = cuda.cupy.empty_like(x)
            libcudnn.convolutionBackwardData(
                handle, one.data, self.filter_desc.value, W.data.ptr,
                gy_desc.value, gy.data.ptr, self.conv_desc.value,
                zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(inputs[2])
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW_mat = gW.reshape(out_c, c * kh * kw)
            col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w)
            gy_mats = gy.reshape(n, out_c, out_h * out_w)
            # TODO(beam2d): Use streams or batch gemm
            gW_mat[...] = 0
            for i in moves.range(n):
                gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T)

            W_mat = W.reshape(out_c, -1)
            gcol = cuda.cupy.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w)
            for i in moves.range(n):
                cuda.cupy.dot(W_mat.T, gy_mats[i], gcol_mats[i])

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
 def test_col2im_consistency(self):
     col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 2, 2, dy=2, dx=2)
     h, w = self.x.shape[2:]
     im_cpu = conv.col2im_cpu(col, 2, 2, 2, 2, h, w, dy=2, dx=2)
     im_gpu = conv.col2im_gpu(
         cuda.to_gpu(col), 2, 2, 2, 2, h, w, dy=2, dx=2)
     testing.assert_allclose(im_cpu, im_gpu.get())
예제 #5
0
    def forward(self, x):
        self.retain_inputs(())

        h, w = x[0].shape[2:]
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
                                                self.kh,
                                                self.sy,
                                                self.ph,
                                                cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
                                                self.kw,
                                                self.sx,
                                                self.pw,
                                                cover_all=self.cover_all)
        xp = cuda.get_array_module(*x)
        col = xp.tile(x[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1))
        if xp is numpy:
            y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
        else:
            y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
        return y,
예제 #6
0
    def backward_gpu(self, x, gy):
        out_c, out_h, out_w = gy[0].shape[1:]
        n, c, h, w = x[0].shape

        if cuda.cudnn_enabled and self.use_cudnn:
            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x[0])
            gy_arr = gy[0]
            if not gy_arr.flags.c_contiguous:
                gy_arr = cuda.cupy.ascontiguousarray(gy_arr)
            gy_desc = cudnn.create_tensor_descriptor(gy_arr)
            one = ctypes.c_float(1)
            zero = ctypes.c_float(0)
            if self.b is not None:
                libcudnn.convolutionBackwardBias(handle, one, gy_desc.value,
                                                 gy_arr.data.ptr, one,
                                                 self.bias_desc.value,
                                                 self.gb.data.ptr)

            libcudnn.convolutionBackwardFilter(handle, one, x_desc.value,
                                               x[0].data.ptr, gy_desc.value,
                                               gy_arr.data.ptr,
                                               self.conv_desc.value, one,
                                               self.filter_desc.value,
                                               self.gW.data.ptr)

            gx = cuda.empty_like(x[0])
            libcudnn.convolutionBackwardData(handle, one,
                                             self.filter_desc.value,
                                             self.W.data.ptr, gy_desc.value,
                                             gy_arr.data.ptr,
                                             self.conv_desc.value, zero,
                                             x_desc.value, gx.data.ptr)
        else:
            if self.gb is not None:
                self.gb += gy[0].sum(axis=(0, 2, 3))

            # TODO(beam2d): Use streams
            gW_mat = self.gW.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(n, c * self.kh * self.kw,
                                        out_h * out_w)
            gy_mats = gy[0].reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T)

            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            gcol = cuda.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.cupy.dot(W_mat.T, gy_mats[i], gcol_mats[i])

            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

        return gx,
예제 #7
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
        if cuda.cudnn_enabled and self.use_cudnn:
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=numpy.float32)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx))
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            one = numpy.array(1, dtype=x.dtype).ctypes
            zero = numpy.array(0, dtype=x.dtype).ctypes

            libcudnn.convolutionBackwardData_v2(
                handle, one.data, self.filter_desc.value, W.data.ptr,
                x_desc.value, x.data.ptr, self.conv_desc.value,
                zero.data, y_desc.value, y.data.ptr)
            if b is not None:
                libcudnn.addTensor_v2(
                    handle, libcudnn.CUDNN_ADD_SAME_C,
                    one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            W_mat = W.reshape(in_c, c * kh * kw)
            x_mats = x.reshape(n, in_c, in_h * in_w)
            gcol = cuda.cupy.empty(
                (n, c, kh, kw, in_h, in_w), dtype=numpy.float32)
            gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w)
            for i in moves.range(n):
                cuda.cupy.dot(W_mat.T, x_mats[i], gcol_mats[i])
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #8
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
        if cuda.cudnn_enabled and self.use_cudnn:
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=numpy.float32)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx))
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(b[None, :,
                                                                  None, None])

            one = numpy.array(1, dtype=x.dtype).ctypes
            zero = numpy.array(0, dtype=x.dtype).ctypes

            libcudnn.convolutionBackwardData_v2(
                handle, one.data, self.filter_desc.value, W.data.ptr,
                x_desc.value, x.data.ptr, self.conv_desc.value, zero.data,
                y_desc.value, y.data.ptr)
            if b is not None:
                libcudnn.addTensor_v2(handle, libcudnn.CUDNN_ADD_SAME_C,
                                      one.data, self.bias_desc.value,
                                      b.data.ptr, one.data, y_desc.value,
                                      y.data.ptr)
        else:
            W_mat = W.reshape(in_c, c * kh * kw)
            x_mats = x.reshape(n, in_c, in_h * in_w)
            gcol = cuda.cupy.empty((n, c, kh, kw, in_h, in_w),
                                   dtype=numpy.float32)
            gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w)
            for i in moves.range(n):
                cuda.cupy.dot(W_mat.T, x_mats[i], gcol_mats[i])
            y = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #9
0
    def backward_gpu(self, x, gy):
        out_c, out_h, out_w = gy[0].shape[1:]
        n, c, h, w = x[0].shape

        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            x_desc = cudnn.get_tensor_desc(x[0], h, w)
            gy_desc = cudnn.get_tensor_desc(gy[0], out_h, out_w)
            if self.b is not None:
                libcudnn.cudnnConvolutionBackwardBias(
                    handle, 1, gy_desc.value, cudnn.get_ptr(gy[0]),
                    1, self.bias_desc.value, cudnn.get_ptr(self.gb))

            libcudnn.cudnnConvolutionBackwardFilter(
                handle, 1, x_desc.value, cudnn.get_ptr(x[0]),
                gy_desc.value, cudnn.get_ptr(gy[0]), self.conv_desc.value,
                1, self.filter_desc.value, cudnn.get_ptr(self.gW))

            gx = cuda.empty_like(x[0])
            libcudnn.cudnnConvolutionBackwardData(
                handle, 1, self.filter_desc.value, cudnn.get_ptr(self.W),
                gy_desc.value, cudnn.get_ptr(gy[0]), self.conv_desc.value,
                0, x_desc.value, cudnn.get_ptr(gx))
        else:
            handle = cuda.get_cublas_handle()
            if self.gb is not None:
                # TODO(beam2d): Unify kernels
                with cuda.using_cumisc(handle):
                    tmp = cuda.cumisc.sum(
                        gy[0].reshape(n * out_c, out_h * out_w), axis=1)
                    tmp = cuda.cumisc.sum(tmp.reshape(n, out_c), axis=0)
                    self.gb += tmp

            # TODO(beam2d): Use streams
            gW_mat = self.gW.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(
                n, c * self.kh * self.kw, out_h * out_w)
            gy_mats = gy[0].reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.add_dot(
                    gy_mats[i], col_mats[i], gW_mat, transb='T', handle=handle)

            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            gcol = cuda.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, gy_mats[i], transa='T', handle=handle,
                                  out=gcol_mats[i])

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        return gx,
예제 #10
0
    def backward_gpu(self, x, gy):
        out_c, out_h, out_w = gy[0].shape[1:]
        n, c, h, w = x[0].shape

        if cuda.cudnn_enabled and self.use_cudnn:
            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x[0])
            gy_arr = gy[0]
            if not gy_arr.flags.c_contiguous:
                gy_arr = cuda.cupy.ascontiguousarray(gy_arr)
            gy_desc = cudnn.create_tensor_descriptor(gy_arr)
            one = ctypes.c_float(1)
            zero = ctypes.c_float(0)
            if self.b is not None:
                libcudnn.convolutionBackwardBias(
                    handle, one, gy_desc.value, gy_arr.data.ptr,
                    one, self.bias_desc.value, self.gb.data.ptr)

            libcudnn.convolutionBackwardFilter(
                handle, one, x_desc.value, x[0].data.ptr,
                gy_desc.value, gy_arr.data.ptr, self.conv_desc.value,
                one, self.filter_desc.value, self.gW.data.ptr)

            gx = cuda.empty_like(x[0])
            libcudnn.convolutionBackwardData(
                handle, one, self.filter_desc.value, self.W.data.ptr,
                gy_desc.value, gy_arr.data.ptr, self.conv_desc.value,
                zero, x_desc.value, gx.data.ptr)
        else:
            handle = cuda.get_cublas_handle()
            if self.gb is not None:
                self.gb += gy[0].sum(axis=(0, 2, 3))

            # TODO(beam2d): Use streams
            gW_mat = self.gW.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(
                n, c * self.kh * self.kw, out_h * out_w)
            gy_mats = gy[0].reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.add_dot(
                    gy_mats[i], col_mats[i], gW_mat, transb='T', handle=handle)

            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            gcol = cuda.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, gy_mats[i], transa='T', handle=handle,
                                  out=gcol_mats[i])

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        return gx,
예제 #11
0
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1))
     if isinstance(x[0], cuda.ndarray):
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     else:
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     return (y,)
예제 #12
0
    def forward(self, x):
        h, w = x[0].shape[2:]
        n = x[0].shape[0]
        c = x[0].shape[1]
        indexes = x[1]

        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
                                                self.kh,
                                                self.sy,
                                                self.ph,
                                                cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
                                                self.kw,
                                                self.sx,
                                                self.pw,
                                                cover_all=self.cover_all)
        xp = cuda.get_array_module(*x)

        col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis],
                      (1, 1, self.kh, self.kw, 1, 1))

        # NOTE(hvy): Take indexes(Switches) into account
        # TODO(hvy): Remove the loops and make it efficient
        y = xp.zeros_like(col)
        if isinstance(x[0], cuda.ndarray):
            indexes = cuda.cupy.asnumpy(indexes)

        for n_i in range(n):
            for c_i in range(c):
                for r in range(h):
                    for c in range(w):
                        index = indexes[n_i][c_i][r][c]
                        if index < self.kw:
                            y[n_i][c_i].T[c][r][index][0] = col[n_i][c_i].T[c][
                                r][index][0]
                        else:
                            y[n_i][c_i].T[c][r][
                                index %
                                self.kw][1] = col[n_i][c_i].T[c][r][index %
                                                                    self.kw][1]

        if isinstance(x[0], cuda.ndarray):
            y = conv.col2im_gpu(y, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
        else:
            y = conv.col2im_cpu(y, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)

        return y,
예제 #13
0
 def _forward_gpu_core(self, x, W, b):
     # Implementation using col2im
     gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                     copy=False)
     # - k, m, n: shape of out_channel
     # - b: number of inputs
     # - h, w: height and width of kernels
     # k, m, n, b, h, w -> b, k, m, n, h, w
     gcol = cuda.cupy.rollaxis(gcol, 3)
     y = conv.col2im_gpu(
         gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw,
         dy=self.dy, dx=self.dx)
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return y,
예제 #14
0
    def backward(self, inputs, grad_outputs):
        x, = inputs
        xp = cuda.get_array_module(x)
        gy, = grad_outputs

        n, _, out_h, out_w = gy.shape
        _, c, h, w = x.shape
        gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w)
        if xp == numpy:
            gx = col2im_cpu(
                gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx)
        else:
            gx = col2im_gpu(
                gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx)
        return gx,
예제 #15
0
    def backward(self, inputs, grad_outputs):
        x, = inputs
        xp = cuda.get_array_module(x)
        gy, = grad_outputs

        n, _, out_h, out_w = gy.shape
        _, c, h, w = x.shape
        gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w)
        if xp == numpy:
            gx = col2im_cpu(gy, self.sy, self.sx, self.ph, self.pw, h, w,
                            self.dy, self.dx)
        else:
            gx = col2im_gpu(gy, self.sy, self.sx, self.ph, self.pw, h, w,
                            self.dy, self.dx)
        return gx,
예제 #16
0
    def forward_gpu(self, x):
        self.retain_inputs(())
        self._in_dtype = x[0].dtype

        xp = cuda.cupy
        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
                                                self.kh,
                                                self.sy,
                                                self.ph,
                                                cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
                                                self.kw,
                                                self.sx,
                                                self.pw,
                                                cover_all=self.cover_all)
        up_y = xp.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
        up_y = conv.im2col_gpu(up_y,
                               self.kh,
                               self.kw,
                               self.sy,
                               self.sx,
                               self.ph,
                               self.pw,
                               cover_all=self.cover_all)
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        n, c, oy, ox, ky, kx = up_y.shape
        indexes = xp.asarray(self.indexes, dtype=numpy.int32)
        xp.ElementwiseKernel(
            'int32 index, float32 x, int32 n, int32 c, int32 oy, int32 ox,'
            'int32 ky, int32 kx', 'raw float32 up_y', '''
            int yn = i / c / oy / ox;
            int yc = (i / oy / ox) % c;
            int yoy = (i / ox) % oy;
            int yox = i % ox;
            up_y[yn * c * oy * ox * ky * kx +
              yc * oy * ox * ky * kx +
              yoy * ox * ky * kx +
              yox * ky * kx +
              index] = x;
            ''', 'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx,
                                      up_y)
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw,
                               self.outh, self.outw)
        return up_y,
    def forward_gpu(self, x):
        n, out_c, out_h, out_w = x[0].shape
        c = self.W.shape[1]
        h = get_deconv_outsize(out_h, self.kh, self.sy, self.ph)
        w = get_deconv_outsize(out_w, self.kw, self.sx, self.pw)
        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            x_desc = cudnn.get_tensor_desc(x[0], out_h, out_w)
            y = cuda.empty((n, c, h, w), dtype=numpy.float32)
            y_desc = cudnn.get_tensor_desc(y, h, w)

            self.filter_desc = cudnn.get_filter4d_desc(self.W)
            self.conv_desc = cudnn.get_conv2d_desc(
                (self.ph, self.pw), (self.sy, self.sx))
            if self.b is not None:
                self.bias_desc = cudnn.get_conv_bias_desc(self.b)

            libcudnn.cudnnConvolutionBackwardData(
                handle, 1, self.filter_desc.value, cudnn.get_ptr(self.W),
                x_desc.value, cudnn.get_ptr(x[0]), self.conv_desc.value,
                0, y_desc.value, cudnn.get_ptr(y))
            if self.b is not None:
                libcudnn.cudnnAddTensor(
                    handle, libcudnn.cudnnAddMode['CUDNN_ADD_SAME_C'],
                    1, self.bias_desc.value, cudnn.get_ptr(self.b),
                    1, y_desc.value, cudnn.get_ptr(y))
        else:
            handle = cuda.get_cublas_handle()
            # TODO(beam2d): Use streams
            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            x_mats = x[0].reshape(n, out_c, out_h * out_w)
            gcol = cuda.empty((n, c, self.kh, self.kw, out_h, out_w), dtype=numpy.float32)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, x_mats[i], transa='T', handle=handle,
                                  out=gcol_mats[i])
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)
            # TODO(beam2d): Support unshared bias
            if self.b is not None:
                cuda.elementwise(
                    'float* y, const float* b, int c, int hw',
                    'y[i] += b[i / hw % c]',
                    'conv_bias_fwd')(y, self.b, c, h * w)
        return y,
    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]

        xp = cuda.get_array_module(*x)
        W = xp.where(W >= 0, 1, -1).astype(numpy.float32, copy=False)

        W = self.M * W
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        xp = cuda.get_array_module(*x)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]

        # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
        gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
            .reshape((C, D, B * IY * IX))
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
        gW_ = _matmul(gy_, c_, xp)
        gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
        gW = gW.astype(W.dtype, copy=False)

        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
        # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
        gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
        gcol = gcol.astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, 3)

        if xp is numpy:
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)
        else:
            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

        if b is None:
            return gx, gW
        else:
            gy = xp.rollaxis(gy, 1, 4)
            gb = gy.sum(axis=(0, 1, 2))
            return gx, gW, gb
예제 #19
0
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(
             h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(
             w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, None, None],
                   (1, 1, self.kh, self.kw, 1, 1))
     if xp is numpy:
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
     else:
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
     return y,
예제 #20
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))  # only retain x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
        else:
            x, W, b = inputs

        self._calc_out_size(x, W)
        self._set_cover_all(x, W)

        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and x.dtype == W.dtype
                and ((self.dy == 1 and self.dx == 1) or
                     (_cudnn_version_ >= 6000
                      and not configuration.config.cudnn_deterministic))):

            # cuDNN implementation
            return self._forward_cudnn(x, W, b)

        else:
            # Implementation using col2im
            gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                            copy=False)
            # - k, m, n: shape of out_channel
            # - b: number of inputs
            # - h, w: height and width of kernels
            # k, m, n, b, h, w -> b, k, m, n, h, w
            gcol = cuda.cupy.rollaxis(gcol, 3)
            y = conv.col2im_gpu(gcol,
                                self.sy,
                                self.sx,
                                self.ph,
                                self.pw,
                                self.outh,
                                self.outw,
                                dy=self.dy,
                                dx=self.dx)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
            return y,
예제 #21
0
    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        xp = cuda.get_array_module(*x)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]

        # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
        gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
            .reshape((C, D, B * IY * IX))
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
        gW_ = _matmul(gy_, c_, xp)
        gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
        gW = gW.astype(W.dtype, copy=False)

        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
        # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
        gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
        gcol = gcol.astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, 3)

        if xp is numpy:
            gx = conv.col2im_cpu(gcol, self.sy, self.sx,
                                 self.ph, self.pw, h, w)
        else:
            gx = conv.col2im_gpu(gcol, self.sy, self.sx,
                                 self.ph, self.pw, h, w)

        if b is None:
            return gx, gW
        else:
            gy = xp.rollaxis(gy, 1, 4)
            gb = gy.sum(axis=(0, 1, 2))
            return gx, gW, gb
예제 #22
0
    def forward_gpu(self, x):
        self.retain_inputs(())
        self._in_dtype = x[0].dtype

        xp = cuda.cupy
        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
        up_y = xp.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_gpu(
            up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
            cover_all=self.cover_all)
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        n, c, oy, ox, ky, kx = up_y.shape
        indexes = xp.asarray(self.indexes, dtype=numpy.int32)
        xp.ElementwiseKernel(
            'int32 index, T x, int32 n, int32 c, int32 oy, int32 ox,'
            'int32 ky, int32 kx', 'raw T up_y',
            '''
            int yn = i / c / oy / ox;
            int yc = (i / oy / ox) % c;
            int yoy = (i / ox) % oy;
            int yox = i % ox;
            up_y[yn * c * oy * ox * ky * kx +
              yc * oy * ox * ky * kx +
              yoy * ox * ky * kx +
              yox * ky * kx +
              index] = x;
            ''',
            'upsampling_2d_fwd')(indexes, x[0], n, c, oy, ox, ky, kx, up_y)
        up_y = up_y.transpose(0, 1, 4, 5, 2, 3)
        up_y = conv.col2im_gpu(up_y, self.sy, self.sx, self.ph, self.pw,
                               self.outh, self.outw)
        return up_y,
예제 #23
0
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h,
                                             self.kh,
                                             self.sy,
                                             self.ph,
                                             cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w,
                                             self.kw,
                                             self.sx,
                                             self.pw,
                                             cover_all=self.cover_all)
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis],
                   (1, 1, self.kh, self.kw, 1, 1))
     if isinstance(x[0], cuda.ndarray):
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
     else:
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
     return y,
예제 #24
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            gx = cuda.cupy.empty_like(x)

            if _cudnn_version >= 4000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')

                algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                    handle, x_desc.value, gy_desc.value,
                    self.conv_desc.value, self.filter_desc.value,
                    _bwd_filter_pref, workspace_size)
                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, self.filter_desc.value, gW.data.ptr)

                algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                    handle, self.filter_desc.value, gy_desc.value,
                    self.conv_desc.value, x_desc.value, _bwd_data_pref,
                    workspace_size)
                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, x_desc.value, gx.data.ptr)
            else:
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, self.filter_desc.value, gW.data.ptr)
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW_mat = gW.reshape(out_c, c * kh * kw)
            col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w)
            gy_mats = gy.reshape(n, out_c, out_h * out_w)
            # TODO(beam2d): Use streams or batch gemm
            gW_mat[...] = 0
            for i in moves.range(n):
                gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T)

            W_mat = W.reshape(out_c, -1)
            gcol = cuda.cupy.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w)

            for i in moves.range(n):
                gcol_mats[i] = cuda.cupy.dot(W_mat.T, gy_mats[i])

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #25
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        if (cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 4000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')
                algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                    handle, self.filter_desc.value, x_desc.value,
                    self.conv_desc.value, y_desc.value, _bwd_data_pref,
                    workspace_size)
                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, y_desc.value, y.data.ptr)
            else:
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    zero.data, y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            W_mat = W.reshape(in_c, c * kh * kw)
            x_mats = x.reshape(n, in_c, in_h * in_w)
            gcol = cuda.cupy.empty(
                (n, c, kh, kw, in_h, in_w), dtype=x.dtype)
            gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w)
            for i in moves.range(n):
                gcol_mats[i] = cuda.cupy.dot(W_mat.T, x_mats[i])
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #26
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        if (chainer.should_use_cudnn('>=auto')
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw), dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(b[None, :,
                                                                  None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size, ), dtype='b')
                if configuration.config.cudnn_deterministic:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA
                else:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, x_desc.value,
                        self.conv_desc.value, y_desc.value, _bwd_data_pref,
                        workspace_size)

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    y_desc.value, y.data.ptr)
            else:
                if configuration.config.cudnn_deterministic:
                    raise ValueError(
                        "`cudnn_deterministic` option must be False "
                        "if the forward propagation of "
                        "chainer.functions.Deconvolution2D "
                        "uses cuDNN and cuDNN versions < v3. "
                        "Turn off cudnn_deterministic option with "
                        "`chainer.using_config('cudnn_deterministic', False)` "
                        "context.")
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value, zero.data,
                    y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(handle, one.data, self.bias_desc.value,
                                 b.data.ptr, one.data, y_desc.value,
                                 y.data.ptr)
        else:
            gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                            copy=False)
            # - k, m, n: shape of out_channel
            # - b: number of inputs
            # - h, w: height and width of kernels
            # k, m, n, b, h, w -> b, k, m, n, h, w
            gcol = cuda.cupy.rollaxis(gcol, 3)
            y = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #27
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        gx = None

        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')

                if configuration.config.cudnn_deterministic:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA
                else:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, x_desc.value, gy_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, self.filter_desc.value, gW.data.ptr)

                if self.requires_x_grad:
                    if configuration.config.cudnn_deterministic:
                        algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA
                    else:
                        algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                            handle, self.filter_desc.value, gy_desc.value,
                            self.conv_desc.value, x_desc.value, _bwd_data_pref,
                            workspace_size)

                    gx = cuda.cupy.empty_like(x)
                    libcudnn.convolutionBackwardData_v3(
                        handle, one.data, self.filter_desc.value, W.data.ptr,
                        gy_desc.value, gy.data.ptr, self.conv_desc.value,
                        algo, workspace.data.ptr, workspace_size,
                        zero.data, x_desc.value, gx.data.ptr)
            else:
                if configuration.config.cudnn_deterministic:
                    raise ValueError(
                        "`cudnn_deterministic` option must be False "
                        "if the backpropagation of "
                        "chainer.functions.Convolution2D "
                        "uses cuDNN and cuDNN versions < v3. "
                        "Turn off cudnn_deterministic option with "
                        "`chainer.using_config('cudnn_deterministic', False)` "
                        "context.")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, self.filter_desc.value, gW.data.ptr)
                if self.requires_x_grad:
                    gx = cuda.cupy.empty_like(x)
                    libcudnn.convolutionBackwardData_v2(
                        handle, one.data, self.filter_desc.value, W.data.ptr,
                        gy_desc.value, gy.data.ptr, self.conv_desc.value,
                        zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(
                gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                             copy=False)
            if self.requires_x_grad:
                gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                                 copy=False)
                gcol = cuda.cupy.rollaxis(gcol, 3)
                gx = conv.col2im_gpu(
                    gcol, self.sy, self.sx, self.ph, self.pw, h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #28
0
    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]

        if self.bcoeffs is not None:
            xp = cuda.get_array_module(*x)

            olen, ilen, hlen, wlen = W.shape
            if self.coeffs is None:
                self.coeffs = numpy.ones(ilen)
            coeffs = numpy.copy(self.bcoeffs)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 0)
            coeffs = numpy.broadcast_to(coeffs, W.shape)
            self.mW = xp.asarray(coeffs, numpy.float32).reshape(W.shape)

        if self.ocoeffs is not None:
            xp = cuda.get_array_module(*x)
            coeffs = numpy.copy(self.ocoeffs)
            self.mb = xp.asarray(coeffs, numpy.float32)

        W = self.M * W
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        xp = cuda.get_array_module(*x)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]

        # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
        gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
            .reshape((C, D, B * IY * IX))
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
        gW_ = _matmul(gy_, c_, xp)
        gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
        gW = gW.astype(W.dtype, copy=False)

        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
        # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
        gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
        gcol = gcol.astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, 3)

        if xp is numpy:
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)
        else:
            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

        if hasattr(self, 'mW'):
            gW = self.mW * gW
            if hasattr(self, 'mb'):
                xp = cuda.get_array_module(*x)
                gW = xp.broadcast_to(
                    xp.expand_dims(
                        xp.expand_dims(xp.expand_dims(self.mb, 1), 1), 0),
                    gW.shape) * gW
        if b is None:
            return gx, gW
        else:
            gy = xp.rollaxis(gy, 1, 4)
            gb = gy.sum(axis=(0, 1, 2))
            if hasattr(self, 'mb'):
                gb = self.mb * gb
            return gx, gW, gb
예제 #29
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape

        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            gx = cuda.cupy.empty_like(x)

            if _cudnn_version >= 4000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

                algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                    handle, x_desc.value, gy_desc.value, self.conv_desc.value,
                    self.filter_desc.value, _bwd_filter_pref, workspace_size)
                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                    gy.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    self.filter_desc.value, gW.data.ptr)

                algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                    handle, self.filter_desc.value, gy_desc.value,
                    self.conv_desc.value, x_desc.value, _bwd_data_pref,
                    workspace_size)
                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    x_desc.value, gx.data.ptr)
            else:
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                    gy.data.ptr, self.conv_desc.value, zero.data,
                    self.filter_desc.value, gW.data.ptr)
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(handle, one.data,
                                                 gy_desc.value, gy.data.ptr,
                                                 zero.data,
                                                 self.bias_desc.value,
                                                 gb.data.ptr)
        else:
            gW_mat = gW.reshape(out_c, c * kh * kw)
            col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w)
            gy_mats = gy.reshape(n, out_c, out_h * out_w)
            # TODO(beam2d): Use streams or batch gemm
            gW_mat[...] = 0
            for i in moves.range(n):
                gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T)

            W_mat = W.reshape(out_c, -1)
            Wb_mat = _kern()(W_mat)

            gcol = cuda.cupy.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w)

            for i in moves.range(n):
                gcol_mats[i] = cuda.cupy.dot(Wb_mat.T, gy_mats[i])

            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #30
0
 def test_col2im_consistency(self):
     col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 1, 1)
     h, w = self.x.shape[2:]
     im_cpu = conv.col2im_cpu(col, 2, 2, 1, 1, h, w)
     im_gpu = conv.col2im_gpu(cuda.to_gpu(col), 2, 2, 1, 1, h, w)
     gradient_check.assert_allclose(im_cpu, im_gpu.get())
예제 #31
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]
        dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1)

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):

            pad_x = cuda.cupy.zeros(
                (n, c, h + 2 * self.ph, w + 2 * self.pw), dtype=x.dtype)
            pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x

            out_h_s1 = h + 2 * self.ph - dkh + 1
            out_w_s1 = w + 2 * self.pw - dkw + 1

            out_sh = out_h + (out_h - 1) * (self.sy - 1)
            out_sw = out_w + (out_w - 1) * (self.sx - 1)

            gy_ph = (h + dkh - out_sh - 1) / 2
            gy_pw = (w + dkw - out_sw - 1) / 2

            pad_gy = cuda.cupy.zeros(
                (n, out_c, h + dkh - 1, w + dkw - 1), dtype=x.dtype)
            pad_gy[:, :,
                   gy_ph:gy_ph + out_sh:self.sy,
                   gy_pw:gy_pw + out_sw:self.sx] = gy

            for j in moves.range(kh):
                for i in moves.range(kw):
                    xji = cuda.cupy.ascontiguousarray(
                        pad_x[:, :,
                              j * self.dy:j * self.dy + out_h_s1,
                              i * self.dx:i * self.dx + out_w_s1])
                    gyji = cuda.cupy.ascontiguousarray(
                        pad_gy[:, :,
                               j * self.dy:j * self.dy + h,
                               i * self.dx:i * self.dx + w])
                    Wji = cuda.cupy.ascontiguousarray(
                        W[:, :, -1::-1, -1::-1][:, :, j:j + 1, i:i + 1])

                    if i == 0 and j == 0:
                        x = cuda.cupy.ascontiguousarray(x)
                        gy = cuda.cupy.ascontiguousarray(gy)

                        handle = cudnn.get_handle()
                        x_desc = cudnn.create_tensor_descriptor(x)
                        xji_desc = cudnn.create_tensor_descriptor(xji)
                        gy_desc = cudnn.create_tensor_descriptor(gy)
                        gyji_desc = cudnn.create_tensor_descriptor(gyji)
                        conv_desc_data = cudnn.create_convolution_descriptor(
                            (0, 0), (1, 1), xji.dtype)

                        oz_dtype = 'd' if x.dtype == 'd' else 'f'
                        one = numpy.array(1, dtype=oz_dtype).ctypes
                        zero = numpy.array(0, dtype=oz_dtype).ctypes
                        gx = cuda.cupy.zeros_like(x)
                        gWji = cuda.cupy.empty((out_c, c, 1, 1), dtype=W.dtype)

                        if _cudnn_version >= 4000:
                            workspace_size = cuda.get_max_workspace_size()
                            workspace = cuda.cupy.empty(
                                (workspace_size,), dtype='b')

                            algo_filter = (
                                libcudnn.getConvolutionBackwardFilterAlgorithm(
                                    handle, xji_desc.value, gy_desc.value,
                                    self.conv_desc.value,
                                    self.filter_desc.value,
                                    _bwd_filter_pref, workspace_size))
                            algo_data = (
                                libcudnn.getConvolutionBackwardDataAlgorithm(
                                    handle, self.filter_desc.value,
                                    gyji_desc.value, conv_desc_data.value,
                                    x_desc.value, _bwd_data_pref,
                                    workspace_size))

                    if _cudnn_version >= 4000:
                        libcudnn.convolutionBackwardFilter_v3(
                            handle, one.data, xji_desc.value, xji.data.ptr,
                            gy_desc.value, gy.data.ptr, self.conv_desc.value,
                            algo_filter, workspace.data.ptr, workspace_size,
                            zero.data, self.filter_desc.value, gWji.data.ptr)
                        libcudnn.convolutionBackwardData_v3(
                            handle, one.data, self.filter_desc.value,
                            Wji.data.ptr, gyji_desc.value,
                            gyji.data.ptr, conv_desc_data.value,
                            algo_data, workspace.data.ptr, workspace_size,
                            one.data, x_desc.value, gx.data.ptr)
                    else:
                        libcudnn.convolutionBackwardFilter_v2(
                            handle, one.data, xji_desc.value, xji.data.ptr,
                            gy_desc.value, gy.data.ptr, self.conv_desc.value,
                            zero.data, self.filter_desc.value, gWji.data.ptr)
                        libcudnn.convolutionBackwardData_v2(
                            handle, one.data, self.filter_desc.value,
                            Wji.data.ptr, gyji_desc.value,
                            gyji.data.ptr, conv_desc_data.value,
                            one.data, x_desc.value, gx.data.ptr)
                    gW[:, :, j:j + 1, i:i + 1] = gWji

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(
                gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                             copy=False)
            gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                             copy=False)
            gcol = cuda.cupy.rollaxis(gcol, 3)
            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                 h, w, dy=self.dy, dx=self.dx)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #32
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        if (cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 4000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')
                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, x_desc.value,
                        self.conv_desc.value, y_desc.value, _bwd_data_pref,
                        workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, y_desc.value, y.data.ptr)
            else:
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    zero.data, y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                            copy=False)
            # - k, m, n: shape of out_channel
            # - b: number of inputs
            # - h, w: height and width of kernels
            # k, m, n, b, h, w -> b, k, m, n, h, w
            gcol = cuda.cupy.rollaxis(gcol, 3)
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #33
0
파일: im2col.py 프로젝트: fukatani/chainer
def _col2im(x, *args, **kwargs):
    if isinstance(x, numpy.ndarray):
        return col2im_cpu(x, *args, **kwargs)
    return col2im_gpu(x, *args, **kwargs)
예제 #34
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            gx = cuda.cupy.empty_like(x)

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, x_desc.value, gy_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                    gy.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    self.filter_desc.value, gW.data.ptr)

                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, gy_desc.value,
                        self.conv_desc.value, x_desc.value, _bwd_data_pref,
                        workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    x_desc.value, gx.data.ptr)
            else:
                if self.deterministic:
                    raise ValueError("'deterministic' option not available "
                                     "for cuDNN versions < v3")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                    gy.data.ptr, self.conv_desc.value, zero.data,
                    self.filter_desc.value, gW.data.ptr)
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(handle, one.data,
                                                 gy_desc.value, gy.data.ptr,
                                                 zero.data,
                                                 self.bias_desc.value,
                                                 gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(gy, self.col,
                                     ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                                    copy=False)
            gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                             copy=False)
            gcol = cuda.cupy.rollaxis(gcol, 3)

            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #35
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        if (cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 4000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')
                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, x_desc.value,
                        self.conv_desc.value, y_desc.value, _bwd_data_pref,
                        workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, y_desc.value, y.data.ptr)
            else:
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    zero.data, y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            W_mat = W.reshape(in_c, c * kh * kw)
            x_mats = x.reshape(n, in_c, in_h * in_w)
            gcol = cuda.cupy.empty(
                (n, c, kh, kw, in_h, in_w), dtype=x.dtype)
            gcol_mats = gcol.reshape(n, c * kh * kw, in_h * in_w)
            for i in moves.range(n):
                gcol_mats[i] = cuda.cupy.dot(W_mat.T, x_mats[i])
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #36
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        
        if self.bcoeffs is not None:
            olen, ilen, hlen, wlen = W.shape
            if self.coeffs is None:
                self.coeffs = numpy.ones(ilen)
            coeffs = numpy.copy(self.bcoeffs)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 0)        
            coeffs = numpy.broadcast_to(coeffs, W.shape)
            self.mW = cuda.cupy.asarray(coeffs,numpy.float32).reshape(W.shape)
            
        if self.ocoeffs is not None:
            coeffs = numpy.copy(self.ocoeffs)
            self.mb = cuda.cupy.asarray(coeffs,numpy.float32) 
        
        W = self.M*W
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        gx = None

        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')

                if configuration.config.cudnn_deterministic:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA
                else:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, x_desc.value, gy_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, self.filter_desc.value, gW.data.ptr)

                if self.requires_x_grad:
                    if configuration.config.cudnn_deterministic:
                        algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA
                    else:
                        algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                            handle, self.filter_desc.value, gy_desc.value,
                            self.conv_desc.value, x_desc.value, _bwd_data_pref,
                            workspace_size)

                    gx = cuda.cupy.empty_like(x)
                    libcudnn.convolutionBackwardData_v3(
                        handle, one.data, self.filter_desc.value, W.data.ptr,
                        gy_desc.value, gy.data.ptr, self.conv_desc.value,
                        algo, workspace.data.ptr, workspace_size,
                        zero.data, x_desc.value, gx.data.ptr)
            else:
                if configuration.config.cudnn_deterministic:
                    raise ValueError(
                        "`cudnn_deterministic` option must be False "
                        "if the backpropagation of "
                        "chainer.functions.Convolution2D "
                        "uses cuDNN and cuDNN versions < v3. "
                        "Turn off cudnn_deterministic option with "
                        "`chainer.using_config('cudnn_deterministic', False)` "
                        "context.")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, self.filter_desc.value, gW.data.ptr)
                if self.requires_x_grad:
                    gx = cuda.cupy.empty_like(x)
                    libcudnn.convolutionBackwardData_v2(
                        handle, one.data, self.filter_desc.value, W.data.ptr,
                        gy_desc.value, gy.data.ptr, self.conv_desc.value,
                        zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(
                gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                             copy=False)
            if self.requires_x_grad:
                gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                                 copy=False)
                gcol = cuda.cupy.rollaxis(gcol, 3)
                gx = conv.col2im_gpu(
                    gcol, self.sy, self.sx, self.ph, self.pw, h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        # gW = self.mW * gW
        if hasattr(self,'mW'):
            gW = self.mW * gW         
            if hasattr(self,'mb'):
                xp = cuda.get_array_module(*x)
                gW = xp.broadcast_to(
                    xp.expand_dims(xp.expand_dims(xp.expand_dims(self.mb,1),1),1)
                    ,gW.shape) * gW
        if b is None:
            return gx, gW
        else:
            if hasattr(self,'mb'):
                gb = self.mb * gb 
            return gx, gW, gb
예제 #37
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))  # only retain x and W
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not all([isinstance(i, cuda.ndarray) for i in inputs]):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph,
                                                d=self.dy)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw,
                                                d=self.dx)
            assert self.outw > 0, 'Width in the output should be positive.'

        self._set_cover_all(x, W)

        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                x.dtype == W.dtype and
                ((self.dy == 1 and self.dx == 1) or _cudnn_version >= 6000)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            filter_desc = cudnn.create_filter_descriptor(W)
            conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype,
                dilation=(self.dy, self.dx),
                use_tensor_core=use_tensor_core)
            if b is not None:
                bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size,), dtype='b')
            if configuration.config.cudnn_deterministic:
                algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
            else:
                algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                    handle, filter_desc.value, x_desc.value,
                    conv_desc.value, y_desc.value, _bwd_data_pref,
                    workspace_size)

            if use_tensor_core:
                # Only CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 supports
                # Tensor-Core in cuDNN7
                algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1

            libcudnn.convolutionBackwardData_v3(
                handle, one.data, filter_desc.value, W.data.ptr,
                x_desc.value, x.data.ptr, conv_desc.value,
                algo, workspace.data.ptr, workspace_size,
                zero.data, y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                            copy=False)
            # - k, m, n: shape of out_channel
            # - b: number of inputs
            # - h, w: height and width of kernels
            # k, m, n, b, h, w -> b, k, m, n, h, w
            gcol = cuda.cupy.rollaxis(gcol, 3)
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw,
                dy=self.dy, dx=self.dx)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #38
0
 def test_col2im_consistency(self):
     col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 1, 1)
     h, w = self.x.shape[2:]
     im_cpu = conv.col2im_cpu(col,         2, 2, 1, 1, h, w)
     im_gpu = conv.col2im_gpu(cuda.to_gpu(col), 2, 2, 1, 1, h, w)
     gradient_check.assert_allclose(im_cpu, im_gpu.get())
예제 #39
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        gx = None

        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and x.dtype == W.dtype):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

            if configuration.config.cudnn_deterministic:
                algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
            else:
                algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                    handle, x_desc.value, gy_desc.value, self.conv_desc.value,
                    self.filter_desc.value, _bwd_filter_pref, workspace_size)

            libcudnn.convolutionBackwardFilter_v3(
                handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr,
                workspace_size, zero.data, self.filter_desc.value, gW.data.ptr)

            if self.requires_x_grad:
                if configuration.config.cudnn_deterministic:
                    algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
                else:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, gy_desc.value,
                        self.conv_desc.value, x_desc.value, _bwd_data_pref,
                        workspace_size)

                gx = cuda.cupy.empty_like(x)
                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(handle, one.data,
                                                 gy_desc.value, gy.data.ptr,
                                                 zero.data,
                                                 self.bias_desc.value,
                                                 gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(gy, self.col,
                                     ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                                    copy=False)
            if self.requires_x_grad:
                gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                                 copy=False)
                gcol = cuda.cupy.rollaxis(gcol, 3)
                gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                     h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #40
0
def _col2im(x, *args, **kwargs):
    if isinstance(x, numpy.ndarray):
        return col2im_cpu(x, *args, **kwargs)
    return col2im_gpu(x, *args, **kwargs)
예제 #41
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if cuda.cudnn_enabled and self.use_cudnn:
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            dtype = x.dtype
            one = numpy.array(1, dtype=dtype).ctypes
            zero = numpy.array(0, dtype=dtype).ctypes

            libcudnn.convolutionBackwardFilter_v2(
                handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                gy.data.ptr, self.conv_desc.value, zero.data,
                self.filter_desc.value, gW.data.ptr)

            gx = cuda.cupy.empty_like(x)
            libcudnn.convolutionBackwardData_v2(
                handle, one.data, self.filter_desc.value, W.data.ptr,
                gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data,
                x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(handle, one.data,
                                                 gy_desc.value, gy.data.ptr,
                                                 zero.data,
                                                 self.bias_desc.value,
                                                 gb.data.ptr)
        else:
            gW_mat = gW.reshape(out_c, c * kh * kw)
            col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w)
            gy_mats = gy.reshape(n, out_c, out_h * out_w)
            # TODO(beam2d): Use streams or batch gemm
            gW_mat[...] = 0
            for i in moves.range(n):
                gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T)

            W_mat = W.reshape(out_c, -1)
            gcol = cuda.cupy.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w)
            for i in moves.range(n):
                cuda.cupy.dot(W_mat.T, gy_mats[i], gcol_mats[i])

            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #42
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not all([isinstance(i, cuda.ndarray) for i in inputs]):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        if chainer.should_use_cudnn('>=auto') and x.dtype == W.dtype:
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size,), dtype='b')
            if configuration.config.cudnn_deterministic:
                algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
            else:
                algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                    handle, self.filter_desc.value, x_desc.value,
                    self.conv_desc.value, y_desc.value, _bwd_data_pref,
                    workspace_size)

            libcudnn.convolutionBackwardData_v3(
                handle, one.data, self.filter_desc.value, W.data.ptr,
                x_desc.value, x.data.ptr, self.conv_desc.value,
                algo, workspace.data.ptr, workspace_size,
                zero.data, y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                            copy=False)
            # - k, m, n: shape of out_channel
            # - b: number of inputs
            # - h, w: height and width of kernels
            # k, m, n, b, h, w -> b, k, m, n, h, w
            gcol = cuda.cupy.rollaxis(gcol, 3)
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
예제 #43
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]
        dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1)

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):

            pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw),
                                    dtype=x.dtype)
            pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x

            out_h_s1 = h + 2 * self.ph - dkh + 1
            out_w_s1 = w + 2 * self.pw - dkw + 1

            out_sh = out_h + (out_h - 1) * (self.sy - 1)
            out_sw = out_w + (out_w - 1) * (self.sx - 1)

            gy_ph = (h + dkh - out_sh - 1) / 2
            gy_pw = (w + dkw - out_sw - 1) / 2

            pad_gy = cuda.cupy.zeros((n, out_c, h + dkh - 1, w + dkw - 1),
                                     dtype=x.dtype)
            pad_gy[:, :, gy_ph:gy_ph + out_sh:self.sy,
                   gy_pw:gy_pw + out_sw:self.sx] = gy

            gx = None

            for j in moves.range(kh):
                for i in moves.range(kw):
                    xji = cuda.cupy.ascontiguousarray(
                        pad_x[:, :, j * self.dy:j * self.dy + out_h_s1,
                              i * self.dx:i * self.dx + out_w_s1])
                    gyji = cuda.cupy.ascontiguousarray(
                        pad_gy[:, :, j * self.dy:j * self.dy + h,
                               i * self.dx:i * self.dx + w])
                    Wji = cuda.cupy.ascontiguousarray(W[:, :, -1::-1,
                                                        -1::-1][:, :, j:j + 1,
                                                                i:i + 1])

                    if i == 0 and j == 0:
                        x = cuda.cupy.ascontiguousarray(x)
                        gy = cuda.cupy.ascontiguousarray(gy)

                        handle = cudnn.get_handle()
                        x_desc = cudnn.create_tensor_descriptor(x)
                        xji_desc = cudnn.create_tensor_descriptor(xji)
                        gy_desc = cudnn.create_tensor_descriptor(gy)
                        gyji_desc = cudnn.create_tensor_descriptor(gyji)
                        conv_desc_data = cudnn.create_convolution_descriptor(
                            (0, 0), (1, 1), xji.dtype)

                        oz_dtype = 'd' if x.dtype == 'd' else 'f'
                        one = numpy.array(1, dtype=oz_dtype).ctypes
                        zero = numpy.array(0, dtype=oz_dtype).ctypes
                        if self.requires_x_grad:
                            gx = cuda.cupy.zeros_like(x)
                        gWji = cuda.cupy.empty((out_c, c, 1, 1), dtype=W.dtype)

                        if _cudnn_version >= 4000:
                            workspace_size = cuda.get_max_workspace_size()
                            workspace = cuda.cupy.empty((workspace_size, ),
                                                        dtype='b')

                            algo_filter = (
                                libcudnn.getConvolutionBackwardFilterAlgorithm(
                                    handle, xji_desc.value, gy_desc.value,
                                    self.conv_desc.value,
                                    self.filter_desc.value, _bwd_filter_pref,
                                    workspace_size))
                            algo_data = (
                                libcudnn.getConvolutionBackwardDataAlgorithm(
                                    handle, self.filter_desc.value,
                                    gyji_desc.value, conv_desc_data.value,
                                    x_desc.value, _bwd_data_pref,
                                    workspace_size))

                    if _cudnn_version >= 4000:
                        libcudnn.convolutionBackwardFilter_v3(
                            handle, one.data, xji_desc.value, xji.data.ptr,
                            gy_desc.value, gy.data.ptr, self.conv_desc.value,
                            algo_filter, workspace.data.ptr, workspace_size,
                            zero.data, self.filter_desc.value, gWji.data.ptr)
                    else:
                        libcudnn.convolutionBackwardFilter_v2(
                            handle, one.data, xji_desc.value, xji.data.ptr,
                            gy_desc.value, gy.data.ptr, self.conv_desc.value,
                            zero.data, self.filter_desc.value, gWji.data.ptr)

                    if self.requires_x_grad:
                        if _cudnn_version >= 4000:
                            libcudnn.convolutionBackwardData_v3(
                                handle, one.data, self.filter_desc.value,
                                Wji.data.ptr, gyji_desc.value, gyji.data.ptr,
                                conv_desc_data.value, algo_data,
                                workspace.data.ptr, workspace_size, one.data,
                                x_desc.value, gx.data.ptr)
                        else:
                            libcudnn.convolutionBackwardData_v2(
                                handle, one.data, self.filter_desc.value,
                                Wji.data.ptr, gyji_desc.value, gyji.data.ptr,
                                conv_desc_data.value, one.data, x_desc.value,
                                gx.data.ptr)

                    gW[:, :, j:j + 1, i:i + 1] = gWji

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(handle, one.data,
                                                 gy_desc.value, gy.data.ptr,
                                                 zero.data,
                                                 self.bias_desc.value,
                                                 gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(gy, self.col,
                                     ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                                    copy=False)
            if not self.requires_x_grad:
                gx = None
            else:
                gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                                 copy=False)
                gcol = cuda.cupy.rollaxis(gcol, 3)
                gx = conv.col2im_gpu(gcol,
                                     self.sy,
                                     self.sx,
                                     self.ph,
                                     self.pw,
                                     h,
                                     w,
                                     dy=self.dy,
                                     dx=self.dx)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
예제 #44
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            gx = cuda.cupy.empty_like(x)

            if _cudnn_version >= 4000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')

                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, x_desc.value, gy_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, self.filter_desc.value, gW.data.ptr)

                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, gy_desc.value,
                        self.conv_desc.value, x_desc.value, _bwd_data_pref,
                        workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, x_desc.value, gx.data.ptr)
            else:
                if self.deterministic:
                    raise ValueError("'deterministic' option not available "
                                     "for cuDNN versions < v4")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, self.filter_desc.value, gW.data.ptr)
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(
                gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                             copy=False)
            gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                             copy=False)
            gcol = cuda.cupy.rollaxis(gcol, 3)

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb