def _forward_cudnn(self, x, W, b):
        c = W.shape[1]          # W: C_I, C_O, k_1, k_2, ..., k_N
        n, in_c = x.shape[:2]   # x: n, C_I, d_1, d_2, ..., d_N
        ndim = self.ndim
        colon = slice(None)

        # Make empty array for output.
        y_shape = (n, c) + self.outs  # (n, c_O, out_1, out_2, ..., out_N)
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)
        self.filter_desc = cudnn.create_filter_descriptor(W)
        conv_param = self.pad, self.stride, x.dtype
        self.conv_desc = cudnn.create_convolution_descriptor(*conv_param)
        if b is not None:
            b_index = (None, colon) + (None,) * ndim
            self.bias_desc = cudnn.create_tensor_descriptor(b[b_index])

        # cuDNN forward computation.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = deconvolution_2d.get_algorithm(W, x, y, conv_param, handle,
                                                  self.filter_desc, x_desc,
                                                  self.conv_desc, y_desc,
                                                  workspace)
        else:
            algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                handle, self.filter_desc.value, x_desc.value,
                self.conv_desc.value, y_desc.value, _bwd_data_pref,
                workspace_size)

        libcudnn.convolutionBackwardData_v3(
            handle, one.data, self.filter_desc.value, W.data.ptr,
            x_desc.value, x.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, y_desc.value, y.data.ptr)

        # Add bias if given.
        # TODO(takagi) Support unshared bias
        if b is not None:
            cudnn.add_tensor(
                handle, one.data, self.bias_desc.value, b.data.ptr,
                one.data, y_desc.value, y.data.ptr)

        return y,
Esempio n. 2
0
    def _forward_cudnn(self, x, W, b):
        c = W.shape[1]          # W: C_I, C_O, k_1, k_2, ..., k_N
        n, in_c = x.shape[:2]   # x: n, C_I, d_1, d_2, ..., d_N
        ndim = self.ndim
        colon = slice(None)

        # Make empty array for output.
        y_shape = (n, c) + self.outs  # (n, c_O, out_1, out_2, ..., out_N)
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)
        self.filter_desc = cudnn.create_filter_descriptor(W)
        conv_param = self.pad, self.stride, x.dtype
        self.conv_desc = cudnn.create_convolution_descriptor(*conv_param)
        if b is not None:
            b_index = (None, colon) + (None,) * ndim
            self.bias_desc = cudnn.create_tensor_descriptor(b[b_index])

        # cuDNN forward computation.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = deconvolution_2d.get_algorithm(W, x, y, conv_param, handle,
                                                  self.filter_desc, x_desc,
                                                  self.conv_desc, y_desc,
                                                  workspace)
        else:
            algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                handle, self.filter_desc.value, x_desc.value,
                self.conv_desc.value, y_desc.value, _bwd_data_pref,
                workspace_size)

        libcudnn.convolutionBackwardData_v3(
            handle, one.data, self.filter_desc.value, W.data.ptr,
            x_desc.value, x.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, y_desc.value, y.data.ptr)

        # Add bias if given.
        # TODO(takagi) Support unshared bias
        if b is not None:
            cudnn.add_tensor(
                handle, one.data, self.bias_desc.value, b.data.ptr,
                one.data, y_desc.value, y.data.ptr)

        return y,
    def _backward_cudnn(self, x, W, b, gy):
        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        gy = cuda.cupy.ascontiguousarray(gy)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Make empty arrays for results.
        gx = cuda.cupy.empty_like(x)
        gW = cuda.cupy.empty_like(W)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        gy_desc = cudnn.create_tensor_descriptor(gy)
        gx_desc = cudnn.create_tensor_descriptor(gx)

        # Chance to choose implicit-precom-gemm algorithm.
        workspace_size = cuda.get_max_workspace_size()
        algo = libcudnn.getConvolutionForwardAlgorithm(
            handle, gy_desc.value, self.filter_desc.value,
            self.conv_desc.value, gx_desc.value, _fwd_pref,
            workspace_size)
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')

        # Compute input gradient.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.convolutionForward(
            handle, one.data, gy_desc.value, gy.data.ptr,
            self.filter_desc.value, W.data.ptr,
            self.conv_desc.value, algo, workspace.data.ptr, workspace_size,
            zero.data, gx_desc.value, gx.data.ptr)

        # Compute bias gradient.
        if b is not None:
            gb = cuda.cupy.empty_like(b)
            libcudnn.convolutionBackwardBias(
                handle, one.data, gy_desc.value, gy.data.ptr,
                zero.data, self.bias_desc.value, gb.data.ptr)

        # Compute filter gradient.
        algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
            handle, gy_desc.value, gx_desc.value,
            self.conv_desc.value, self.filter_desc.value,
            _bwd_filter_pref, workspace_size)

        libcudnn.convolutionBackwardFilter_v3(
            handle, one.data, gy_desc.value, gy.data.ptr,
            gx_desc.value, x.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, self.filter_desc.value, gW.data.ptr)

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
Esempio n. 4
0
    def _backward_cudnn(self, x, W, b, gy):
        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        gy = cuda.cupy.ascontiguousarray(gy)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Make empty arrays for results.
        gx = cuda.cupy.empty_like(x)
        gW = cuda.cupy.empty_like(W)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        gy_desc = cudnn.create_tensor_descriptor(gy)
        gx_desc = cudnn.create_tensor_descriptor(gx)

        # Chance to choose implicit-precom-gemm algorithm.
        workspace_size = cuda.get_max_workspace_size()
        algo = libcudnn.getConvolutionForwardAlgorithm(
            handle, gy_desc.value, self.filter_desc.value,
            self.conv_desc.value, gx_desc.value, _fwd_pref,
            workspace_size)
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')

        # Compute input gradient.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.convolutionForward(
            handle, one.data, gy_desc.value, gy.data.ptr,
            self.filter_desc.value, W.data.ptr,
            self.conv_desc.value, algo, workspace.data.ptr, workspace_size,
            zero.data, gx_desc.value, gx.data.ptr)

        # Compute bias gradient.
        if b is not None:
            gb = cuda.cupy.empty_like(b)
            libcudnn.convolutionBackwardBias(
                handle, one.data, gy_desc.value, gy.data.ptr,
                zero.data, self.bias_desc.value, gb.data.ptr)

        # Compute filter gradient.
        algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
            handle, gy_desc.value, gx_desc.value,
            self.conv_desc.value, self.filter_desc.value,
            _bwd_filter_pref, workspace_size)

        libcudnn.convolutionBackwardFilter_v3(
            handle, one.data, gy_desc.value, gy.data.ptr,
            gx_desc.value, x.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, self.filter_desc.value, gW.data.ptr)

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
Esempio n. 5
0
    def _forward_cudnn(self, x, gy):
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape

        iC = c
        iCg = int(iC / self.group)
        gW = cuda.cupy.empty((out_c, iCg, self.kh, self.kw),
                             dtype=self.W_dtype)
        x = cuda.cupy.ascontiguousarray(x)
        gy = cuda.cupy.ascontiguousarray(gy)

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        filter_desc = cudnn.create_filter_descriptor(gW)
        conv_param = (self.ph, self.pw), (self.sy, self.sx), x.dtype
        dilation = (self.dy, self.dx)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param,
            dilation=dilation,
            use_tensor_core=use_tensor_core,
            group=self.group)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

        if configuration.config.cudnn_deterministic:
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
        elif configuration.config.autotune and _cudnn_version >= 5000:
            algo = _get_algorithm_bwd_filter(x, gy, gW,
                                             conv_param + (dilation, ), handle,
                                             x_desc, gy_desc, conv_desc,
                                             filter_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, conv_desc.value,
                filter_desc.value, _bwd_filter_pref, workspace_size)

        if use_tensor_core:
            # Only CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 supports
            # Tensor-Core in cuDNN7.
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1

        libcudnn.convolutionBackwardFilter_v3(handle, one.data, x_desc.value,
                                              x.data.ptr, gy_desc.value,
                                              gy.data.ptr, conv_desc.value,
                                              algo, workspace.data.ptr,
                                              workspace_size, zero.data,
                                              filter_desc.value, gW.data.ptr)

        return gW,
Esempio n. 6
0
    def _backward_cudnn(self, x, W, b, gy):
        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        gy = cuda.cupy.ascontiguousarray(gy)

        # Make empty arrays for result.
        gx = cuda.cupy.empty_like(x)
        gW = cuda.cupy.empty_like(W)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        # Compute gradients.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')

        # Compute filter weight gradient.
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = convolution_2d._get_algorithm_bwd_filter(
                x, gy, gW, self.conv_param, handle, x_desc, gy_desc,
                self.conv_desc, self.filter_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, self.conv_desc.value,
                self.filter_desc.value, _bwd_filter_pref, workspace_size)

        libcudnn.convolutionBackwardFilter_v3(
            handle, one.data, x_desc.value, x.data.ptr,
            gy_desc.value, gy.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, self.filter_desc.value, gW.data.ptr)

        # Compute input gradient.
        algo = libcudnn.getConvolutionBackwardDataAlgorithm(
            handle, self.filter_desc.value, gy_desc.value,
            self.conv_desc.value, x_desc.value, _bwd_data_pref,
            workspace_size)
        libcudnn.convolutionBackwardData_v3(
            handle, one.data, self.filter_desc.value, W.data.ptr,
            gy_desc.value, gy.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, x_desc.value, gx.data.ptr)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            gb = cuda.cupy.empty_like(b)
            libcudnn.convolutionBackwardBias(
                handle, one.data, gy_desc.value, gy.data.ptr,
                zero.data, self.bias_desc.value, gb.data.ptr)
            return gx, gW, gb
Esempio n. 7
0
    def _forward_cudnn(self, x, gy):
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape

        iC = c
        iCg = int(iC / self.group)
        gW = cuda.cupy.empty((out_c, iCg, self.kh, self.kw),
                             dtype=self.W_dtype)
        x = cuda.cupy.ascontiguousarray(x)
        gy = cuda.cupy.ascontiguousarray(gy)

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        # cuDNN 7 supports dilation only in *_BWD_FILTER_ALGO_0, but
        # it supports Tensor Cores only in *_BWD_FILTER_ALGO_1.
        if use_tensor_core and (self.dx > 1 or self.dy > 1):
            use_tensor_core = False

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        filter_desc = cudnn.create_filter_descriptor(gW)
        conv_param = (self.ph, self.pw), (self.sy, self.sx), x.dtype
        dilation = (self.dy, self.dx)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param, dilation=dilation,
            use_tensor_core=use_tensor_core,
            group=self.group)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')

        if configuration.config.cudnn_deterministic:
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
        elif configuration.config.autotune and _cudnn_version >= 5000:
            algo = _get_algorithm_bwd_filter(
                x, gy, gW, conv_param + (dilation,), handle, x_desc, gy_desc,
                conv_desc, filter_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, conv_desc.value,
                filter_desc.value, _bwd_filter_pref, workspace_size)

        if use_tensor_core:
            algo = self._tensor_core_adjust_algo()

        libcudnn.convolutionBackwardFilter_v3(
            handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
            gy.data.ptr, conv_desc.value, algo, workspace.data.ptr,
            workspace_size, zero.data, filter_desc.value, gW.data.ptr)

        return gW,
Esempio n. 8
0
    def _forward_cudnn(self, x, W, b, y):
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        # cuDNN 7 supports dilation only in *_FWD_ALGO_IMPLICIT_GEMM, but
        # it supports Tensor Cores only in *_FWD_ALGO_IMPLICIT_PRECOMP_GEMM.
        if use_tensor_core and (self.dx > 1 or self.dy > 1):
            use_tensor_core = False

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        filter_desc = cudnn.create_filter_descriptor(W)
        conv_param = ((self.ph, self.pw), (self.sy, self.sx), x.dtype)
        dilation = (self.dy, self.dx)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param, dilation=dilation,
            use_tensor_core=use_tensor_core,
            group=self.group)
        if b is not None:
            bias_desc = cudnn.create_tensor_descriptor(
                b[None, :, None, None])
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')
        if configuration.config.autotune and _cudnn_version >= 5000:
            algo = _get_algorithm_fwd(
                x, W, y, conv_param + (dilation,), handle, x_desc,
                filter_desc, conv_desc, y_desc, workspace)
        else:
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, filter_desc.value,
                conv_desc.value, y_desc.value, _fwd_pref, workspace_size)

        if use_tensor_core:
            algo = self._tensor_core_adjust_algo()

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.convolutionForward(
            handle, one.data, x_desc.value, x.data.ptr,
            filter_desc.value, W.data.ptr, conv_desc.value,
            algo, workspace.data.ptr, workspace_size, zero.data,
            y_desc.value, y.data.ptr)

        # TODO(beam2d): Support unshared bias
        if b is not None:
            cudnn.add_tensor(
                handle, one.data, bias_desc.value, b.data.ptr,
                one.data, y_desc.value, y.data.ptr)

        return y,
Esempio n. 9
0
    def _forward_cudnn(self, x, W, b):
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        n = x.shape[0]
        out_c = W.shape[1]

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y = cuda.cupy.empty((n, out_c, self.outh, self.outw), dtype=x.dtype)
        y_desc = cudnn.create_tensor_descriptor(y)

        filter_desc = cudnn.create_filter_descriptor(W)
        conv_param = (self.ph, self.pw), (self.sy, self.sx), x.dtype
        dilation = (self.dy, self.dx)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param, dilation=dilation, use_tensor_core=use_tensor_core)
        if b is not None:
            bias_desc = cudnn.create_tensor_descriptor(b[None, :, None, None])

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

        if configuration.config.cudnn_deterministic:
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
        elif configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = get_algorithm(W, x, y, conv_param + (dilation, ), handle,
                                 filter_desc, x_desc, conv_desc, y_desc,
                                 workspace)
        else:
            algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                handle, filter_desc.value, x_desc.value, conv_desc.value,
                y_desc.value, _bwd_data_pref, workspace_size)

        if use_tensor_core:
            # Only CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 supports
            # Tensor-Core in cuDNN7
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1

        libcudnn.convolutionBackwardData_v3(
            handle, one.data, filter_desc.value, W.data.ptr, x_desc.value,
            x.data.ptr, conv_desc.value, algo, workspace.data.ptr,
            workspace_size, zero.data, y_desc.value, y.data.ptr)

        if b is not None:
            cudnn.add_tensor(handle, one.data, bias_desc.value, b.data.ptr,
                             one.data, y_desc.value, y.data.ptr)

        return y,
Esempio n. 10
0
    def _backward_cudnn(self, x, W, b, gy):
        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        gy = cuda.cupy.ascontiguousarray(gy)

        # Make empty arrays for result.
        gx = cuda.cupy.empty_like(x)
        gW = cuda.cupy.empty_like(W)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        # Compute gradients.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

        # Compute filter weight gradient.
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = convolution_2d._get_algorithm_bwd_filter(
                x, gy, gW, self.conv_param, handle, x_desc, gy_desc,
                self.conv_desc, self.filter_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, self.conv_desc.value,
                self.filter_desc.value, _bwd_filter_pref, workspace_size)

        libcudnn.convolutionBackwardFilter_v3(
            handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
            gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr,
            workspace_size, zero.data, self.filter_desc.value, gW.data.ptr)

        # Compute input gradient.
        algo = libcudnn.getConvolutionBackwardDataAlgorithm(
            handle, self.filter_desc.value, gy_desc.value,
            self.conv_desc.value, x_desc.value, _bwd_data_pref, workspace_size)
        libcudnn.convolutionBackwardData_v3(
            handle, one.data, self.filter_desc.value, W.data.ptr,
            gy_desc.value, gy.data.ptr, self.conv_desc.value, algo,
            workspace.data.ptr, workspace_size, zero.data, x_desc.value,
            gx.data.ptr)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            gb = cuda.cupy.empty_like(b)
            libcudnn.convolutionBackwardBias(handle, one.data, gy_desc.value,
                                             gy.data.ptr, zero.data,
                                             self.bias_desc.value, gb.data.ptr)
            return gx, gW, gb
Esempio n. 11
0
    def _forward_cudnn(self, x, W, b, y):
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        # cuDNN 7 supports dilation only in *_FWD_ALGO_IMPLICIT_GEMM, but
        # it supports Tensor Cores only in *_FWD_ALGO_IMPLICIT_PRECOMP_GEMM.
        if use_tensor_core and (self.dx > 1 or self.dy > 1):
            use_tensor_core = False

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        filter_desc = cudnn.create_filter_descriptor(W)
        conv_param = ((self.ph, self.pw), (self.sy, self.sx), x.dtype)
        dilation = (self.dy, self.dx)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param,
            dilation=dilation,
            use_tensor_core=use_tensor_core,
            group=self.group)
        if b is not None:
            bias_desc = cudnn.create_tensor_descriptor(b[None, :, None, None])
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')
        if configuration.config.autotune and _cudnn_version >= 5000:
            algo = _get_algorithm_fwd(x, W, y, conv_param + (dilation, ),
                                      handle, x_desc, filter_desc, conv_desc,
                                      y_desc, workspace)
        else:
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, filter_desc.value, conv_desc.value,
                y_desc.value, _fwd_pref, workspace_size)

        if use_tensor_core:
            algo = self._tensor_core_adjust_algo()

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.convolutionForward(handle, one.data, x_desc.value, x.data.ptr,
                                    filter_desc.value, W.data.ptr,
                                    conv_desc.value, algo, workspace.data.ptr,
                                    workspace_size, zero.data, y_desc.value,
                                    y.data.ptr)

        # TODO(beam2d): Support unshared bias
        if b is not None:
            cudnn.add_tensor(handle, one.data, bias_desc.value, b.data.ptr,
                             one.data, y_desc.value, y.data.ptr)

        return y,
Esempio n. 12
0
    def _forward_cudnn(self, x, gy):
        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        gy = cuda.cupy.ascontiguousarray(gy)

        # Make empty arrays for result.
        out_c = gy.shape[1]
        in_c = x.shape[1]
        gW = cuda.cupy.empty((out_c, in_c) + self.ksize, dtype=self.W_dtype)

        # Get cuDNN handler and descriptors.
        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        filter_desc = cudnn.create_filter_descriptor(gW)
        conv_param = (self.pad, self.stride, self.W_dtype)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param, use_tensor_core=use_tensor_core)

        # Compute gradients.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

        # Compute filter weight gradient.
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = convolution_2d._get_algorithm_bwd_filter(
                x, gy, gW, conv_param, handle, x_desc, gy_desc, conv_desc,
                filter_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, conv_desc.value,
                filter_desc.value, _bwd_filter_pref, workspace_size)

        libcudnn.convolutionBackwardFilter_v3(handle, one.data, x_desc.value,
                                              x.data.ptr, gy_desc.value,
                                              gy.data.ptr, conv_desc.value,
                                              algo, workspace.data.ptr,
                                              workspace_size, zero.data,
                                              filter_desc.value, gW.data.ptr)

        return gW,
Esempio n. 13
0
    def _forward_cudnn(self, x, gy):
        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        gy = cuda.cupy.ascontiguousarray(gy)

        # Make empty arrays for result.
        out_c = gy.shape[1]
        in_c = x.shape[1]
        gW = cuda.cupy.empty(
            (out_c, in_c) + self.ksize, dtype=self.W_dtype)

        # Get cuDNN handler and descriptors.
        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        filter_desc = cudnn.create_filter_descriptor(gW)
        conv_param = (self.pad, self.stride, self.W_dtype)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param, use_tensor_core=use_tensor_core)

        # Compute gradients.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')

        # Compute filter weight gradient.
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = convolution_2d._get_algorithm_bwd_filter(
                x, gy, gW, conv_param, handle, x_desc, gy_desc,
                conv_desc, filter_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, conv_desc.value,
                filter_desc.value, _bwd_filter_pref, workspace_size)

        libcudnn.convolutionBackwardFilter_v3(
            handle, one.data, x_desc.value, x.data.ptr,
            gy_desc.value, gy.data.ptr, conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, filter_desc.value, gW.data.ptr)

        return gW,
Esempio n. 14
0
 def test_size(self):
     size = 1024
     cuda.set_max_workspace_size(size)
     assert size == cuda.get_max_workspace_size()
Esempio n. 15
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))
        x, gy = inputs
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape

        if (self.cover_all or not chainer.should_use_cudnn('>=auto')
                or x.dtype != self.W_dtype
                or ((self.dy > 1 or self.dx > 1) and _cudnn_version < 6000)):
            col = conv.im2col_gpu(x,
                                  self.kh,
                                  self.kw,
                                  self.sy,
                                  self.sx,
                                  self.ph,
                                  self.pw,
                                  cover_all=self.cover_all,
                                  dy=self.dy,
                                  dx=self.dx)
            gW = cuda.cupy.tensordot(gy, col, ((0, 2, 3),
                                               (0, 4, 5))).astype(self.W_dtype,
                                                                  copy=False)
            return gW,

        gW = cuda.cupy.empty((out_c, c, self.kh, self.kw), dtype=self.W_dtype)
        x = cuda.cupy.ascontiguousarray(x)
        gy = cuda.cupy.ascontiguousarray(gy)

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        gy_desc = cudnn.create_tensor_descriptor(gy)

        filter_desc = cudnn.create_filter_descriptor(gW)
        conv_param = (self.ph, self.pw), (self.sy, self.sx), x.dtype
        dilation = (self.dy, self.dx)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param, dilation=dilation, use_tensor_core=use_tensor_core)

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

        if configuration.config.cudnn_deterministic:
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
        elif configuration.config.autotune and _cudnn_version >= 5000:
            algo = _get_algorithm_bwd_filter(x, gy, gW,
                                             conv_param + (dilation, ), handle,
                                             x_desc, gy_desc, conv_desc,
                                             filter_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                handle, x_desc.value, gy_desc.value, conv_desc.value,
                filter_desc.value, _bwd_filter_pref, workspace_size)

        if use_tensor_core:
            # Only CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 supports
            # Tensor-Core in cuDNN7.
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1

        libcudnn.convolutionBackwardFilter_v3(handle, one.data, x_desc.value,
                                              x.data.ptr, gy_desc.value,
                                              gy.data.ptr, conv_desc.value,
                                              algo, workspace.data.ptr,
                                              workspace_size, zero.data,
                                              filter_desc.value, gW.data.ptr)

        return gW,
Esempio n. 16
0
 def test_size(self):
     size = 1024
     cuda.set_max_workspace_size(size)
     self.assertEqual(size, cuda.get_max_workspace_size())
Esempio n. 17
0
 def setUp(self):
     self.space = cuda.get_max_workspace_size()
Esempio n. 18
0
    def _forward_cudnn(self, x, W, b):
        out_c = W.shape[0]  # (c_O, _, k_1, k_2, ..., k_N)
        ksize = W.shape[2:]
        n, c = x.shape[:2]  # (n, c_I, d_1, d_2, ..., d_N)
        dims = x.shape[2:]
        stride = self.stride
        pad = self.pad
        ndim = self.ndim
        colon = slice(None)

        # Make empty array for result.
        outs = tuple(
            conv.get_conv_outsize(d, k, s, p, cover_all=self.cover_all)
            for (d, k, s, p) in zip(dims, ksize, stride, pad))
        assert all(out > 0 for out in outs), 'Output sizes should be positive.'
        y_shape = (n, out_c) + outs  # (n, c_O, out_1, out_2, ..., out_N)
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        self.filter_desc = cudnn.create_filter_descriptor(W)
        self.conv_param = (pad, stride, x.dtype)
        self.conv_desc = cudnn.create_convolution_descriptor(*self.conv_param)
        if b is not None:
            b_index = (None, colon) + (None, ) * ndim
            self.bias_desc = cudnn.create_tensor_descriptor(b[b_index])

        # Find cuDNN algorithm to be used.
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size, ), dtype='b')
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = convolution_2d._get_algorithm_fwd(x, W, y, self.conv_param,
                                                     handle, x_desc,
                                                     self.filter_desc,
                                                     self.conv_desc, y_desc,
                                                     workspace)
        else:
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size)

        # cuDNN forward computation.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.convolutionForward(handle, one.data, x_desc.value, x.data.ptr,
                                    self.filter_desc.value, W.data.ptr,
                                    self.conv_desc.value, algo,
                                    workspace.data.ptr, workspace_size,
                                    zero.data, y_desc.value, y.data.ptr)

        # Add bias if given.
        # TODO(takagi) Support unshared bias
        if b is not None:
            cudnn.add_tensor(handle, one.data, self.bias_desc.value,
                             b.data.ptr, one.data, y_desc.value, y.data.ptr)

        return y,
Esempio n. 19
0
 def test_size(self):
     size = 1024
     cuda.set_max_workspace_size(size)
     assert size == cuda.get_max_workspace_size()
Esempio n. 20
0
    def _forward_cudnn(self, x, W, b):
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        n = x.shape[0]
        # out_c = W.shape[1]
        yCg = W.shape[1]
        yC = yCg * self.group

        use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

        # cuDNN 7 supports dilation only in *_BWD_DATA_ALGO_0, but
        # it supports Tensor Cores only in *_BWD_DATA_ALGO_1.
        if use_tensor_core and (self.dx > 1 or self.dy > 1):
            use_tensor_core = False

        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y = cuda.cupy.empty((n, yC, self.outh, self.outw),
                            dtype=x.dtype)
        y_desc = cudnn.create_tensor_descriptor(y)

        filter_desc = cudnn.create_filter_descriptor(W)
        conv_param = (self.ph, self.pw), (self.sy, self.sx), x.dtype
        dilation = (self.dy, self.dx)
        conv_desc = cudnn.create_convolution_descriptor(
            *conv_param, dilation=dilation,
            use_tensor_core=use_tensor_core,
            group=self.group)
        if b is not None:
            bias_desc = cudnn.create_tensor_descriptor(
                b[None, :, None, None])

        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes

        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')

        if configuration.config.cudnn_deterministic:
            algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
        elif configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = get_algorithm(
                W, x, y, conv_param + (dilation,), handle, filter_desc,
                x_desc, conv_desc, y_desc, workspace)
        else:
            algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                handle, filter_desc.value, x_desc.value, conv_desc.value,
                y_desc.value, _bwd_data_pref, workspace_size)

        if use_tensor_core:
            algo = self._tensor_core_adjust_algo()

        libcudnn.convolutionBackwardData_v3(
            handle, one.data, filter_desc.value, W.data.ptr,
            x_desc.value, x.data.ptr, conv_desc.value,
            algo, workspace.data.ptr, workspace_size,
            zero.data, y_desc.value, y.data.ptr)

        if b is not None:
            cudnn.add_tensor(
                handle, one.data, bias_desc.value, b.data.ptr,
                one.data, y_desc.value, y.data.ptr)

        return y,
Esempio n. 21
0
    def _forward_cudnn(self, x, W, b):
        out_c = W.shape[0]      # (c_O, _, k_1, k_2, ..., k_N)
        ksize = W.shape[2:]
        n, c = x.shape[:2]      # (n, c_I, d_1, d_2, ..., d_N)
        dims = x.shape[2:]
        stride = self.stride
        pad = self.pad
        ndim = self.ndim
        colon = slice(None)

        # Make empty array for result.
        outs = tuple(
            conv.get_conv_outsize(d, k, s, p, cover_all=self.cover_all)
            for (d, k, s, p) in zip(dims, ksize, stride, pad))
        assert all(out > 0 for out in outs), 'Output sizes should be positive.'
        y_shape = (n, out_c) + outs  # (n, c_O, out_1, out_2, ..., out_N)
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        # Convert to C-contiguous arrays.
        x = cuda.cupy.ascontiguousarray(x)
        W = cuda.cupy.ascontiguousarray(W)
        if b is not None:
            b = cuda.cupy.ascontiguousarray(b)

        # Get cuDNN handler and descriptors.
        handle = cudnn.get_handle()
        x_desc = cudnn.create_tensor_descriptor(x)
        y_desc = cudnn.create_tensor_descriptor(y)

        self.filter_desc = cudnn.create_filter_descriptor(W)
        self.conv_param = (pad, stride, x.dtype)
        self.conv_desc = cudnn.create_convolution_descriptor(*self.conv_param)
        if b is not None:
            b_index = (None, colon) + (None,) * ndim
            self.bias_desc = cudnn.create_tensor_descriptor(b[b_index])

        # Find cuDNN algorithm to be used.
        workspace_size = cuda.get_max_workspace_size()
        workspace = cuda.cupy.empty((workspace_size,), dtype='b')
        if configuration.config.autotune and _cudnn_version_ >= 5000:
            algo = convolution_2d._get_algorithm_fwd(
                x, W, y, self.conv_param, handle, x_desc, self.filter_desc,
                self.conv_desc, y_desc, workspace)
        else:
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, _fwd_pref,
                workspace_size)

        # cuDNN forward computation.
        oz_dtype = 'd' if x.dtype == 'd' else 'f'
        one = numpy.array(1, dtype=oz_dtype).ctypes
        zero = numpy.array(0, dtype=oz_dtype).ctypes
        libcudnn.convolutionForward(
            handle, one.data, x_desc.value, x.data.ptr,
            self.filter_desc.value, W.data.ptr, self.conv_desc.value,
            algo, workspace.data.ptr, workspace_size, zero.data,
            y_desc.value, y.data.ptr)

        # Add bias if given.
        # TODO(takagi) Support unshared bias
        if b is not None:
            cudnn.add_tensor(
                handle, one.data, self.bias_desc.value, b.data.ptr,
                one.data, y_desc.value, y.data.ptr)

        return y,
Esempio n. 22
0
 def test_size(self):
     size = 1024
     cuda.set_max_workspace_size(size)
     self.assertEqual(size, cuda.get_max_workspace_size())
Esempio n. 23
0
    def forward_gpu(self, inputs):
        self.retain_inputs((0, 1))  # retain only x and W
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not all([isinstance(i, cuda.ndarray) for i in inputs]):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape

        out_h = conv.get_conv_outsize(h,
                                      kh,
                                      self.sy,
                                      self.ph,
                                      cover_all=self.cover_all,
                                      d=self.dy)
        assert out_h > 0, 'Height in the output should be positive.'
        out_w = conv.get_conv_outsize(w,
                                      kw,
                                      self.sx,
                                      self.pw,
                                      cover_all=self.cover_all,
                                      d=self.dx)
        assert out_w > 0, 'Width in the output should be positive.'

        y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and x.dtype == W.dtype and
            ((self.dy == 1 and self.dx == 1) or _cudnn_version >= 6000)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y_desc = cudnn.create_tensor_descriptor(y)

            filter_desc = cudnn.create_filter_descriptor(W)
            conv_param = ((self.ph, self.pw), (self.sy, self.sx), x.dtype)
            dilation = (self.dy, self.dx)
            conv_desc = cudnn.create_convolution_descriptor(
                *conv_param,
                dilation=dilation,
                use_tensor_core=use_tensor_core)
            if b is not None:
                bias_desc = cudnn.create_tensor_descriptor(b[None, :, None,
                                                             None])
            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size, ), dtype='b')
            if configuration.config.autotune and _cudnn_version >= 5000:
                algo = _get_algorithm_fwd(x, W, y, conv_param + (dilation, ),
                                          handle, x_desc, filter_desc,
                                          conv_desc, y_desc, workspace)
            else:
                algo = libcudnn.getConvolutionForwardAlgorithm(
                    handle, x_desc.value, filter_desc.value, conv_desc.value,
                    y_desc.value, _fwd_pref, workspace_size)

            if use_tensor_core:
                # Only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
                # supports Tensor-Core in cuDNN7.
                algo = libcudnn.CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM  # NOQA

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            libcudnn.convolutionForward(handle, one.data, x_desc.value,
                                        x.data.ptr, filter_desc.value,
                                        W.data.ptr, conv_desc.value, algo,
                                        workspace.data.ptr, workspace_size,
                                        zero.data, y_desc.value, y.data.ptr)

            # TODO(beam2d): Support unshared bias
            if b is not None:
                cudnn.add_tensor(handle, one.data, bias_desc.value, b.data.ptr,
                                 one.data, y_desc.value, y.data.ptr)
        else:
            # Implementation using im2col
            col = conv.im2col_gpu(x,
                                  kh,
                                  kw,
                                  self.sy,
                                  self.sx,
                                  self.ph,
                                  self.pw,
                                  cover_all=self.cover_all,
                                  dy=self.dy,
                                  dx=self.dx)
            y = cuda.cupy.tensordot(col, W,
                                    ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                                   copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,
Esempio n. 24
0
 def setUp(self):
     self.space = cuda.get_max_workspace_size()