コード例 #1
0
ファイル: _interface.py プロジェクト: rgommers/cupy
    def __call__(self,
                 grid,
                 block,
                 args,
                 shared_mem=0,
                 stream=None,
                 enable_cooperative_groups=False):
        in_types = []
        for x in args:
            if isinstance(x, cupy.ndarray):
                t = _types.CArray.from_ndarray(x)
            elif numpy.isscalar(x):
                t = _typerules.get_ctype_from_scalar(self._mode, x)
            else:
                raise TypeError(f'{type(x)} is not supported for RawKernel')
            in_types.append(t)
        in_types = tuple(in_types)

        kern = self._cache.get(in_types)
        if kern is None:
            result = _compile.transpile(
                self._func,
                ['extern "C"', '__global__'],
                self._mode,
                in_types,
                _types.Void(),
            )
            fname = result.func_name
            module = cupy._core.core.compile_with_cache(
                source=result.code, options=('-D CUPY_JIT_MODE', ))
            kern = module.get_function(fname)
            self._cache[in_types] = kern
            self._cached_codes[in_types] = result.code

        kern(grid, block, args, shared_mem, stream, enable_cooperative_groups)
コード例 #2
0
ファイル: _interface.py プロジェクト: viantirreau/cupy
    def __call__(self, grid, block, args):
        in_types = []
        for x in args:
            if isinstance(x, cupy.ndarray):
                t = _types.Array.from_ndarray(x)
            elif numpy.isscalar(x):
                t = _typerules.get_ctype_from_scalar(self._mode, x)
            else:
                raise TypeError(f'{type(x)} is not supported for RawKernel')
            in_types.append(t)
        in_types = tuple(in_types)

        kern = self._cache.get(in_types)
        if kern is None:
            result = _compile.transpile(
                self._func,
                ['extern "C"', '__global__'],
                self._mode,
                in_types,
                _types.Void(),
            )
            fname = result.func_name
            module = cupy.core.core.compile_with_cache(result.code)
            kern = module.get_function(fname)
            self._cache[in_types] = kern
        kern(grid, block, args)
コード例 #3
0
    def __call__(self,
                 grid,
                 block,
                 args,
                 shared_mem=0,
                 stream=None,
                 enable_cooperative_groups=False):
        """Calls the CUDA kernel.

        The compilation will be deferred until the first function call.
        CuPy's JIT compiler infers the types of arguments at the call
        time, and will cache the compiled kernels for speeding up any
        subsequent calls.

        Args:
            grid (tuple of int): Size of grid in blocks.
            block (tuple of int): Dimensions of each thread block.
            args (tuple):
                Arguments of the kernel. The type of all elements must be
                ``bool``, ``int``, ``float``, ``complex``, NumPy scalar or
                ``cupy.ndarray``.
            shared_mem (int):
                Dynamic shared-memory size per thread block in bytes.
            stream (cupy.cuda.Stream): CUDA stream.

        .. seealso:: :ref:`jit_kernel_definition`
        """
        in_types = []
        for x in args:
            if isinstance(x, cupy.ndarray):
                t = _cuda_types.CArray.from_ndarray(x)
            elif numpy.isscalar(x):
                t = _cuda_typerules.get_ctype_from_scalar(self._mode, x)
            else:
                raise TypeError(f'{type(x)} is not supported for RawKernel')
            in_types.append(t)
        in_types = tuple(in_types)

        kern = self._cache.get(in_types)
        if kern is None:
            result = _compile.transpile(
                self._func,
                ['extern "C"', '__global__'],
                self._mode,
                in_types,
                _cuda_types.void,
            )
            fname = result.func_name
            # workaround for hipRTC: as of ROCm 4.1.0 hipRTC still does not
            # recognize "-D", so we have to compile using hipcc...
            backend = 'nvcc' if runtime.is_hip else 'nvrtc'
            module = core.compile_with_cache(source=result.code,
                                             options=('-DCUPY_JIT_MODE',
                                                      '--std=c++11'),
                                             backend=backend)
            kern = module.get_function(fname)
            self._cache[in_types] = kern
            self._cached_codes[in_types] = result.code

        kern(grid, block, args, shared_mem, stream, enable_cooperative_groups)
コード例 #4
0
ファイル: _interface.py プロジェクト: rgommers/cupy
 def _emit_code_from_types(self, in_types, ret_type=None):
     return _compile.transpile(self.func, self.attributes, self.mode,
                               in_types, ret_type)