def __call__(self, a, axis=None, dtype=None, out=None, keepdims=False): if not isinstance(a, cupy.ndarray): raise TypeError('Input type must be cupy.ndarray') if self.identity is None: assert a.size != 0 if dtype is not None: dtype = numpy.dtype(dtype).type in_args = [a] if out is None: _check_args((a, )) out_args = [] else: _check_args((a, out)) out_args = [out] in_types, out_types, routine = _guess_routine(self.name, self._routine_cache, self._ops, in_args, dtype) axis, raxis = _get_axis(axis, a.ndim) out_shape = _get_out_shape(a.shape, axis, raxis, keepdims) out_args = _get_out_args(out_args, out_types, out_shape) in_args, in_shape = _get_trans_args(in_args, axis + raxis, in_args[0].shape) in_indexer = carray.Indexer(in_shape) out_indexer = carray.Indexer(out_shape) out_clp2_size = 2**int.bit_length(int(out_indexer.size - 1)) inout_args = _get_inout_args(in_args, out_args, in_indexer, out_indexer, out_clp2_size, self._params, True) args_info = _get_args_info(inout_args) block_size = 512 kern = _get_simple_reduction_function( routine, self._params, args_info, in_args[0].dtype.type, out_args[0].dtype.type, out_types, self.name, block_size, self.identity, self._input_expr, self._output_expr, self._preamble, ()) shared_mem = 32 * block_size if out_clp2_size > 256: shared_mem = 0 # TODO(okuta) set actual size kern.linear_launch(max(out_indexer.size, block_size), inout_args, shared_mem, block_size) if len(out_args) == 1: return out_args[0] return tuple(out_args)
def __call__(self, *args, **kwargs): """Compiles and invokes the reduction kernel. The compilation runs only if the kernel is not cached. Note that the kernels with different argument dtypes, ndims, or axis are not compatible. It means that single ReductionKernel object may be compiled into multiple kernel binaries. Args: args: Arguments of the kernel. Returns: Arrays are returned according to the ``out_params`` argument of the ``__init__`` method. """ out = kwargs.pop('out', None) axis = kwargs.pop('axis', None) keepdims = kwargs.pop('keepdims', False) if kwargs: raise TypeError('Wrong arguments %s' % kwargs) n_args = len(args) if n_args != self.nin and n_args != self.nargs: raise TypeError('Wrong number of arguments for %s' % self.name) out_args = list(args[self.nin:]) if out is not None: if self.nout != 1: raise NotImplementedError('') if len(out_args) != 0: raise ValueError("cannot specify 'out' as both " "a positional and keyword argument") out_args = [out] in_args, broad_shape = _broadcast(args, self.in_params, False) _check_args(in_args + out_args) if self.identity is None: assert 0 in broad_shape cp_array = cupy.ndarray in_ndarray_types = tuple([ a.dtype.type if isinstance(a, cp_array) else None for a in in_args ]) out_ndarray_types = tuple([ a.dtype.type if isinstance(a, cp_array) else None for a in out_args ]) in_types, out_types, types = _decide_params_type( self.in_params, self.out_params, in_ndarray_types, out_ndarray_types) axis, raxis = _get_axis(axis, len(broad_shape)) out_shape = _get_out_shape(broad_shape, axis, raxis, keepdims) in_args = [ x if isinstance(x, cp_array) else t(x) for x, t in six_zip(in_args, in_types) ] in_args, in_shape = _get_trans_args(in_args, axis + raxis, broad_shape, self.in_params) out_args = _get_out_args_with_params(out_args, out_types, out_shape, self.out_params) in_indexer = carray.Indexer(in_shape) out_indexer = carray.Indexer(out_shape) out_clp2_size = 2**int.bit_length(int(out_indexer.size - 1)) inout_args = _get_inout_args(in_args, out_args, in_indexer, out_indexer, out_clp2_size, self.params, self.reduce_dims) args_info = _get_args_info(inout_args) block_size = 512 kern = _get_reduction_kernel(self.params, args_info, types, self.name, block_size, self.reduce_type, self.identity, self.map_expr, self.reduce_expr, self.post_map_expr, self.preamble, self.options) shared_mem = 32 * block_size if out_clp2_size > 256: shared_mem = 0 # TODO(okuta) set actual size kern.linear_launch(max(out_indexer.size, block_size), inout_args, shared_mem, block_size) return out_args[0]
def __call__(self, *args, **kwargs): """Applies the universal function to arguments elementwise. Args: args: Input arguments. Each of them can be a cupy.ndarray object or a scalar. The output arguments can be omitted or be specified by the ``out`` argument. out (cupy.ndarray): Output array. It outputs to new arrays default. dtype: Data type specifier. Returns: Output array or a tuple of output arrays. """ out = kwargs.pop('out', None) dtype = kwargs.pop('dtype', None) if dtype is not None: dtype = numpy.dtype(dtype).type if kwargs: raise TypeError('Wrong arguments %s' % kwargs) n_args = len(args) if n_args != self.nin and n_args != self.nargs: raise TypeError('Wrong number of arguments for %s' % self.name) if out is None: in_args = args[:self.nin] out_args = args[self.nin:] else: if self.nout != 1: raise ValueError("Cannot use 'out' in %s" % self.name) if n_args != self.nin: raise ValueError("Cannot specify 'out' as both " "a positional and keyword argument") in_args = args out_args = out, args += out_args _check_args(args) broad = cupy.broadcast(*args) shape = broad.shape in_types, out_types, routine = _guess_routine( self.name, self._routine_cache, self._ops, in_args, dtype) out_args = _get_out_args(out_args, out_types, shape) if self.nout == 1: ret = out_args[0] else: ret = tuple(out_args) if 0 in shape: return ret inout_args = [x if isinstance(x, cupy.ndarray) else t(x) for x, t in six_zip(broad.values, in_types)] inout_args.extend(out_args) inout_args, shape = _reduce_dims(inout_args, self._params, shape) indexer = carray.Indexer(shape) inout_args.append(indexer) args_info = _get_args_info(inout_args) out_raw_types = tuple([x.dtype.type for x in out_args]) kern = _get_ufunc_kernel( in_types, out_types, routine, args_info, out_raw_types, self._params, self.name, self._preamble) kern.linear_launch(indexer.size, inout_args) return ret
def __call__(self, *args, **kwargs): """Compiles and invokes the elementwise kernel. The compilation runs only if the kernel is not cached. Note that the kernels with different argument dtypes or ndims are not compatible. It means that single ElementwiseKernel object may be compiled into multiple kernel binaries. Args: args: Argumens of the kernel. size (int): Range size of the indices. If specified, the variable ``n`` is set to this value. Otherwise, the result of broadcasting is used to determine the value of ``n``. Returns: Arrays are returned according to the ``out_params`` argument of the ``__init__`` method. """ size = kwargs.pop('size', None) if kwargs: raise TypeError('Wrong arguments %s' % kwargs) n_args = len(args) if n_args != self.nin and n_args != self.nargs: raise TypeError('Wrong number of arguments for %s' % self.name) _check_args(args) values, shape = _broadcast(args, self.params, size is not None) in_args = values[:self.nin] out_args = values[self.nin:] cp_array = cupy.ndarray in_ndarray_types = tuple( [a.dtype.type if isinstance(a, cp_array) else None for a in in_args]) out_ndarray_types = tuple( [a.dtype.type if isinstance(a, cp_array) else None for a in out_args]) in_types, out_types, types = _decide_params_type( self.in_params, self.out_params, in_ndarray_types, out_ndarray_types) out_args = _get_out_args_with_params( out_args, out_types, shape, self.out_params) if self.nout == 1: ret = out_args[0] else: ret = tuple(out_args) if size is not None: shape = size, if 0 in shape: return ret inout_args = [x if isinstance(x, cp_array) else t(x) for x, t in six_zip(in_args, in_types)] inout_args += out_args if self.reduce_dims: inout_args, shape = _reduce_dims( inout_args, self.params, shape) indexer = carray.Indexer(shape) inout_args.append(indexer) args_info = _get_args_info(inout_args) kern = _get_elementwise_kernel( args_info, types, self.params, self.operation, self.name, self.preamble, self.kwargs) kern.linear_launch(indexer.size, inout_args) return ret