Python ElementwiseKernel 예제들, cupy._core.ElementwiseKernel Python 예제들

예제 #1

0

파일 보기

def _kernel_labels():
    return _core.ElementwiseKernel(
        '', 'raw Y y, raw int32 count, raw int32 labels', '''
        if (y[i] != i) continue;
        int j = atomicAdd(&count[1], 1);
        labels[j] = i;
        ''', 'cupyx_scipy_ndimage_label_labels')

예제 #2

0

파일 보기

파일: vectorize.py 프로젝트: the-lay/cupy

    def __call__(self, *args):
        itypes = ''.join([_get_input_type(x) for x in args])
        kern = self._kernel_cache.get(itypes, None)

        if kern is None:
            in_types = [_cuda_types.Scalar(t) for t in itypes]
            ret_type = None
            if self.otypes is not None:
                # TODO(asi1024): Implement
                raise NotImplementedError

            func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True)
            result = func._emit_code_from_types(in_types, ret_type)
            in_params = ', '.join(f'{t.dtype} in{i}'
                                  for i, t in enumerate(in_types))
            in_args = ', '.join([f'in{i}' for i in range(len(in_types))])
            out_params, out_lval = self._parse_out_param(result.return_type)
            body = '{} = {}({})'.format(out_lval, func.name, in_args)
            # note: we don't worry about -D not working on ROCm here, because
            # we unroll all headers for HIP and so thrust::tuple et al are all
            # defined regardless if CUPY_JIT_MODE is defined or not
            kern = _core.ElementwiseKernel(in_params,
                                           out_params,
                                           body,
                                           preamble=result.code,
                                           options=('-DCUPY_JIT_MODE', ))
            self._kernel_cache[itypes] = kern

        return kern(*args)

예제 #3

0

파일 보기

    def tocsc(self, copy=False):
        """Converts the matrix to Compressed Sparse Column format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in dia to csc conversion.

        Returns:
            cupyx.scipy.sparse.csc_matrix: Converted matrix.

        """
        if self.data.size == 0:
            return csc.csc_matrix(self.shape, dtype=self.dtype)

        num_rows, num_cols = self.shape
        num_offsets, offset_len = self.data.shape

        row, mask = _core.ElementwiseKernel(
            'int32 offset_len, int32 offsets, int32 num_rows, '
            'int32 num_cols, T data', 'int32 row, bool mask', '''
            int offset_inds = i % offset_len;
            row = offset_inds - offsets;
            mask = (row >= 0 && row < num_rows && offset_inds < num_cols
                    && data != T(0));
            ''', 'dia_tocsc')(offset_len, self.offsets[:, None], num_rows,
                              num_cols, self.data)
        indptr = cupy.zeros(num_cols + 1, dtype='i')
        indptr[1:offset_len + 1] = cupy.cumsum(mask.sum(axis=0))
        indptr[offset_len + 1:] = indptr[offset_len]
        indices = row.T[mask.T].astype('i', copy=False)
        data = self.data.T[mask.T]
        return csc.csc_matrix((data, indices, indptr),
                              shape=self.shape,
                              dtype=self.dtype)

예제 #4

0

파일 보기

def _kernel_count():
    return _core.ElementwiseKernel(
        '', 'raw Y y, raw int32 count', '''
        if (y[i] < 0) continue;
        int j = i;
        while (j != y[j]) { j = y[j]; }
        if (j != i) y[i] = j;
        else atomicAdd(&count[0], 1);
        ''', 'cupyx_scipy_ndimage_label_count')

예제 #5

0

파일 보기

def _kernel_finalize():
    return _core.ElementwiseKernel(
        'int32 maxlabel', 'raw int32 labels, raw Y y', '''
        if (y[i] < 0) {
            y[i] = 0;
            continue;
        }
        int yi = y[i];
        int j_min = 0;
        int j_max = maxlabel - 1;
        int j = (j_min + j_max) / 2;
        while (j_min < j_max) {
            if (yi == labels[j]) break;
            if (yi < labels[j]) j_max = j - 1;
            else j_min = j + 1;
            j = (j_min + j_max) / 2;
        }
        y[i] = j + 1;
        ''', 'cupyx_scipy_ndimage_label_finalize')

예제 #6

0

파일 보기

파일: _measurements.py 프로젝트: takagi/cupy

def _kernel_connect():
    return _core.ElementwiseKernel(
        'raw int32 shape, raw int32 dirs, int32 ndirs, int32 ndim',
        'raw Y y',
        '''
        if (y[i] < 0) continue;
        for (int dr = 0; dr < ndirs; dr++) {
            int j = i;
            int rest = j;
            int stride = 1;
            int k = 0;
            for (int dm = ndim-1; dm >= 0; dm--) {
                int pos = rest % shape[dm] + dirs[dm + dr * ndim];
                if (pos < 0 || pos >= shape[dm]) {
                    k = -1;
                    break;
                }
                k += pos * stride;
                rest /= shape[dm];
                stride *= shape[dm];
            }
            if (k < 0) continue;
            if (y[k] < 0) continue;
            while (1) {
                while (j != y[j]) { j = y[j]; }
                while (k != y[k]) { k = y[k]; }
                if (j == k) break;
                if (j < k) {
                    int old = atomicCAS( &y[k], k, j );
                    if (old == k) break;
                    k = old;
                }
                else {
                    int old = atomicCAS( &y[j], j, k );
                    if (old == j) break;
                    j = old;
                }
            }
        }
        ''',
        'cupyx_scipy_ndimage_label_connect')

예제 #7

0

파일 보기

    def __call__(self, *args):
        itypes = ''.join([_get_input_type(x) for x in args])
        kern = self._kernel_cache.get(itypes, None)

        if kern is None:
            in_types = [_types.Scalar(t) for t in itypes]
            ret_type = None
            if self.otypes is not None:
                # TODO(asi1024): Implement
                raise NotImplementedError

            func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True)
            result = func._emit_code_from_types(in_types, ret_type)
            in_params = ', '.join(
                f'{t.dtype} in{i}' for i, t in enumerate(in_types))
            in_args = ', '.join([f'in{i}' for i in range(len(in_types))])
            out_params, out_lval = self._parse_out_param(result.return_type)
            body = '{} = {}({})'.format(out_lval, func.name, in_args)
            kern = _core.ElementwiseKernel(
                in_params, out_params, body, preamble=result.code,
                options=('-D CUPY_JIT_MODE',))
            self._kernel_cache[itypes] = kern

        return kern(*args)

예제 #8

0

파일 보기

파일: _kernels.py 프로젝트: the-lay/cupy

    do {
        *U = rk_double(state);
    } while (*U <= 0.0 || *U >= 1.0);
}
'''

definitions = [
    rk_basic_definition, rk_gauss_definition,
    rk_standard_exponential_definition, rk_standard_gamma_definition,
    rk_beta_definition
]
beta_kernel = _core.ElementwiseKernel('S a, T b, uint64 seed',
                                      'Y y',
                                      '''
    rk_seed(seed + i, &internal_state);
    y = rk_beta(&internal_state, a, b);
    ''',
                                      'beta_kernel',
                                      preamble=''.join(definitions),
                                      loop_prep='rk_state internal_state;')

definitions = [rk_use_binominal, rk_basic_definition, rk_binomial_definition]
binomial_kernel = _core.ElementwiseKernel('S n, T p, uint64 seed',
                                          'Y y',
                                          '''
    rk_seed(seed + i, &internal_state);
    y = rk_binomial(&internal_state, n, p);
    ''',
                                          'binomial_kernel',
                                          preamble=''.join(definitions),
                                          loop_prep='rk_state internal_state;')

예제 #9

0

파일 보기

import cupy

from cupy import _core

_piecewise_krnl = _core.ElementwiseKernel('bool cond, T value', 'T y',
                                          'if (cond) y = value',
                                          'piecewise_kernel')


def piecewise(x, condlist, funclist):
    """Evaluate a piecewise-defined function.

        Args:
            x (cupy.ndarray): input domain
            condlist (list of cupy.ndarray):
                Each boolean array/ scalar corresponds to a function
                in funclist. Length of funclist is equal to that of
                condlist. If one extra function is given, it is used
                as the default value when the otherwise condition is met
            funclist (list of scalars): list of scalar functions.

        Returns:
            cupy.ndarray: the scalar values in funclist on portions of x
            defined by condlist.

        .. warning::

            This function currently doesn't support callable functions,
            args and kw parameters.

        .. seealso:: :func:`numpy.piecewise`

예제 #10

0

파일 보기

파일: packing.py 프로젝트: toslunar/cupy

import cupy
from cupy import _core


_packbits_kernel = {
    'big': _core.ElementwiseKernel(
        'raw T a, raw int32 a_size', 'uint8 packed',
        '''for (int j = 0; j < 8; ++j) {
                    int k = i * 8 + j;
                    int bit = k < a_size && a[k] != 0;
                    packed |= bit << (7 - j);
                }''',
        'cupy_packbits_big'
    ),
    'little': _core.ElementwiseKernel(
        'raw T a, raw int32 a_size', 'uint8 packed',
        '''for (int j = 0; j < 8; ++j) {
                    int k = i * 8 + j;
                    int bit = k < a_size && a[k] != 0;
                    packed |= bit << j;
                }''',
        'cupy_packbits_little'
    )
}


def packbits(a, axis=None, bitorder='big'):
    """Packs the elements of a binary-valued array into bits in a uint8 array.

    This function currently does not support ``axis`` option.

예제 #11

0

파일 보기

from cupy import _core
from cupy.cuda import texture
from cupy.cuda import runtime


_affine_transform_2d_array_kernel = _core.ElementwiseKernel(
    'U texObj, raw float32 m, uint64 width', 'T transformed_image',
    '''
    float3 pixel = make_float3(
        (float)(i / width),
        (float)(i % width),
        1.0f
    );
    float x = dot(pixel, make_float3(m[0],  m[1],  m[2])) + .5f;
    float y = dot(pixel, make_float3(m[3],  m[4],  m[5])) + .5f;
    transformed_image = tex2D<T>(texObj, y, x);
    ''',
    'cupyx_texture_affine_transformation_2d_array',
    preamble='''
    inline __host__ __device__ float dot(float3 a, float3 b)
    {
        return a.x * b.x + a.y * b.y + a.z * b.z;
    }
    ''')


_affine_transform_3d_array_kernel = _core.ElementwiseKernel(
    'U texObj, raw float32 m, uint64 height, uint64 width',
    'T transformed_volume',
    '''

예제 #12

0

파일 보기

파일: _measurements.py 프로젝트: takagi/cupy

def _kernel_init():
    return _core.ElementwiseKernel(
        'X x', 'Y y', 'if (x == 0) { y = -1; } else { y = i; }',
        'cupyx_scipy_ndimage_label_init')

예제 #13

0

파일 보기

            If not given the sample assumes a uniform distribution over all
            entries in ``a``.

    Returns:
        cupy.ndarray: An array of ``a`` values distributed according to
        ``p`` or uniformly.

    .. seealso:: :meth:`numpy.random.choice`

    """
    rs = _generator.get_random_state()
    return rs.choice(a, size, replace, p)


_multinominal_kernel = _core.ElementwiseKernel(
    'int64 x, int32 p, int32 n', 'raw U ys',
    'atomicAdd(&ys[i / n * p + x], U(1))', 'cupy_random_multinomial')


def multinomial(n, pvals, size=None):
    """Returns an array from multinomial distribution.

    Args:
        n (int): Number of trials.
        pvals (cupy.ndarray): Probabilities of each of the ``p`` different
            outcomes. The sum of these values must be 1.
        size (int or tuple of ints or None): Shape of a sample in each trial.
            For example when ``size`` is ``(a, b)``, shape of returned value is
            ``(a, b, p)`` where ``p`` is ``len(pvals)``.
            If ``size`` is ``None``, it is treated as ``()``. So, shape of
            returned value is ``(p,)``.

예제 #14

0

파일 보기

class _compressed_sparse_matrix(sparse_data._data_matrix,
                                sparse_data._minmax_mixin, _index.IndexMixin):

    _max_min_reduction_code = r'''
        extern "C" __global__
        void ${func}(double* data, int* x, int* y, int length,
                           double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (${cond}){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the block and update
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] ${op} running_value){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }'''

    _max_reduction_kern = _core.RawKernel(
        string.Template(_max_min_reduction_code).substitute(
            func='max_reduction', op='>', cond='block_length == length'),
        'max_reduction')

    _max_nonzero_reduction_kern = _core.RawKernel(
        string.Template(_max_min_reduction_code).substitute(
            func='max_nonzero_reduction', op='>', cond='block_length > 0'),
        'max_nonzero_reduction')

    _min_reduction_kern = _core.RawKernel(
        string.Template(_max_min_reduction_code).substitute(
            func='min_reduction', op='<', cond='block_length == length'),
        'min_reduction')

    _min_nonzero_reduction_kern = _core.RawKernel(
        string.Template(_max_min_reduction_code).substitute(
            func='min_nonzero_reduction', op='<', cond='block_length > 0'),
        'min_nonzero_reduction')

    # For _max_arg_reduction_mod and _min_arg_reduction_mod below, we pick
    # the right template specialization according to input dtypes at runtime.
    # The distinction in int types (T2) is important for portability in OS.

    _argmax_argmin_code = r'''
        template<typename T1, typename T2> __global__ void
        ${func}_arg_reduction(T1* data, int* indices, int* x, int* y,
                              int length, T2* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            int data_index = 0;
            double data_value = 0;

            if (block_length == length){
                // Block is dense. Fill the first value
                data_value = data[x[tid]];
                data_index = indices[x[tid]];
            } else if (block_length > 0)  {
                // Block has at least one zero. Assign first occurrence as the
                // starting reference
                data_value = 0;
                for (data_index = 0; data_index < length; data_index++){
                    if (data_index != indices[x[tid] + data_index] ||
                        x[tid] + data_index >= y[tid]){
                        break;
                    }
                }
            } else {
                // Zero valued array
                data_value = 0;
                data_index = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    data_value = nan("");
                    data_index = 0;
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] ${op} data_value){
                        data_index = indices[entry];
                        data_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = data_index;
        }'''

    _max_arg_reduction_mod = _core.RawModule(
        code=string.Template(_argmax_argmin_code).substitute(func='max',
                                                             op='>'),
        options=('-std=c++11', ),
        name_expressions=[
            'max_arg_reduction<float, int>',
            'max_arg_reduction<float, long long>',
            'max_arg_reduction<double, int>',
            'max_arg_reduction<double, long long>'
        ])

    _min_arg_reduction_mod = _core.RawModule(
        code=string.Template(_argmax_argmin_code).substitute(func='min',
                                                             op='<'),
        options=('-std=c++11', ),
        name_expressions=[
            'min_arg_reduction<float, int>',
            'min_arg_reduction<float, long long>',
            'min_arg_reduction<double, int>',
            'min_arg_reduction<double, long long>'
        ])

    # TODO(leofang): rewrite a more load-balanced approach than this naive one?
    _has_sorted_indices_kern = _core.ElementwiseKernel(
        'raw T indptr, raw T indices', 'bool diff', '''
        bool diff_out = true;
        for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) {
            if (indices[jj] > indices[jj+1]){
                diff_out = false;
            }
        }
        diff = diff_out;
        ''', 'has_sorted_indices')

    # TODO(leofang): rewrite a more load-balanced approach than this naive one?
    _has_canonical_format_kern = _core.ElementwiseKernel(
        'raw T indptr, raw T indices', 'bool diff', '''
        bool diff_out = true;
        if (indptr[i] > indptr[i+1]) {
            diff = false;
            return;
        }
        for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) {
            if (indices[jj] >= indices[jj+1]) {
                diff_out = false;
            }
        }
        diff = diff_out;
        ''', 'has_canonical_format')

    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if shape is not None:
            if not _util.isshape(shape):
                raise ValueError('invalid shape (must be a 2-tuple of int)')
            shape = int(shape[0]), int(shape[1])

        if base.issparse(arg1):
            x = arg1.asformat(self.format)
            data = x.data
            indices = x.indices
            indptr = x.indptr

            if arg1.format != self.format:
                # When formats are differnent, all arrays are already copied
                copy = False

            if shape is None:
                shape = arg1.shape

        elif _util.isshape(arg1):
            m, n = arg1
            m, n = int(m), int(n)
            data = basic.zeros(0, dtype if dtype else 'd')
            indices = basic.zeros(0, 'i')
            indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype='i')
            # shape and copy argument is ignored
            shape = (m, n)
            copy = False

        elif scipy_available and scipy.sparse.issparse(arg1):
            # Convert scipy.sparse to cupyx.scipy.sparse
            x = arg1.asformat(self.format)
            data = cupy.array(x.data)
            indices = cupy.array(x.indices, dtype='i')
            indptr = cupy.array(x.indptr, dtype='i')
            copy = False

            if shape is None:
                shape = arg1.shape

        elif isinstance(arg1, tuple) and len(arg1) == 2:
            # Note: This implementation is not efficeint, as it first
            # constructs a sparse matrix with coo format, then converts it to
            # compressed format.
            sp_coo = coo.coo_matrix(arg1, shape=shape, dtype=dtype, copy=copy)
            sp_compressed = sp_coo.asformat(self.format)
            data = sp_compressed.data
            indices = sp_compressed.indices
            indptr = sp_compressed.indptr

        elif isinstance(arg1, tuple) and len(arg1) == 3:
            data, indices, indptr = arg1
            if not (base.isdense(data) and data.ndim == 1
                    and base.isdense(indices) and indices.ndim == 1
                    and base.isdense(indptr) and indptr.ndim == 1):
                raise ValueError('data, indices, and indptr should be 1-D')

            if len(data) != len(indices):
                raise ValueError('indices and data should have the same size')

        elif base.isdense(arg1):
            if arg1.ndim > 2:
                raise TypeError('expected dimension <= 2 array or matrix')
            elif arg1.ndim == 1:
                arg1 = arg1[None]
            elif arg1.ndim == 0:
                arg1 = arg1[None, None]
            data, indices, indptr = self._convert_dense(arg1)
            copy = False
            if shape is None:
                shape = arg1.shape

        else:
            raise ValueError('Unsupported initializer format')

        if dtype is None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)

        if dtype.char not in '?fdFD':
            raise ValueError(
                'Only bool, float32, float64, complex64 and complex128 '
                'are supported')

        data = data.astype(dtype, copy=copy)
        sparse_data._data_matrix.__init__(self, data)

        self.indices = indices.astype('i', copy=copy)
        self.indptr = indptr.astype('i', copy=copy)

        if shape is None:
            shape = self._swap(len(indptr) - 1, int(indices.max()) + 1)

        major, minor = self._swap(*shape)
        if len(indptr) != major + 1:
            raise ValueError('index pointer size (%d) should be (%d)' %
                             (len(indptr), major + 1))

        self._descr = cusparse.MatDescriptor.create()
        self._shape = shape

    def _with_data(self, data, copy=True):
        if copy:
            return self.__class__(
                (data, self.indices.copy(), self.indptr.copy()),
                shape=self.shape,
                dtype=data.dtype)
        else:
            return self.__class__((data, self.indices, self.indptr),
                                  shape=self.shape,
                                  dtype=data.dtype)

    def _convert_dense(self, x):
        raise NotImplementedError

    def _swap(self, x, y):
        raise NotImplementedError

    def _add_sparse(self, other, alpha, beta):
        raise NotImplementedError

    def _add(self, other, lhs_negative, rhs_negative):
        if cupy.isscalar(other):
            if other == 0:
                if lhs_negative:
                    return -self
                else:
                    return self.copy()
            else:
                raise NotImplementedError(
                    'adding a nonzero scalar to a sparse matrix is not '
                    'supported')
        elif base.isspmatrix(other):
            alpha = -1 if lhs_negative else 1
            beta = -1 if rhs_negative else 1
            return self._add_sparse(other, alpha, beta)
        elif base.isdense(other):
            if lhs_negative:
                if rhs_negative:
                    return -self.todense() - other
                else:
                    return other - self.todense()
            else:
                if rhs_negative:
                    return self.todense() - other
                else:
                    return self.todense() + other
        else:
            return NotImplemented

    def __add__(self, other):
        return self._add(other, False, False)

    def __radd__(self, other):
        return self._add(other, False, False)

    def __sub__(self, other):
        return self._add(other, False, True)

    def __rsub__(self, other):
        return self._add(other, True, False)

    def _get_intXint(self, row, col):
        major, minor = self._swap(row, col)
        data, indices, _ = _index._get_csr_submatrix_major_axis(
            self.data, self.indices, self.indptr, major, major + 1)
        dtype = data.dtype
        res = cupy.zeros((), dtype=dtype)
        if dtype.kind == 'c':
            _index._compress_getitem_complex_kern(data.real, data.imag,
                                                  indices, minor, res.real,
                                                  res.imag)
        else:
            _index._compress_getitem_kern(data, indices, minor, res)
        return res

    def _get_sliceXslice(self, row, col):
        major, minor = self._swap(row, col)
        copy = major.step in (1, None)
        return self._major_slice(major)._minor_slice(minor, copy=copy)

    def _get_arrayXarray(self, row, col, not_found_val=0):
        # inner indexing
        idx_dtype = self.indices.dtype
        M, N = self._swap(*self.shape)
        major, minor = self._swap(row, col)
        major = major.astype(idx_dtype, copy=False)
        minor = minor.astype(idx_dtype, copy=False)

        val = _index._csr_sample_values(M, N, self.indptr,
                                        self.indices, self.data, major.ravel(),
                                        minor.ravel(), not_found_val)

        if major.ndim == 1:
            # Scipy returns `matrix` here
            return cupy.expand_dims(val, 0)
        return self.__class__(val.reshape(major.shape))

    def _get_columnXarray(self, row, col):
        # outer indexing
        major, minor = self._swap(row, col)
        return self._major_index_fancy(major)._minor_index_fancy(minor)

    def _major_index_fancy(self, idx):
        """Index along the major axis where idx is an array of ints.
        """
        _, N = self._swap(*self.shape)
        M = idx.size
        new_shape = self._swap(M, N)
        if self.nnz == 0 or M == 0:
            return self.__class__(new_shape)

        return self.__class__(_index._csr_row_index(self.data, self.indices,
                                                    self.indptr, idx),
                              shape=new_shape,
                              copy=False)

    def _minor_index_fancy(self, idx):
        """Index along the minor axis where idx is an array of ints.
        """
        M, _ = self._swap(*self.shape)
        N = idx.size
        new_shape = self._swap(M, N)
        if self.nnz == 0 or N == 0:
            return self.__class__(new_shape)

        if idx.size * M < self.nnz:
            # TODO (asi1024): Implement faster algorithm.
            pass

        return self._tocsx()._major_index_fancy(idx)._tocsx()

    def _major_slice(self, idx, copy=False):
        """Index along the major axis where idx is a slice object.
        """
        M, N = self._swap(*self.shape)
        start, stop, step = idx.indices(M)

        if start == 0 and stop == M and step == 1:
            return self.copy() if copy else self

        M = len(range(start, stop, step))
        new_shape = self._swap(M, N)

        if step == 1:
            if M == 0 or self.nnz == 0:
                return self.__class__(new_shape, dtype=self.dtype)
            return self.__class__(_index._get_csr_submatrix_major_axis(
                self.data, self.indices, self.indptr, start, stop),
                                  shape=new_shape,
                                  copy=copy)
        rows = cupy.arange(start, stop, step, dtype=self.indptr.dtype)
        return self._major_index_fancy(rows)

    def _minor_slice(self, idx, copy=False):
        """Index along the minor axis where idx is a slice object.
        """
        M, N = self._swap(*self.shape)
        start, stop, step = idx.indices(N)

        if start == 0 and stop == N and step == 1:
            return self.copy() if copy else self

        N = len(range(start, stop, step))
        new_shape = self._swap(M, N)

        if N == 0 or self.nnz == 0:
            return self.__class__(new_shape)
        if step == 1:
            return self.__class__(_index._get_csr_submatrix_minor_axis(
                self.data, self.indices, self.indptr, start, stop),
                                  shape=new_shape,
                                  copy=False)
        cols = cupy.arange(start, stop, step, dtype=self.indices.dtype)
        return self._minor_index_fancy(cols)

    def _set_intXint(self, row, col, x):
        i, j = self._swap(row, col)
        self._set_many(i, j, x)

    def _set_arrayXarray(self, row, col, x):
        i, j = self._swap(row, col)
        self._set_many(i, j, x)

    def _set_arrayXarray_sparse(self, row, col, x):
        # clear entries that will be overwritten
        self._zero_many(*self._swap(row, col))

        M, N = row.shape  # matches col.shape
        broadcast_row = M != 1 and x.shape[0] == 1
        broadcast_col = N != 1 and x.shape[1] == 1
        r, c = x.row, x.col
        x = cupy.asarray(x.data, dtype=self.dtype)
        if broadcast_row:
            r = cupy.repeat(cupy.arange(M), r.size)
            c = cupy.tile(c, M)
            x = cupy.tile(x, M)
        if broadcast_col:
            r = cupy.repeat(r, N)
            c = cupy.tile(cupy.arange(N), c.size)
            x = cupy.repeat(x, N)
        # only assign entries in the new sparsity structure
        i, j = self._swap(row[r, c], col[r, c])
        self._set_many(i, j, x)

    def _prepare_indices(self, i, j):
        M, N = self._swap(*self.shape)

        def check_bounds(indices, bound):
            idx = indices.max()
            if idx >= bound:
                raise IndexError('index (%d) out of range (>= %d)' %
                                 (idx, bound))
            idx = indices.min()
            if idx < -bound:
                raise IndexError('index (%d) out of range (< -%d)' %
                                 (idx, bound))

        i = cupy.array(i, dtype=self.indptr.dtype, copy=True, ndmin=1).ravel()
        j = cupy.array(j, dtype=self.indices.dtype, copy=True, ndmin=1).ravel()
        check_bounds(i, M)
        check_bounds(j, N)
        return i, j, M, N

    def _set_many(self, i, j, x):
        """Sets value at each (i, j) to x
        Here (i,j) index major and minor respectively, and must not contain
        duplicate entries.
        """
        i, j, M, N = self._prepare_indices(i, j)
        x = cupy.array(x, dtype=self.dtype, copy=True, ndmin=1).ravel()

        new_sp = cupyx.scipy.sparse.csr_matrix((cupy.arange(
            self.nnz, dtype=cupy.float32), self.indices, self.indptr),
                                               shape=(M, N))

        offsets = new_sp._get_arrayXarray(i, j, not_found_val=-1).astype(
            cupy.int32).ravel()

        if -1 not in offsets:
            # only affects existing non-zero cells
            self.data[offsets] = x
            return

        else:
            warnings.warn('Changing the sparsity structure of a '
                          '{}_matrix is expensive.'
                          ' lil_matrix is more efficient.'.format(self.format))
            # replace where possible
            mask = offsets > -1
            self.data[offsets[mask]] = x[mask]
            # only insertions remain
            mask = ~mask
            i = i[mask]
            i[i < 0] += M
            j = j[mask]
            j[j < 0] += N
            self._insert_many(i, j, x[mask])

    def _zero_many(self, i, j):
        """Sets value at each (i, j) to zero, preserving sparsity structure.
        Here (i,j) index major and minor respectively.
        """
        i, j, M, N = self._prepare_indices(i, j)

        new_sp = cupyx.scipy.sparse.csr_matrix((cupy.arange(
            self.nnz, dtype=cupy.float32), self.indices, self.indptr),
                                               shape=(M, N))

        offsets = new_sp._get_arrayXarray(i, j, not_found_val=-1).astype(
            cupy.int32).ravel()

        # only assign zeros to the existing sparsity structure
        self.data[offsets[offsets > -1]] = 0

    def _perform_insert(self, indices_inserts, data_inserts, rows, row_counts,
                        idx_dtype):
        """Insert new elements into current sparse matrix in sorted order"""
        indptr_diff = cupy.diff(self.indptr)
        indptr_diff[rows] += row_counts

        new_indptr = cupy.empty(self.indptr.shape, dtype=idx_dtype)
        new_indptr[0] = idx_dtype(0)
        new_indptr[1:] = indptr_diff

        # Build output arrays
        cupy.cumsum(new_indptr, out=new_indptr)
        out_nnz = int(new_indptr[-1])

        new_indices = cupy.empty(out_nnz, dtype=idx_dtype)
        new_data = cupy.empty(out_nnz, dtype=self.data.dtype)

        # Build an indexed indptr that contains the offsets for each
        # row but only for in i, j, and x.
        new_indptr_lookup = cupy.zeros(new_indptr.size, dtype=idx_dtype)
        new_indptr_lookup[1:][rows] = row_counts
        cupy.cumsum(new_indptr_lookup, out=new_indptr_lookup)

        _index._insert_many_populate_arrays(indices_inserts,
                                            data_inserts,
                                            new_indptr_lookup,
                                            self.indptr,
                                            self.indices,
                                            self.data,
                                            new_indptr,
                                            new_indices,
                                            new_data,
                                            size=self.indptr.size - 1)

        self.indptr = new_indptr
        self.indices = new_indices
        self.data = new_data

    def _insert_many(self, i, j, x):
        """Inserts new nonzero at each (i, j) with value x
        Here (i,j) index major and minor respectively.
        i, j and x must be non-empty, 1d arrays.
        Inserts each major group (e.g. all entries per row) at a time.
        Maintains has_sorted_indices property.
        Modifies i, j, x in place.
        """

        order = cupy.argsort(i)  # stable for duplicates
        i = i.take(order)
        j = j.take(order)
        x = x.take(order)

        # Update index data type

        idx_dtype = sputils.get_index_dtype((self.indices, self.indptr),
                                            maxval=(self.nnz + x.size))

        self.indptr = self.indptr.astype(idx_dtype)
        self.indices = self.indices.astype(idx_dtype)
        self.data = self.data.astype(self.dtype)

        indptr_inserts, indices_inserts, data_inserts = \
            _index._select_last_indices(i, j, x, idx_dtype)

        rows, ui_indptr = cupy.unique(indptr_inserts, return_index=True)

        to_add = cupy.empty(ui_indptr.size + 1, ui_indptr.dtype)
        to_add[-1] = j.size
        to_add[:-1] = ui_indptr
        ui_indptr = to_add

        # Compute the counts for each row in the insertion array
        row_counts = cupy.zeros(ui_indptr.size - 1, dtype=idx_dtype)
        cupyx.scatter_add(row_counts, cupy.searchsorted(rows, indptr_inserts),
                          1)

        self._perform_insert(indices_inserts, data_inserts, rows, row_counts,
                             idx_dtype)

    def __get_has_canonical_format(self):
        """Determine whether the matrix has sorted indices and no duplicates.

        Returns
            bool: ``True`` if the above applies, otherwise ``False``.

        .. note::
            :attr:`has_canonical_format` implies :attr:`has_sorted_indices`, so
            if the latter flag is ``False``, so will the former be; if the
            former is found ``True``, the latter flag is also set.

        .. warning::
            Getting this property might synchronize the device.

        """
        # Modified from the SciPy counterpart.

        # In CuPy the implemented conversions do not exactly match those of
        # SciPy's, so it's hard to put this exactly as where it is in SciPy,
        # but this should do the job.
        if self.data.size == 0:
            self._has_canonical_format = True
        # check to see if result was cached
        elif not getattr(self, '_has_sorted_indices', True):
            # not sorted => not canonical
            self._has_canonical_format = False
        elif not hasattr(self, '_has_canonical_format'):
            is_canonical = self._has_canonical_format_kern(
                self.indptr, self.indices, size=self.indptr.size - 1)
            self._has_canonical_format = bool(is_canonical.all())
        return self._has_canonical_format

    def __set_has_canonical_format(self, val):
        """Taken from SciPy as is."""
        self._has_canonical_format = bool(val)
        if val:
            self.has_sorted_indices = True

    has_canonical_format = property(fget=__get_has_canonical_format,
                                    fset=__set_has_canonical_format)

    def __get_sorted(self):
        """Determine whether the matrix has sorted indices.

        Returns
            bool:
                ``True`` if the indices of the matrix are in sorted order,
                otherwise ``False``.

        .. warning::
            Getting this property might synchronize the device.

        """
        # Modified from the SciPy counterpart.

        # In CuPy the implemented conversions do not exactly match those of
        # SciPy's, so it's hard to put this exactly as where it is in SciPy,
        # but this should do the job.
        if self.data.size == 0:
            self._has_sorted_indices = True
        # check to see if result was cached
        elif not hasattr(self, '_has_sorted_indices'):
            is_sorted = self._has_sorted_indices_kern(self.indptr,
                                                      self.indices,
                                                      size=self.indptr.size -
                                                      1)
            self._has_sorted_indices = bool(is_sorted.all())
        return self._has_sorted_indices

    def __set_sorted(self, val):
        self._has_sorted_indices = bool(val)

    has_sorted_indices = property(fget=__get_sorted, fset=__set_sorted)

    def get_shape(self):
        """Returns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.

        """
        return self._shape

    def getnnz(self, axis=None):
        """Returns the number of stored values, including explicit zeros.

        Args:
            axis: Not supported yet.

        Returns:
            int: The number of stored values.

        """
        if axis is None:
            return self.data.size
        else:
            raise ValueError

    def sorted_indices(self):
        """Return a copy of this matrix with sorted indices

        .. warning::
            Calling this function might synchronize the device.
        """
        # Taken from SciPy as is.
        A = self.copy()
        A.sort_indices()
        return A

    def sort_indices(self):
        # Unlike in SciPy, here this is implemented in child classes because
        # each child needs to call its own sort function from cuSPARSE
        raise NotImplementedError

    def sum_duplicates(self):
        """Eliminate duplicate matrix entries by adding them together.

        .. note::
            This is an *in place* operation.

        .. warning::
            Calling this function might synchronize the device.

        .. seealso::
           :meth:`scipy.sparse.csr_matrix.sum_duplicates`,
           :meth:`scipy.sparse.csc_matrix.sum_duplicates`
        """
        if self.has_canonical_format:
            return
        # TODO(leofang): add a kernel for compressed sparse matrices without
        # converting to coo
        coo = self.tocoo()
        coo.sum_duplicates()
        self.__init__(coo.asformat(self.format))
        self.has_canonical_format = True

    #####################
    # Reduce operations #
    #####################

    def _minor_reduce(self, ufunc, axis, nonzero):
        """Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Matrix over which the reduction should be
                conducted.

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
            major_index.

        """
        out_shape = self.shape[1 - axis]
        # Call to the appropriate kernel function
        out = cupy.zeros(out_shape).astype(cupy.float64)
        if nonzero:
            kerns = {
                cupy.amax: self._max_nonzero_reduction_kern,
                cupy.amin: self._min_nonzero_reduction_kern
            }
        else:
            kerns = {
                cupy.amax: self._max_reduction_kern,
                cupy.amin: self._min_reduction_kern
            }

        kerns[ufunc]((out_shape, ), (1, ), (self.data.astype(
            cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:],
                                            cupy.int64(self.shape[axis]), out))

        return out

    def _arg_minor_reduce(self, ufunc, axis):
        """Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Maxtrix over which the reduction should be conducted

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
            major_index

        """

        # Call to the appropriate kernel function
        # Create the vector to hold output
        # Note: it's important to set "int" here, following what SciPy
        # does, as the outcome dtype is platform dependent
        out_shape = self.shape[1 - axis]
        out = cupy.zeros(out_shape, dtype=int)

        # Perform the calculation
        ker_name = '_arg_reduction<{}, {}>'.format(
            _scalar.get_typename(self.data.dtype),
            _scalar.get_typename(out.dtype))

        if ufunc == cupy.argmax:
            ker = self._max_arg_reduction_mod.get_function('max' + ker_name)
        elif ufunc == cupy.argmin:
            ker = self._min_arg_reduction_mod.get_function('min' + ker_name)

        ker((out_shape, ), (1, ),
            (self.data, self.indices, self.indptr[:len(self.indptr) - 1],
             self.indptr[1:], cupy.int64(self.shape[axis]), out))

        return out

예제 #15

0

파일 보기

파일: _generator.py 프로젝트: the-lay/cupy

_cupy_permutation = _core.ElementwiseKernel(
    'raw int32 sample, int32 j_start, int32 _j_end',
    'raw int32 array',
    '''
        const int invalid = -1;
        const int num = _ind.size();
        int j = (sample[i] & 0x7fffffff) % num;
        int j_end = _j_end;
        if (j_end > num) j_end = num;
        if (j == i || j < j_start || j >= j_end) continue;

        // If a thread fails to do data swaping once, it changes j
        // value using j_offset below and try data swaping again.
        // This process is repeated until data swapping is succeeded.
        // The j_offset is determined from the initial j
        // (random number assigned to each thread) and the initial
        // offset between j and i (ID of each thread).
        // If a given number sequence in sample is really random,
        // this j-update would not be necessary. This is work-around
        // mainly to avoid potential eternal conflict when sample has
        // rather synthetic number sequence.
        int j_offset = ((2*j - i + num) % (num - 1)) + 1;

        // A thread gives up to do data swapping if loop count exceed
        // a threathod determined below. This is kind of safety
        // mechanism to escape the eternal race condition, though I
        // believe it never happens.
        int loops = 256;

        bool do_next = true;
        while (do_next && loops > 0) {
            // try to swap the contents of array[i] and array[j]
            if (i != j) {
                int val_j = atomicExch(&array[j], invalid);
                if (val_j != invalid) {
                    int val_i = atomicExch(&array[i], invalid);
                    if (val_i != invalid) {
                        array[i] = val_j;
                        array[j] = val_i;
                        do_next = false;
                        // done
                    }
                    else {
                        // restore array[j]
                        array[j] = val_j;
                    }
                }
            }
            j = (j + j_offset) % num;
            loops--;
        }
    ''',
    'cupy_permutation',
)

예제 #16

0

파일 보기

        cupy.ndarray: A 2-D diagonal array with the diagonal copied from ``v``.

    .. seealso:: :func:`numpy.diagflat`

    """
    if numpy.isscalar(v):
        v = numpy.asarray(v)

    return cupy.diag(v.ravel(), k)


_tri_kernel = _core.ElementwiseKernel(
    'int32 m, int32 k',
    'T out',
    '''
    int row = i / m;
    int col = i % m;
    out = (col <= row + k);
    ''',
    'cupy_tri',
)


def tri(N, M=None, k=0, dtype=float):
    """Creates an array with ones at and below the given diagonal.

    Args:
        N (int): Number of rows.
        M (int): Number of columns. ``M == N`` by default.
        k (int): The sub-diagonal at and below which the array is filled. Zero
            is the main diagonal, a positive value is above it, and a negative
            value is below.

예제 #17

0

파일 보기

파일: iterate.py 프로젝트: toslunar/cupy

        """A reference to the array that is iterated over."""
        return self._base

    # TODO(Takagi): Implement coords

    # TODO(Takagi): Implement index

    # TODO(Takagi): Implement __lt__

    # TODO(Takagi): Implement __le__

    # TODO(Takagi): Implement __eq__

    # TODO(Takagi): Implement __ne__

    # TODO(Takagi): Implement __ge__

    # TODO(Takagi): Implement __gt__

    def __len__(self):
        return self.base.size


_flatiter_setitem_slice = _core.ElementwiseKernel(
    'raw T val, int64 start, int64 step', 'raw T a',
    'a[start + i * step] = val[i % val.size()]', 'cupy_flatiter_setitem_slice')

_flatiter_getitem_slice = _core.ElementwiseKernel(
    'raw T a, int64 start, int64 step', 'T o', 'o = a[start + i * step]',
    'cupy_flatiter_getitem_slice')

예제 #18

0

파일 보기

_searchsorted_kernel = _core.ElementwiseKernel(
    'S x, raw T bins, int64 n_bins, bool side_is_right, '
    'bool assume_increassing',
    'int64 y',
    '''
    #ifdef __HIP_DEVICE_COMPILE__
    bool is_done = false;
    #endif

    // Array is assumed to be monotonically
    // increasing unless a check is requested with the
    // `assume_increassing = False` parameter.
    // `digitize` allows increasing and decreasing arrays.
    bool inc = true;
    if (!assume_increassing && n_bins >= 2) {
        // In the case all the bins are nan the array is considered
        // to be decreasing in numpy
        inc = (bins[0] <= bins[n_bins-1])
              || (!_isnan<T>(bins[0]) && _isnan<T>(bins[n_bins-1]));
    }

    if (_isnan<S>(x)) {
        long long pos = (inc ? n_bins : 0);
        if (!side_is_right) {
            if (inc) {
                while (pos > 0 && _isnan<T>(bins[pos-1])) {
                    --pos;
                }
            } else {
                while (pos < n_bins && _isnan<T>(bins[pos])) {
                    ++pos;
                }
            }
        }
        no_thread_divergence( y = pos , true )
    }

    bool greater = false;
    if (side_is_right) {
        greater = inc && x >= bins[n_bins-1];
    } else {
        greater = (inc ? x > bins[n_bins-1] : x <= bins[n_bins-1]);
    }
    if (greater) {
        no_thread_divergence( y = n_bins , true )
    }

    long long left = 0;
    // In the case the bins is all NaNs, digitize
    // needs to place all the valid values to the right
    if (!inc) {
        while (_isnan<T>(bins[left]) && left < n_bins) {
            ++left;
        }
        if (left == n_bins) {
            no_thread_divergence( y = n_bins , true )
        }
        if (side_is_right
                && !_isnan<T>(bins[n_bins-1]) && !_isnan<S>(x)
                && bins[n_bins-1] > x) {
            no_thread_divergence( y = n_bins , true )
        }
    }

    long long right = n_bins-1;
    while (left < right) {
        long long m = left + (right - left) / 2;
        bool look_right = true;
        if (side_is_right) {
            look_right = (inc ? bins[m] <= x : bins[m] > x);
        } else {
            look_right = (inc ? bins[m] < x : bins[m] >= x);
        }
        if (look_right) {
            left = m + 1;
        } else {
            right = m;
        }
    }
    no_thread_divergence( y = right , false )
    ''',
    preamble=_preamble + _hip_preamble)

예제 #19

0

파일 보기

파일: packing.py 프로젝트: the-lay/cupy

import cupy
from cupy import _core

_packbits_kernel = _core.ElementwiseKernel(
    'raw T myarray, raw int32 myarray_size', 'uint8 packed',
    '''for (int j = 0; j < 8; ++j) {
        int k = i * 8 + j;
        int bit = k < myarray_size && myarray[k] != 0;
        packed |= bit << (7 - j);
    }''', 'packbits_kernel')


def packbits(myarray):
    """Packs the elements of a binary-valued array into bits in a uint8 array.

    This function currently does not support ``axis`` option.

    Args:
        myarray (cupy.ndarray): Input array.

    Returns:
        cupy.ndarray: The packed array.

    .. note::
        When the input array is empty, this function returns a copy of it,
        i.e., the type of the output array is not necessarily always uint8.
        This exactly follows the NumPy's behaviour (as of version 1.11),
        alghough this is inconsistent to the documentation.

    .. seealso:: :func:`numpy.packbits`
    """

예제 #20

0

파일 보기

            If `v` is shorter than `ind` it will be repeated as necessary.
        mode (str): How out-of-bounds indices will behave. Its value must be
            either `'raise'`, `'wrap'` or `'clip'`. Otherwise,
            :class:`TypeError` is raised.

    .. note::
        Default `mode` is set to `'wrap'` to avoid unintended performance drop.
        If you need NumPy's behavior, please pass `mode='raise'` manually.

    .. seealso:: :func:`numpy.put`
    """
    a.put(ind, v, mode=mode)


_putmask_kernel = _core.ElementwiseKernel(
    'Q mask, raw S values, uint64 len_vals', 'T out', '''
    if (mask) out = (T) values[i % len_vals];
    ''', 'putmask_kernel')


def putmask(a, mask, values):
    """
    Changes elements of an array inplace, based on a conditional mask and
    input values.

    Sets ``a.flat[n] = values[n]`` for each n where ``mask.flat[n]==True``.
    If `values` is not the same size as `a` and `mask` then it will repeat.

    Args:
        a (cupy.ndarray): Target array.
        mask (cupy.ndarray): Boolean mask array. It has to be
            the same shape as `a`.

예제 #21

0

파일 보기

파일: window.py 프로젝트: toslunar/cupy

import numpy

import cupy
from cupy import _core

_blackman_kernel = _core.ElementwiseKernel(
    "float32 alpha",
    "float64 out",
    """
    out = 0.42 - 0.5 * cos(i * alpha) + 0.08 * cos(2 * alpha * i);
    """, name="cupy_blackman")


_bartlett_kernel = _core.ElementwiseKernel(
    "float32 alpha",
    "T arr",
    """
    if (i < alpha)
        arr = i / alpha;
    else
        arr = 2.0 - i / alpha;
    """, name="cupy_bartlett")


def bartlett(M):
    """Returns the Bartlett window.

    The Bartlett window is defined as

    .. math::
            w(n) = \\frac{2}{M-1} \\left(

예제 #22

0

파일 보기

파일: _util.py 프로젝트: toslunar/cupy

    config_linalg = cupyx._ufunc_config.get_config_linalg()
    # Only 'ignore' and 'raise' are currently supported.
    if config_linalg == 'ignore':
        return

    assert config_linalg == 'raise'
    if (info_array != 0).any():
        raise linalg.LinAlgError(
            'Error reported by {} in cuBLAS. infoArray/devInfoArray = {}.'
            ' Please refer to the cuBLAS documentation.'.format(
                routine.__name__, info_array))


_tril_kernel = _core.ElementwiseKernel(
    'int64 k', 'S x',
    'x = (_ind.get()[1] - _ind.get()[0] <= k) ? x : 0',
    'cupy_tril_kernel',
    reduce_dims=False
)


def _tril(x, k=0):
    _tril_kernel(k, x)
    return x


# support a batch of matrices
_triu_kernel = _core.ElementwiseKernel(
    'int64 k', 'S x',
    'x = (_ind.get()[_ind.ndim - 1] - _ind.get()[_ind.ndim - 2] >= k) ? x : 0',
    'cupy_triu_kernel',
    reduce_dims=False

예제 #23

0

파일 보기

    Returns:
        (cupy.ndarray): The Hadamard matrix.

    .. seealso:: :func:`scipy.linalg.hadamard`
    """
    lg2 = 0 if n < 1 else (int(n).bit_length() - 1)
    if 2**lg2 != n:
        raise ValueError('n must be an positive a power of 2 integer')
    H = cupy.empty((n, n), dtype)
    return _hadamard_kernel(H, H)


_hadamard_kernel = _core.ElementwiseKernel(
    'T in',
    'T out',
    'out = (__popc(_ind.get()[0] & _ind.get()[1]) & 1) ? -1 : 1;',
    'hadamard',
    reduce_dims=False)


def leslie(f, s):
    """Create a Leslie matrix.

    Given the length n array of fecundity coefficients ``f`` and the length n-1
    array of survival coefficients ``s``, return the associated Leslie matrix.

    Args:
        f (cupy.ndarray): The "fecundity" coefficients.
        s (cupy.ndarray): The "survival" coefficients, has to be 1-D.  The
            length of ``s`` must be one less than the length of ``f``, and it
            must be at least 1.

예제 #24

0

파일 보기

파일: histogram.py 프로젝트: the-lay/cupy

from cupy.cuda import common
from cupy.cuda import runtime

# rename builtin range for use in functions that take a range argument
_range = range

# TODO(unno): use searchsorted
_histogram_kernel = _core.ElementwiseKernel(
    'S x, raw T bins, int32 n_bins', 'raw U y', '''
    if (x < bins[0] or bins[n_bins - 1] < x) {
        return;
    }
    int high = n_bins - 1;
    int low = 0;

    while (high - low > 1) {
        int mid = (high + low) / 2;
        if (bins[mid] <= x) {
            low = mid;
        } else {
            high = mid;
        }
    }
    atomicAdd(&y[low], U(1));
    ''')

_weighted_histogram_kernel = _core.ElementwiseKernel(
    'S x, raw T bins, int32 n_bins, raw W weights', 'raw Y y', '''
    if (x < bins[0] or bins[n_bins - 1] < x) {
        return;
    }
    int high = n_bins - 1;

예제 #25

0

파일 보기

파일: _index.py 프로젝트: carterbox/cupy

from cupy.cuda import device
from cupy.cuda import runtime

import numpy

try:
    import scipy
    scipy_available = True
except ImportError:
    scipy_available = False

_int_scalar_types = (int, numpy.integer, numpy.int_)
_bool_scalar_types = (bool, numpy.bool_)

_compress_getitem_kern = _core.ElementwiseKernel(
    'T d, S ind, int32 minor', 'raw T answer',
    'if (ind == minor) atomicAdd(&answer[0], d);',
    'cupyx_scipy_sparse_compress_getitem')

_compress_getitem_complex_kern = _core.ElementwiseKernel(
    'T real, T imag, S ind, int32 minor',
    'raw T answer_real, raw T answer_imag', '''
    if (ind == minor) {
    atomicAdd(&answer_real[0], real);
    atomicAdd(&answer_imag[0], imag);
    }
    ''', 'cupyx_scipy_sparse_compress_getitem_complex')


def _get_csr_submatrix_major_axis(Ax, Aj, Ap, start, stop):
    """Return a submatrix of the input sparse matrix by slicing major axis.

예제 #26

0

파일 보기

class coo_matrix(sparse_data._data_matrix):

    """COOrdinate format sparse matrix.

    This can be instantiated in several ways.

    ``coo_matrix(D)``
        ``D`` is a rank-2 :class:`cupy.ndarray`.

    ``coo_matrix(S)``
        ``S`` is another sparse matrix. It is equivalent to ``S.tocoo()``.

    ``coo_matrix((M, N), [dtype])``
        It constructs an empty matrix whose shape is ``(M, N)``. Default dtype
        is float64.

    ``coo_matrix((data, (row, col)))``
        All ``data``, ``row`` and ``col`` are one-dimenaional
        :class:`cupy.ndarray`.

    Args:
        arg1: Arguments for the initializer.
        shape (tuple): Shape of a matrix. Its length must be two.
        dtype: Data type. It must be an argument of :class:`numpy.dtype`.
        copy (bool): If ``True``, copies of given data are always used.

    .. seealso::
       :class:`scipy.sparse.coo_matrix`

    """

    format = 'coo'

    _sum_duplicates_diff = _core.ElementwiseKernel(
        'raw T row, raw T col',
        'T diff',
        '''
        T diff_out = 1;
        if (i == 0 || row[i - 1] == row[i] && col[i - 1] == col[i]) {
          diff_out = 0;
        }
        diff = diff_out;
        ''', 'sum_duplicates_diff')

    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if shape is not None and len(shape) != 2:
            raise ValueError(
                'Only two-dimensional sparse arrays are supported.')

        if base.issparse(arg1):
            x = arg1.asformat(self.format)
            data = x.data
            row = x.row
            col = x.col

            if arg1.format != self.format:
                # When formats are differnent, all arrays are already copied
                copy = False

            if shape is None:
                shape = arg1.shape

            self.has_canonical_format = x.has_canonical_format

        elif _util.isshape(arg1):
            m, n = arg1
            m, n = int(m), int(n)
            data = cupy.zeros(0, dtype if dtype else 'd')
            row = cupy.zeros(0, dtype='i')
            col = cupy.zeros(0, dtype='i')
            # shape and copy argument is ignored
            shape = (m, n)
            copy = False

            self.has_canonical_format = True

        elif _scipy_available and scipy.sparse.issparse(arg1):
            # Convert scipy.sparse to cupyx.scipy.sparse
            x = arg1.tocoo()
            data = cupy.array(x.data)
            row = cupy.array(x.row, dtype='i')
            col = cupy.array(x.col, dtype='i')
            copy = False
            if shape is None:
                shape = arg1.shape

            self.has_canonical_format = x.has_canonical_format

        elif isinstance(arg1, tuple) and len(arg1) == 2:
            try:
                data, (row, col) = arg1
            except (TypeError, ValueError):
                raise TypeError('invalid input format')

            if not (base.isdense(data) and data.ndim == 1 and
                    base.isdense(row) and row.ndim == 1 and
                    base.isdense(col) and col.ndim == 1):
                raise ValueError('row, column, and data arrays must be 1-D')
            if not (len(data) == len(row) == len(col)):
                raise ValueError(
                    'row, column, and data array must all be the same length')

            self.has_canonical_format = False

        elif base.isdense(arg1):
            if arg1.ndim > 2:
                raise TypeError('expected dimension <= 2 array or matrix')
            dense = cupy.atleast_2d(arg1)
            row, col = dense.nonzero()
            data = dense[row, col]
            shape = dense.shape

            self.has_canonical_format = True

        else:
            raise TypeError('invalid input format')

        if dtype is None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)

        if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D':
            raise ValueError(
                'Only float32, float64, complex64 and complex128'
                ' are supported')

        data = data.astype(dtype, copy=copy)
        row = row.astype('i', copy=copy)
        col = col.astype('i', copy=copy)

        if shape is None:
            if len(row) == 0 or len(col) == 0:
                raise ValueError(
                    'cannot infer dimensions from zero sized index arrays')
            shape = (int(row.max()) + 1, int(col.max()) + 1)

        if len(data) > 0:
            if row.max() >= shape[0]:
                raise ValueError('row index exceeds matrix dimensions')
            if col.max() >= shape[1]:
                raise ValueError('column index exceeds matrix dimensions')
            if row.min() < 0:
                raise ValueError('negative row index found')
            if col.min() < 0:
                raise ValueError('negative column index found')

        sparse_data._data_matrix.__init__(self, data)
        self.row = row
        self.col = col
        if not _util.isshape(shape):
            raise ValueError('invalid shape (must be a 2-tuple of int)')
        self._shape = int(shape[0]), int(shape[1])

    def _with_data(self, data, copy=True):
        """Returns a matrix with the same sparsity structure as self,
        but with different data.  By default the index arrays
        (i.e. .row and .col) are copied.
        """
        if copy:
            return coo_matrix(
                (data, (self.row.copy(), self.col.copy())),
                shape=self.shape, dtype=data.dtype)
        else:
            return coo_matrix(
                (data, (self.row, self.col)), shape=self.shape,
                dtype=data.dtype)

    def diagonal(self, k=0):
        """Returns the k-th diagonal of the matrix.

        Args:
            k (int, optional): Which diagonal to get, corresponding to elements
            a[i, i+k]. Default: 0 (the main diagonal).

        Returns:
            cupy.ndarray : The k-th diagonal.
        """
        rows, cols = self.shape
        if k <= -rows or k >= cols:
            return cupy.empty(0, dtype=self.data.dtype)
        diag = cupy.zeros(min(rows + min(k, 0), cols - max(k, 0)),
                          dtype=self.dtype)
        diag_mask = (self.row + k) == self.col

        if self.has_canonical_format:
            row = self.row[diag_mask]
            data = self.data[diag_mask]
        else:
            row, _, data = self._sum_duplicates(self.row[diag_mask],
                                                self.col[diag_mask],
                                                self.data[diag_mask])
        diag[row + min(k, 0)] = data

        return diag

    def setdiag(self, values, k=0):
        """Set diagonal or off-diagonal elements of the array.

        Args:
            values (ndarray): New values of the diagonal elements. Values may
                have any length. If the diagonal is longer than values, then
                the remaining diagonal entries will not be set. If values are
                longer than the diagonal, then the remaining values are
                ignored. If a scalar value is given, all of the diagonal is set
                to it.
            k (int, optional): Which off-diagonal to set, corresponding to
                elements a[i,i+k]. Default: 0 (the main diagonal).

        """
        M, N = self.shape
        if (k > 0 and k >= N) or (k < 0 and -k >= M):
            raise ValueError("k exceeds matrix dimensions")
        if values.ndim and not len(values):
            return
        idx_dtype = self.row.dtype

        # Determine which triples to keep and where to put the new ones.
        full_keep = self.col - self.row != k
        if k < 0:
            max_index = min(M + k, N)
            if values.ndim:
                max_index = min(max_index, len(values))
            keep = cupy.logical_or(full_keep, self.col >= max_index)
            new_row = cupy.arange(-k, -k + max_index, dtype=idx_dtype)
            new_col = cupy.arange(max_index, dtype=idx_dtype)
        else:
            max_index = min(M, N - k)
            if values.ndim:
                max_index = min(max_index, len(values))
            keep = cupy.logical_or(full_keep, self.row >= max_index)
            new_row = cupy.arange(max_index, dtype=idx_dtype)
            new_col = cupy.arange(k, k + max_index, dtype=idx_dtype)

        # Define the array of data consisting of the entries to be added.
        if values.ndim:
            new_data = values[:max_index]
        else:
            new_data = cupy.empty(max_index, dtype=self.dtype)
            new_data[:] = values

        # Update the internal structure.
        self.row = cupy.concatenate((self.row[keep], new_row))
        self.col = cupy.concatenate((self.col[keep], new_col))
        self.data = cupy.concatenate((self.data[keep], new_data))
        self.has_canonical_format = False

    def eliminate_zeros(self):
        """Removes zero entories in place."""
        ind = self.data != 0
        self.data = self.data[ind]
        self.row = self.row[ind]
        self.col = self.col[ind]

    def get_shape(self):
        """Returns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.
        """
        return self._shape

    def getnnz(self, axis=None):
        """Returns the number of stored values, including explicit zeros."""
        if axis is None:
            return self.data.size
        else:
            raise ValueError

    def get(self, stream=None):
        """Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.coo_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')

        data = self.data.get(stream)
        row = self.row.get(stream)
        col = self.col.get(stream)
        return scipy.sparse.coo_matrix(
            (data, (row, col)), shape=self.shape)

    def reshape(self, *shape, order='C'):
        """Gives a new shape to a sparse matrix without changing its data.

        Args:
            shape (tuple):
                The new shape should be compatible with the original shape.
            order: {'C', 'F'} (optional)
                Read the elements using this index order. 'C' means to read and
                write the elements using C-like index order. 'F' means to read
                and write the elements using Fortran-like index order. Default:
                C.

        Returns:
            cupyx.scipy.sparse.coo_matrix: sparse matrix

        """

        shape = sputils.check_shape(shape, self.shape)

        if shape == self.shape:
            return self

        nrows, ncols = self.shape

        if order == 'C':  # C to represent matrix in row major format
            dtype = sputils.get_index_dtype(maxval=(ncols * max(0, nrows - 1) +
                                                    max(0, ncols - 1)))
            flat_indices = cupy.multiply(ncols, self.row,
                                         dtype=dtype) + self.col
            new_row, new_col = divmod(flat_indices, shape[1])
        elif order == 'F':
            dtype = sputils.get_index_dtype(maxval=(ncols * max(0, nrows - 1) +
                                                    max(0, ncols - 1)))
            flat_indices = cupy.multiply(ncols, self.row,
                                         dtype=dtype) + self.row
            new_col, new_row = divmod(flat_indices, shape[0])
        else:
            raise ValueError("'order' must be 'C' or 'F'")

        new_data = self.data

        return coo_matrix((new_data, (new_row, new_col)), shape=shape,
                          copy=False)

    def sum_duplicates(self):
        """Eliminate duplicate matrix entries by adding them together.

        .. warning::
            When sorting the indices, CuPy follows the convention of cuSPARSE,
            which is different from that of SciPy. Therefore, the order of the
            output indices may differ:

            .. code-block:: python

                >>> #     1 0 0
                >>> # A = 1 1 0
                >>> #     1 1 1
                >>> data = cupy.array([1, 1, 1, 1, 1, 1], 'f')
                >>> row = cupy.array([0, 1, 1, 2, 2, 2], 'i')
                >>> col = cupy.array([0, 0, 1, 0, 1, 2], 'i')
                >>> A = cupyx.scipy.sparse.coo_matrix((data, (row, col)),
                ...                                   shape=(3, 3))
                >>> a = A.get()
                >>> A.sum_duplicates()
                >>> a.sum_duplicates()  # a is scipy.sparse.coo_matrix
                >>> A.row
                array([0, 1, 1, 2, 2, 2], dtype=int32)
                >>> a.row
                array([0, 1, 2, 1, 2, 2], dtype=int32)
                >>> A.col
                array([0, 0, 1, 0, 1, 2], dtype=int32)
                >>> a.col
                array([0, 0, 0, 1, 1, 2], dtype=int32)

        .. warning::
            Calling this function might synchronize the device.

        .. seealso::
           :meth:`scipy.sparse.coo_matrix.sum_duplicates`

        """
        if self.has_canonical_format:
            return
        # Note: The sorting order below follows the cuSPARSE convention (first
        # row then col, so-called row-major) and differs from that of SciPy, as
        # the cuSPARSE functions such as cusparseSpMV() assume this sorting
        # order.
        # See https://docs.nvidia.com/cuda/cusparse/index.html#coo-format
        keys = cupy.stack([self.col, self.row])
        order = cupy.lexsort(keys)
        src_data = self.data[order]
        src_row = self.row[order]
        src_col = self.col[order]
        diff = self._sum_duplicates_diff(src_row, src_col, size=self.row.size)

        if diff[1:].all():
            # All elements have different indices.
            data = src_data
            row = src_row
            col = src_col
        else:
            # TODO(leofang): move the kernels outside this method
            index = cupy.cumsum(diff, dtype='i')
            size = int(index[-1]) + 1
            data = cupy.zeros(size, dtype=self.data.dtype)
            row = cupy.empty(size, dtype='i')
            col = cupy.empty(size, dtype='i')
            if self.data.dtype.kind == 'f':
                cupy.ElementwiseKernel(
                    'T src_data, int32 src_row, int32 src_col, int32 index',
                    'raw T data, raw int32 row, raw int32 col',
                    '''
                    atomicAdd(&data[index], src_data);
                    row[index] = src_row;
                    col[index] = src_col;
                    ''',
                    'sum_duplicates_assign'
                )(src_data, src_row, src_col, index, data, row, col)
            elif self.data.dtype.kind == 'c':
                cupy.ElementwiseKernel(
                    'T src_real, T src_imag, int32 src_row, int32 src_col, '
                    'int32 index',
                    'raw T real, raw T imag, raw int32 row, raw int32 col',
                    '''
                    atomicAdd(&real[index], src_real);
                    atomicAdd(&imag[index], src_imag);
                    row[index] = src_row;
                    col[index] = src_col;
                    ''',
                    'sum_duplicates_assign_complex'
                )(src_data.real, src_data.imag, src_row, src_col, index,
                  data.real, data.imag, row, col)

        self.data = data
        self.row = row
        self.col = col
        self.has_canonical_format = True

    def toarray(self, order=None, out=None):
        """Returns a dense matrix representing the same value.

        Args:
            order (str): Not supported.
            out: Not supported.

        Returns:
            cupy.ndarray: Dense array representing the same value.

        .. seealso:: :meth:`scipy.sparse.coo_matrix.toarray`

        """
        return self.tocsr().toarray(order=order, out=out)

    def tocoo(self, copy=False):
        """Converts the matrix to COOdinate format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible.

        Returns:
            cupyx.scipy.sparse.coo_matrix: Converted matrix.

        """
        if copy:
            return self.copy()
        else:
            return self

    def tocsc(self, copy=False):
        """Converts the matrix to Compressed Sparse Column format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in coo to csc conversion.

        Returns:
            cupyx.scipy.sparse.csc_matrix: Converted matrix.

        """
        if self.nnz == 0:
            return csc.csc_matrix(self.shape, dtype=self.dtype)
        # copy is silently ignored (in line with SciPy) because both
        # sum_duplicates and coosort change the underlying data
        x = self.copy()
        x.sum_duplicates()
        cusparse.coosort(x, 'c')
        x = cusparse.coo2csc(x)
        x.has_canonical_format = True
        return x

    def tocsr(self, copy=False):
        """Converts the matrix to Compressed Sparse Row format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in coo to csr conversion.

        Returns:
            cupyx.scipy.sparse.csr_matrix: Converted matrix.

        """
        if self.nnz == 0:
            return csr.csr_matrix(self.shape, dtype=self.dtype)
        # copy is silently ignored (in line with SciPy) because both
        # sum_duplicates and coosort change the underlying data
        x = self.copy()
        x.sum_duplicates()
        cusparse.coosort(x, 'r')
        x = cusparse.coo2csr(x)
        x.has_canonical_format = True
        return x

    def transpose(self, axes=None, copy=False):
        """Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupyx.scipy.sparse.spmatrix: Transpose matrix.

        """
        if axes is not None:
            raise ValueError(
                'Sparse matrices do not support an \'axes\' parameter because '
                'swapping dimensions is the only logical permutation.')
        shape = self.shape[1], self.shape[0]
        return coo_matrix(
            (self.data, (self.col, self.row)), shape=shape, copy=copy)

예제 #27

0

파일 보기

파일: _measurements.py 프로젝트: takagi/cupy

            if (yi == labels[j]) break;
            if (yi < labels[j]) j_max = j - 1;
            else j_min = j + 1;
            j = (j_min + j_max) / 2;
        }
        y[i] = j + 1;
        ''',
        'cupyx_scipy_ndimage_label_finalize')


_ndimage_variance_kernel = _core.ElementwiseKernel(
    'T input, R labels, raw X index, uint64 size, raw float64 mean',
    'raw float64 out',
    """
    for (ptrdiff_t j = 0; j < size; j++) {
      if (labels == index[j]) {
        atomicAdd(&out[j], (input - mean[j]) * (input - mean[j]));
        break;
      }
    }
    """,
    'cupyx_scipy_ndimage_variance')


_ndimage_sum_kernel = _core.ElementwiseKernel(
    'T input, R labels, raw X index, uint64 size',
    'raw float64 out',
    """
    for (ptrdiff_t j = 0; j < size; j++) {
      if (labels == index[j]) {
        atomicAdd(&out[j], input);
        break;

예제 #28

0

파일 보기

파일: add_remove.py 프로젝트: the-lay/cupy

            filled.  If ``axis`` is None, ``out`` is a flattened array.

    .. seealso:: :func:`numpy.append`
    """
    # TODO(asi1024): Implement fast path for scalar inputs.
    arr = cupy.asarray(arr)
    values = cupy.asarray(values)
    if axis is None:
        return _core.concatenate_method((arr.ravel(), values.ravel()),
                                        0).ravel()
    return _core.concatenate_method((arr, values), axis)


_resize_kernel = _core.ElementwiseKernel(
    'raw T x, int64 size',
    'T y',
    'y = x[i % size]',
    'resize',
)


def resize(a, new_shape):
    """Return a new array with the specified shape.

    If the new array is larger than the original array, then the new
    array is filled with repeated copies of ``a``.  Note that this behavior
    is different from a.resize(new_shape) which fills with zeros instead
    of repeated copies of ``a``.

    Args:
        a (array_like): Array to be resized.
        new_shape (int or tuple of int): Shape of resized array.

예제 #29

0

파일 보기

파일: _generator.py 프로젝트: the-lay/cupy

class RandomState(object):

    """Portable container of a pseudo-random number generator.

    An instance of this class holds the state of a random number generator. The
    state is available only on the device which has been current at the
    initialization of the instance.

    Functions of :mod:`cupy.random` use global instances of this class.
    Different instances are used for different devices. The global state for
    the current device can be obtained by the
    :func:`cupy.random.get_random_state` function.

    Args:
        seed (None or int): Seed of the random number generator. See the
            :meth:`~cupy.random.RandomState.seed` method for detail.
        method (int): Method of the random number generator. Following values
            are available::

               cupy.cuda.curand.CURAND_RNG_PSEUDO_DEFAULT
               cupy.cuda.curand.CURAND_RNG_PSEUDO_XORWOW
               cupy.cuda.curand.CURAND_RNG_PSEUDO_MRG32K3A
               cupy.cuda.curand.CURAND_RNG_PSEUDO_MTGP32
               cupy.cuda.curand.CURAND_RNG_PSEUDO_MT19937
               cupy.cuda.curand.CURAND_RNG_PSEUDO_PHILOX4_32_10

    """

    def __init__(self, seed=None, method=curand.CURAND_RNG_PSEUDO_DEFAULT):
        self._generator = curand.createGenerator(method)
        self.method = method
        self.seed(seed)

    def __del__(self, is_shutting_down=_util.is_shutting_down):
        # When createGenerator raises an error, _generator is not initialized
        if is_shutting_down():
            return
        if hasattr(self, '_generator'):
            curand.destroyGenerator(self._generator)

    def _update_seed(self, size):
        self._rk_seed = (self._rk_seed + size) % _UINT64_MAX

    def _generate_normal(self, func, size, dtype, *args):
        # curand functions below don't support odd size.
        # * curand.generateNormal
        # * curand.generateNormalDouble
        # * curand.generateLogNormal
        # * curand.generateLogNormalDouble
        size = _core.get_size(size)
        element_size = _core.internal.prod(size)
        if element_size % 2 == 0:
            out = cupy.empty(size, dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out
        else:
            out = cupy.empty((element_size + 1,), dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out[:element_size].reshape(size)

    # NumPy compatible functions

    def beta(self, a, b, size=None, dtype=float):
        """Returns an array of samples drawn from the beta distribution.

        .. seealso::
            - :func:`cupy.random.beta` for full documentation
            - :meth:`numpy.random.RandomState.beta`
        """
        a, b = cupy.asarray(a), cupy.asarray(b)
        if size is None:
            size = cupy.broadcast(a, b).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.beta_kernel(a, b, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def binomial(self, n, p, size=None, dtype=int):
        """Returns an array of samples drawn from the binomial distribution.

        .. seealso::
            - :func:`cupy.random.binomial` for full documentation
            - :meth:`numpy.random.RandomState.binomial`
        """
        n, p = cupy.asarray(n), cupy.asarray(p)
        if size is None:
            size = cupy.broadcast(n, p).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.binomial_kernel(n, p, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def chisquare(self, df, size=None, dtype=float):
        """Returns an array of samples drawn from the chi-square distribution.

        .. seealso::
            - :func:`cupy.random.chisquare` for full documentation
            - :meth:`numpy.random.RandomState.chisquare`
        """
        df = cupy.asarray(df)
        if size is None:
            size = df.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.chisquare_kernel(df, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def dirichlet(self, alpha, size=None, dtype=float):
        """Returns an array of samples drawn from the dirichlet distribution.

        .. seealso::
            - :func:`cupy.random.dirichlet` for full documentation
            - :meth:`numpy.random.RandomState.dirichlet`
        """
        alpha = cupy.asarray(alpha)
        if size is None:
            size = alpha.shape
        elif isinstance(size, (int, cupy.integer)):
            size = (size,) + alpha.shape
        else:
            size += alpha.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_gamma_kernel(alpha, self._rk_seed, y)
        y /= y.sum(axis=-1, keepdims=True)
        self._update_seed(y.size)
        return y

    def exponential(self, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a exponential distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.exponential` for full documentation
            - :meth:`numpy.random.RandomState.exponential`
        """
        scale = cupy.asarray(scale, dtype)
        if (scale < 0).any():  # synchronize!
            raise ValueError('scale < 0')
        if size is None:
            size = scale.shape
        x = self.standard_exponential(size, dtype)
        x *= scale
        return x

    def f(self, dfnum, dfden, size=None, dtype=float):
        """Returns an array of samples drawn from the f distribution.

        .. seealso::
            - :func:`cupy.random.f` for full documentation
            - :meth:`numpy.random.RandomState.f`
        """
        dfnum, dfden = cupy.asarray(dfnum), cupy.asarray(dfden)
        if size is None:
            size = cupy.broadcast(dfnum, dfden).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.f_kernel(dfnum, dfden, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def gamma(self, shape, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a gamma distribution.

        .. seealso::
            - :func:`cupy.random.gamma` for full documentation
            - :meth:`numpy.random.RandomState.gamma`
        """
        shape, scale = cupy.asarray(shape), cupy.asarray(scale)
        if size is None:
            size = cupy.broadcast(shape, scale).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_gamma_kernel(shape, self._rk_seed, y)
        y *= scale
        self._update_seed(y.size)
        return y

    def geometric(self, p, size=None, dtype=int):
        """Returns an array of samples drawn from the geometric distribution.

        .. seealso::
            - :func:`cupy.random.geometric` for full documentation
            - :meth:`numpy.random.RandomState.geometric`
        """
        p = cupy.asarray(p)
        if size is None:
            size = p.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.geometric_kernel(p, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def hypergeometric(self, ngood, nbad, nsample, size=None, dtype=int):
        """Returns an array of samples drawn from the hypergeometric distribution.

        .. seealso::
            - :func:`cupy.random.hypergeometric` for full documentation
            - :meth:`numpy.random.RandomState.hypergeometric`
        """
        ngood, nbad, nsample = \
            cupy.asarray(ngood), cupy.asarray(nbad), cupy.asarray(nsample)
        if size is None:
            size = cupy.broadcast(ngood, nbad, nsample).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.hypergeometric_kernel(ngood, nbad, nsample, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    _laplace_kernel = _core.ElementwiseKernel(
        'T x, T loc, T scale', 'T y',
        'y = loc + scale * ((x <= 0.5) ? log(x + x): -log(x + x - 1.0))',
        'laplace_kernel')

    def laplace(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from the laplace distribution.

        .. seealso::
            - :func:`cupy.random.laplace` for full documentation
            - :meth:`numpy.random.RandomState.laplace`
        """
        loc = cupy.asarray(loc, dtype)
        scale = cupy.asarray(scale, dtype)
        if size is None:
            size = cupy.broadcast(loc, scale).shape
        x = self._random_sample_raw(size, dtype)
        RandomState._laplace_kernel(x, loc, scale, x)
        return x

    def logistic(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from the logistic distribution.

        .. seealso::
            - :func:`cupy.random.logistic` for full documentation
            - :meth:`numpy.random.RandomState.logistic`
        """
        loc, scale = cupy.asarray(loc), cupy.asarray(scale)
        if size is None:
            size = cupy.broadcast(loc, scale).shape
        x = cupy.empty(shape=size, dtype=dtype)
        _kernels.open_uniform_kernel(self._rk_seed, x)
        self._update_seed(x.size)
        x = (1.0 - x) / x
        cupy.log(x, out=x)
        cupy.multiply(x, scale, out=x)
        cupy.add(x, loc, out=x)
        return x

    def lognormal(self, mean=0.0, sigma=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a log normal distribution.

        .. seealso::
            - :func:`cupy.random.lognormal` for full documentation
            - :meth:`numpy.random.RandomState.lognormal`

        """
        if any(isinstance(arg, cupy.ndarray) for arg in (mean, sigma)):
            x = self.normal(mean, sigma, size, dtype)
            cupy.exp(x, out=x)
            return x
        if size is None:
            size = ()
        dtype = _check_and_get_dtype(dtype)
        if dtype.char == 'f':
            func = curand.generateLogNormal
        else:
            func = curand.generateLogNormalDouble
        return self._generate_normal(func, size, dtype, mean, sigma)

    def logseries(self, p, size=None, dtype=int):
        """Returns an array of samples drawn from a log series distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.logseries` for full documentation
            - :meth:`numpy.random.RandomState.logseries`

        """
        p = cupy.asarray(p)
        if cupy.any(p <= 0):  # synchronize!
            raise ValueError('p <= 0.0')
        if cupy.any(p >= 1):  # synchronize!
            raise ValueError('p >= 1.0')
        if size is None:
            size = p.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.logseries_kernel(p, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def multivariate_normal(self, mean, cov, size=None, check_valid='ignore',
                            tol=1e-08, method='cholesky', dtype=float):
        """Returns an array of samples drawn from the multivariate normal
        distribution.

        .. warning::
            This function calls one or more cuSOLVER routine(s) which may yield
            invalid results if input conditions are not met.
            To detect these invalid results, you can set the `linalg`
            configuration to a value that is not `ignore` in
            :func:`cupyx.errstate` or :func:`cupyx.seterr`.

        .. seealso::
            - :func:`cupy.random.multivariate_normal` for full documentation
            - :meth:`numpy.random.RandomState.multivariate_normal`
        """
        _util.experimental('cupy.random.RandomState.multivariate_normal')
        mean = cupy.asarray(mean, dtype=dtype)
        cov = cupy.asarray(cov, dtype=dtype)
        if size is None:
            shape = []
        elif isinstance(size, (int, cupy.integer)):
            shape = [size]
        else:
            shape = size

        if len(mean.shape) != 1:
            raise ValueError('mean must be 1 dimensional')
        if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]):
            raise ValueError('cov must be 2 dimensional and square')
        if mean.shape[0] != cov.shape[0]:
            raise ValueError('mean and cov must have same length')

        final_shape = list(shape[:])
        final_shape.append(mean.shape[0])

        if method not in {'eigh', 'svd', 'cholesky'}:
            raise ValueError(
                "method must be one of {'eigh', 'svd', 'cholesky'}")

        if check_valid != 'ignore':
            if check_valid != 'warn' and check_valid != 'raise':
                raise ValueError(
                    "check_valid must equal 'warn', 'raise', or 'ignore'")

        if check_valid == 'warn':
            with cupyx.errstate(linalg='raise'):
                try:
                    decomp = cupy.linalg.cholesky(cov)
                except LinAlgError:
                    with cupyx.errstate(linalg='ignore'):
                        if method != 'cholesky':
                            if method == 'eigh':
                                (s, u) = cupy.linalg.eigh(cov)
                                psd = not cupy.any(s < -tol)
                            if method == 'svd':
                                (u, s, vh) = cupy.linalg.svd(cov)
                                psd = cupy.allclose(cupy.dot(vh.T * s, vh),
                                                    cov, rtol=tol, atol=tol)
                            decomp = u * cupy.sqrt(cupy.abs(s))
                            if not psd:
                                warnings.warn("covariance is not positive-" +
                                              "semidefinite, output may be " +
                                              "invalid.", RuntimeWarning)

                        else:
                            warnings.warn("covariance is not positive-" +
                                          "semidefinite, output *is* " +
                                          "invalid.", RuntimeWarning)
                            decomp = cupy.linalg.cholesky(cov)

        else:
            with cupyx.errstate(linalg=check_valid):
                try:
                    if method == 'cholesky':
                        decomp = cupy.linalg.cholesky(cov)
                    elif method == 'eigh':
                        (s, u) = cupy.linalg.eigh(cov)
                        decomp = u * cupy.sqrt(cupy.abs(s))
                    elif method == 'svd':
                        (u, s, vh) = cupy.linalg.svd(cov)
                        decomp = u * cupy.sqrt(cupy.abs(s))

                except LinAlgError:
                    raise LinAlgError("Matrix is not positive definite; if " +
                                      "matrix is positive-semidefinite, set" +
                                      "'check_valid' to 'warn'")

        x = self.standard_normal(final_shape,
                                 dtype=dtype).reshape(-1, mean.shape[0])
        x = cupy.dot(decomp, x.T)
        x = x.T
        x += mean
        x.shape = tuple(final_shape)
        return x

    def negative_binomial(self, n, p, size=None, dtype=int):
        """Returns an array of samples drawn from the negative binomial distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.negative_binomial` for full documentation
            - :meth:`numpy.random.RandomState.negative_binomial`
        """
        n = cupy.asarray(n)
        p = cupy.asarray(p)
        if cupy.any(n <= 0):  # synchronize!
            raise ValueError('n <= 0')
        if cupy.any(p < 0):  # synchronize!
            raise ValueError('p < 0')
        if cupy.any(p > 1):  # synchronize!
            raise ValueError('p > 1')
        y = self.gamma(n, (1-p)/p, size)
        return self.poisson(y, dtype=dtype)

    def normal(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of normally distributed samples.

        .. seealso::
            - :func:`cupy.random.normal` for full documentation
            - :meth:`numpy.random.RandomState.normal`

        """
        dtype = _check_and_get_dtype(dtype)
        if size is None:
            size = cupy.broadcast(loc, scale).shape
        if dtype.char == 'f':
            func = curand.generateNormal
        else:
            func = curand.generateNormalDouble
        if isinstance(scale, cupy.ndarray):
            x = self._generate_normal(func, size, dtype, 0.0, 1.0)
            cupy.multiply(x, scale, out=x)
            cupy.add(x, loc, out=x)
        elif isinstance(loc, cupy.ndarray):
            x = self._generate_normal(func, size, dtype, 0.0, scale)
            cupy.add(x, loc, out=x)
        else:
            x = self._generate_normal(func, size, dtype, loc, scale)
        return x

    def pareto(self, a, size=None, dtype=float):
        """Returns an array of samples drawn from the pareto II distribution.

        .. seealso::
            - :func:`cupy.random.pareto` for full documentation
            - :meth:`numpy.random.RandomState.pareto`
        """
        a = cupy.asarray(a)
        if size is None:
            size = a.shape
        x = self._random_sample_raw(size, dtype)
        cupy.log(x, out=x)
        cupy.exp(-x/a, out=x)
        return x - 1

    def noncentral_chisquare(self, df, nonc, size=None, dtype=float):
        """Returns an array of samples drawn from the noncentral chi-square
        distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.noncentral_chisquare` for full documentation
            - :meth:`numpy.random.RandomState.noncentral_chisquare`
        """
        df, nonc = cupy.asarray(df), cupy.asarray(nonc)
        if cupy.any(df <= 0):  # synchronize!
            raise ValueError('df <= 0')
        if cupy.any(nonc < 0):  # synchronize!
            raise ValueError('nonc < 0')
        if size is None:
            size = cupy.broadcast(df, nonc).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.noncentral_chisquare_kernel(df, nonc, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def noncentral_f(self, dfnum, dfden, nonc, size=None, dtype=float):
        """Returns an array of samples drawn from the noncentral F distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.noncentral_f` for full documentation
            - :meth:`numpy.random.RandomState.noncentral_f`
        """
        dfnum, dfden, nonc = \
            cupy.asarray(dfnum), cupy.asarray(dfden), cupy.asarray(nonc)
        if cupy.any(dfnum <= 0):  # synchronize!
            raise ValueError('dfnum <= 0')
        if cupy.any(dfden <= 0):  # synchronize!
            raise ValueError('dfden <= 0')
        if cupy.any(nonc < 0):  # synchronize!
            raise ValueError('nonc < 0')
        if size is None:
            size = cupy.broadcast(dfnum, dfden, nonc).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.noncentral_f_kernel(dfnum, dfden, nonc, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def poisson(self, lam=1.0, size=None, dtype=int):
        """Returns an array of samples drawn from the poisson distribution.

        .. seealso::
            - :func:`cupy.random.poisson` for full documentation
            - :meth:`numpy.random.RandomState.poisson`
        """
        lam = cupy.asarray(lam)
        if size is None:
            size = lam.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.poisson_kernel(lam, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def power(self, a, size=None, dtype=float):
        """Returns an array of samples drawn from the power distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.power` for full documentation
            - :meth:`numpy.random.RandomState.power`
        """
        a = cupy.asarray(a)
        if cupy.any(a < 0):  # synchronize!
            raise ValueError('a < 0')
        if size is None:
            size = a.shape
        x = self.standard_exponential(size=size, dtype=dtype)
        cupy.exp(-x, out=x)
        cupy.add(1, -x, out=x)
        cupy.power(x, 1./a, out=x)
        return x

    def rand(self, *size, **kwarg):
        """Returns uniform random values over the interval ``[0, 1)``.

        .. seealso::
            - :func:`cupy.random.rand` for full documentation
            - :meth:`numpy.random.RandomState.rand`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('rand() got unexpected keyword arguments %s'
                            % ', '.join(kwarg.keys()))
        return self.random_sample(size=size, dtype=dtype)

    def randn(self, *size, **kwarg):
        """Returns an array of standard normal random values.

        .. seealso::
            - :func:`cupy.random.randn` for full documentation
            - :meth:`numpy.random.RandomState.randn`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('randn() got unexpected keyword arguments %s'
                            % ', '.join(kwarg.keys()))
        return self.normal(size=size, dtype=dtype)

    _mod1_kernel = _core.ElementwiseKernel(
        '', 'T x', 'x = (x == (T)1) ? 0 : x', 'cupy_random_x_mod_1')

    def _random_sample_raw(self, size, dtype):
        dtype = _check_and_get_dtype(dtype)
        out = cupy.empty(size, dtype=dtype)
        if dtype.char == 'f':
            func = curand.generateUniform
        else:
            func = curand.generateUniformDouble
        func(self._generator, out.data.ptr, out.size)
        return out

    def random_sample(self, size=None, dtype=float):
        """Returns an array of random values over the interval ``[0, 1)``.

        .. seealso::
            - :func:`cupy.random.random_sample` for full documentation
            - :meth:`numpy.random.RandomState.random_sample`

        """
        if size is None:
            size = ()
        out = self._random_sample_raw(size, dtype)
        RandomState._mod1_kernel(out)
        return out

    def rayleigh(self, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a rayleigh distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.rayleigh` for full documentation
            - :meth:`numpy.random.RandomState.rayleigh`
        """
        scale = cupy.asarray(scale)
        if size is None:
            size = scale.shape
        if cupy.any(scale < 0):  # synchronize!
            raise ValueError('scale < 0')
        x = self._random_sample_raw(size, dtype)
        x = cupy.log(x, out=x)
        x = cupy.multiply(x, -2., out=x)
        x = cupy.sqrt(x, out=x)
        x = cupy.multiply(x, scale, out=x)
        return x

    def _interval(self, mx, size):
        """Generate multiple integers independently sampled uniformly from ``[0, mx]``.

        Args:
            mx (int): Upper bound of the interval
            size (None or int or tuple): Shape of the array or the scalar
                returned.
        Returns:
            int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with
            shape ``()`` is returned.
            If ``int``, 1-D array of length size is returned.
            If ``tuple``, multi-dimensional array with shape
            ``size`` is returned.
            Currently, only 32 bit or 64 bit integers can be sampled.
        """  # NOQA
        if size is None:
            size = ()
        elif isinstance(size, int):
            size = size,

        if mx == 0:
            return cupy.zeros(size, dtype=numpy.uint32)

        if mx < 0:
            raise ValueError(
                'mx must be non-negative (actual: {})'.format(mx))
        elif mx <= _UINT32_MAX:
            dtype = numpy.uint32
            upper_limit = _UINT32_MAX - (1 << 32) % (mx + 1)
        elif mx <= _UINT64_MAX:
            dtype = numpy.uint64
            upper_limit = _UINT64_MAX - (1 << 64) % (mx + 1)
        else:
            raise ValueError(
                'mx must be within uint64 range (actual: {})'.format(mx))

        n_sample = functools.reduce(operator.mul, size, 1)
        if n_sample == 0:
            return cupy.empty(size, dtype=dtype)
        sample = self._curand_generate(n_sample, dtype)

        mx1 = mx + 1
        if mx1 != (1 << (mx1.bit_length() - 1)):
            # Get index of samples that exceed the upper limit
            ng_indices = self._get_indices(sample, upper_limit, False)
            n_ng = ng_indices.size

            while n_ng > 0:
                n_supplement = max(n_ng * 2, 1024)
                supplement = self._curand_generate(n_supplement, dtype)

                # Get index of supplements that are within the upper limit
                ok_indices = self._get_indices(supplement, upper_limit, True)
                n_ok = ok_indices.size

                # Replace the values that exceed the upper limit
                if n_ok >= n_ng:
                    sample[ng_indices] = supplement[ok_indices[:n_ng]]
                    n_ng = 0
                else:
                    sample[ng_indices[:n_ok]] = supplement[ok_indices]
                    ng_indices = ng_indices[n_ok:]
                    n_ng -= n_ok
            sample %= mx1
        else:
            mask = (1 << mx.bit_length()) - 1
            sample &= mask

        return sample.reshape(size)

    def _curand_generate(self, num, dtype):
        sample = cupy.empty((num,), dtype=dtype)
        # Call 32-bit RNG to fill 32-bit or 64-bit `sample`
        size32 = sample.view(dtype=numpy.uint32).size
        curand.generate(self._generator, sample.data.ptr, size32)
        return sample

    def _get_indices(self, sample, upper_limit, cond):
        dtype = numpy.uint32 if sample.size < 2**32 else numpy.uint64
        flags = (sample <= upper_limit) if cond else (sample > upper_limit)
        csum = cupy.cumsum(flags, dtype=dtype)
        del flags
        indices = cupy.empty((int(csum[-1]),), dtype=dtype)
        self._kernel_get_indices(csum, indices, size=csum.size)
        return indices

    _kernel_get_indices = _core.ElementwiseKernel(
        'raw U csum', 'raw U indices',
        '''
        int j = 0;
        if (i > 0) { j = csum[i-1]; }
        if (csum[i] > j) { indices[j] = i; }
        ''',
        'cupy_get_indices')

    def seed(self, seed=None):
        """Resets the state of the random number generator with a seed.

        .. seealso::
            - :func:`cupy.random.seed` for full documentation
            - :meth:`numpy.random.RandomState.seed`

        """
        if seed is None:
            try:
                seed_str = binascii.hexlify(os.urandom(8))
                seed = int(seed_str, 16)
            except NotImplementedError:
                seed = (time.time() * 1000000) % _UINT64_MAX
        else:
            if isinstance(seed, numpy.ndarray):
                seed = int(hashlib.md5(seed).hexdigest()[:16], 16)
            else:
                seed = int(
                    numpy.asarray(seed).astype(numpy.uint64, casting='safe'))

        curand.setPseudoRandomGeneratorSeed(self._generator, seed)
        if (self.method not in (curand.CURAND_RNG_PSEUDO_MT19937,
                                curand.CURAND_RNG_PSEUDO_MTGP32)):
            curand.setGeneratorOffset(self._generator, 0)

        self._rk_seed = seed

    def standard_cauchy(self, size=None, dtype=float):
        """Returns an array of samples drawn from the standard cauchy distribution.

        .. seealso::
            - :func:`cupy.random.standard_cauchy` for full documentation
            - :meth:`numpy.random.RandomState.standard_cauchy`
        """
        x = self.uniform(size=size, dtype=dtype)
        return cupy.tan(cupy.pi * (x - 0.5))

    def standard_exponential(self, size=None, dtype=float):
        """Returns an array of samples drawn from the standard exp distribution.

         .. seealso::
            - :func:`cupy.random.standard_exponential` for full documentation
            - :meth:`numpy.random.RandomState.standard_exponential`
        """
        if size is None:
            size = ()
        x = self._random_sample_raw(size, dtype)
        return -cupy.log(x, out=x)

    def standard_gamma(self, shape, size=None, dtype=float):
        """Returns an array of samples drawn from a standard gamma distribution.

        .. seealso::
            - :func:`cupy.random.standard_gamma` for full documentation
            - :meth:`numpy.random.RandomState.standard_gamma`
        """
        shape = cupy.asarray(shape)
        if size is None:
            size = shape.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_gamma_kernel(shape, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def standard_normal(self, size=None, dtype=float):
        """Returns samples drawn from the standard normal distribution.

        .. seealso::
            - :func:`cupy.random.standard_normal` for full documentation
            - :meth:`numpy.random.RandomState.standard_normal`

        """
        return self.normal(size=size, dtype=dtype)

    def standard_t(self, df, size=None, dtype=float):
        """Returns an array of samples drawn from the standard t distribution.

        .. seealso::
            - :func:`cupy.random.standard_t` for full documentation
            - :meth:`numpy.random.RandomState.standard_t`
        """
        df = cupy.asarray(df)
        if size is None:
            size = df.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_t_kernel(df, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def tomaxint(self, size=None):
        """Draws integers between 0 and max integer inclusive.

        Return a sample of uniformly distributed random integers in the
        interval [0, ``np.iinfo(np.int_).max``]. The `np.int_` type translates
        to the C long integer type and its precision is platform dependent.

        Args:
            size (int or tuple of ints): Output shape.

        Returns:
            cupy.ndarray: Drawn samples.

        .. seealso::
            :meth:`numpy.random.RandomState.tomaxint`

        """
        if size is None:
            size = ()
        sample = cupy.empty(size, dtype=cupy.int_)
        # cupy.random only uses int32 random generator
        size_in_int = sample.dtype.itemsize // 4
        curand.generate(
            self._generator, sample.data.ptr, sample.size * size_in_int)

        # Disable sign bit
        sample &= cupy.iinfo(cupy.int_).max
        return sample

    _triangular_kernel = _core.ElementwiseKernel(
        'L left, M mode, R right', 'T x',
        """
        T base, leftbase, ratio, leftprod, rightprod;

        base = right - left;
        leftbase = mode - left;
        ratio = leftbase / base;
        leftprod = leftbase*base;
        rightprod = (right - mode)*base;

        if (x <= ratio)
        {
            x = left + sqrt(x*leftprod);
        } else
        {
            x = right - sqrt((1.0 - x) * rightprod);
        }
        """,
        'triangular_kernel'
    )

    def triangular(self, left, mode, right, size=None, dtype=float):
        """Returns an array of samples drawn from the triangular distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.triangular` for full documentation
            - :meth:`numpy.random.RandomState.triangular`
        """
        left, mode, right = \
            cupy.asarray(left), cupy.asarray(mode), cupy.asarray(right)
        if cupy.any(left > mode):  # synchronize!
            raise ValueError('left > mode')
        if cupy.any(mode > right):  # synchronize!
            raise ValueError('mode > right')
        if cupy.any(left == right):  # synchronize!
            raise ValueError('left == right')
        if size is None:
            size = cupy.broadcast(left, mode, right).shape
        x = self.random_sample(size=size, dtype=dtype)
        return RandomState._triangular_kernel(left, mode, right, x)

    _scale_kernel = _core.ElementwiseKernel(
        'T low, T high', 'T x',
        'x = T(low) + x * T(high - low)',
        'cupy_scale')

    def uniform(self, low=0.0, high=1.0, size=None, dtype=float):
        """Returns an array of uniformly-distributed samples over an interval.

        .. seealso::
            - :func:`cupy.random.uniform` for full documentation
            - :meth:`numpy.random.RandomState.uniform`

        """
        dtype = numpy.dtype(dtype)
        rand = self.random_sample(size=size, dtype=dtype)
        if not numpy.isscalar(low):
            low = cupy.asarray(low, dtype)
        if not numpy.isscalar(high):
            high = cupy.asarray(high, dtype)
        return RandomState._scale_kernel(low, high, rand)

    def vonmises(self, mu, kappa, size=None, dtype=float):
        """Returns an array of samples drawn from the von Mises distribution.

        .. seealso::
            - :func:`cupy.random.vonmises` for full documentation
            - :meth:`numpy.random.RandomState.vonmises`
        """
        mu, kappa = cupy.asarray(mu), cupy.asarray(kappa)
        if size is None:
            size = cupy.broadcast(mu, kappa).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.vonmises_kernel(mu, kappa, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    _wald_kernel = _core.ElementwiseKernel(
        'T mean, T scale, T U', 'T X',
        """
            T mu_2l;
            T Y;
            mu_2l = mean / (2*scale);
            Y = mean*X*X;
            X = mean + mu_2l*(Y - sqrt(4*scale*Y + Y*Y));
            if (U > mean/(mean+X))
            {
                X = mean*mean/X;
            }
        """,
        'wald_scale')

    def wald(self, mean, scale, size=None, dtype=float):
        """Returns an array of samples drawn from the Wald distribution.

         .. seealso::
            - :func:`cupy.random.wald` for full documentation
            - :meth:`numpy.random.RandomState.wald`
        """
        mean, scale = \
            cupy.asarray(mean, dtype=dtype), cupy.asarray(scale, dtype=dtype)
        if size is None:
            size = cupy.broadcast(mean, scale).shape
        x = self.normal(size=size, dtype=dtype)
        u = self.random_sample(size=size, dtype=dtype)
        return RandomState._wald_kernel(mean, scale, u, x)

    def weibull(self, a, size=None, dtype=float):
        """Returns an array of samples drawn from the weibull distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.weibull` for full documentation
            - :meth:`numpy.random.RandomState.weibull`
        """
        a = cupy.asarray(a)
        if cupy.any(a < 0):  # synchronize!
            raise ValueError('a < 0')
        x = self.standard_exponential(size, dtype)
        cupy.power(x, 1./a, out=x)
        return x

    def zipf(self, a, size=None, dtype=int):
        """Returns an array of samples drawn from the Zipf distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            - :func:`cupy.random.zipf` for full documentation
            - :meth:`numpy.random.RandomState.zipf`
        """
        a = cupy.asarray(a)
        if cupy.any(a <= 1.0):  # synchronize!
            raise ValueError('\'a\' must be a valid float > 1.0')
        if size is None:
            size = a.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.zipf_kernel(a, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def choice(self, a, size=None, replace=True, p=None):
        """Returns an array of random values from a given 1-D array.

        .. seealso::
            - :func:`cupy.random.choice` for full documentation
            - :meth:`numpy.random.choice`

        """
        if a is None:
            raise ValueError('a must be 1-dimensional or an integer')
        if isinstance(a, cupy.ndarray) and a.ndim == 0:
            raise NotImplementedError
        if isinstance(a, int):
            a_size = a
            if a_size < 0:
                raise ValueError('a must be greater than or equal to 0')
        else:
            a = cupy.array(a, copy=False)
            if a.ndim != 1:
                raise ValueError('a must be 1-dimensional or an integer')
            a_size = len(a)

        if p is not None:
            p = cupy.array(p)
            if p.ndim != 1:
                raise ValueError('p must be 1-dimensional')
            if len(p) != a_size:
                raise ValueError('a and p must have same size')
            if not (p >= 0).all():
                raise ValueError('probabilities are not non-negative')
            p_sum = cupy.sum(p).get()
            if not numpy.allclose(p_sum, 1):
                raise ValueError('probabilities do not sum to 1')

        if size is None:
            raise NotImplementedError
        shape = size
        size = numpy.prod(shape)

        if a_size == 0 and size > 0:
            raise ValueError('a cannot be empty unless no samples are taken')

        if not replace and p is None:
            if a_size < size:
                raise ValueError(
                    'Cannot take a larger sample than population when '
                    '\'replace=False\'')
            if isinstance(a, int):
                indices = cupy.arange(a, dtype='l')
            else:
                indices = a.copy()
            self.shuffle(indices)
            return indices[:size].reshape(shape)

        if not replace:
            raise NotImplementedError

        if p is not None:
            p = cupy.broadcast_to(p, (size, a_size))
            index = cupy.argmax(cupy.log(p) +
                                self.gumbel(size=(size, a_size)),
                                axis=1)
            if not isinstance(shape, int):
                index = cupy.reshape(index, shape)
        else:
            if a_size == 0:  # TODO: (#4511) Fix `randint` instead
                a_size = 1
            index = self.randint(0, a_size, size=shape)
            # Align the dtype with NumPy
            index = index.astype(cupy.int64, copy=False)

        if isinstance(a, int):
            return index

        if index.ndim == 0:
            return cupy.array(a[index], dtype=a.dtype)

        return a[index]

    def shuffle(self, a):
        """Returns a shuffled array.

        .. seealso::
            - :func:`cupy.random.shuffle` for full documentation
            - :meth:`numpy.random.shuffle`

        """
        if not isinstance(a, cupy.ndarray):
            raise TypeError('The array must be cupy.ndarray')

        if a.ndim == 0:
            raise TypeError('An array whose ndim is 0 is not supported')

        a[:] = a[self._permutation(len(a))]

    def permutation(self, a):
        """Returns a permuted range or a permutation of an array."""
        if isinstance(a, int):
            return self._permutation(a)
        else:
            return a[self._permutation(len(a))]

    def _permutation(self, num):
        """Returns a permuted range."""
        sample = cupy.empty((num), dtype=numpy.int32)
        curand.generate(self._generator, sample.data.ptr, num)
        if 128 < num <= 32 * 1024 * 1024:
            array = cupy.arange(num, dtype=numpy.int32)
            # apply sort of cache blocking
            block_size = 1 * 1024 * 1024
            # The block size above is a value determined from the L2 cache size
            # of GP100 (L2 cache size / size of int = 4MB / 4B = 1M). It may be
            # better to change the value base on the L2 cache size of the GPU
            # you use.
            # When num > block_size, cupy kernel: _cupy_permutation is to be
            # launched multiple times. However, it is observed that performance
            # will be degraded if the launch count is too many. Therefore,
            # the block size is adjusted so that launch count will not exceed
            # twelve Note that this twelve is the value determined from
            # measurement on GP100.
            while num // block_size > 12:
                block_size *= 2
            for j_start in range(0, num, block_size):
                j_end = j_start + block_size
                _cupy_permutation(sample, j_start, j_end, array, size=num)
        else:
            # When num > 32M, argsort is used, because it is faster than
            # custom kernel. See https://github.com/cupy/cupy/pull/603.
            array = cupy.argsort(sample)
        return array

    _gumbel_kernel = _core.ElementwiseKernel(
        'T x, T loc, T scale', 'T y',
        'y = T(loc) - log(-log(x)) * T(scale)',
        'gumbel_kernel')

    def gumbel(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a Gumbel distribution.

        .. seealso::
            - :func:`cupy.random.gumbel` for full documentation
            - :meth:`numpy.random.RandomState.gumbel`
        """
        if not numpy.isscalar(loc):
            loc = cupy.asarray(loc, dtype)
        if not numpy.isscalar(scale):
            scale = cupy.asarray(scale, dtype)
        if size is None:
            size = cupy.broadcast(loc, scale).shape
        x = self._random_sample_raw(size=size, dtype=dtype)
        RandomState._gumbel_kernel(x, loc, scale, x)
        return x

    def randint(self, low, high=None, size=None, dtype=int):
        """Returns a scalar or an array of integer values over ``[low, high)``.

        .. seealso::
            - :func:`cupy.random.randint` for full documentation
            - :meth:`numpy.random.RandomState.randint`
        """
        if high is None:
            lo = 0
            hi1 = int(low) - 1
        else:
            lo = int(low)
            hi1 = int(high) - 1

        if lo > hi1:
            raise ValueError('low >= high')
        if lo < cupy.iinfo(dtype).min:
            raise ValueError(
                'low is out of bounds for {}'.format(cupy.dtype(dtype).name))
        if hi1 > cupy.iinfo(dtype).max:
            raise ValueError(
                'high is out of bounds for {}'.format(cupy.dtype(dtype).name))

        diff = hi1 - lo
        x = self._interval(diff, size).astype(dtype, copy=False)
        cupy.add(x, lo, out=x)
        return x

예제 #30

0

파일 보기

        } else {
            look_right = (inc ? bins[m] < x : bins[m] >= x);
        }
        if (look_right) {
            left = m + 1;
        } else {
            right = m;
        }
    }
    no_thread_divergence( y = right , false )
'''

_searchsorted_kernel = _core.ElementwiseKernel(
    'S x, raw T bins, int64 n_bins, bool side_is_right, '
    'bool assume_increasing',
    'int64 y',
    _searchsorted_code,
    name='cupy_searchsorted_kernel',
    preamble=_preamble + _hip_preamble)

_hip_preamble = r'''
#ifdef __HIP_DEVICE_COMPILE__
  #define no_thread_divergence(do_work, to_return) \
    if (!is_done) {                                \
      do_work;                                     \
      is_done = true;                              \
    }
#else
  #define no_thread_divergence(do_work, to_return) \
    do_work;                                       \
    if (to_return) {                               \