Python ElementwiseKernelの例

プログラミング言語: Python

名前空間/パッケージ名: cupy.core

メソッド/関数: ElementwiseKernel

hotexamples.comのコード掲載数: 30

Python ElementwiseKernel - 30件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcupy.core.ElementwiseKernelの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def _kernel_labels():
    return core.ElementwiseKernel(
        '', 'raw Y y, raw int32 count, raw int32 labels', '''
        if (y[i] != i) continue;
        int j = atomicAdd(&count[1], 1);
        labels[j] = i;
        ''', 'cupyx_nd_label_labels')

コード例 #2

ファイルを表示

ファイル: vectorize.py プロジェクト: viantirreau/cupy

    def __call__(self, *args):
        itypes = ''.join([_get_input_type(x) for x in args])
        kern = self._kernel_cache.get(itypes, None)

        if kern is None:
            in_types = [_types.Scalar(t) for t in itypes]
            ret_type = None
            if self.otypes is not None:
                # TODO(asi1024): Implement
                raise NotImplementedError

            func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True)
            result = func._emit_code_from_types(in_types, ret_type)
            in_params = ', '.join(f'{t.dtype} in{i}'
                                  for i, t in enumerate(in_types))
            out_params = str(result.return_type.dtype) + ' out0'
            body = 'out0 = {}({})'.format(
                func.name, ', '.join([f'in{i}' for i in range(len(in_types))]))
            kern = core.ElementwiseKernel(in_params,
                                          out_params,
                                          body,
                                          preamble=result.code)
            self._kernel_cache[itypes] = kern

        return kern(*args)

コード例 #3

ファイルを表示

def _kernel_init():
    return core.ElementwiseKernel(
        "X x",
        "Y y",
        "if (x == 0) { y = -1; } else { y = i; }",
        "cupyimg_nd_label_init",
    )

コード例 #4

ファイルを表示

ファイル: sumprod.py プロジェクト: tau3000/chainer

def cumsum(a, axis=None, dtype=None, out=None):
    if axis is None:
        a = a.ravel()
    else:
        raise ValueError("'axis' option is not supported")

    if out is None:
        if dtype is None:
            kind = a.dtype.kind
            if kind == 'b':
                dtype = numpy.dtype('l')
            elif kind == 'i' and a.dtype.itemsize < numpy.dtype('l').itemsize:
                dtype = numpy.dtype('l')
            elif kind == 'u' and a.dtype.itemsize < numpy.dtype('L').itemsize:
                dtype = numpy.dtype('L')
            else:
                dtype = a.dtype

        out = a.astype(dtype)
    else:
        out[...] = a

    kern = core.ElementwiseKernel(
        'int32 pos', 'raw T x', '''
        if (i & pos) {
          x[i] += x[i ^ pos | (pos - 1)];
        }
        ''', 'cumsum_kernel')

    pos = 1
    while pos < out.size:
        kern(pos, out, size=out.size)
        pos <<= 1
    return out

コード例 #5

ファイルを表示

ファイル: generator.py プロジェクト: q132546/cupy

def _get_gumbel_kernel():
    global _gumbel_kernel
    if _gumbel_kernel is None:
        _gumbel_kernel = core.ElementwiseKernel(
            'T x, T loc, T scale', 'T y', 'y = loc - log(-log(1 - x)) * scale',
            'gumbel_kernel')
    return _gumbel_kernel

コード例 #6

ファイルを表示

ファイル: dia.py プロジェクト: kmatsuura/cupy

    def tocsc(self, copy=False):
        """Converts the matrix to Compressed Sparse Column format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in dia to csc conversion.

        Returns:
            cupy.sparse.csc_matrix: Converted matrix.

        """
        if self.data.size == 0:
            return csc.csc_matrix(self.shape, dtype=self.dtype)

        num_rows, num_cols = self.shape
        num_offsets, offset_len = self.data.shape

        row, mask = core.ElementwiseKernel(
            'int32 offset_len, int32 offsets, int32 num_rows, '
            'int32 num_cols, T data', 'int32 row, bool mask', '''
            int offset_inds = i % offset_len;
            row = offset_inds - offsets;
            mask = (row >= 0 && row < num_rows && offset_inds < num_cols
                    && data != 0);
            ''', 'dia_tocsc')(offset_len, self.offsets[:, None], num_rows,
                              num_cols, self.data)
        indptr = cupy.zeros(num_cols + 1, dtype='i')
        indptr[1:offset_len + 1] = cupy.cumsum(mask.sum(axis=0))
        indptr[offset_len + 1:] = indptr[offset_len]
        indices = row.T[mask.T].astype('i', copy=False)
        data = self.data.T[mask.T]
        return csc.csc_matrix((data, indices, indptr),
                              shape=self.shape,
                              dtype=self.dtype)

コード例 #7

ファイルを表示

def _kernel_count():
    return core.ElementwiseKernel(
        '', 'raw Y y, raw int32 count', '''
        if (y[i] < 0) continue;
        int j = i;
        while (j != y[j]) { j = y[j]; }
        if (j != i) y[i] = j;
        else atomicAdd(&count[0], 1);
        ''', 'cupyx_nd_label_count')

コード例 #8

ファイルを表示

ファイル: generator.py プロジェクト: gridl/cupy

def _cupy_permutation():
    return core.ElementwiseKernel(
        'raw int32 array, raw int32 sample, int32 j_start, int32 _j_end',
        '',
        '''
            const int invalid = -1;
            const int num = _ind.size();
            int j = (sample[i] & 0x7fffffff) % num;
            int j_end = _j_end;
            if (j_end > num) j_end = num;
            if (j == i || j < j_start || j >= j_end) continue;

            // If a thread fails to do data swaping once, it changes j
            // value using j_offset below and try data swaping again.
            // This process is repeated until data swapping is succeeded.
            // The j_offset is determined from the initial j
            // (random number assigned to each thread) and the initial
            // offset between j and i (ID of each thread).
            // If a given number sequence in sample is really random,
            // this j-update would not be necessary. This is work-around
            // mainly to avoid potential eternal conflict when sample has
            // rather synthetic number sequence.
            int j_offset = ((2*j - i + num) % (num - 1)) + 1;

            // A thread gives up to do data swapping if loop count exceed
            // a threathod determined below. This is kind of safety
            // mechanism to escape the eternal race condition, though I
            // believe it never happens.
            int loops = 256;

            bool do_next = true;
            while (do_next && loops > 0) {
                // try to swap the contents of array[i] and array[j]
                if (i != j) {
                    int val_j = atomicExch(&array[j], invalid);
                    if (val_j != invalid) {
                        int val_i = atomicExch(&array[i], invalid);
                        if (val_i != invalid) {
                            array[i] = val_j;
                            array[j] = val_i;
                            do_next = false;
                            // done
                        }
                        else {
                            // restore array[j]
                            array[j] = val_j;
                        }
                    }
                }
                j = (j + j_offset) % num;
                loops--;
            }
        ''',
        'cupy_permutation',
    )

コード例 #9

ファイルを表示

def _kernel_labels():
    return core.ElementwiseKernel(
        "",
        "raw Y y, raw int32 count, raw int32 labels",
        """
        if (y[i] != i) continue;
        int j = atomicAdd(&count[1], 1);
        labels[j] = i;
        """,
        "cupyimg_nd_label_labels",
    )

コード例 #10

ファイルを表示

def _get_gammaln_kernel():
    global _gammaln_kernel
    if _gammaln_kernel is None:
        _gammaln_kernel = core.ElementwiseKernel(
            'T x', 'T y', """
            if(isinf(x) && x < 0){
                y = - 1.0 / 0.0;
                return;
            }
            y = lgamma(x);
            """, 'gammaln_kernel')
    return _gammaln_kernel

コード例 #11

ファイルを表示

ファイル: digamma.py プロジェクト: wuxun-zhang/cupy

def _get_digamma_kernel():
    global _digamma_kernel
    if _digamma_kernel is None:
        _digamma_kernel = core.ElementwiseKernel(
            'T x', 'T y',
            """
            y = psi(x)
            """,
            'digamma_kernel',
            preamble=polevl_definition+psi_definition
        )
    return _digamma_kernel

コード例 #12

ファイルを表示

def _get_zeta_kernel():
    global _zeta_kernel
    if _zeta_kernel is None:
        _zeta_kernel = core.ElementwiseKernel(
            'T x, T q', 'T y',
            """
            y = zeta(x, q)
            """,
            'zeta_kernel',
            preamble=zeta_definition
        )
    return _zeta_kernel

コード例 #13

ファイルを表示

def _kernel_count():
    return core.ElementwiseKernel(
        "",
        "raw Y y, raw int32 count",
        """
        if (y[i] < 0) continue;
        int j = i;
        while (j != y[j]) { j = y[j]; }
        if (j != i) y[i] = j;
        else atomicAdd(&count[0], 1);
        """,
        "cupyimg_nd_label_count",
    )

コード例 #14

ファイルを表示

ファイル: sumprod.py プロジェクト: nudigamma/cupy

def cumsum(a, axis=None, dtype=None, out=None):
    """Returns the cumlative sum of an array along a given axis.

    Args:
        a (cupy.ndarray): Input array.
        axis (int): Axis along which the cumlative sum is taken. If it is not
        specified, the input is flattened.
        dtype: Data type specifier.
        out (cupy.ndarray): Output array.

    Returns:
        cupy.ndarray: The result array.

    .. seealso:: :func:`numpy.cumsum`

    """
    if out is None:
        if dtype is None:
            kind = a.dtype.kind
            if kind == 'b':
                dtype = numpy.dtype('l')
            elif kind == 'i' and a.dtype.itemsize < numpy.dtype('l').itemsize:
                dtype = numpy.dtype('l')
            elif kind == 'u' and a.dtype.itemsize < numpy.dtype('L').itemsize:
                dtype = numpy.dtype('L')
            else:
                dtype = a.dtype

        out = a.astype(dtype)
    else:
        out[...] = a

    if axis is None:
        out = out.ravel()
    elif not (-a.ndim <= axis < a.ndim):
        raise ValueError('axis(={}) out of bounds'.format(axis))
    else:
        return _proc_as_batch(_cumsum_batch, out, axis=axis)

    kern = core.ElementwiseKernel(
        'int32 pos', 'raw T x', '''
        if (i & pos) {
          x[i] += x[i ^ pos | (pos - 1)];
        }
        ''', 'cumsum_kernel')

    pos = 1
    while pos < out.size:
        kern(pos, out, size=out.size)
        pos <<= 1
    return out

コード例 #15

ファイルを表示

ファイル: sumprod.py プロジェクト: nudigamma/cupy

def _cumsum_batch(out):
    kern = core.ElementwiseKernel(
        'int32 pos, int32 batch', 'raw T x', '''
        int b = i % batch;
        int j = i / batch;
        if (j & pos) {
          const int dst_index[] = {b, j};
          const int src_index[] = {b, j ^ pos | (pos - 1)};
          x[dst_index] += x[src_index];
        }
        ''', 'cumsum_batch_kernel')

    pos = 1
    while pos < out.size:
        kern(pos, out.shape[0], out, size=out.size)
        pos <<= 1
    return out

コード例 #16

ファイルを表示

def multinomial(n, pvals, size=None):
    """Returns an array from multinomial distribution.

    Args:
        n (int): Number of trials.
        pvals (cupy.ndarray): Probabilities of each of the ``p`` different
            outcomes. The sum of these values must be 1.
        size (int or tuple of ints or None): Shape of a sample in each trial.
            For example when ``size`` is ``(a, b)``, shape of returned value is
            ``(a, b, p)`` where ``p`` is ``len(pvals)``.
            If ``size`` is ``None``, it is treated as ``()``. So, shape of
            returned value is ``(p,)``.

    Returns:
        cupy.ndarray: An array drawn from multinomial distribution.

    .. note::
       It does not support ``sum(pvals) < 1`` case.

    .. seealso:: :func:`numpy.random.multinomial`
    """

    if size is None:
        m = 1
        size = ()
    elif isinstance(size, six.integer_types):
        m = size
        size = (size,)
    else:
        size = tuple(size)
        m = 1
        for x in size:
            m *= x

    p = len(pvals)
    shape = size + (p,)
    # atomicAdd only supports int32
    ys = basic.zeros(shape, 'i')
    if ys.size > 0:
        xs = choice(p, p=pvals, size=n * m)
        core.ElementwiseKernel(
            'int64 x, int32 p, int32 n', 'raw int32 ys',
            'atomicAdd(&ys[i / n * p + x], 1)',
            'cupy_random_multinomial')(xs, p, n, ys)
    return ys.astype('l')

コード例 #17

ファイルを表示

def _kernel_finalize():
    return core.ElementwiseKernel(
        'int32 maxlabel', 'raw int32 labels, raw Y y', '''
        if (y[i] < 0) {
            y[i] = 0;
            continue;
        }
        int yi = y[i];
        int j_min = 0;
        int j_max = maxlabel - 1;
        int j = (j_min + j_max) / 2;
        while (j_min < j_max) {
            if (yi == labels[j]) break;
            if (yi < labels[j]) j_max = j - 1;
            else j_min = j + 1;
            j = (j_min + j_max) / 2;
        }
        y[i] = j + 1;
        ''', 'cupyx_nd_label_finalize')

コード例 #18

ファイルを表示

def _kernel_connect():
    return core.ElementwiseKernel(
        'raw int32 shape, raw int32 dirs, int32 ndirs, int32 ndim',
        'raw Y y',
        '''
        if (y[i] < 0) continue;
        for (int dr = 0; dr < ndirs; dr++) {
            int j = i;
            int rest = j;
            int stride = 1;
            int k = 0;
            for (int dm = ndim-1; dm >= 0; dm--) {
                int pos = rest % shape[dm] + dirs[dm + dr * ndim];
                if (pos < 0 || pos >= shape[dm]) {
                    k = -1;
                    break;
                }
                k += pos * stride;
                rest /= shape[dm];
                stride *= shape[dm];
            }
            if (k < 0) continue;
            if (y[k] < 0) continue;
            while (1) {
                while (j != y[j]) { j = y[j]; }
                while (k != y[k]) { k = y[k]; }
                if (j == k) break;
                if (j < k) {
                    int old = atomicCAS( &y[k], k, j );
                    if (old == k) break;
                    k = old;
                }
                else {
                    int old = atomicCAS( &y[j], j, k );
                    if (old == j) break;
                    j = old;
                }
            }
        }
        ''',
        'cupyx_nd_label_connect')

コード例 #19

ファイルを表示

def _kernel_finalize():
    return core.ElementwiseKernel(
        "int32 maxlabel",
        "raw int32 labels, raw Y y",
        """
        if (y[i] < 0) {
            y[i] = 0;
            continue;
        }
        int yi = y[i];
        int j_min = 0;
        int j_max = maxlabel - 1;
        int j = (j_min + j_max) / 2;
        while (j_min < j_max) {
            if (yi == labels[j]) break;
            if (yi < labels[j]) j_max = j - 1;
            else j_min = j + 1;
            j = (j_min + j_max) / 2;
        }
        y[i] = j + 1;
        """,
        "cupyimg_nd_label_finalize",
    )

コード例 #20

ファイルを表示

ファイル: histogram.py プロジェクト: wongalvis14/cupy

__device__ long long atomicAdd(long long *address, long long val) {
    return atomicAdd(reinterpret_cast<unsigned long long*>(address),
                     static_cast<unsigned long long>(val));
}'''

# TODO(unno): use searchsorted
_histogram_kernel = core.ElementwiseKernel('S x, raw T bins, int32 n_bins',
                                           'raw U y',
                                           '''
    if (x < bins[0] or bins[n_bins - 1] < x) {
        return;
    }
    int high = n_bins - 1;
    int low = 0;

    while (high - low > 1) {
        int mid = (high + low) / 2;
        if (bins[mid] <= x) {
            low = mid;
        } else {
            high = mid;
        }
    }
    atomicAdd(&y[low], U(1));
    ''',
                                           preamble=_preamble)


def histogram(x, bins=10):
    """Computes the histogram of a set of data.

    Args:

コード例 #21

ファイルを表示

ファイル: _generator.py プロジェクト: twonp168/cupy

class RandomState(object):
    """Portable container of a pseudo-random number generator.

    An instance of this class holds the state of a random number generator. The
    state is available only on the device which has been current at the
    initialization of the instance.

    Functions of :mod:`cupy.random` use global instances of this class.
    Different instances are used for different devices. The global state for
    the current device can be obtained by the
    :func:`cupy.random.get_random_state` function.

    Args:
        seed (None or int): Seed of the random number generator. See the
            :meth:`~cupy.random.RandomState.seed` method for detail.
        method (int): Method of the random number generator. Following values
            are available::

               cupy.cuda.curand.CURAND_RNG_PSEUDO_DEFAULT
               cupy.cuda.curand.CURAND_RNG_XORWOW
               cupy.cuda.curand.CURAND_RNG_MRG32K3A
               cupy.cuda.curand.CURAND_RNG_MTGP32
               cupy.cuda.curand.CURAND_RNG_MT19937
               cupy.cuda.curand.CURAND_RNG_PHILOX4_32_10

    """
    def __init__(self, seed=None, method=curand.CURAND_RNG_PSEUDO_DEFAULT):
        self._generator = curand.createGenerator(method)
        self.method = method
        self.seed(seed)

    def __del__(self, is_shutting_down=util.is_shutting_down):
        # When createGenerator raises an error, _generator is not initialized
        if is_shutting_down():
            return
        if hasattr(self, '_generator'):
            curand.destroyGenerator(self._generator)

    def _update_seed(self, size):
        self._rk_seed = (self._rk_seed + size) % _UINT64_MAX

    def _generate_normal(self, func, size, dtype, *args):
        # curand functions below don't support odd size.
        # * curand.generateNormal
        # * curand.generateNormalDouble
        # * curand.generateLogNormal
        # * curand.generateLogNormalDouble
        size = core.get_size(size)
        element_size = functools.reduce(operator.mul, size, 1)
        if element_size % 2 == 0:
            out = cupy.empty(size, dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out
        else:
            out = cupy.empty((element_size + 1, ), dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out[:element_size].reshape(size)

    # NumPy compatible functions

    def beta(self, a, b, size=None, dtype=float):
        """Returns an array of samples drawn from the beta distribution.

        .. seealso::
            :func:`cupy.random.beta` for full documentation,
            :meth:`numpy.random.RandomState.beta
            <numpy.random.mtrand.RandomState.beta>`
        """
        a, b = cupy.asarray(a), cupy.asarray(b)
        if size is None:
            size = cupy.broadcast(a, b).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.beta_kernel(a, b, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def binomial(self, n, p, size=None, dtype=int):
        """Returns an array of samples drawn from the binomial distribution.

        .. seealso::
            :func:`cupy.random.binomial` for full documentation,
            :meth:`numpy.random.RandomState.binomial
            <numpy.random.mtrand.RandomState.binomial>`
        """
        n, p = cupy.asarray(n), cupy.asarray(p)
        if size is None:
            size = cupy.broadcast(n, p).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.binomial_kernel(n, p, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def chisquare(self, df, size=None, dtype=float):
        """Returns an array of samples drawn from the chi-square distribution.

        .. seealso::
            :func:`cupy.random.chisquare` for full documentation,
            :meth:`numpy.random.RandomState.chisquare
            <numpy.random.mtrand.RandomState.chisquare>`
        """
        df = cupy.asarray(df)
        if size is None:
            size = df.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.chisquare_kernel(df, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def dirichlet(self, alpha, size=None, dtype=float):
        """Returns an array of samples drawn from the dirichlet distribution.

        .. seealso::
            :func:`cupy.random.dirichlet` for full documentation,
            :meth:`numpy.random.RandomState.dirichlet
            <numpy.random.mtrand.RandomState.dirichlet>`
        """
        alpha = cupy.asarray(alpha)
        if size is None:
            size = alpha.shape
        else:
            size += alpha.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_gamma_kernel(alpha, self._rk_seed, y)
        y /= y.sum(axis=-1, keepdims=True)
        self._update_seed(y.size)
        return y

    def exponential(self, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a exponential distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.exponential` for full documentation,
            :meth:`numpy.random.RandomState.exponential
            <numpy.random.mtrand.RandomState.exponential>`
        """
        scale = cupy.asarray(scale, dtype)
        if (scale < 0).any():  # synchronize!
            raise ValueError('scale < 0')
        if size is None:
            size = scale.shape
        x = self.standard_exponential(size, dtype)
        x *= scale
        return x

    def f(self, dfnum, dfden, size=None, dtype=float):
        """Returns an array of samples drawn from the f distribution.

        .. seealso::
            :func:`cupy.random.f` for full documentation,
            :meth:`numpy.random.RandomState.f
            <numpy.random.mtrand.RandomState.f>`
        """
        dfnum, dfden = cupy.asarray(dfnum), cupy.asarray(dfden)
        if size is None:
            size = cupy.broadcast(dfnum, dfden).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.f_kernel(dfnum, dfden, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def gamma(self, shape, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a gamma distribution.

        .. seealso::
            :func:`cupy.random.gamma` for full documentation,
            :meth:`numpy.random.RandomState.gamma
            <numpy.random.mtrand.RandomState.gamma>`
        """
        shape, scale = cupy.asarray(shape), cupy.asarray(scale)
        if size is None:
            size = cupy.broadcast(shape, scale).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_gamma_kernel(shape, self._rk_seed, y)
        y *= scale
        self._update_seed(y.size)
        return y

    def geometric(self, p, size=None, dtype=int):
        """Returns an array of samples drawn from the geometric distribution.

        .. seealso::
            :func:`cupy.random.geometric` for full documentation,
            :meth:`numpy.random.RandomState.geometric
            <numpy.random.mtrand.RandomState.geometric>`
        """
        p = cupy.asarray(p)
        if size is None:
            size = p.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.geometric_kernel(p, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def hypergeometric(self, ngood, nbad, nsample, size=None, dtype=int):
        """Returns an array of samples drawn from the hypergeometric distribution.

        .. seealso::
            :func:`cupy.random.hypergeometric` for full documentation,
            :meth:`numpy.random.RandomState.hypergeometric
            <numpy.random.mtrand.RandomState.hypergeometric>`
        """
        ngood, nbad, nsample = \
            cupy.asarray(ngood), cupy.asarray(nbad), cupy.asarray(nsample)
        if size is None:
            size = cupy.broadcast(ngood, nbad, nsample).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.hypergeometric_kernel(ngood, nbad, nsample, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    _laplace_kernel = core.ElementwiseKernel(
        'T x, T loc, T scale', 'T y',
        'y = loc + scale * ((x <= 0.5) ? log(x + x): -log(x + x - 1.0))',
        'laplace_kernel')

    def laplace(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from the laplace distribution.

        .. seealso::
            :func:`cupy.random.laplace` for full documentation,
            :meth:`numpy.random.RandomState.laplace
            <numpy.random.mtrand.RandomState.laplace>`
        """
        loc = cupy.asarray(loc, dtype)
        scale = cupy.asarray(scale, dtype)
        if size is None:
            size = cupy.broadcast(loc, scale).shape
        x = self._random_sample_raw(size, dtype)
        RandomState._laplace_kernel(x, loc, scale, x)
        return x

    def logistic(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from the logistic distribution.

        .. seealso::
            :func:`cupy.random.logistic` for full documentation,
            :meth:`numpy.random.RandomState.logistic
            <numpy.random.mtrand.RandomState.logistic>`
        """
        loc, scale = cupy.asarray(loc), cupy.asarray(scale)
        if size is None:
            size = cupy.broadcast(loc, scale).shape
        x = cupy.empty(shape=size, dtype=dtype)
        _kernels.open_uniform_kernel(self._rk_seed, x)
        self._update_seed(x.size)
        x = (1.0 - x) / x
        cupy.log(x, out=x)
        cupy.multiply(x, scale, out=x)
        cupy.add(x, loc, out=x)
        return x

    def lognormal(self, mean=0.0, sigma=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a log normal distribution.

        .. seealso::
            :func:`cupy.random.lognormal` for full documentation,
            :meth:`numpy.random.RandomState.lognormal
            <numpy.random.mtrand.RandomState.lognormal>`

        """
        dtype = _check_and_get_dtype(dtype)
        if dtype.char == 'f':
            func = curand.generateLogNormal
        else:
            func = curand.generateLogNormalDouble
        return self._generate_normal(func, size, dtype, mean, sigma)

    def logseries(self, p, size=None, dtype=int):
        """Returns an array of samples drawn from a log series distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.logseries` for full documentation,
            :meth:`numpy.random.RandomState.logseries
            <numpy.random.mtrand.RandomState.logseries>`

        """
        p = cupy.asarray(p)
        if cupy.any(p <= 0):  # synchronize!
            raise ValueError('p <= 0.0')
        if cupy.any(p >= 1):  # synchronize!
            raise ValueError('p >= 1.0')
        if size is None:
            size = p.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.logseries_kernel(p, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def multivariate_normal(self,
                            mean,
                            cov,
                            size=None,
                            check_valid='ignore',
                            tol=1e-08,
                            method='cholesky',
                            dtype=float):
        """Returns an array of samples drawn from the multivariate normal
        distribution.

        .. warning::
            This function calls one or more cuSOLVER routine(s) which may yield
            invalid results if input conditions are not met.
            To detect these invalid results, you can set the `linalg`
            configuration to a value that is not `ignore` in
            :func:`cupyx.errstate` or :func:`cupyx.seterr`.

        .. seealso::
            :func:`cupy.random.multivariate_normal` for full documentation,
            :meth:`numpy.random.RandomState.multivariate_normal
            <numpy.random.mtrand.RandomState.multivariate_normal>`
        """
        util.experimental('cupy.random.RandomState.multivariate_normal')
        mean = cupy.asarray(mean, dtype=dtype)
        cov = cupy.asarray(cov, dtype=dtype)
        if size is None:
            shape = []
        elif isinstance(size, (int, cupy.integer)):
            shape = [size]
        else:
            shape = size

        if len(mean.shape) != 1:
            raise ValueError('mean must be 1 dimensional')
        if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]):
            raise ValueError('cov must be 2 dimensional and square')
        if mean.shape[0] != cov.shape[0]:
            raise ValueError('mean and cov must have same length')

        final_shape = list(shape[:])
        final_shape.append(mean.shape[0])

        if method not in {'eigh', 'svd', 'cholesky'}:
            raise ValueError(
                "method must be one of {'eigh', 'svd', 'cholesky'}")

        if check_valid != 'ignore':
            if check_valid != 'warn' and check_valid != 'raise':
                raise ValueError(
                    "check_valid must equal 'warn', 'raise', or 'ignore'")

        if check_valid == 'warn':
            with cupyx.errstate(linalg='raise'):
                try:
                    decomp = cupy.linalg.cholesky(cov)
                except LinAlgError:
                    with cupyx.errstate(linalg='ignore'):
                        if method != 'cholesky':
                            if method == 'eigh':
                                (s, u) = cupy.linalg.eigh(cov)
                                psd = not cupy.any(s < -tol)
                            if method == 'svd':
                                (u, s, vh) = cupy.linalg.svd(cov)
                                psd = cupy.allclose(cupy.dot(vh.T * s, vh),
                                                    cov,
                                                    rtol=tol,
                                                    atol=tol)
                            decomp = u * cupy.sqrt(cupy.abs(s))
                            if not psd:
                                warnings.warn(
                                    "covariance is not positive-" +
                                    "semidefinite, output may be " +
                                    "invalid.", RuntimeWarning)

                        else:
                            warnings.warn(
                                "covariance is not positive-" +
                                "semidefinite, output *is* " + "invalid.",
                                RuntimeWarning)
                            decomp = cupy.linalg.cholesky(cov)

        else:
            with cupyx.errstate(linalg=check_valid):
                try:
                    if method == 'cholesky':
                        decomp = cupy.linalg.cholesky(cov)
                    elif method == 'eigh':
                        (s, u) = cupy.linalg.eigh(cov)
                        decomp = u * cupy.sqrt(cupy.abs(s))
                    elif method == 'svd':
                        (u, s, vh) = cupy.linalg.svd(cov)
                        decomp = u * cupy.sqrt(cupy.abs(s))

                except LinAlgError:
                    raise LinAlgError("Matrix is not positive definite; if " +
                                      "matrix is positive-semidefinite, set" +
                                      "'check_valid' to 'warn'")

        x = self.standard_normal(final_shape,
                                 dtype=dtype).reshape(-1, mean.shape[0])
        x = cupy.dot(decomp, x.T)
        x = x.T
        x += mean
        x.shape = tuple(final_shape)
        return x

    def negative_binomial(self, n, p, size=None, dtype=int):
        """Returns an array of samples drawn from the negative binomial distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.negative_binomial` for full documentation,
            :meth:`numpy.random.RandomState.negative_binomial
            <numpy.random.mtrand.RandomState.negative_binomial>`
        """
        n = cupy.asarray(n)
        p = cupy.asarray(p)
        if cupy.any(n <= 0):  # synchronize!
            raise ValueError('n <= 0')
        if cupy.any(p < 0):  # synchronize!
            raise ValueError('p < 0')
        if cupy.any(p > 1):  # synchronize!
            raise ValueError('p > 1')
        y = self.gamma(n, (1 - p) / p, size)
        return self.poisson(y, dtype=dtype)

    def normal(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of normally distributed samples.

        .. seealso::
            :func:`cupy.random.normal` for full documentation,
            :meth:`numpy.random.RandomState.normal
            <numpy.random.mtrand.RandomState.normal>`

        """
        dtype = _check_and_get_dtype(dtype)
        if dtype.char == 'f':
            func = curand.generateNormal
        else:
            func = curand.generateNormalDouble
        return self._generate_normal(func, size, dtype, loc, scale)

    def pareto(self, a, size=None, dtype=float):
        """Returns an array of samples drawn from the pareto II distribution.

        .. seealso::
            :func:`cupy.random.pareto_kernel` for full documentation,
            :meth:`numpy.random.RandomState.pareto
            <numpy.random.mtrand.RandomState.pareto>`
        """
        a = cupy.asarray(a)
        x = self._random_sample_raw(size, dtype)
        cupy.log(x, out=x)
        cupy.exp(-x / a, out=x)
        return x - 1

    def noncentral_chisquare(self, df, nonc, size=None, dtype=float):
        """Returns an array of samples drawn from the noncentral chi-square
        distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.noncentral_chisquare` for full documentation,
            :meth:`numpy.random.RandomState.noncentral_chisquare
            <numpy.random.mtrand.RandomState.noncentral_chisquare>`
        """
        df, nonc = cupy.asarray(df), cupy.asarray(nonc)
        if cupy.any(df <= 0):  # synchronize!
            raise ValueError('df <= 0')
        if cupy.any(nonc < 0):  # synchronize!
            raise ValueError('nonc < 0')
        if size is None:
            size = cupy.broadcast(df, nonc).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.noncentral_chisquare_kernel(df, nonc, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def noncentral_f(self, dfnum, dfden, nonc, size=None, dtype=float):
        """Returns an array of samples drawn from the noncentral F distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.noncentral_f` for full documentation,
            :meth:`numpy.random.RandomState.noncentral_f
            <numpy.random.mtrand.RandomState.noncentral_f>`
        """
        dfnum, dfden, nonc = \
            cupy.asarray(dfnum), cupy.asarray(dfden), cupy.asarray(nonc)
        if cupy.any(dfnum <= 0):  # synchronize!
            raise ValueError('dfnum <= 0')
        if cupy.any(dfden <= 0):  # synchronize!
            raise ValueError('dfden <= 0')
        if cupy.any(nonc < 0):  # synchronize!
            raise ValueError('nonc < 0')
        if size is None:
            size = cupy.broadcast(dfnum, dfden, nonc).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.noncentral_f_kernel(dfnum, dfden, nonc, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def poisson(self, lam=1.0, size=None, dtype=int):
        """Returns an array of samples drawn from the poisson distribution.

        .. seealso::
            :func:`cupy.random.poisson` for full documentation,
            :meth:`numpy.random.RandomState.poisson
            <numpy.random.mtrand.RandomState.poisson>`
        """
        lam = cupy.asarray(lam)
        if size is None:
            size = lam.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.poisson_kernel(lam, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def power(self, a, size=None, dtype=float):
        """Returns an array of samples drawn from the power distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.power` for full documentation,
            :meth:`numpy.random.RandomState.power
            <numpy.random.mtrand.RandomState.power>`
        """
        a = cupy.asarray(a)
        if cupy.any(a < 0):  # synchronize!
            raise ValueError('a < 0')
        if size is None:
            size = a.shape
        x = self.standard_exponential(size=size, dtype=dtype)
        cupy.exp(-x, out=x)
        cupy.add(1, -x, out=x)
        cupy.power(x, 1. / a, out=x)
        return x

    def rand(self, *size, **kwarg):
        """Returns uniform random values over the interval ``[0, 1)``.

        .. seealso::
            :func:`cupy.random.rand` for full documentation,
            :meth:`numpy.random.RandomState.rand
            <numpy.random.mtrand.RandomState.rand>`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('rand() got unexpected keyword arguments %s' %
                            ', '.join(kwarg.keys()))
        return self.random_sample(size=size, dtype=dtype)

    def randn(self, *size, **kwarg):
        """Returns an array of standard normal random values.

        .. seealso::
            :func:`cupy.random.randn` for full documentation,
            :meth:`numpy.random.RandomState.randn
            <numpy.random.mtrand.RandomState.randn>`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('randn() got unexpected keyword arguments %s' %
                            ', '.join(kwarg.keys()))
        return self.normal(size=size, dtype=dtype)

    _mod1_kernel = core.ElementwiseKernel('', 'T x', 'x = (x == (T)1) ? 0 : x',
                                          'cupy_random_x_mod_1')

    def _random_sample_raw(self, size, dtype):
        dtype = _check_and_get_dtype(dtype)
        out = cupy.empty(size, dtype=dtype)
        if dtype.char == 'f':
            func = curand.generateUniform
        else:
            func = curand.generateUniformDouble
        func(self._generator, out.data.ptr, out.size)
        return out

    def random_sample(self, size=None, dtype=float):
        """Returns an array of random values over the interval ``[0, 1)``.

        .. seealso::
            :func:`cupy.random.random_sample` for full documentation,
            :meth:`numpy.random.RandomState.random_sample
            <numpy.random.mtrand.RandomState.random_sample>`

        """
        out = self._random_sample_raw(size, dtype)
        RandomState._mod1_kernel(out)
        return out

    def rayleigh(self, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a rayleigh distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.rayleigh` for full documentation,
            :meth:`numpy.random.RandomState.rayleigh
            <numpy.random.mtrand.RandomState.rayleigh>`
        """
        scale = cupy.asarray(scale)
        if size is None:
            size = scale.shape
        if cupy.any(scale < 0):  # synchronize!
            raise ValueError('scale < 0')
        x = self._random_sample_raw(size, dtype)
        x = cupy.log(x, out=x)
        x = cupy.multiply(x, -2., out=x)
        x = cupy.sqrt(x, out=x)
        x = cupy.multiply(x, scale, out=x)
        return x

    def _interval(self, mx, size):
        """Generate multiple integers independently sampled uniformly from ``[0, mx]``.

        Args:
            mx (int): Upper bound of the interval
            size (None or int or tuple): Shape of the array or the scalar
                returned.
        Returns:
            int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with
            shape ``()`` is returned.
            If ``int``, 1-D array of length size is returned.
            If ``tuple``, multi-dimensional array with shape
            ``size`` is returned.
            Currently, only 32 bit or 64 bit integers can be sampled.
        """  # NOQA
        if size is None:
            size = ()
        elif isinstance(size, int):
            size = size,

        if mx == 0:
            return cupy.zeros(size, dtype=numpy.uint32)

        if mx < 0:
            raise ValueError('mx must be non-negative (actual: {})'.format(mx))
        elif mx <= _UINT32_MAX:
            dtype = numpy.uint32
        elif mx <= _UINT64_MAX:
            dtype = numpy.uint64
        else:
            raise ValueError(
                'mx must be within uint64 range (actual: {})'.format(mx))

        mask = (1 << mx.bit_length()) - 1
        mask = cupy.array(mask, dtype=dtype)

        n = functools.reduce(operator.mul, size, 1)

        if n == 0:
            return cupy.empty(size, dtype=dtype)

        sample = cupy.empty((n, ), dtype=dtype)
        size32 = sample.view(dtype=numpy.uint32).size
        n_rem = n  # The number of remaining elements to sample
        ret = None
        while n_rem > 0:
            # Call 32-bit RNG to fill 32-bit or 64-bit `sample`
            curand.generate(self._generator, sample.data.ptr, size32)
            # Drop the samples that exceed the upper limit
            sample &= mask
            success = sample <= mx

            if ret is None:
                # If the sampling has finished in the first iteration,
                # just return the sample.
                if success.all():
                    n_rem = 0
                    ret = sample
                    break

                # Allocate the return array.
                ret = cupy.empty((n, ), dtype=dtype)

            n_succ = min(n_rem, int(success.sum()))
            ret[n - n_rem:n - n_rem + n_succ] = sample[success][:n_succ]
            n_rem -= n_succ

        assert n_rem == 0

        return ret.reshape(size)

    def seed(self, seed=None):
        """Resets the state of the random number generator with a seed.

        .. seealso::
            :func:`cupy.random.seed` for full documentation,
            :meth:`numpy.random.RandomState.seed
            <numpy.random.mtrand.RandomState.seed>`

        """
        if seed is None:
            try:
                seed_str = binascii.hexlify(os.urandom(8))
                seed = int(seed_str, 16)
            except NotImplementedError:
                seed = (time.time() * 1000000) % _UINT64_MAX
        else:
            if isinstance(seed, numpy.ndarray):
                seed = int(hashlib.md5(seed).hexdigest()[:16], 16)
            else:
                seed = int(
                    numpy.asarray(seed).astype(numpy.uint64, casting='safe'))

        curand.setPseudoRandomGeneratorSeed(self._generator, seed)
        if (self.method not in (curand.CURAND_RNG_PSEUDO_MT19937,
                                curand.CURAND_RNG_PSEUDO_MTGP32)):
            curand.setGeneratorOffset(self._generator, 0)

        self._rk_seed = seed

    def standard_cauchy(self, size=None, dtype=float):
        """Returns an array of samples drawn from the standard cauchy distribution.

        .. seealso::
            :func:`cupy.random.standard_cauchy` for full documentation,
            :meth:`numpy.random.RandomState.standard_cauchy
            <numpy.random.mtrand.RandomState.standard_cauchy>`
        """
        x = self.uniform(size=size, dtype=dtype)
        return cupy.tan(cupy.pi * (x - 0.5))

    def standard_exponential(self, size=None, dtype=float):
        """Returns an array of samples drawn from the standard exp distribution.

         .. seealso::
            :func:`cupy.random.standard_exponential` for full documentation,
            :meth:`numpy.random.RandomState.standard_exponential
            <numpy.random.mtrand.RandomState.standard_exponential>`
        """
        x = self._random_sample_raw(size, dtype)
        return -cupy.log(x, out=x)

    def standard_gamma(self, shape, size=None, dtype=float):
        """Returns an array of samples drawn from a standard gamma distribution.

        .. seealso::
            :func:`cupy.random.standard_gamma` for full documentation,
            :meth:`numpy.random.RandomState.standard_gamma
            <numpy.random.mtrand.RandomState.standard_gamma>`
        """
        shape = cupy.asarray(shape)
        if size is None:
            size = shape.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_gamma_kernel(shape, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def standard_normal(self, size=None, dtype=float):
        """Returns samples drawn from the standard normal distribution.

        .. seealso::
            :func:`cupy.random.standard_normal` for full documentation,
            :meth:`numpy.random.RandomState.standard_normal
            <numpy.random.mtrand.RandomState.standard_normal>`

        """
        return self.normal(size=size, dtype=dtype)

    def standard_t(self, df, size=None, dtype=float):
        """Returns an array of samples drawn from the standard t distribution.

        .. seealso::
            :func:`cupy.random.standard_t` for full documentation,
            :meth:`numpy.random.RandomState.standard_t
            <numpy.random.mtrand.RandomState.standard_t>`
        """
        df = cupy.asarray(df)
        if size is None:
            size = df.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.standard_t_kernel(df, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def tomaxint(self, size=None):
        """Draws integers between 0 and max integer inclusive.

        Args:
            size (int or tuple of ints): Output shape.

        Returns:
            cupy.ndarray: Drawn samples.

        .. seealso::
            :meth:`numpy.random.RandomState.tomaxint
            <numpy.random.mtrand.RandomState.tomaxint>`

        """
        if size is None:
            size = ()
        sample = cupy.empty(size, dtype=cupy.int_)
        # cupy.random only uses int32 random generator
        size_in_int = sample.dtype.itemsize // 4
        curand.generate(self._generator, sample.data.ptr,
                        sample.size * size_in_int)

        # Disable sign bit
        sample &= cupy.iinfo(cupy.int_).max
        return sample

    _triangular_kernel = core.ElementwiseKernel(
        'L left, M mode, R right', 'T x', """
        T base, leftbase, ratio, leftprod, rightprod;

        base = right - left;
        leftbase = mode - left;
        ratio = leftbase / base;
        leftprod = leftbase*base;
        rightprod = (right - mode)*base;

        if (x <= ratio)
        {
            x = left + sqrt(x*leftprod);
        } else
        {
            x = right - sqrt((1.0 - x) * rightprod);
        }
        """, 'triangular_kernel')

    def triangular(self, left, mode, right, size=None, dtype=float):
        """Returns an array of samples drawn from the triangular distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.triangular` for full documentation,
            :meth:`numpy.random.RandomState.triangular
            <numpy.random.mtrand.RandomState.triangular>`
        """
        left, mode, right = \
            cupy.asarray(left), cupy.asarray(mode), cupy.asarray(right)
        if cupy.any(left > mode):  # synchronize!
            raise ValueError('left > mode')
        if cupy.any(mode > right):  # synchronize!
            raise ValueError('mode > right')
        if cupy.any(left == right):  # synchronize!
            raise ValueError('left == right')
        if size is None:
            size = cupy.broadcast(left, mode, right).shape
        x = self.random_sample(size=size, dtype=dtype)
        return RandomState._triangular_kernel(left, mode, right, x)

    _scale_kernel = core.ElementwiseKernel('T low, T high', 'T x',
                                           'x = T(low) + x * T(high - low)',
                                           'cupy_scale')

    def uniform(self, low=0.0, high=1.0, size=None, dtype=float):
        """Returns an array of uniformly-distributed samples over an interval.

        .. seealso::
            :func:`cupy.random.uniform` for full documentation,
            :meth:`numpy.random.RandomState.uniform
            <numpy.random.mtrand.RandomState.uniform>`

        """
        dtype = numpy.dtype(dtype)
        rand = self.random_sample(size=size, dtype=dtype)
        if not numpy.isscalar(low):
            low = cupy.asarray(low, dtype)
        if not numpy.isscalar(high):
            high = cupy.asarray(high, dtype)
        return RandomState._scale_kernel(low, high, rand)

    def vonmises(self, mu, kappa, size=None, dtype=float):
        """Returns an array of samples drawn from the von Mises distribution.

        .. seealso::
            :func:`cupy.random.vonmises` for full documentation,
            :meth:`numpy.random.RandomState.vonmises
            <numpy.random.mtrand.RandomState.vonmises>`
        """
        mu, kappa = cupy.asarray(mu), cupy.asarray(kappa)
        if size is None:
            size = cupy.broadcast(mu, kappa).shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.vonmises_kernel(mu, kappa, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    _wald_kernel = core.ElementwiseKernel(
        'T mean, T scale, T U', 'T X', """
            T mu_2l;
            T Y;
            mu_2l = mean / (2*scale);
            Y = mean*X*X;
            X = mean + mu_2l*(Y - sqrt(4*scale*Y + Y*Y));
            if (U > mean/(mean+X))
            {
                X = mean*mean/X;
            }
        """, 'wald_scale')

    def wald(self, mean, scale, size=None, dtype=float):
        """Returns an array of samples drawn from the Wald distribution.

         .. seealso::
            :func:`cupy.random.wald` for full documentation,
            :meth:`numpy.random.RandomState.wald
            <numpy.random.mtrand.RandomState.wald>`
        """
        mean, scale = \
            cupy.asarray(mean, dtype=dtype), cupy.asarray(scale, dtype=dtype)
        if size is None:
            size = cupy.broadcast(mean, scale).shape
        x = self.normal(size=size, dtype=dtype)
        u = self.random_sample(size=size, dtype=dtype)
        return RandomState._wald_kernel(mean, scale, u, x)

    def weibull(self, a, size=None, dtype=float):
        """Returns an array of samples drawn from the weibull distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.weibull` for full documentation,
            :meth:`numpy.random.RandomState.weibull
            <numpy.random.mtrand.RandomState.weibull>`
        """
        a = cupy.asarray(a)
        if cupy.any(a < 0):  # synchronize!
            raise ValueError('a < 0')
        x = self.standard_exponential(size, dtype)
        cupy.power(x, 1. / a, out=x)
        return x

    def zipf(self, a, size=None, dtype=int):
        """Returns an array of samples drawn from the Zipf distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.zipf` for full documentation,
            :meth:`numpy.random.RandomState.zipf
            <numpy.random.mtrand.RandomState.zipf>`
        """
        a = cupy.asarray(a)
        if cupy.any(a <= 1.0):  # synchronize!
            raise ValueError('\'a\' must be a valid float > 1.0')
        if size is None:
            size = a.shape
        y = cupy.empty(shape=size, dtype=dtype)
        _kernels.zipf_kernel(a, self._rk_seed, y)
        self._update_seed(y.size)
        return y

    def choice(self, a, size=None, replace=True, p=None):
        """Returns an array of random values from a given 1-D array.

        .. seealso::
            :func:`cupy.random.choice` for full document,
            :meth:`numpy.random.choice
            <numpy.random.mtrand.RandomState.choice>`

        """
        if a is None:
            raise ValueError('a must be 1-dimensional or an integer')
        if isinstance(a, cupy.ndarray) and a.ndim == 0:
            raise NotImplementedError
        if isinstance(a, int):
            a_size = a
            if a_size <= 0:
                raise ValueError('a must be greater than 0')
        else:
            a = cupy.array(a, copy=False)
            if a.ndim != 1:
                raise ValueError('a must be 1-dimensional or an integer')
            else:
                a_size = len(a)
                if a_size == 0:
                    raise ValueError('a must be non-empty')

        if p is not None:
            p = cupy.array(p)
            if p.ndim != 1:
                raise ValueError('p must be 1-dimensional')
            if len(p) != a_size:
                raise ValueError('a and p must have same size')
            if not (p >= 0).all():
                raise ValueError('probabilities are not non-negative')
            p_sum = cupy.sum(p).get()
            if not numpy.allclose(p_sum, 1):
                raise ValueError('probabilities do not sum to 1')

        if size is None:
            raise NotImplementedError
        shape = size
        size = numpy.prod(shape)

        if not replace and p is None:
            if a_size < size:
                raise ValueError(
                    'Cannot take a larger sample than population when '
                    '\'replace=False\'')
            if isinstance(a, int):
                indices = cupy.arange(a, dtype='l')
            else:
                indices = a.copy()
            self.shuffle(indices)
            return indices[:size].reshape(shape)

        if not replace:
            raise NotImplementedError

        if p is not None:
            p = cupy.broadcast_to(p, (size, a_size))
            index = cupy.argmax(cupy.log(p) + self.gumbel(size=(size, a_size)),
                                axis=1)
            if not isinstance(shape, int):
                index = cupy.reshape(index, shape)
        else:
            index = self.randint(0, a_size, size=shape)
            # Align the dtype with NumPy
            index = index.astype(cupy.int64, copy=False)

        if isinstance(a, int):
            return index

        if index.ndim == 0:
            return cupy.array(a[index], dtype=a.dtype)

        return a[index]

    def shuffle(self, a):
        """Returns a shuffled array.

        .. seealso::
            :func:`cupy.random.shuffle` for full document,
            :meth:`numpy.random.shuffle
            <numpy.random.mtrand.RandomState.shuffle>`

        """
        if not isinstance(a, cupy.ndarray):
            raise TypeError('The array must be cupy.ndarray')

        if a.ndim == 0:
            raise TypeError('An array whose ndim is 0 is not supported')

        a[:] = a[self._permutation(len(a))]

    def permutation(self, a):
        """Returns a permuted range or a permutation of an array."""
        if isinstance(a, int):
            return self._permutation(a)
        else:
            return a[self._permutation(len(a))]

    def _permutation(self, num):
        """Returns a permuted range."""
        sample = cupy.empty((num), dtype=numpy.int32)
        curand.generate(self._generator, sample.data.ptr, num)
        if 128 < num <= 32 * 1024 * 1024:
            array = cupy.arange(num, dtype=numpy.int32)
            # apply sort of cache blocking
            block_size = 1 * 1024 * 1024
            # The block size above is a value determined from the L2 cache size
            # of GP100 (L2 cache size / size of int = 4MB / 4B = 1M). It may be
            # better to change the value base on the L2 cache size of the GPU
            # you use.
            # When num > block_size, cupy kernel: _cupy_permutation is to be
            # launched multiple times. However, it is observed that performance
            # will be degraded if the launch count is too many. Therefore,
            # the block size is adjusted so that launch count will not exceed
            # twelve Note that this twelve is the value determined from
            # measurement on GP100.
            while num // block_size > 12:
                block_size *= 2
            for j_start in range(0, num, block_size):
                j_end = j_start + block_size
                _cupy_permutation(sample, j_start, j_end, array, size=num)
        else:
            # When num > 32M, argsort is used, because it is faster than
            # custom kernel. See https://github.com/cupy/cupy/pull/603.
            array = cupy.argsort(sample)
        return array

    _gumbel_kernel = core.ElementwiseKernel(
        'T x, T loc, T scale', 'T y', 'y = T(loc) - log(-log(x)) * T(scale)',
        'gumbel_kernel')

    def gumbel(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a Gumbel distribution.

        .. seealso::
            :func:`cupy.random.gumbel` for full documentation,
            :meth:`numpy.random.RandomState.gumbel
            <numpy.random.mtrand.RandomState.gumbel>`
        """
        x = self._random_sample_raw(size=size, dtype=dtype)
        if not numpy.isscalar(loc):
            loc = cupy.asarray(loc, dtype)
        if not numpy.isscalar(scale):
            scale = cupy.asarray(scale, dtype)
        RandomState._gumbel_kernel(x, loc, scale, x)
        return x

    def randint(self, low, high=None, size=None, dtype='l'):
        """Returns a scalar or an array of integer values over ``[low, high)``.

        .. seealso::
            :func:`cupy.random.randint` for full documentation,
            :meth:`numpy.random.RandomState.randint
            <numpy.random.mtrand.RandomState.randint>`
        """
        if high is None:
            lo = 0
            hi1 = int(low) - 1
        else:
            lo = int(low)
            hi1 = int(high) - 1

        if lo > hi1:
            raise ValueError('low >= high')
        if lo < cupy.iinfo(dtype).min:
            raise ValueError('low is out of bounds for {}'.format(
                cupy.dtype(dtype).name))
        if hi1 > cupy.iinfo(dtype).max:
            raise ValueError('high is out of bounds for {}'.format(
                cupy.dtype(dtype).name))

        diff = hi1 - lo
        x = self._interval(diff, size).astype(dtype, copy=False)
        cupy.add(x, lo, out=x)
        return x

コード例 #22

ファイルを表示

ファイル: compressed.py プロジェクト: zivzone/cupy

class _compressed_sparse_matrix(sparse_data._data_matrix,
                                sparse_data._minmax_mixin):

    _compress_getitem_kern = core.ElementwiseKernel(
        'T d, S ind, int32 minor', 'raw T answer',
        'if (ind == minor) atomicAdd(&answer[0], d);',
        'compress_getitem')

    _compress_getitem_complex_kern = core.ElementwiseKernel(
        'T real, T imag, S ind, int32 minor',
        'raw T answer_real, raw T answer_imag',
        '''
        if (ind == minor) {
          atomicAdd(&answer_real[0], real);
          atomicAdd(&answer_imag[0], imag);
        }
        ''',
        'compress_getitem_complex')

    _max_reduction_kern = core.RawKernel(r'''
        extern "C" __global__
        void max_reduction(double* data, int* x, int* y, int length,
                           double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length == length){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the block and update
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] > running_value){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'max_reduction')

    _max_nonzero_reduction_kern = core.RawKernel(r'''
        extern "C" __global__
        void max_nonzero_reduction(double* data, int* x, int* y, int length,
                                   double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length > 0){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (running_value < data[entry]){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'max_nonzero_reduction')

    _min_reduction_kern = core.RawKernel(r'''
        extern "C" __global__
        void min_reduction(double* data, int* x, int* y, int length,
                           double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length == length){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the block to update the initial value
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] < running_value){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'min_reduction')

    _min_nonzero_reduction_kern = core.RawKernel(r'''
        extern "C" __global__
        void min_nonzero_reduction(double* data, int* x, int* y, int length,
                                   double* z) {
            // Get the index of hte block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length > 0){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (running_value > data[entry]){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'min_nonzero_reduction')

    _max_arg_reduction_kern = core.RawKernel(r'''
        extern "C" __global__
        void max_arg_reduction(double* data, int* indices, int* x, int* y,
                               int length, long long* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            int data_index = 0;
            double data_value = 0;

            if (block_length == length){
                // Block is dense. Fill the first value
                data_value = data[x[tid]];
                data_index = indices[x[tid]];
            } else if (block_length > 0)  {
                // Block has at least one zero. Assign first occurrence as the
                // starting reference
                data_value = 0;
                for (data_index = 0; data_index < length; data_index++){
                    if (data_index != indices[x[tid] + data_index] ||
                        x[tid] + data_index >= y[tid]){
                        break;
                    }
                }
            } else {
                 // Zero valued array
                data_value = 0;
                data_index = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    data_value = nan("");
                    data_index = 0;
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] > data_value){
                        data_index = indices[entry];
                        data_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = data_index;
        }
        ''', 'max_arg_reduction')

    _min_arg_reduction_kern = core.RawKernel(r'''
        extern "C" __global__
        void min_arg_reduction(double* data, int* indices, int* x, int* y,
                               int length, long long* z) {
            // Get the index of hte block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            int data_index = 0;
            double data_value = 0;

            if (block_length == length){
                // Block is dense. Fill the first value
                data_value = data[x[tid]];
                data_index = indices[x[tid]];
            } else if (block_length > 0)  {
                // Block has at least one zero. Assign first occurrence as the
                // starting reference
                data_value = 0;
                for (data_index = 0; data_index < length; data_index++){
                    if (data_index != indices[x[tid] + data_index] ||
                        x[tid] + data_index >= y[tid]){
                        break;
                    }
                }
            } else {
                // Zero valued array
                data_value = 0;
                data_index = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    data_value = nan("");
                    data_index = 0;
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] < data_value){
                        data_index = indices[entry];
                        data_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = data_index;

        }
        ''', 'min_arg_reduction')

    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if shape is not None:
            if not util.isshape(shape):
                raise ValueError('invalid shape (must be a 2-tuple of int)')
            shape = int(shape[0]), int(shape[1])

        if base.issparse(arg1):
            x = arg1.asformat(self.format)
            data = x.data
            indices = x.indices
            indptr = x.indptr

            if arg1.format != self.format:
                # When formats are differnent, all arrays are already copied
                copy = False

            if shape is None:
                shape = arg1.shape

            has_canonical_format = x.has_canonical_format
        elif util.isshape(arg1):
            m, n = arg1
            m, n = int(m), int(n)
            data = basic.zeros(0, dtype if dtype else 'd')
            indices = basic.zeros(0, 'i')
            indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype='i')
            # shape and copy argument is ignored
            shape = (m, n)
            copy = False
            has_canonical_format = True

        elif scipy_available and scipy.sparse.issparse(arg1):
            # Convert scipy.sparse to cupyx.scipy.sparse
            x = arg1.asformat(self.format)
            data = cupy.array(x.data)
            indices = cupy.array(x.indices, dtype='i')
            indptr = cupy.array(x.indptr, dtype='i')
            copy = False

            if shape is None:
                shape = arg1.shape
            has_canonical_format = x.has_canonical_format

        elif isinstance(arg1, tuple) and len(arg1) == 3:
            data, indices, indptr = arg1
            if not (base.isdense(data) and data.ndim == 1 and
                    base.isdense(indices) and indices.ndim == 1 and
                    base.isdense(indptr) and indptr.ndim == 1):
                raise ValueError(
                    'data, indices, and indptr should be 1-D')

            if len(data) != len(indices):
                raise ValueError('indices and data should have the same size')

            has_canonical_format = False

        elif base.isdense(arg1):
            if arg1.ndim > 2:
                raise TypeError('expected dimension <= 2 array or matrix')
            elif arg1.ndim == 1:
                arg1 = arg1[None]
            elif arg1.ndim == 0:
                arg1 = arg1[None, None]
            data, indices, indptr = self._convert_dense(arg1)
            copy = False
            if shape is None:
                shape = arg1.shape

            has_canonical_format = True

        else:
            raise ValueError(
                'Unsupported initializer format')

        if dtype is None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)

        if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D':
            raise ValueError(
                'Only float32, float64, complex64 and complex128 '
                'are supported')

        data = data.astype(dtype, copy=copy)
        sparse_data._data_matrix.__init__(self, data)

        self.indices = indices.astype('i', copy=copy)
        self.indptr = indptr.astype('i', copy=copy)

        if shape is None:
            shape = self._swap(len(indptr) - 1, int(indices.max()) + 1)

        major, minor = self._swap(*shape)
        if len(indptr) != major + 1:
            raise ValueError('index pointer size (%d) should be (%d)'
                             % (len(indptr), major + 1))

        self._descr = cusparse.MatDescriptor.create()
        self._shape = shape
        self._has_canonical_format = has_canonical_format

    def _with_data(self, data, copy=True):
        if copy:
            return self.__class__(
                (data, self.indices.copy(), self.indptr.copy()),
                shape=self.shape,
                dtype=data.dtype)
        else:
            return self.__class__(
                (data, self.indices, self.indptr),
                shape=self.shape,
                dtype=data.dtype)

    def _convert_dense(self, x):
        raise NotImplementedError

    def _swap(self, x, y):
        raise NotImplementedError

    def _add_sparse(self, other, alpha, beta):
        raise NotImplementedError

    def _add(self, other, lhs_negative, rhs_negative):
        if cupy.isscalar(other):
            if other == 0:
                if lhs_negative:
                    return -self
                else:
                    return self.copy()
            else:
                raise NotImplementedError(
                    'adding a nonzero scalar to a sparse matrix is not '
                    'supported')
        elif base.isspmatrix(other):
            alpha = -1 if lhs_negative else 1
            beta = -1 if rhs_negative else 1
            return self._add_sparse(other, alpha, beta)
        elif base.isdense(other):
            if lhs_negative:
                if rhs_negative:
                    return -self.todense() - other
                else:
                    return other - self.todense()
            else:
                if rhs_negative:
                    return self.todense() - other
                else:
                    return self.todense() + other
        else:
            return NotImplemented

    def __add__(self, other):
        return self._add(other, False, False)

    def __radd__(self, other):
        return self._add(other, False, False)

    def __sub__(self, other):
        return self._add(other, False, True)

    def __rsub__(self, other):
        return self._add(other, True, False)

    def __getitem__(self, slices):
        if isinstance(slices, tuple):
            slices = list(slices)
        elif isinstance(slices, list):
            slices = list(slices)
            if all([isinstance(s, int) for s in slices]):
                slices = [slices]
        else:
            slices = [slices]

        ellipsis = -1
        n_ellipsis = 0
        for i, s in enumerate(slices):
            if s is None:
                raise IndexError('newaxis is not supported')
            elif s is Ellipsis:
                ellipsis = i
                n_ellipsis += 1
        if n_ellipsis > 0:
            ellipsis_size = self.ndim - (len(slices) - 1)
            slices[ellipsis:ellipsis + 1] = [slice(None)] * ellipsis_size

        if len(slices) == 2:
            row, col = slices
        elif len(slices) == 1:
            row, col = slices[0], slice(None)
        else:
            raise IndexError('invalid number of indices')

        major, minor = self._swap(row, col)
        major_size, minor_size = self._swap(*self._shape)
        if numpy.isscalar(major):
            i = int(major)
            if i < 0:
                i += major_size
            if not (0 <= i < major_size):
                raise IndexError('index out of bounds')
            if numpy.isscalar(minor):
                j = int(minor)
                if j < 0:
                    j += minor_size
                if not (0 <= j < minor_size):
                    raise IndexError('index out of bounds')
                return self._get_single(i, j)
            elif minor == slice(None):
                return self._get_major_slice(slice(i, i + 1))
        elif isinstance(major, slice):
            if minor == slice(None):
                return self._get_major_slice(major)

        raise ValueError('unsupported indexing')

    def _get_single(self, major, minor):
        start = self.indptr[major]
        end = self.indptr[major + 1]
        answer = cupy.zeros((), self.dtype)
        data = self.data[start:end]
        indices = self.indices[start:end]
        if self.dtype.kind == 'c':
            self._compress_getitem_complex_kern(
                data.real, data.imag, indices, minor, answer.real, answer.imag)
        else:
            self._compress_getitem_kern(
                data, indices, minor, answer)
        return answer[()]

    def _get_major_slice(self, major):
        major_size, minor_size = self._swap(*self._shape)
        # major.indices cannot be used because scipy.sparse behaves differently
        major_start = major.start
        major_stop = major.stop
        major_step = major.step
        if major_start is None:
            major_start = 0
        if major_stop is None:
            major_stop = major_size
        if major_step is None:
            major_step = 1
        if major_start < 0:
            major_start += major_size
        if major_stop < 0:
            major_stop += major_size
        major_start = max(min(major_start, major_size), 0)
        major_stop = max(min(major_stop, major_size), 0)

        if major_step != 1:
            raise ValueError('slicing with step != 1 not supported')

        if not (major_start <= major_stop):
            # will give an empty slice, but preserve shape on the other axis
            major_start = major_stop

        start = self.indptr[major_start]
        stop = self.indptr[major_stop]
        data = self.data[start:stop]
        indptr = self.indptr[major_start:major_stop + 1] - start
        indices = self.indices[start:stop]

        shape = self._swap(len(indptr) - 1, minor_size)
        return self.__class__(
            (data, indices, indptr), shape=shape, dtype=self.dtype, copy=False)

    @property
    def has_canonical_format(self):
        return self._has_canonical_format

    def get_shape(self):
        """Returns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.

        """
        return self._shape

    def getnnz(self, axis=None):
        """Returns the number of stored values, including explicit zeros.

        Args:
            axis: Not supported yet.

        Returns:
            int: The number of stored values.

        """
        if axis is None:
            return self.data.size
        else:
            raise ValueError

    # TODO(unno): Implement sorted_indices

    def sum_duplicates(self):
        if self._has_canonical_format:
            return
        if self.data.size == 0:
            self._has_canonical_format = True
            return
        coo = self.tocoo()
        coo.sum_duplicates()
        self.__init__(coo.asformat(self.format))
        self._has_canonical_format = True

    #####################
    # Reduce operations #
    #####################

    def _minor_reduce(self, ufunc, axis, nonzero):
        """Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Matrix over which the reduction should be
                conducted.

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
                major_index.

        """

        # Call to the appropriate kernel function
        if axis == 1:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[0]).astype(cupy.float64)

            if nonzero:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_nonzero_reduction_kern(
                        (self.shape[0],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]),
                         value))
                if ufunc == cupy.amin:
                    self._min_nonzero_reduction_kern(
                        (self.shape[0],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]),
                         value))

            else:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_reduction_kern(
                        (self.shape[0],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]),
                         value))
                if ufunc == cupy.amin:
                    self._min_reduction_kern(
                        (self.shape[0],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]),
                         value))

        if axis == 0:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[1]).astype(cupy.float64)

            if nonzero:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_nonzero_reduction_kern(
                        (self.shape[1],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]),
                         value))
                if ufunc == cupy.amin:
                    self._min_nonzero_reduction_kern(
                        (self.shape[1],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]),
                         value))
            else:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_reduction_kern(
                        (self.shape[1],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]),
                         value))
                if ufunc == cupy.amin:
                    self._min_reduction_kern(
                        (self.shape[1],), (1,),
                        (self.data.astype(cupy.float64),
                         self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]),
                         value))

        return value

    def _arg_minor_reduce(self, ufunc, axis):
        """Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Maxtrix over which the reduction should be conducted

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
                major_index

        """

        # Call to the appropriate kernel function
        if axis == 1:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[0]).astype(cupy.int64)

            # Perform the calculation
            if ufunc == cupy.argmax:
                self._max_arg_reduction_kern(
                    (self.shape[0],), (1,),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1],
                     self.indptr[1:], cupy.int64(self.shape[1]),
                     value))
            if ufunc == cupy.argmin:
                self._min_arg_reduction_kern(
                    (self.shape[0],), (1,),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1],
                     self.indptr[1:], cupy.int64(self.shape[1]),
                     value))

        if axis == 0:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[1]).astype(cupy.int64)

            # Perform the calculation
            if ufunc == cupy.argmax:
                self._max_arg_reduction_kern(
                    (self.shape[1],), (1,),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1],
                     self.indptr[1:], cupy.int64(self.shape[0]),
                     value))
            if ufunc == cupy.argmin:
                self._min_arg_reduction_kern(
                    (self.shape[1],), (1,),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1],
                     self.indptr[1:],
                     cupy.int64(self.shape[0]), value))

        return value

コード例 #23

ファイルを表示

ファイル: compressed.py プロジェクト: zhaohb/cupy

class _compressed_sparse_matrix(sparse_data._data_matrix,
                                sparse_data._minmax_mixin, _index.IndexMixin):

    _max_reduction_kern = core.RawKernel(
        r'''
        extern "C" __global__
        void max_reduction(double* data, int* x, int* y, int length,
                           double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length == length){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the block and update
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] > running_value){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'max_reduction')

    _max_nonzero_reduction_kern = core.RawKernel(
        r'''
        extern "C" __global__
        void max_nonzero_reduction(double* data, int* x, int* y, int length,
                                   double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length > 0){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (running_value < data[entry]){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'max_nonzero_reduction')

    _min_reduction_kern = core.RawKernel(
        r'''
        extern "C" __global__
        void min_reduction(double* data, int* x, int* y, int length,
                           double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length == length){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the block to update the initial value
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] < running_value){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'min_reduction')

    _min_nonzero_reduction_kern = core.RawKernel(
        r'''
        extern "C" __global__
        void min_nonzero_reduction(double* data, int* x, int* y, int length,
                                   double* z) {
            // Get the index of hte block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (block_length > 0){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (running_value > data[entry]){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }
        ''', 'min_nonzero_reduction')

    _max_arg_reduction_kern = core.RawKernel(
        r'''
        extern "C" __global__
        void max_arg_reduction(double* data, int* indices, int* x, int* y,
                               int length, long long* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            int data_index = 0;
            double data_value = 0;

            if (block_length == length){
                // Block is dense. Fill the first value
                data_value = data[x[tid]];
                data_index = indices[x[tid]];
            } else if (block_length > 0)  {
                // Block has at least one zero. Assign first occurrence as the
                // starting reference
                data_value = 0;
                for (data_index = 0; data_index < length; data_index++){
                    if (data_index != indices[x[tid] + data_index] ||
                        x[tid] + data_index >= y[tid]){
                        break;
                    }
                }
            } else {
                 // Zero valued array
                data_value = 0;
                data_index = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    data_value = nan("");
                    data_index = 0;
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] > data_value){
                        data_index = indices[entry];
                        data_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = data_index;
        }
        ''', 'max_arg_reduction')

    _min_arg_reduction_kern = core.RawKernel(
        r'''
        extern "C" __global__
        void min_arg_reduction(double* data, int* indices, int* x, int* y,
                               int length, long long* z) {
            // Get the index of hte block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            int data_index = 0;
            double data_value = 0;

            if (block_length == length){
                // Block is dense. Fill the first value
                data_value = data[x[tid]];
                data_index = indices[x[tid]];
            } else if (block_length > 0)  {
                // Block has at least one zero. Assign first occurrence as the
                // starting reference
                data_value = 0;
                for (data_index = 0; data_index < length; data_index++){
                    if (data_index != indices[x[tid] + data_index] ||
                        x[tid] + data_index >= y[tid]){
                        break;
                    }
                }
            } else {
                // Zero valued array
                data_value = 0;
                data_index = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    data_value = nan("");
                    data_index = 0;
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] < data_value){
                        data_index = indices[entry];
                        data_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = data_index;

        }
        ''', 'min_arg_reduction')

    # TODO(leofang): rewrite a more load-balanced approach than this naive one?
    _has_sorted_indices_kern = core.ElementwiseKernel(
        'raw T indptr, raw T indices', 'bool diff', '''
        bool diff_out = true;
        for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) {
            if (indices[jj] > indices[jj+1]){
                diff_out = false;
            }
        }
        diff = diff_out;
        ''', 'has_sorted_indices')

    # TODO(leofang): rewrite a more load-balanced approach than this naive one?
    _has_canonical_format_kern = core.ElementwiseKernel(
        'raw T indptr, raw T indices', 'bool diff', '''
        bool diff_out = true;
        if (indptr[i] > indptr[i+1]) {
            diff = false;
            return;
        }
        for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) {
            if (indices[jj] >= indices[jj+1]) {
                diff_out = false;
            }
        }
        diff = diff_out;
        ''', 'has_canonical_format')

    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if shape is not None:
            if not _util.isshape(shape):
                raise ValueError('invalid shape (must be a 2-tuple of int)')
            shape = int(shape[0]), int(shape[1])

        if base.issparse(arg1):
            x = arg1.asformat(self.format)
            data = x.data
            indices = x.indices
            indptr = x.indptr

            if arg1.format != self.format:
                # When formats are differnent, all arrays are already copied
                copy = False

            if shape is None:
                shape = arg1.shape

        elif _util.isshape(arg1):
            m, n = arg1
            m, n = int(m), int(n)
            data = basic.zeros(0, dtype if dtype else 'd')
            indices = basic.zeros(0, 'i')
            indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype='i')
            # shape and copy argument is ignored
            shape = (m, n)
            copy = False

        elif scipy_available and scipy.sparse.issparse(arg1):
            # Convert scipy.sparse to cupyx.scipy.sparse
            x = arg1.asformat(self.format)
            data = cupy.array(x.data)
            indices = cupy.array(x.indices, dtype='i')
            indptr = cupy.array(x.indptr, dtype='i')
            copy = False

            if shape is None:
                shape = arg1.shape

        elif isinstance(arg1, tuple) and len(arg1) == 2:
            # Note: This implementation is not efficeint, as it first
            # constructs a sparse matrix with coo format, then converts it to
            # compressed format.
            sp_coo = coo.coo_matrix(arg1, shape=shape, dtype=dtype, copy=copy)
            sp_compressed = sp_coo.asformat(self.format)
            data = sp_compressed.data
            indices = sp_compressed.indices
            indptr = sp_compressed.indptr

        elif isinstance(arg1, tuple) and len(arg1) == 3:
            data, indices, indptr = arg1
            if not (base.isdense(data) and data.ndim == 1
                    and base.isdense(indices) and indices.ndim == 1
                    and base.isdense(indptr) and indptr.ndim == 1):
                raise ValueError('data, indices, and indptr should be 1-D')

            if len(data) != len(indices):
                raise ValueError('indices and data should have the same size')

        elif base.isdense(arg1):
            if arg1.ndim > 2:
                raise TypeError('expected dimension <= 2 array or matrix')
            elif arg1.ndim == 1:
                arg1 = arg1[None]
            elif arg1.ndim == 0:
                arg1 = arg1[None, None]
            data, indices, indptr = self._convert_dense(arg1)
            copy = False
            if shape is None:
                shape = arg1.shape

        else:
            raise ValueError('Unsupported initializer format')

        if dtype is None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)

        if dtype.char not in '?fdFD':
            raise ValueError(
                'Only bool, float32, float64, complex64 and complex128 '
                'are supported')

        data = data.astype(dtype, copy=copy)
        sparse_data._data_matrix.__init__(self, data)

        self.indices = indices.astype('i', copy=copy)
        self.indptr = indptr.astype('i', copy=copy)

        if shape is None:
            shape = self._swap(len(indptr) - 1, int(indices.max()) + 1)

        major, minor = self._swap(*shape)
        if len(indptr) != major + 1:
            raise ValueError('index pointer size (%d) should be (%d)' %
                             (len(indptr), major + 1))

        self._descr = cusparse.MatDescriptor.create()
        self._shape = shape

    def _with_data(self, data, copy=True):
        if copy:
            return self.__class__(
                (data, self.indices.copy(), self.indptr.copy()),
                shape=self.shape,
                dtype=data.dtype)
        else:
            return self.__class__((data, self.indices, self.indptr),
                                  shape=self.shape,
                                  dtype=data.dtype)

    def _convert_dense(self, x):
        raise NotImplementedError

    def _swap(self, x, y):
        raise NotImplementedError

    def _add_sparse(self, other, alpha, beta):
        raise NotImplementedError

    def _add(self, other, lhs_negative, rhs_negative):
        if cupy.isscalar(other):
            if other == 0:
                if lhs_negative:
                    return -self
                else:
                    return self.copy()
            else:
                raise NotImplementedError(
                    'adding a nonzero scalar to a sparse matrix is not '
                    'supported')
        elif base.isspmatrix(other):
            alpha = -1 if lhs_negative else 1
            beta = -1 if rhs_negative else 1
            return self._add_sparse(other, alpha, beta)
        elif base.isdense(other):
            if lhs_negative:
                if rhs_negative:
                    return -self.todense() - other
                else:
                    return other - self.todense()
            else:
                if rhs_negative:
                    return self.todense() - other
                else:
                    return self.todense() + other
        else:
            return NotImplemented

    def __add__(self, other):
        return self._add(other, False, False)

    def __radd__(self, other):
        return self._add(other, False, False)

    def __sub__(self, other):
        return self._add(other, False, True)

    def __rsub__(self, other):
        return self._add(other, True, False)

    def _get_intXint(self, row, col):
        major, minor = self._swap(row, col)
        data, indices, _ = _index._get_csr_submatrix_major_axis(
            self.data, self.indices, self.indptr, major, major + 1)
        dtype = data.dtype
        res = cupy.zeros((), dtype=dtype)
        if dtype.kind == 'c':
            _index._compress_getitem_complex_kern(data.real, data.imag,
                                                  indices, minor, res.real,
                                                  res.imag)
        else:
            _index._compress_getitem_kern(data, indices, minor, res)
        return res

    def _get_sliceXslice(self, row, col):
        major, minor = self._swap(row, col)
        copy = major.step in (1, None)
        return self._major_slice(major)._minor_slice(minor, copy=copy)

    def _get_arrayXarray(self, row, col, not_found_val=0):
        # inner indexing
        idx_dtype = self.indices.dtype
        M, N = self._swap(*self.shape)
        major, minor = self._swap(row, col)
        major = major.astype(idx_dtype, copy=False)
        minor = minor.astype(idx_dtype, copy=False)

        val = _index._csr_sample_values(M, N, self.indptr,
                                        self.indices, self.data, major.ravel(),
                                        minor.ravel(), not_found_val)

        if major.ndim == 1:
            # Scipy returns `matrix` here
            return cupy.expand_dims(val, 0)
        return self.__class__(val.reshape(major.shape))

    def _get_columnXarray(self, row, col):
        # outer indexing
        major, minor = self._swap(row, col)
        return self._major_index_fancy(major)._minor_index_fancy(minor)

    def _major_index_fancy(self, idx):
        """Index along the major axis where idx is an array of ints.
        """
        _, N = self._swap(*self.shape)
        M = idx.size
        new_shape = self._swap(M, N)
        if self.nnz == 0 or M == 0:
            return self.__class__(new_shape)

        return self.__class__(_index._csr_row_index(self.data, self.indices,
                                                    self.indptr, idx),
                              shape=new_shape,
                              copy=False)

    def _minor_index_fancy(self, idx):
        """Index along the minor axis where idx is an array of ints.
        """
        M, _ = self._swap(*self.shape)
        N = idx.size
        new_shape = self._swap(M, N)
        if self.nnz == 0 or N == 0:
            return self.__class__(new_shape)

        if idx.size * M < self.nnz:
            # TODO (asi1024): Implement faster algorithm.
            pass

        return self._tocsx()._major_index_fancy(idx)._tocsx()

    def _major_slice(self, idx, copy=False):
        """Index along the major axis where idx is a slice object.
        """
        M, N = self._swap(*self.shape)
        start, stop, step = idx.indices(M)

        if start == 0 and stop == M and step == 1:
            return self.copy() if copy else self

        M = len(range(start, stop, step))
        new_shape = self._swap(M, N)

        if step == 1:
            if M == 0 or self.nnz == 0:
                return self.__class__(new_shape, dtype=self.dtype)
            return self.__class__(_index._get_csr_submatrix_major_axis(
                self.data, self.indices, self.indptr, start, stop),
                                  shape=new_shape,
                                  copy=copy)
        rows = cupy.arange(start, stop, step, dtype=self.indptr.dtype)
        return self._major_index_fancy(rows)

    def _minor_slice(self, idx, copy=False):
        """Index along the minor axis where idx is a slice object.
        """
        M, N = self._swap(*self.shape)
        start, stop, step = idx.indices(N)

        if start == 0 and stop == N and step == 1:
            return self.copy() if copy else self

        N = len(range(start, stop, step))
        new_shape = self._swap(M, N)

        if N == 0 or self.nnz == 0:
            return self.__class__(new_shape)
        if step == 1:
            return self.__class__(_index._get_csr_submatrix_minor_axis(
                self.data, self.indices, self.indptr, start, stop),
                                  shape=new_shape,
                                  copy=False)
        cols = cupy.arange(start, stop, step, dtype=self.indices.dtype)
        return self._minor_index_fancy(cols)

    def _set_intXint(self, row, col, x):
        i, j = self._swap(row, col)
        self._set_many(i, j, x)

    def _set_arrayXarray(self, row, col, x):
        i, j = self._swap(row, col)
        self._set_many(i, j, x)

    def _set_arrayXarray_sparse(self, row, col, x):
        # clear entries that will be overwritten
        self._zero_many(*self._swap(row, col))

        M, N = row.shape  # matches col.shape
        broadcast_row = M != 1 and x.shape[0] == 1
        broadcast_col = N != 1 and x.shape[1] == 1
        r, c = x.row, x.col
        x = cupy.asarray(x.data, dtype=self.dtype)
        if broadcast_row:
            r = cupy.repeat(cupy.arange(M), r.size)
            c = cupy.tile(c, M)
            x = cupy.tile(x, M)
        if broadcast_col:
            r = cupy.repeat(r, N)
            c = cupy.tile(cupy.arange(N), c.size)
            x = cupy.repeat(x, N)
        # only assign entries in the new sparsity structure
        i, j = self._swap(row[r, c], col[r, c])
        self._set_many(i, j, x)

    def _prepare_indices(self, i, j):
        M, N = self._swap(*self.shape)

        def check_bounds(indices, bound):
            idx = indices.max()
            if idx >= bound:
                raise IndexError('index (%d) out of range (>= %d)' %
                                 (idx, bound))
            idx = indices.min()
            if idx < -bound:
                raise IndexError('index (%d) out of range (< -%d)' %
                                 (idx, bound))

        i = cupy.array(i, dtype=self.indptr.dtype, copy=True, ndmin=1).ravel()
        j = cupy.array(j, dtype=self.indices.dtype, copy=True, ndmin=1).ravel()
        check_bounds(i, M)
        check_bounds(j, N)
        return i, j, M, N

    def _set_many(self, i, j, x):
        """Sets value at each (i, j) to x
        Here (i,j) index major and minor respectively, and must not contain
        duplicate entries.
        """
        i, j, M, N = self._prepare_indices(i, j)
        x = cupy.array(x, dtype=self.dtype, copy=True, ndmin=1).ravel()

        new_sp = cupyx.scipy.sparse.csr_matrix((cupy.arange(
            self.nnz, dtype=cupy.float32), self.indices, self.indptr),
                                               shape=(M, N))

        offsets = new_sp._get_arrayXarray(i, j, not_found_val=-1).astype(
            cupy.int32).ravel()

        if -1 not in offsets:
            # only affects existing non-zero cells
            self.data[offsets] = x
            return

        else:
            warnings.warn('Changing the sparsity structure of a '
                          '{}_matrix is expensive.'
                          ' lil_matrix is more efficient.'.format(self.format))
            # replace where possible
            mask = offsets > -1
            self.data[offsets[mask]] = x[mask]
            # only insertions remain
            mask = ~mask
            i = i[mask]
            i[i < 0] += M
            j = j[mask]
            j[j < 0] += N
            self._insert_many(i, j, x[mask])

    def _zero_many(self, i, j):
        """Sets value at each (i, j) to zero, preserving sparsity structure.
        Here (i,j) index major and minor respectively.
        """
        i, j, M, N = self._prepare_indices(i, j)

        new_sp = cupyx.scipy.sparse.csr_matrix((cupy.arange(
            self.nnz, dtype=cupy.float32), self.indices, self.indptr),
                                               shape=(M, N))

        offsets = new_sp._get_arrayXarray(i, j, not_found_val=-1).astype(
            cupy.int32).ravel()

        # only assign zeros to the existing sparsity structure
        self.data[offsets[offsets > -1]] = 0

    def _perform_insert(self, indices_inserts, data_inserts, rows, row_counts,
                        idx_dtype):
        """Insert new elements into current sparse matrix in sorted order"""
        indptr_diff = cupy.diff(self.indptr)
        indptr_diff[rows] += row_counts

        new_indptr = cupy.empty(self.indptr.shape, dtype=idx_dtype)
        new_indptr[0] = idx_dtype(0)
        new_indptr[1:] = indptr_diff

        # Build output arrays
        cupy.cumsum(new_indptr, out=new_indptr)
        out_nnz = int(new_indptr[-1])

        new_indices = cupy.empty(out_nnz, dtype=idx_dtype)
        new_data = cupy.empty(out_nnz, dtype=self.data.dtype)

        # Build an indexed indptr that contains the offsets for each
        # row but only for in i, j, and x.
        new_indptr_lookup = cupy.zeros(new_indptr.size, dtype=idx_dtype)
        new_indptr_lookup[1:][rows] = row_counts
        cupy.cumsum(new_indptr_lookup, out=new_indptr_lookup)

        _index._insert_many_populate_arrays(indices_inserts,
                                            data_inserts,
                                            new_indptr_lookup,
                                            self.indptr,
                                            self.indices,
                                            self.data,
                                            new_indptr,
                                            new_indices,
                                            new_data,
                                            size=self.indptr.size - 1)

        self.indptr = new_indptr
        self.indices = new_indices
        self.data = new_data

    def _insert_many(self, i, j, x):
        """Inserts new nonzero at each (i, j) with value x
        Here (i,j) index major and minor respectively.
        i, j and x must be non-empty, 1d arrays.
        Inserts each major group (e.g. all entries per row) at a time.
        Maintains has_sorted_indices property.
        Modifies i, j, x in place.
        """

        order = cupy.argsort(i)  # stable for duplicates
        i = i.take(order)
        j = j.take(order)
        x = x.take(order)

        # Update index data type

        idx_dtype = sputils.get_index_dtype((self.indices, self.indptr),
                                            maxval=(self.nnz + x.size))

        self.indptr = self.indptr.astype(idx_dtype)
        self.indices = self.indices.astype(idx_dtype)
        self.data = self.data.astype(self.dtype)

        indptr_inserts, indices_inserts, data_inserts = \
            _index._select_last_indices(i, j, x, idx_dtype)

        rows, ui_indptr = cupy.unique(indptr_inserts, return_index=True)

        to_add = cupy.empty(ui_indptr.size + 1, ui_indptr.dtype)
        to_add[-1] = j.size
        to_add[:-1] = ui_indptr
        ui_indptr = to_add

        # Compute the counts for each row in the insertion array
        row_counts = cupy.zeros(ui_indptr.size - 1, dtype=idx_dtype)
        cupyx.scatter_add(row_counts, cupy.searchsorted(rows, indptr_inserts),
                          1)

        self._perform_insert(indices_inserts, data_inserts, rows, row_counts,
                             idx_dtype)

    def __get_has_canonical_format(self):
        """Determine whether the matrix has sorted indices and no duplicates.

        Returns
            bool: ``True`` if the above applies, otherwise ``False``.

        .. note::
            :attr:`has_canonical_format` implies :attr:`has_sorted_indices`, so
            if the latter flag is ``False``, so will the former be; if the
            former is found ``True``, the latter flag is also set.

        .. warning::
            Getting this property might synchronize the device.

        """
        # Modified from the SciPy counterpart.

        # In CuPy the implemented conversions do not exactly match those of
        # SciPy's, so it's hard to put this exactly as where it is in SciPy,
        # but this should do the job.
        if self.data.size == 0:
            self._has_canonical_format = True
        # check to see if result was cached
        elif not getattr(self, '_has_sorted_indices', True):
            # not sorted => not canonical
            self._has_canonical_format = False
        elif not hasattr(self, '_has_canonical_format'):
            is_canonical = self._has_canonical_format_kern(
                self.indptr, self.indices, size=self.indptr.size - 1)
            self._has_canonical_format = bool(is_canonical.all())
        return self._has_canonical_format

    def __set_has_canonical_format(self, val):
        """Taken from SciPy as is."""
        self._has_canonical_format = bool(val)
        if val:
            self.has_sorted_indices = True

    has_canonical_format = property(fget=__get_has_canonical_format,
                                    fset=__set_has_canonical_format)

    def __get_sorted(self):
        """Determine whether the matrix has sorted indices.

        Returns
            bool:
                ``True`` if the indices of the matrix are in sorted order,
                otherwise ``False``.

        .. warning::
            Getting this property might synchronize the device.

        """
        # Modified from the SciPy counterpart.

        # In CuPy the implemented conversions do not exactly match those of
        # SciPy's, so it's hard to put this exactly as where it is in SciPy,
        # but this should do the job.
        if self.data.size == 0:
            self._has_sorted_indices = True
        # check to see if result was cached
        elif not hasattr(self, '_has_sorted_indices'):
            is_sorted = self._has_sorted_indices_kern(self.indptr,
                                                      self.indices,
                                                      size=self.indptr.size -
                                                      1)
            self._has_sorted_indices = bool(is_sorted.all())
        return self._has_sorted_indices

    def __set_sorted(self, val):
        self._has_sorted_indices = bool(val)

    has_sorted_indices = property(fget=__get_sorted, fset=__set_sorted)

    def get_shape(self):
        """Returns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.

        """
        return self._shape

    def getnnz(self, axis=None):
        """Returns the number of stored values, including explicit zeros.

        Args:
            axis: Not supported yet.

        Returns:
            int: The number of stored values.

        """
        if axis is None:
            return self.data.size
        else:
            raise ValueError

    def sorted_indices(self):
        """Return a copy of this matrix with sorted indices

        .. warning::
            Calling this function might synchronize the device.
        """
        # Taken from SciPy as is.
        A = self.copy()
        A.sort_indices()
        return A

    def sort_indices(self):
        # Unlike in SciPy, here this is implemented in child classes because
        # each child needs to call its own sort function from cuSPARSE
        raise NotImplementedError

    def sum_duplicates(self):
        """Eliminate duplicate matrix entries by adding them together.

        .. note::
            This is an *in place* operation.

        .. warning::
            Calling this function might synchronize the device.

        .. seealso::
           :meth:`scipy.sparse.csr_matrix.sum_duplicates`,
           :meth:`scipy.sparse.csc_matrix.sum_duplicates`
        """
        if self.has_canonical_format:
            return
        # TODO(leofang): add a kernel for compressed sparse matrices without
        # converting to coo
        coo = self.tocoo()
        coo.sum_duplicates()
        self.__init__(coo.asformat(self.format))
        self.has_canonical_format = True

    #####################
    # Reduce operations #
    #####################

    def _minor_reduce(self, ufunc, axis, nonzero):
        """Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Matrix over which the reduction should be
                conducted.

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
                major_index.

        """

        # Call to the appropriate kernel function
        if axis == 1:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[0]).astype(cupy.float64)

            if nonzero:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_nonzero_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))
                if ufunc == cupy.amin:
                    self._min_nonzero_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))

            else:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))
                if ufunc == cupy.amin:
                    self._min_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))

        if axis == 0:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[1]).astype(cupy.float64)

            if nonzero:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_nonzero_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))
                if ufunc == cupy.amin:
                    self._min_nonzero_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))
            else:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))
                if ufunc == cupy.amin:
                    self._min_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))

        return value

    def _arg_minor_reduce(self, ufunc, axis):
        """Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Maxtrix over which the reduction should be conducted

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
                major_index

        """

        # Call to the appropriate kernel function
        if axis == 1:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[0]).astype(cupy.int64)

            # Perform the calculation
            if ufunc == cupy.argmax:
                self._max_arg_reduction_kern(
                    (self.shape[0], ), (1, ),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1], self.indptr[1:],
                     cupy.int64(self.shape[1]), value))
            if ufunc == cupy.argmin:
                self._min_arg_reduction_kern(
                    (self.shape[0], ), (1, ),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1], self.indptr[1:],
                     cupy.int64(self.shape[1]), value))

        if axis == 0:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[1]).astype(cupy.int64)

            # Perform the calculation
            if ufunc == cupy.argmax:
                self._max_arg_reduction_kern(
                    (self.shape[1], ), (1, ),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1], self.indptr[1:],
                     cupy.int64(self.shape[0]), value))
            if ufunc == cupy.argmin:
                self._min_arg_reduction_kern(
                    (self.shape[1], ), (1, ),
                    (self.data.astype(cupy.float64), self.indices,
                     self.indptr[:len(self.indptr) - 1], self.indptr[1:],
                     cupy.int64(self.shape[0]), value))

        return value

コード例 #24

ファイルを表示

        raise core.core._AxisError('axis(={}) out of bounds'.format(axis))
    else:
        return _proc_as_batch(batch_kern, out, axis=axis)

    pos = 1
    while pos < out.size:
        kern(pos, out, size=out.size)
        pos <<= 1
    return out


_cumsum_batch_kern = core.ElementwiseKernel(
    'int64 pos, int64 batch', 'raw T x', '''
    ptrdiff_t b = i % batch;
    ptrdiff_t j = i / batch;
    if (j & pos) {
      const ptrdiff_t dst_index[] = {j, b};
      const ptrdiff_t src_index[] = {j ^ pos | (pos - 1), b};
      x[dst_index] += x[src_index];
    }
    ''', 'cumsum_batch_kernel')
_cumsum_kern = core.ElementwiseKernel(
    'int64 pos', 'raw T x', '''
    if (i & pos) {
      x[i] += x[i ^ pos | (pos - 1)];
    }
    ''', 'cumsum_kernel')


def cumsum(a, axis=None, dtype=None, out=None):
    """Returns the cumulative sum of an array along a given axis.

コード例 #25

ファイルを表示

ファイル: _kernels.py プロジェクト: yutiansut/cupy

from cupy import core

gumbel_kernel = core.ElementwiseKernel(
    'T x, T loc, T scale', 'T y',
    'y = loc - log(-log(1 - x)) * scale',
    'gumbel_kernel'
)

laplace_kernel = core.ElementwiseKernel(
    'T x, T loc, T scale', 'T y',
    'y = (x < 0.5)? loc + scale * log(x + x):'
    ' loc - scale * log(2.0 - x - x)',
    'laplace_kernel'
)

コード例 #26

ファイルを表示

ファイル: search.py プロジェクト: yuhc/ava-cupy

_searchsorted_kernel = core.ElementwiseKernel(
    'S x, raw T bins, int64 n_bins, bool side_is_right',
    'int64 y',
    '''
    if (_isnan<S>(x)) {
        long long pos = n_bins;
        if (!side_is_right) {
            while (pos > 0 && _isnan<T>(bins[pos-1])) {
                --pos;
            }
        }
        y = pos;
        return;
    }
    bool greater = (side_is_right ? x >= bins[n_bins-1] : x > bins[n_bins-1]);
    if (greater) {
        y = n_bins;
        return;
    }
    long long left = 0;
    long long right = n_bins-1;
    while (left < right) {
        long long m = left + (right - left) / 2;
        if (side_is_right ? bins[m] <= x : bins[m] < x) {
            left = m + 1;
        } else {
            right = m;
        }
    }
    y = right;
    ''',
    preamble=_preamble)

コード例 #27

ファイルを表示

class RandomState(object):
    """Portable container of a pseudo-random number generator.

    An instance of this class holds the state of a random number generator. The
    state is available only on the device which has been current at the
    initialization of the instance.

    Functions of :mod:`cupy.random` use global instances of this class.
    Different instances are used for different devices. The global state for
    the current device can be obtained by the
    :func:`cupy.random.get_random_state` function.

    Args:
        seed (None or int): Seed of the random number generator. See the
            :meth:`~cupy.random.RandomState.seed` method for detail.
        method (int): Method of the random number generator. Following values
            are available::

               cupy.cuda.curand.CURAND_RNG_PSEUDO_DEFAULT
               cupy.cuda.curand.CURAND_RNG_XORWOW
               cupy.cuda.curand.CURAND_RNG_MRG32K3A
               cupy.cuda.curand.CURAND_RNG_MTGP32
               cupy.cuda.curand.CURAND_RNG_MT19937
               cupy.cuda.curand.CURAND_RNG_PHILOX4_32_10

    """
    def __init__(self, seed=None, method=curand.CURAND_RNG_PSEUDO_DEFAULT):
        self._generator = curand.createGenerator(method)
        self.seed(seed)

    def __del__(self):
        # When createGenerator raises an error, _generator is not initialized
        if hasattr(self, '_generator'):
            curand.destroyGenerator(self._generator)

    def set_stream(self, stream=None):
        if stream is None:
            stream = cuda.Stream()
        curand.setStream(self._generator, stream.ptr)

    def _generate_normal(self, func, size, dtype, *args):
        # curand functions below don't support odd size.
        # * curand.generateNormal
        # * curand.generateNormalDouble
        # * curand.generateLogNormal
        # * curand.generateLogNormalDouble
        size = core.get_size(size)
        element_size = six.moves.reduce(operator.mul, size, 1)
        if element_size % 2 == 0:
            out = cupy.empty(size, dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out
        else:
            out = cupy.empty((element_size + 1, ), dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out[:element_size].reshape(size)

    # NumPy compatible functions

    def lognormal(self, mean=0.0, sigma=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a log normal distribution.

        .. seealso::
            :func:`cupy.random.lognormal` for full documentation,
            :meth:`numpy.random.RandomState.lognormal`

        """
        dtype = _check_and_get_dtype(dtype)
        if dtype.char == 'f':
            func = curand.generateLogNormal
        else:
            func = curand.generateLogNormalDouble
        return self._generate_normal(func, size, dtype, mean, sigma)

    def normal(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of normally distributed samples.

        .. seealso::
            :func:`cupy.random.normal` for full documentation,
            :meth:`numpy.random.RandomState.normal`

        """
        dtype = _check_and_get_dtype(dtype)
        if dtype.char == 'f':
            func = curand.generateNormal
        else:
            func = curand.generateNormalDouble
        return self._generate_normal(func, size, dtype, loc, scale)

    def rand(self, *size, **kwarg):
        """Returns uniform random values over the interval ``[0, 1)``.

        .. seealso::
            :func:`cupy.random.rand` for full documentation,
            :meth:`numpy.random.RandomState.rand`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('rand() got unexpected keyword arguments %s' %
                            ', '.join(kwarg.keys()))
        return self.random_sample(size=size, dtype=dtype)

    def randn(self, *size, **kwarg):
        """Returns an array of standard normal random values.

        .. seealso::
            :func:`cupy.random.randn` for full documentation,
            :meth:`numpy.random.RandomState.randn`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('randn() got unexpected keyword arguments %s' %
                            ', '.join(kwarg.keys()))
        return self.normal(size=size, dtype=dtype)

    _1m_kernel = core.ElementwiseKernel('', 'T x', 'x = 1 - x',
                                        'cupy_random_1_minus_x')

    def random_sample(self, size=None, dtype=float):
        """Returns an array of random values over the interval ``[0, 1)``.

        .. seealso::
            :func:`cupy.random.random_sample` for full documentation,
            :meth:`numpy.random.RandomState.random_sample`

        """
        dtype = _check_and_get_dtype(dtype)
        out = cupy.empty(size, dtype=dtype)
        if dtype.char == 'f':
            func = curand.generateUniform
        else:
            func = curand.generateUniformDouble
        func(self._generator, out.data.ptr, out.size)
        RandomState._1m_kernel(out)
        return out

    def interval(self, mx, size):
        """Generate multiple integers independently sampled uniformly from ``[0, mx]``.

        Args:
            mx (int): Upper bound of the interval
            size (None or int or tuple): Shape of the array or the scalar
                returned.
        Returns:
            int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with
            shape ``()`` is returned.
            If ``int``, 1-D array of length size is returned.
            If ``tuple``, multi-dimensional array with shape
            ``size`` is returned.
            Currently, only 32 bit integers can be sampled.
            If 0 :math:`\\leq` ``mx`` :math:`\\leq` 0x7fffffff,
            a ``numpy.int32`` array is returned.
            If 0x80000000 :math:`\\leq` ``mx`` :math:`\\leq` 0xffffffff,
            a ``numpy.uint32`` array is returned.
        """
        if size is None:
            return self.interval(mx, 1).reshape(())
        elif isinstance(size, int):
            size = (size, )

        if mx == 0:
            return cupy.zeros(size, dtype=numpy.int32)

        if mx < 0:
            raise ValueError('mx must be non-negative (actual: {})'.format(mx))
        elif mx <= 0x7fffffff:
            dtype = numpy.int32
        elif mx <= 0xffffffff:
            dtype = numpy.uint32
        else:
            raise ValueError(
                'mx must be within uint32 range (actual: {})'.format(mx))

        mask = (1 << mx.bit_length()) - 1
        mask = cupy.array(mask, dtype=dtype)

        n = functools.reduce(operator.mul, size, 1)

        sample = cupy.empty((n, ), dtype=dtype)
        n_rem = n  # The number of remaining elements to sample
        ret = None
        while n_rem > 0:
            curand.generate(self._generator, sample.data.ptr, sample.size)
            # Drop the samples that exceed the upper limit
            sample &= mask
            success = sample <= mx

            if ret is None:
                # If the sampling has finished in the first iteration,
                # just return the sample.
                if success.all():
                    n_rem = 0
                    ret = sample
                    break

                # Allocate the return array.
                ret = cupy.empty((n, ), dtype=dtype)

            n_succ = min(n_rem, int(success.sum()))
            ret[n - n_rem:n - n_rem + n_succ] = sample[success][:n_succ]
            n_rem -= n_succ

        assert n_rem == 0
        return ret.reshape(size)

    def seed(self, seed=None):
        """Resets the state of the random number generator with a seed.

        .. seealso::
            :func:`cupy.random.seed` for full documentation,
            :meth:`numpy.random.RandomState.seed`

        """
        if seed is None:
            try:
                seed_str = binascii.hexlify(os.urandom(8))
                seed = numpy.uint64(int(seed_str, 16))
            except NotImplementedError:
                seed = numpy.uint64(time.clock() * 1000000)
        else:
            seed = numpy.uint64(seed)

        curand.setPseudoRandomGeneratorSeed(self._generator, seed)
        curand.setGeneratorOffset(self._generator, 0)

    def standard_normal(self, size=None, dtype=float):
        """Returns samples drawn from the standard normal distribution.

        .. seealso::
            :func:`cupy.random.standard_normal` for full documentation,
            :meth:`numpy.random.RandomState.standard_normal`

        """
        return self.normal(size=size, dtype=dtype)

    def uniform(self, low=0.0, high=1.0, size=None, dtype=float):
        """Returns an array of uniformly-distributed samples over an interval.

        .. seealso::
            :func:`cupy.random.uniform` for full documentation,
            :meth:`numpy.random.RandomState.uniform`

        """
        dtype = numpy.dtype(dtype)
        rand = self.random_sample(size=size, dtype=dtype)
        return dtype.type(low) + rand * dtype.type(high - low)

    def choice(self, a, size=None, replace=True, p=None):
        """Returns an array of random values from a given 1-D array.

        .. seealso::
            :func:`cupy.random.choice` for full document,
            :meth:`numpy.random.choice`

        """
        if a is None:
            raise ValueError('a must be 1-dimensional or an integer')
        if isinstance(a, cupy.ndarray) and a.ndim == 0:
            raise NotImplementedError
        if isinstance(a, six.integer_types):
            a_size = a
            if a_size <= 0:
                raise ValueError('a must be greater than 0')
        else:
            a = cupy.array(a, copy=False)
            if a.ndim != 1:
                raise ValueError('a must be 1-dimensional or an integer')
            else:
                a_size = len(a)
                if a_size == 0:
                    raise ValueError('a must be non-empty')

        if p is not None:
            p = cupy.array(p)
            if p.ndim != 1:
                raise ValueError('p must be 1-dimensional')
            if len(p) != a_size:
                raise ValueError('a and p must have same size')
            if not (p >= 0).all():
                raise ValueError('probabilities are not non-negative')
            p_sum = cupy.sum(p).get()
            if not numpy.allclose(p_sum, 1):
                raise ValueError('probabilities do not sum to 1')

        if size is None:
            raise NotImplementedError
        shape = size
        size = numpy.prod(shape)

        if not replace and p is None:
            if a_size < size:
                raise ValueError(
                    'Cannot take a larger sample than population when '
                    '\'replace=False\'')
            if isinstance(a, six.integer_types):
                indices = cupy.arange(a, dtype='l')
            else:
                indices = a.copy()
            self.shuffle(indices)
            return indices[:size].reshape(shape)

        if not replace:
            raise NotImplementedError

        if p is not None:
            p = cupy.broadcast_to(p, (size, a_size))
            index = cupy.argmax(cupy.log(p) +
                                cupy.random.gumbel(size=(size, a_size)),
                                axis=1)
            if not isinstance(shape, six.integer_types):
                index = cupy.reshape(index, shape)
        else:
            index = cupy.random.randint(0, a_size, size=shape)
            # Align the dtype with NumPy
            index = index.astype(cupy.int64, copy=False)

        if isinstance(a, six.integer_types):
            return index

        if index.ndim == 0:
            return cupy.array(a[index], dtype=a.dtype)

        return a[index]

    def shuffle(self, a):
        """Returns a shuffled array.

        .. seealso::
            :func:`cupy.random.shuffle` for full document,
            :meth:`numpy.random.shuffle`

        """
        if not isinstance(a, cupy.ndarray):
            raise TypeError('The array must be cupy.ndarray')

        if a.ndim == 0:
            raise TypeError('An array whose ndim is 0 is not supported')

        sample = cupy.zeros((len(a)), dtype=numpy.int32)
        curand.generate(self._generator, sample.data.ptr, sample.size)
        a[:] = a[cupy.argsort(sample)]

コード例 #28

ファイルを表示

ファイル: generator.py プロジェクト: gridl/cupy

class RandomState(object):
    """Portable container of a pseudo-random number generator.

    An instance of this class holds the state of a random number generator. The
    state is available only on the device which has been current at the
    initialization of the instance.

    Functions of :mod:`cupy.random` use global instances of this class.
    Different instances are used for different devices. The global state for
    the current device can be obtained by the
    :func:`cupy.random.get_random_state` function.

    Args:
        seed (None or int): Seed of the random number generator. See the
            :meth:`~cupy.random.RandomState.seed` method for detail.
        method (int): Method of the random number generator. Following values
            are available::

               cupy.cuda.curand.CURAND_RNG_PSEUDO_DEFAULT
               cupy.cuda.curand.CURAND_RNG_XORWOW
               cupy.cuda.curand.CURAND_RNG_MRG32K3A
               cupy.cuda.curand.CURAND_RNG_MTGP32
               cupy.cuda.curand.CURAND_RNG_MT19937
               cupy.cuda.curand.CURAND_RNG_PHILOX4_32_10

    """
    def __init__(self, seed=None, method=curand.CURAND_RNG_PSEUDO_DEFAULT):
        self._generator = curand.createGenerator(method)
        self.seed(seed)

    def __del__(self):
        # When createGenerator raises an error, _generator is not initialized
        if hasattr(self, '_generator'):
            curand.destroyGenerator(self._generator)

    def _generate_normal(self, func, size, dtype, *args):
        # curand functions below don't support odd size.
        # * curand.generateNormal
        # * curand.generateNormalDouble
        # * curand.generateLogNormal
        # * curand.generateLogNormalDouble
        size = core.get_size(size)
        element_size = six.moves.reduce(operator.mul, size, 1)
        if element_size % 2 == 0:
            out = cupy.empty(size, dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out
        else:
            out = cupy.empty((element_size + 1, ), dtype=dtype)
            func(self._generator, out.data.ptr, out.size, *args)
            return out[:element_size].reshape(size)

    # NumPy compatible functions

    def binomial(self, n, p, size=None, dtype=int):
        """Returns an array of samples drawn from the binomial distribution.

        .. seealso::
            :func:`cupy.random.binomial` for full documentation,
            :meth:`numpy.random.RandomState.binomial`
        """
        n, p = cupy.asarray(n), cupy.asarray(p)
        if size is None:
            size = cupy.broadcast(n, p).shape
        y = cupy.zeros(shape=size, dtype=dtype)
        _kernels.binomial_kernel(n, p, self.rk_seed, y)
        if size is None:
            self.rk_seed += 1
        else:
            self.rk_seed += numpy.prod(size)
        return y

    _laplace_kernel = core.ElementwiseKernel(
        'T x, T loc, T scale', 'T y',
        'y = T(loc) + T(scale) * ((x < 0.5) ? log(x + x): -log(2.0 - x - x))',
        'laplace_kernel')

    def laplace(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from the laplace distribution.

        .. seealso::
            :func:`cupy.random.laplace` for full documentation,
            :meth:`numpy.random.RandomState.laplace`
        """
        x = self.random_sample(size=size, dtype=dtype)
        if not numpy.isscalar(loc):
            loc = cupy.asarray(loc, dtype)
        if not numpy.isscalar(scale):
            scale = cupy.asarray(scale, dtype)
        RandomState._laplace_kernel(x, loc, scale, x)
        return x

    def lognormal(self, mean=0.0, sigma=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a log normal distribution.

        .. seealso::
            :func:`cupy.random.lognormal` for full documentation,
            :meth:`numpy.random.RandomState.lognormal`

        """
        dtype = _check_and_get_dtype(dtype)
        if dtype.char == 'f':
            func = curand.generateLogNormal
        else:
            func = curand.generateLogNormalDouble
        return self._generate_normal(func, size, dtype, mean, sigma)

    def normal(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of normally distributed samples.

        .. seealso::
            :func:`cupy.random.normal` for full documentation,
            :meth:`numpy.random.RandomState.normal`

        """
        dtype = _check_and_get_dtype(dtype)
        if dtype.char == 'f':
            func = curand.generateNormal
        else:
            func = curand.generateNormalDouble
        return self._generate_normal(func, size, dtype, loc, scale)

    def rand(self, *size, **kwarg):
        """Returns uniform random values over the interval ``[0, 1)``.

        .. seealso::
            :func:`cupy.random.rand` for full documentation,
            :meth:`numpy.random.RandomState.rand`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('rand() got unexpected keyword arguments %s' %
                            ', '.join(kwarg.keys()))
        return self.random_sample(size=size, dtype=dtype)

    def randn(self, *size, **kwarg):
        """Returns an array of standard normal random values.

        .. seealso::
            :func:`cupy.random.randn` for full documentation,
            :meth:`numpy.random.RandomState.randn`

        """
        dtype = kwarg.pop('dtype', float)
        if kwarg:
            raise TypeError('randn() got unexpected keyword arguments %s' %
                            ', '.join(kwarg.keys()))
        return self.normal(size=size, dtype=dtype)

    _1m_kernel = core.ElementwiseKernel('', 'T x', 'x = 1 - x',
                                        'cupy_random_1_minus_x')

    def _random_sample_raw(self, size, dtype):
        dtype = _check_and_get_dtype(dtype)
        out = cupy.empty(size, dtype=dtype)
        if dtype.char == 'f':
            func = curand.generateUniform
        else:
            func = curand.generateUniformDouble
        func(self._generator, out.data.ptr, out.size)
        return out

    def random_sample(self, size=None, dtype=float):
        """Returns an array of random values over the interval ``[0, 1)``.

        .. seealso::
            :func:`cupy.random.random_sample` for full documentation,
            :meth:`numpy.random.RandomState.random_sample`

        """
        out = self._random_sample_raw(size, dtype)
        RandomState._1m_kernel(out)
        return out

    def _interval(self, mx, size):
        """Generate multiple integers independently sampled uniformly from ``[0, mx]``.

        Args:
            mx (int): Upper bound of the interval
            size (None or int or tuple): Shape of the array or the scalar
                returned.
        Returns:
            int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with
            shape ``()`` is returned.
            If ``int``, 1-D array of length size is returned.
            If ``tuple``, multi-dimensional array with shape
            ``size`` is returned.
            Currently, only 32 bit integers can be sampled.
            If 0 :math:`\\leq` ``mx`` :math:`\\leq` 0x7fffffff,
            a ``numpy.int32`` array is returned.
            If 0x80000000 :math:`\\leq` ``mx`` :math:`\\leq` 0xffffffff,
            a ``numpy.uint32`` array is returned.
        """
        if size is None:
            return self._interval(mx, 1).reshape(())
        elif isinstance(size, int):
            size = (size, )

        if mx == 0:
            return cupy.zeros(size, dtype=numpy.int32)

        if mx < 0:
            raise ValueError('mx must be non-negative (actual: {})'.format(mx))
        elif mx <= 0x7fffffff:
            dtype = numpy.int32
        elif mx <= 0xffffffff:
            dtype = numpy.uint32
        else:
            raise ValueError(
                'mx must be within uint32 range (actual: {})'.format(mx))

        mask = (1 << mx.bit_length()) - 1
        mask = cupy.array(mask, dtype=dtype)

        n = functools.reduce(operator.mul, size, 1)

        sample = cupy.empty((n, ), dtype=dtype)
        n_rem = n  # The number of remaining elements to sample
        ret = None
        while n_rem > 0:
            curand.generate(self._generator, sample.data.ptr, sample.size)
            # Drop the samples that exceed the upper limit
            sample &= mask
            success = sample <= mx

            if ret is None:
                # If the sampling has finished in the first iteration,
                # just return the sample.
                if success.all():
                    n_rem = 0
                    ret = sample
                    break

                # Allocate the return array.
                ret = cupy.empty((n, ), dtype=dtype)

            n_succ = min(n_rem, int(success.sum()))
            ret[n - n_rem:n - n_rem + n_succ] = sample[success][:n_succ]
            n_rem -= n_succ

        assert n_rem == 0
        return ret.reshape(size)

    def seed(self, seed=None):
        """Resets the state of the random number generator with a seed.

        .. seealso::
            :func:`cupy.random.seed` for full documentation,
            :meth:`numpy.random.RandomState.seed`

        """
        if seed is None:
            try:
                seed_str = binascii.hexlify(os.urandom(8))
                seed = numpy.uint64(int(seed_str, 16))
            except NotImplementedError:
                seed = numpy.uint64(time.clock() * 1000000)
        else:
            seed = numpy.asarray(seed).astype(numpy.uint64, casting='safe')

        curand.setPseudoRandomGeneratorSeed(self._generator, seed)
        curand.setGeneratorOffset(self._generator, 0)

        self.rk_seed = numpy.uint32(seed)

    def standard_normal(self, size=None, dtype=float):
        """Returns samples drawn from the standard normal distribution.

        .. seealso::
            :func:`cupy.random.standard_normal` for full documentation,
            :meth:`numpy.random.RandomState.standard_normal`

        """
        return self.normal(size=size, dtype=dtype)

    def tomaxint(self, size=None):
        """Draws integers between 0 and max integer inclusive.

        Args:
            size (int or tuple of ints): Output shape.

        Returns:
            cupy.ndarray: Drawn samples.

        .. seealso::
            :meth:`numpy.random.RandomState.tomaxint`

        """
        if size is None:
            size = ()
        sample = cupy.empty(size, dtype=cupy.int_)
        # cupy.random only uses int32 random generator
        size_in_int = sample.dtype.itemsize // 4
        curand.generate(self._generator, sample.data.ptr,
                        sample.size * size_in_int)

        # Disable sign bit
        sample &= cupy.iinfo(cupy.int_).max
        return sample

    _scale_kernel = core.ElementwiseKernel('T low, T high', 'T x',
                                           'x = T(low) + x * T(high - low)',
                                           'cupy_scale')

    def uniform(self, low=0.0, high=1.0, size=None, dtype=float):
        """Returns an array of uniformly-distributed samples over an interval.

        .. seealso::
            :func:`cupy.random.uniform` for full documentation,
            :meth:`numpy.random.RandomState.uniform`

        """
        dtype = numpy.dtype(dtype)
        rand = self.random_sample(size=size, dtype=dtype)
        if not numpy.isscalar(low):
            low = cupy.asarray(low, dtype)
        if not numpy.isscalar(high):
            high = cupy.asarray(high, dtype)
        return RandomState._scale_kernel(low, high, rand)

    def choice(self, a, size=None, replace=True, p=None):
        """Returns an array of random values from a given 1-D array.

        .. seealso::
            :func:`cupy.random.choice` for full document,
            :func:`numpy.random.choice`

        """
        if a is None:
            raise ValueError('a must be 1-dimensional or an integer')
        if isinstance(a, cupy.ndarray) and a.ndim == 0:
            raise NotImplementedError
        if isinstance(a, six.integer_types):
            a_size = a
            if a_size <= 0:
                raise ValueError('a must be greater than 0')
        else:
            a = cupy.array(a, copy=False)
            if a.ndim != 1:
                raise ValueError('a must be 1-dimensional or an integer')
            else:
                a_size = len(a)
                if a_size == 0:
                    raise ValueError('a must be non-empty')

        if p is not None:
            p = cupy.array(p)
            if p.ndim != 1:
                raise ValueError('p must be 1-dimensional')
            if len(p) != a_size:
                raise ValueError('a and p must have same size')
            if not (p >= 0).all():
                raise ValueError('probabilities are not non-negative')
            p_sum = cupy.sum(p).get()
            if not numpy.allclose(p_sum, 1):
                raise ValueError('probabilities do not sum to 1')

        if size is None:
            raise NotImplementedError
        shape = size
        size = numpy.prod(shape)

        if not replace and p is None:
            if a_size < size:
                raise ValueError(
                    'Cannot take a larger sample than population when '
                    '\'replace=False\'')
            if isinstance(a, six.integer_types):
                indices = cupy.arange(a, dtype='l')
            else:
                indices = a.copy()
            self.shuffle(indices)
            return indices[:size].reshape(shape)

        if not replace:
            raise NotImplementedError

        if p is not None:
            p = cupy.broadcast_to(p, (size, a_size))
            index = cupy.argmax(cupy.log(p) + self.gumbel(size=(size, a_size)),
                                axis=1)
            if not isinstance(shape, six.integer_types):
                index = cupy.reshape(index, shape)
        else:
            index = self.randint(0, a_size, size=shape)
            # Align the dtype with NumPy
            index = index.astype(cupy.int64, copy=False)

        if isinstance(a, six.integer_types):
            return index

        if index.ndim == 0:
            return cupy.array(a[index], dtype=a.dtype)

        return a[index]

    def shuffle(self, a):
        """Returns a shuffled array.

        .. seealso::
            :func:`cupy.random.shuffle` for full document,
            :func:`numpy.random.shuffle`

        """
        if not isinstance(a, cupy.ndarray):
            raise TypeError('The array must be cupy.ndarray')

        if a.ndim == 0:
            raise TypeError('An array whose ndim is 0 is not supported')

        a[:] = a[self.permutation(len(a))]

    def permutation(self, num):
        """Returns a permuted range."""
        if not isinstance(num, six.integer_types):
            raise TypeError('The data type of argument "num" must be integer')

        sample = cupy.empty((num), dtype=numpy.int32)
        curand.generate(self._generator, sample.data.ptr, num)
        if 128 < num <= 32 * 1024 * 1024:
            array = cupy.arange(num, dtype=numpy.int32)
            # apply sort of cache blocking
            block_size = 1 * 1024 * 1024
            # The block size above is a value determined from the L2 cache size
            # of GP100 (L2 cache size / size of int = 4MB / 4B = 1M). It may be
            # better to change the value base on the L2 cache size of the GPU
            # you use.
            # When num > block_size, cupy kernel: _cupy_permutation is to be
            # launched multiple times. However, it is observed that performance
            # will be degraded if the launch count is too many. Therefore,
            # the block size is adjusted so that launch count will not exceed
            # twelve Note that this twelve is the value determined from
            # measurement on GP100.
            while num // block_size > 12:
                block_size *= 2
            for j_start in range(0, num, block_size):
                j_end = j_start + block_size
                _cupy_permutation()(array, sample, j_start, j_end, size=num)
        else:
            # When num > 32M, argsort is used, because it is faster than
            # custom kernel. See https://github.com/cupy/cupy/pull/603.
            array = cupy.argsort(sample)
        return array

    _gumbel_kernel = core.ElementwiseKernel(
        'T x, T loc, T scale', 'T y', 'y = T(loc) - log(-log(x)) * T(scale)',
        'gumbel_kernel')

    def gumbel(self, loc=0.0, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a Gumbel distribution.

        .. seealso::
            :func:`cupy.random.gumbel` for full documentation,
            :meth:`numpy.random.RandomState.gumbel`
        """
        x = self._random_sample_raw(size=size, dtype=dtype)
        if not numpy.isscalar(loc):
            loc = cupy.asarray(loc, dtype)
        if not numpy.isscalar(scale):
            scale = cupy.asarray(scale, dtype)
        RandomState._gumbel_kernel(x, loc, scale, x)
        return x

    def randint(self, low, high=None, size=None, dtype='l'):
        """Returns a scalar or an array of integer values over ``[low, high)``.

        .. seealso::
            :func:`cupy.random.randint` for full documentation,
            :meth:`numpy.random.RandomState.randint`
        """
        if high is None:
            lo = 0
            hi = low
        else:
            lo = low
            hi = high

        if lo >= hi:
            raise ValueError('low >= high')
        if lo < cupy.iinfo(dtype).min:
            raise ValueError('low is out of bounds for {}'.format(
                cupy.dtype(dtype).name))
        if hi > cupy.iinfo(dtype).max + 1:
            raise ValueError('high is out of bounds for {}'.format(
                cupy.dtype(dtype).name))

        diff = hi - lo - 1
        if diff > cupy.iinfo(cupy.int32).max - cupy.iinfo(cupy.int32).min + 1:
            raise NotImplementedError(
                'Sampling from a range whose extent is larger than int32 '
                'range is currently not supported')
        x = self._interval(diff, size).astype(dtype, copy=False)
        cupy.add(x, lo, out=x)
        return x

コード例 #29

ファイルを表示

class coo_matrix(sparse_data._data_matrix):
    """COOrdinate format sparse matrix.

    Now it has only one initializer format below:

    ``coo_matrix(S)``
        ``S`` is another sparse matrix. It is equivalent to ``S.tocoo()``.

    ``coo_matrix((M, N), [dtype])``
        It constructs an empty matrix whose shape is ``(M, N)``. Default dtype
        is float64.

    ``coo_matrix((data, (row, col))``
        All ``data``, ``row`` and ``col`` are one-dimenaional
        :class:`cupy.ndarray`.

    Args:
        arg1: Arguments for the initializer.
        shape (tuple): Shape of a matrix. Its length must be two.
        dtype: Data type. It must be an argument of :class:`numpy.dtype`.
        copy (bool): If ``True``, copies of given data are always used.

    .. seealso::
       :class:`scipy.sparse.coo_matrix`

    """

    format = 'coo'

    _sum_duplicates_diff = core.ElementwiseKernel(
        'raw T row, raw T col', 'T diff', '''
        T diff_out = 1;
        if (i == 0 || row[i - 1] == row[i] && col[i - 1] == col[i]) {
          diff_out = 0;
        }
        diff = diff_out;
        ''', 'sum_duplicates_diff')

    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if shape is not None and len(shape) != 2:
            raise ValueError(
                'Only two-dimensional sparse arrays are supported.')

        if base.issparse(arg1):
            x = arg1.asformat(self.format)
            data = x.data
            row = x.row
            col = x.col

            if arg1.format != self.format:
                # When formats are differnent, all arrays are already copied
                copy = False

            if shape is None:
                shape = arg1.shape

            self.has_canonical_format = x.has_canonical_format

        elif _util.isshape(arg1):
            m, n = arg1
            m, n = int(m), int(n)
            data = cupy.zeros(0, dtype if dtype else 'd')
            row = cupy.zeros(0, dtype='i')
            col = cupy.zeros(0, dtype='i')
            # shape and copy argument is ignored
            shape = (m, n)
            copy = False

            self.has_canonical_format = True

        elif _scipy_available and scipy.sparse.issparse(arg1):
            # Convert scipy.sparse to cupyx.scipy.sparse
            x = arg1.tocoo()
            data = cupy.array(x.data)
            row = cupy.array(x.row, dtype='i')
            col = cupy.array(x.col, dtype='i')
            copy = False
            if shape is None:
                shape = arg1.shape

            self.has_canonical_format = x.has_canonical_format

        elif isinstance(arg1, tuple) and len(arg1) == 2:
            try:
                data, (row, col) = arg1
            except (TypeError, ValueError):
                raise TypeError('invalid input format')

            if not (base.isdense(data) and data.ndim == 1 and base.isdense(row)
                    and row.ndim == 1 and base.isdense(col) and col.ndim == 1):
                raise ValueError('row, column, and data arrays must be 1-D')
            if not (len(data) == len(row) == len(col)):
                raise ValueError(
                    'row, column, and data array must all be the same length')

            self.has_canonical_format = False

        else:
            # TODO(leofang): support constructing from a dense matrix
            raise TypeError('invalid input format')

        if dtype is None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)

        if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D':
            raise ValueError('Only float32, float64, complex64 and complex128'
                             ' are supported')

        data = data.astype(dtype, copy=copy)
        row = row.astype('i', copy=copy)
        col = col.astype('i', copy=copy)

        if shape is None:
            if len(row) == 0 or len(col) == 0:
                raise ValueError(
                    'cannot infer dimensions from zero sized index arrays')
            shape = (int(row.max()) + 1, int(col.max()) + 1)

        if len(data) > 0:
            if row.max() >= shape[0]:
                raise ValueError('row index exceeds matrix dimensions')
            if col.max() >= shape[1]:
                raise ValueError('column index exceeds matrix dimensions')
            if row.min() < 0:
                raise ValueError('negative row index found')
            if col.min() < 0:
                raise ValueError('negative column index found')

        sparse_data._data_matrix.__init__(self, data)
        self.row = row
        self.col = col
        if not _util.isshape(shape):
            raise ValueError('invalid shape (must be a 2-tuple of int)')
        self._shape = int(shape[0]), int(shape[1])

    def _with_data(self, data, copy=True):
        """Returns a matrix with the same sparsity structure as self,
        but with different data.  By default the index arrays
        (i.e. .row and .col) are copied.
        """
        if copy:
            return coo_matrix((data, (self.row.copy(), self.col.copy())),
                              shape=self.shape,
                              dtype=data.dtype)
        else:
            return coo_matrix((data, (self.row, self.col)),
                              shape=self.shape,
                              dtype=data.dtype)

    def eliminate_zeros(self):
        """Removes zero entories in place."""
        ind = self.data != 0
        self.data = self.data[ind]
        self.row = self.row[ind]
        self.col = self.col[ind]

    def get_shape(self):
        """Returns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.
        """
        return self._shape

    def getnnz(self, axis=None):
        """Returns the number of stored values, including explicit zeros."""
        if axis is None:
            return self.data.size
        else:
            raise ValueError

    def get(self, stream=None):
        """Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.coo_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')

        data = self.data.get(stream)
        row = self.row.get(stream)
        col = self.col.get(stream)
        return scipy.sparse.coo_matrix((data, (row, col)), shape=self.shape)

    def sum_duplicates(self):
        """Eliminate duplicate matrix entries by adding them together.

        .. warning::
            When sorting the indices, CuPy follows the convention of cuSPARSE,
            which is different from that of SciPy. Therefore, the order of the
            output indices may differ:

            .. code-block:: python

                >>> #     1 0 0
                >>> # A = 1 1 0
                >>> #     1 1 1
                >>> data = cupy.array([1, 1, 1, 1, 1, 1], 'f')
                >>> row = cupy.array([0, 1, 1, 2, 2, 2], 'i')
                >>> col = cupy.array([0, 0, 1, 0, 1, 2], 'i')
                >>> A = cupyx.scipy.sparse.coo_matrix((data, (row, col)),
                ...                                   shape=(3, 3))
                >>> a = A.get()
                >>> A.sum_duplicates()
                >>> a.sum_duplicates()  # a is scipy.sparse.coo_matrix
                >>> A.row
                array([0, 1, 1, 2, 2, 2], dtype=int32)
                >>> a.row
                array([0, 1, 2, 1, 2, 2], dtype=int32)
                >>> A.col
                array([0, 0, 1, 0, 1, 2], dtype=int32)
                >>> a.col
                array([0, 0, 0, 1, 1, 2], dtype=int32)

        .. warning::
            Calling this function might synchronize the device.

        .. seealso::
           :meth:`scipy.sparse.coo_matrix.sum_duplicates`

        """
        if self.has_canonical_format:
            return
        # Note: The sorting order below follows the cuSPARSE convention (first
        # row then col, so-called row-major) and differs from that of SciPy, as
        # the cuSPARSE functions such as cusparseSpMV() assume this sorting
        # order.
        # See https://docs.nvidia.com/cuda/cusparse/index.html#coo-format
        keys = cupy.stack([self.col, self.row])
        order = cupy.lexsort(keys)
        src_data = self.data[order]
        src_row = self.row[order]
        src_col = self.col[order]
        diff = self._sum_duplicates_diff(src_row, src_col, size=self.row.size)

        if diff[1:].all():
            # All elements have different indices.
            data = src_data
            row = src_row
            col = src_col
        else:
            # TODO(leofang): move the kernels outside this method
            index = cupy.cumsum(diff, dtype='i')
            size = int(index[-1]) + 1
            data = cupy.zeros(size, dtype=self.data.dtype)
            row = cupy.empty(size, dtype='i')
            col = cupy.empty(size, dtype='i')
            if self.data.dtype.kind == 'f':
                cupy.ElementwiseKernel(
                    'T src_data, int32 src_row, int32 src_col, int32 index',
                    'raw T data, raw int32 row, raw int32 col', '''
                    atomicAdd(&data[index], src_data);
                    row[index] = src_row;
                    col[index] = src_col;
                    ''', 'sum_duplicates_assign')(src_data, src_row, src_col,
                                                  index, data, row, col)
            elif self.data.dtype.kind == 'c':
                cupy.ElementwiseKernel(
                    'T src_real, T src_imag, int32 src_row, int32 src_col, '
                    'int32 index',
                    'raw T real, raw T imag, raw int32 row, raw int32 col', '''
                    atomicAdd(&real[index], src_real);
                    atomicAdd(&imag[index], src_imag);
                    row[index] = src_row;
                    col[index] = src_col;
                    ''',
                    'sum_duplicates_assign_complex')(src_data.real,
                                                     src_data.imag, src_row,
                                                     src_col, index, data.real,
                                                     data.imag, row, col)

        self.data = data
        self.row = row
        self.col = col
        self.has_canonical_format = True

    def toarray(self, order=None, out=None):
        """Returns a dense matrix representing the same value.

        Args:
            order (str): Not supported.
            out: Not supported.

        Returns:
            cupy.ndarray: Dense array representing the same value.

        .. seealso:: :meth:`scipy.sparse.coo_matrix.toarray`

        """
        return self.tocsr().toarray(order=order, out=out)

    def tocoo(self, copy=False):
        """Converts the matrix to COOdinate format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible.

        Returns:
            cupyx.scipy.sparse.coo_matrix: Converted matrix.

        """
        if copy:
            return self.copy()
        else:
            return self

    def tocsc(self, copy=False):
        """Converts the matrix to Compressed Sparse Column format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in coo to csc conversion.

        Returns:
            cupyx.scipy.sparse.csc_matrix: Converted matrix.

        """
        if self.nnz == 0:
            return csc.csc_matrix(self.shape, dtype=self.dtype)
        # copy is silently ignored (in line with SciPy) because both
        # sum_duplicates and coosort change the underlying data
        x = self.copy()
        x.sum_duplicates()
        cusparse.coosort(x, 'c')
        x = cusparse.coo2csc(x)
        x.has_canonical_format = True
        return x

    def tocsr(self, copy=False):
        """Converts the matrix to Compressed Sparse Row format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in coo to csr conversion.

        Returns:
            cupyx.scipy.sparse.csr_matrix: Converted matrix.

        """
        if self.nnz == 0:
            return csr.csr_matrix(self.shape, dtype=self.dtype)
        # copy is silently ignored (in line with SciPy) because both
        # sum_duplicates and coosort change the underlying data
        x = self.copy()
        x.sum_duplicates()
        cusparse.coosort(x, 'r')
        x = cusparse.coo2csr(x)
        x.has_canonical_format = True
        return x

    def transpose(self, axes=None, copy=False):
        """Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupyx.scipy.sparse.spmatrix: Transpose matrix.

        """
        if axes is not None:
            raise ValueError(
                'Sparse matrices do not support an \'axes\' parameter because '
                'swapping dimensions is the only logical permutation.')
        shape = self.shape[1], self.shape[0]
        return coo_matrix((self.data, (self.col, self.row)),
                          shape=shape,
                          copy=copy)

コード例 #30

ファイルを表示

ファイル: compressed.py プロジェクト: yutiansut/cupy

class _compressed_sparse_matrix(sparse_data._data_matrix):

    _compress_getitem_kern = core.ElementwiseKernel(
        'T d, S ind, int32 minor', 'raw T answer',
        'if (ind == minor) atomicAdd(&answer[0], d);',
        'compress_getitem')

    _compress_getitem_complex_kern = core.ElementwiseKernel(
        'T real, T imag, S ind, int32 minor',
        'raw T answer_real, raw T answer_imag',
        '''
        if (ind == minor) {
          atomicAdd(&answer_real[0], real);
          atomicAdd(&answer_imag[0], imag);
        }
        ''',
        'compress_getitem_complex')

    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if shape is not None and len(shape) != 2:
            raise ValueError(
                'Only two-dimensional sparse arrays are supported.')

        if base.issparse(arg1):
            x = arg1.asformat(self.format)
            data = x.data
            indices = x.indices
            indptr = x.indptr

            if arg1.format != self.format:
                # When formats are differnent, all arrays are already copied
                copy = False

            if shape is None:
                shape = arg1.shape

            has_canonical_format = x.has_canonical_format
        elif util.isshape(arg1):
            m, n = arg1
            m, n = int(m), int(n)
            data = basic.zeros(0, dtype if dtype else 'd')
            indices = basic.zeros(0, 'i')
            indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype='i')
            # shape and copy argument is ignored
            shape = (m, n)
            copy = False
            has_canonical_format = True

        elif scipy_available and scipy.sparse.issparse(arg1):
            # Convert scipy.sparse to cupy.sparse
            x = arg1.asformat(self.format)
            data = cupy.array(x.data)
            indices = cupy.array(x.indices, dtype='i')
            indptr = cupy.array(x.indptr, dtype='i')
            copy = False

            if shape is None:
                shape = arg1.shape
            has_canonical_format = x.has_canonical_format

        elif isinstance(arg1, tuple) and len(arg1) == 3:
            data, indices, indptr = arg1
            if not (base.isdense(data) and data.ndim == 1 and
                    base.isdense(indices) and indices.ndim == 1 and
                    base.isdense(indptr) and indptr.ndim == 1):
                raise ValueError(
                    'data, indices, and indptr should be 1-D')

            if len(data) != len(indices):
                raise ValueError('indices and data should have the same size')

            has_canonical_format = False

        elif base.isdense(arg1):
            if arg1.ndim > 2:
                raise TypeError('expected dimension <= 2 array or matrix')
            elif arg1.ndim == 1:
                arg1 = arg1[None]
            elif arg1.ndim == 0:
                arg1 = arg1[None, None]
            data, indices, indptr = self._convert_dense(arg1)
            copy = False
            if shape is None:
                shape = arg1.shape

            has_canonical_format = True

        else:
            raise ValueError(
                'Unsupported initializer format')

        if dtype is None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)

        if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D':
            raise ValueError(
                'Only float32, float64, complex64 and complex128 '
                'are supported')

        data = data.astype(dtype, copy=copy)
        sparse_data._data_matrix.__init__(self, data)

        self.indices = indices.astype('i', copy=copy)
        self.indptr = indptr.astype('i', copy=copy)

        if shape is None:
            shape = self._swap(len(indptr) - 1, int(indices.max()) + 1)

        major, minor = self._swap(*shape)
        if len(indptr) != major + 1:
            raise ValueError('index pointer size (%d) should be (%d)'
                             % (len(indptr), major + 1))

        self._descr = cusparse.MatDescriptor.create()
        self._shape = shape
        self._has_canonical_format = has_canonical_format

    def _with_data(self, data):
        return self.__class__(
            (data, self.indices.copy(), self.indptr.copy()), shape=self.shape)

    def _convert_dense(self, x):
        raise NotImplementedError

    def _swap(self, x, y):
        raise NotImplementedError

    def _add_sparse(self, other, alpha, beta):
        raise NotImplementedError

    def _add(self, other, lhs_negative, rhs_negative):
        if cupy.isscalar(other):
            if other == 0:
                if lhs_negative:
                    return -self
                else:
                    return self.copy()
            else:
                raise NotImplementedError(
                    'adding a nonzero scalar to a sparse matrix is not '
                    'supported')
        elif base.isspmatrix(other):
            alpha = -1 if lhs_negative else 1
            beta = -1 if rhs_negative else 1
            return self._add_sparse(other, alpha, beta)
        elif base.isdense(other):
            if lhs_negative:
                if rhs_negative:
                    return -self.todense() - other
                else:
                    return other - self.todense()
            else:
                if rhs_negative:
                    return self.todense() - other
                else:
                    return self.todense() + other
        else:
            return NotImplemented

    def __add__(self, other):
        return self._add(other, False, False)

    def __radd__(self, other):
        return self._add(other, False, False)

    def __sub__(self, other):
        return self._add(other, False, True)

    def __rsub__(self, other):
        return self._add(other, True, False)

    def __getitem__(self, slices):
        if isinstance(slices, tuple):
            slices = list(slices)
        elif isinstance(slices, list):
            slices = list(slices)
            if all([isinstance(s, int) for s in slices]):
                slices = [slices]
        else:
            slices = [slices]

        ellipsis = -1
        n_ellipsis = 0
        for i, s in enumerate(slices):
            if s is None:
                raise IndexError('newaxis is not supported')
            elif s is Ellipsis:
                ellipsis = i
                n_ellipsis += 1
        if n_ellipsis > 0:
            ellipsis_size = self.ndim - (len(slices) - 1)
            slices[ellipsis:ellipsis + 1] = [slice(None)] * ellipsis_size

        if len(slices) == 2:
            row, col = slices
        elif len(slices) == 1:
            row, col = slices[0], slice(None)
        else:
            raise IndexError('invalid number of indices')

        major, minor = self._swap(row, col)
        major_size, minor_size = self._swap(*self._shape)
        if numpy.isscalar(major):
            i = int(major)
            if i < 0:
                i += major_size
            if not (0 <= i < major_size):
                raise IndexError('index out of bounds')
            if numpy.isscalar(minor):
                j = int(minor)
                if j < 0:
                    j += minor_size
                if not (0 <= j < minor_size):
                    raise IndexError('index out of bounds')
                return self._get_single(i, j)
            elif minor == slice(None):
                return self._get_major_slice(slice(i, i + 1))
        elif isinstance(major, slice):
            if minor == slice(None):
                return self._get_major_slice(major)

        raise ValueError('unsupported indexing')

    def _get_single(self, major, minor):
        start = self.indptr[major]
        end = self.indptr[major + 1]
        answer = cupy.zeros((), self.dtype)
        data = self.data[start:end]
        indices = self.indices[start:end]
        if self.dtype.kind == 'c':
            self._compress_getitem_complex_kern(
                data.real, data.imag, indices, minor, answer.real, answer.imag)
        else:
            self._compress_getitem_kern(
                data, indices, minor, answer)
        return answer[()]

    def _get_major_slice(self, major):
        major_size, minor_size = self._swap(*self._shape)
        # major.indices cannot be used because scipy.sparse behaves differently
        major_start = major.start
        major_stop = major.stop
        major_step = major.step
        if major_start is None:
            major_start = 0
        if major_stop is None:
            major_stop = major_size
        if major_step is None:
            major_step = 1
        if major_start < 0:
            major_start += major_size
        if major_stop < 0:
            major_stop += major_size

        if major_step != 1:
            raise ValueError('slicing with step != 1 not supported')

        if not (0 <= major_start <= major_size and
                0 <= major_stop <= major_size and
                major_start <= major_stop):
            raise IndexError('index out of bounds')

        start = self.indptr[major_start]
        stop = self.indptr[major_stop]
        data = self.data[start:stop]
        indptr = self.indptr[major_start:major_stop + 1] - start
        indices = self.indices[start:stop]

        shape = self._swap(len(indptr) - 1, minor_size)
        return self.__class__(
            (data, indices, indptr), shape=shape, dtype=self.dtype, copy=False)

    @property
    def has_canonical_format(self):
        return self._has_canonical_format

    def get_shape(self):
        """Returns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.
        """
        return self._shape

    def getnnz(self, axis=None):
        """Returns the number of stored values, including explicit zeros.

        Args:
            axis: Not supported yet.

        Returns:
            int: The number of stored values.

        """
        if axis is None:
            return self.data.size
        else:
            raise ValueError

    # TODO(unno): Implement sorted_indices

    def sum_duplicates(self):
        if self._has_canonical_format:
            return
        if self.data.size == 0:
            self._has_canonical_format = True
            return
        coo = self.tocoo()
        coo.sum_duplicates()
        self.__init__(coo.asformat(self.format))
        self._has_canonical_format = True