def _kernel_labels(): return core.ElementwiseKernel( '', 'raw Y y, raw int32 count, raw int32 labels', ''' if (y[i] != i) continue; int j = atomicAdd(&count[1], 1); labels[j] = i; ''', 'cupyx_nd_label_labels')
def __call__(self, *args): itypes = ''.join([_get_input_type(x) for x in args]) kern = self._kernel_cache.get(itypes, None) if kern is None: in_types = [_types.Scalar(t) for t in itypes] ret_type = None if self.otypes is not None: # TODO(asi1024): Implement raise NotImplementedError func = _interface._CudaFunction(self.pyfunc, 'numpy', device=True) result = func._emit_code_from_types(in_types, ret_type) in_params = ', '.join(f'{t.dtype} in{i}' for i, t in enumerate(in_types)) out_params = str(result.return_type.dtype) + ' out0' body = 'out0 = {}({})'.format( func.name, ', '.join([f'in{i}' for i in range(len(in_types))])) kern = core.ElementwiseKernel(in_params, out_params, body, preamble=result.code) self._kernel_cache[itypes] = kern return kern(*args)
def _kernel_init(): return core.ElementwiseKernel( "X x", "Y y", "if (x == 0) { y = -1; } else { y = i; }", "cupyimg_nd_label_init", )
def cumsum(a, axis=None, dtype=None, out=None): if axis is None: a = a.ravel() else: raise ValueError("'axis' option is not supported") if out is None: if dtype is None: kind = a.dtype.kind if kind == 'b': dtype = numpy.dtype('l') elif kind == 'i' and a.dtype.itemsize < numpy.dtype('l').itemsize: dtype = numpy.dtype('l') elif kind == 'u' and a.dtype.itemsize < numpy.dtype('L').itemsize: dtype = numpy.dtype('L') else: dtype = a.dtype out = a.astype(dtype) else: out[...] = a kern = core.ElementwiseKernel( 'int32 pos', 'raw T x', ''' if (i & pos) { x[i] += x[i ^ pos | (pos - 1)]; } ''', 'cumsum_kernel') pos = 1 while pos < out.size: kern(pos, out, size=out.size) pos <<= 1 return out
def _get_gumbel_kernel(): global _gumbel_kernel if _gumbel_kernel is None: _gumbel_kernel = core.ElementwiseKernel( 'T x, T loc, T scale', 'T y', 'y = loc - log(-log(1 - x)) * scale', 'gumbel_kernel') return _gumbel_kernel
def tocsc(self, copy=False): """Converts the matrix to Compressed Sparse Column format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Actually this option is ignored because all arrays in a matrix cannot be shared in dia to csc conversion. Returns: cupy.sparse.csc_matrix: Converted matrix. """ if self.data.size == 0: return csc.csc_matrix(self.shape, dtype=self.dtype) num_rows, num_cols = self.shape num_offsets, offset_len = self.data.shape row, mask = core.ElementwiseKernel( 'int32 offset_len, int32 offsets, int32 num_rows, ' 'int32 num_cols, T data', 'int32 row, bool mask', ''' int offset_inds = i % offset_len; row = offset_inds - offsets; mask = (row >= 0 && row < num_rows && offset_inds < num_cols && data != 0); ''', 'dia_tocsc')(offset_len, self.offsets[:, None], num_rows, num_cols, self.data) indptr = cupy.zeros(num_cols + 1, dtype='i') indptr[1:offset_len + 1] = cupy.cumsum(mask.sum(axis=0)) indptr[offset_len + 1:] = indptr[offset_len] indices = row.T[mask.T].astype('i', copy=False) data = self.data.T[mask.T] return csc.csc_matrix((data, indices, indptr), shape=self.shape, dtype=self.dtype)
def _kernel_count(): return core.ElementwiseKernel( '', 'raw Y y, raw int32 count', ''' if (y[i] < 0) continue; int j = i; while (j != y[j]) { j = y[j]; } if (j != i) y[i] = j; else atomicAdd(&count[0], 1); ''', 'cupyx_nd_label_count')
def _cupy_permutation(): return core.ElementwiseKernel( 'raw int32 array, raw int32 sample, int32 j_start, int32 _j_end', '', ''' const int invalid = -1; const int num = _ind.size(); int j = (sample[i] & 0x7fffffff) % num; int j_end = _j_end; if (j_end > num) j_end = num; if (j == i || j < j_start || j >= j_end) continue; // If a thread fails to do data swaping once, it changes j // value using j_offset below and try data swaping again. // This process is repeated until data swapping is succeeded. // The j_offset is determined from the initial j // (random number assigned to each thread) and the initial // offset between j and i (ID of each thread). // If a given number sequence in sample is really random, // this j-update would not be necessary. This is work-around // mainly to avoid potential eternal conflict when sample has // rather synthetic number sequence. int j_offset = ((2*j - i + num) % (num - 1)) + 1; // A thread gives up to do data swapping if loop count exceed // a threathod determined below. This is kind of safety // mechanism to escape the eternal race condition, though I // believe it never happens. int loops = 256; bool do_next = true; while (do_next && loops > 0) { // try to swap the contents of array[i] and array[j] if (i != j) { int val_j = atomicExch(&array[j], invalid); if (val_j != invalid) { int val_i = atomicExch(&array[i], invalid); if (val_i != invalid) { array[i] = val_j; array[j] = val_i; do_next = false; // done } else { // restore array[j] array[j] = val_j; } } } j = (j + j_offset) % num; loops--; } ''', 'cupy_permutation', )
def _kernel_labels(): return core.ElementwiseKernel( "", "raw Y y, raw int32 count, raw int32 labels", """ if (y[i] != i) continue; int j = atomicAdd(&count[1], 1); labels[j] = i; """, "cupyimg_nd_label_labels", )
def _get_gammaln_kernel(): global _gammaln_kernel if _gammaln_kernel is None: _gammaln_kernel = core.ElementwiseKernel( 'T x', 'T y', """ if(isinf(x) && x < 0){ y = - 1.0 / 0.0; return; } y = lgamma(x); """, 'gammaln_kernel') return _gammaln_kernel
def _get_digamma_kernel(): global _digamma_kernel if _digamma_kernel is None: _digamma_kernel = core.ElementwiseKernel( 'T x', 'T y', """ y = psi(x) """, 'digamma_kernel', preamble=polevl_definition+psi_definition ) return _digamma_kernel
def _get_zeta_kernel(): global _zeta_kernel if _zeta_kernel is None: _zeta_kernel = core.ElementwiseKernel( 'T x, T q', 'T y', """ y = zeta(x, q) """, 'zeta_kernel', preamble=zeta_definition ) return _zeta_kernel
def _kernel_count(): return core.ElementwiseKernel( "", "raw Y y, raw int32 count", """ if (y[i] < 0) continue; int j = i; while (j != y[j]) { j = y[j]; } if (j != i) y[i] = j; else atomicAdd(&count[0], 1); """, "cupyimg_nd_label_count", )
def cumsum(a, axis=None, dtype=None, out=None): """Returns the cumlative sum of an array along a given axis. Args: a (cupy.ndarray): Input array. axis (int): Axis along which the cumlative sum is taken. If it is not specified, the input is flattened. dtype: Data type specifier. out (cupy.ndarray): Output array. Returns: cupy.ndarray: The result array. .. seealso:: :func:`numpy.cumsum` """ if out is None: if dtype is None: kind = a.dtype.kind if kind == 'b': dtype = numpy.dtype('l') elif kind == 'i' and a.dtype.itemsize < numpy.dtype('l').itemsize: dtype = numpy.dtype('l') elif kind == 'u' and a.dtype.itemsize < numpy.dtype('L').itemsize: dtype = numpy.dtype('L') else: dtype = a.dtype out = a.astype(dtype) else: out[...] = a if axis is None: out = out.ravel() elif not (-a.ndim <= axis < a.ndim): raise ValueError('axis(={}) out of bounds'.format(axis)) else: return _proc_as_batch(_cumsum_batch, out, axis=axis) kern = core.ElementwiseKernel( 'int32 pos', 'raw T x', ''' if (i & pos) { x[i] += x[i ^ pos | (pos - 1)]; } ''', 'cumsum_kernel') pos = 1 while pos < out.size: kern(pos, out, size=out.size) pos <<= 1 return out
def _cumsum_batch(out): kern = core.ElementwiseKernel( 'int32 pos, int32 batch', 'raw T x', ''' int b = i % batch; int j = i / batch; if (j & pos) { const int dst_index[] = {b, j}; const int src_index[] = {b, j ^ pos | (pos - 1)}; x[dst_index] += x[src_index]; } ''', 'cumsum_batch_kernel') pos = 1 while pos < out.size: kern(pos, out.shape[0], out, size=out.size) pos <<= 1 return out
def multinomial(n, pvals, size=None): """Returns an array from multinomial distribution. Args: n (int): Number of trials. pvals (cupy.ndarray): Probabilities of each of the ``p`` different outcomes. The sum of these values must be 1. size (int or tuple of ints or None): Shape of a sample in each trial. For example when ``size`` is ``(a, b)``, shape of returned value is ``(a, b, p)`` where ``p`` is ``len(pvals)``. If ``size`` is ``None``, it is treated as ``()``. So, shape of returned value is ``(p,)``. Returns: cupy.ndarray: An array drawn from multinomial distribution. .. note:: It does not support ``sum(pvals) < 1`` case. .. seealso:: :func:`numpy.random.multinomial` """ if size is None: m = 1 size = () elif isinstance(size, six.integer_types): m = size size = (size,) else: size = tuple(size) m = 1 for x in size: m *= x p = len(pvals) shape = size + (p,) # atomicAdd only supports int32 ys = basic.zeros(shape, 'i') if ys.size > 0: xs = choice(p, p=pvals, size=n * m) core.ElementwiseKernel( 'int64 x, int32 p, int32 n', 'raw int32 ys', 'atomicAdd(&ys[i / n * p + x], 1)', 'cupy_random_multinomial')(xs, p, n, ys) return ys.astype('l')
def _kernel_finalize(): return core.ElementwiseKernel( 'int32 maxlabel', 'raw int32 labels, raw Y y', ''' if (y[i] < 0) { y[i] = 0; continue; } int yi = y[i]; int j_min = 0; int j_max = maxlabel - 1; int j = (j_min + j_max) / 2; while (j_min < j_max) { if (yi == labels[j]) break; if (yi < labels[j]) j_max = j - 1; else j_min = j + 1; j = (j_min + j_max) / 2; } y[i] = j + 1; ''', 'cupyx_nd_label_finalize')
def _kernel_connect(): return core.ElementwiseKernel( 'raw int32 shape, raw int32 dirs, int32 ndirs, int32 ndim', 'raw Y y', ''' if (y[i] < 0) continue; for (int dr = 0; dr < ndirs; dr++) { int j = i; int rest = j; int stride = 1; int k = 0; for (int dm = ndim-1; dm >= 0; dm--) { int pos = rest % shape[dm] + dirs[dm + dr * ndim]; if (pos < 0 || pos >= shape[dm]) { k = -1; break; } k += pos * stride; rest /= shape[dm]; stride *= shape[dm]; } if (k < 0) continue; if (y[k] < 0) continue; while (1) { while (j != y[j]) { j = y[j]; } while (k != y[k]) { k = y[k]; } if (j == k) break; if (j < k) { int old = atomicCAS( &y[k], k, j ); if (old == k) break; k = old; } else { int old = atomicCAS( &y[j], j, k ); if (old == j) break; j = old; } } } ''', 'cupyx_nd_label_connect')
def _kernel_finalize(): return core.ElementwiseKernel( "int32 maxlabel", "raw int32 labels, raw Y y", """ if (y[i] < 0) { y[i] = 0; continue; } int yi = y[i]; int j_min = 0; int j_max = maxlabel - 1; int j = (j_min + j_max) / 2; while (j_min < j_max) { if (yi == labels[j]) break; if (yi < labels[j]) j_max = j - 1; else j_min = j + 1; j = (j_min + j_max) / 2; } y[i] = j + 1; """, "cupyimg_nd_label_finalize", )
__device__ long long atomicAdd(long long *address, long long val) { return atomicAdd(reinterpret_cast<unsigned long long*>(address), static_cast<unsigned long long>(val)); }''' # TODO(unno): use searchsorted _histogram_kernel = core.ElementwiseKernel('S x, raw T bins, int32 n_bins', 'raw U y', ''' if (x < bins[0] or bins[n_bins - 1] < x) { return; } int high = n_bins - 1; int low = 0; while (high - low > 1) { int mid = (high + low) / 2; if (bins[mid] <= x) { low = mid; } else { high = mid; } } atomicAdd(&y[low], U(1)); ''', preamble=_preamble) def histogram(x, bins=10): """Computes the histogram of a set of data. Args:
class RandomState(object): """Portable container of a pseudo-random number generator. An instance of this class holds the state of a random number generator. The state is available only on the device which has been current at the initialization of the instance. Functions of :mod:`cupy.random` use global instances of this class. Different instances are used for different devices. The global state for the current device can be obtained by the :func:`cupy.random.get_random_state` function. Args: seed (None or int): Seed of the random number generator. See the :meth:`~cupy.random.RandomState.seed` method for detail. method (int): Method of the random number generator. Following values are available:: cupy.cuda.curand.CURAND_RNG_PSEUDO_DEFAULT cupy.cuda.curand.CURAND_RNG_XORWOW cupy.cuda.curand.CURAND_RNG_MRG32K3A cupy.cuda.curand.CURAND_RNG_MTGP32 cupy.cuda.curand.CURAND_RNG_MT19937 cupy.cuda.curand.CURAND_RNG_PHILOX4_32_10 """ def __init__(self, seed=None, method=curand.CURAND_RNG_PSEUDO_DEFAULT): self._generator = curand.createGenerator(method) self.method = method self.seed(seed) def __del__(self, is_shutting_down=util.is_shutting_down): # When createGenerator raises an error, _generator is not initialized if is_shutting_down(): return if hasattr(self, '_generator'): curand.destroyGenerator(self._generator) def _update_seed(self, size): self._rk_seed = (self._rk_seed + size) % _UINT64_MAX def _generate_normal(self, func, size, dtype, *args): # curand functions below don't support odd size. # * curand.generateNormal # * curand.generateNormalDouble # * curand.generateLogNormal # * curand.generateLogNormalDouble size = core.get_size(size) element_size = functools.reduce(operator.mul, size, 1) if element_size % 2 == 0: out = cupy.empty(size, dtype=dtype) func(self._generator, out.data.ptr, out.size, *args) return out else: out = cupy.empty((element_size + 1, ), dtype=dtype) func(self._generator, out.data.ptr, out.size, *args) return out[:element_size].reshape(size) # NumPy compatible functions def beta(self, a, b, size=None, dtype=float): """Returns an array of samples drawn from the beta distribution. .. seealso:: :func:`cupy.random.beta` for full documentation, :meth:`numpy.random.RandomState.beta <numpy.random.mtrand.RandomState.beta>` """ a, b = cupy.asarray(a), cupy.asarray(b) if size is None: size = cupy.broadcast(a, b).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.beta_kernel(a, b, self._rk_seed, y) self._update_seed(y.size) return y def binomial(self, n, p, size=None, dtype=int): """Returns an array of samples drawn from the binomial distribution. .. seealso:: :func:`cupy.random.binomial` for full documentation, :meth:`numpy.random.RandomState.binomial <numpy.random.mtrand.RandomState.binomial>` """ n, p = cupy.asarray(n), cupy.asarray(p) if size is None: size = cupy.broadcast(n, p).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.binomial_kernel(n, p, self._rk_seed, y) self._update_seed(y.size) return y def chisquare(self, df, size=None, dtype=float): """Returns an array of samples drawn from the chi-square distribution. .. seealso:: :func:`cupy.random.chisquare` for full documentation, :meth:`numpy.random.RandomState.chisquare <numpy.random.mtrand.RandomState.chisquare>` """ df = cupy.asarray(df) if size is None: size = df.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.chisquare_kernel(df, self._rk_seed, y) self._update_seed(y.size) return y def dirichlet(self, alpha, size=None, dtype=float): """Returns an array of samples drawn from the dirichlet distribution. .. seealso:: :func:`cupy.random.dirichlet` for full documentation, :meth:`numpy.random.RandomState.dirichlet <numpy.random.mtrand.RandomState.dirichlet>` """ alpha = cupy.asarray(alpha) if size is None: size = alpha.shape else: size += alpha.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.standard_gamma_kernel(alpha, self._rk_seed, y) y /= y.sum(axis=-1, keepdims=True) self._update_seed(y.size) return y def exponential(self, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from a exponential distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.exponential` for full documentation, :meth:`numpy.random.RandomState.exponential <numpy.random.mtrand.RandomState.exponential>` """ scale = cupy.asarray(scale, dtype) if (scale < 0).any(): # synchronize! raise ValueError('scale < 0') if size is None: size = scale.shape x = self.standard_exponential(size, dtype) x *= scale return x def f(self, dfnum, dfden, size=None, dtype=float): """Returns an array of samples drawn from the f distribution. .. seealso:: :func:`cupy.random.f` for full documentation, :meth:`numpy.random.RandomState.f <numpy.random.mtrand.RandomState.f>` """ dfnum, dfden = cupy.asarray(dfnum), cupy.asarray(dfden) if size is None: size = cupy.broadcast(dfnum, dfden).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.f_kernel(dfnum, dfden, self._rk_seed, y) self._update_seed(y.size) return y def gamma(self, shape, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from a gamma distribution. .. seealso:: :func:`cupy.random.gamma` for full documentation, :meth:`numpy.random.RandomState.gamma <numpy.random.mtrand.RandomState.gamma>` """ shape, scale = cupy.asarray(shape), cupy.asarray(scale) if size is None: size = cupy.broadcast(shape, scale).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.standard_gamma_kernel(shape, self._rk_seed, y) y *= scale self._update_seed(y.size) return y def geometric(self, p, size=None, dtype=int): """Returns an array of samples drawn from the geometric distribution. .. seealso:: :func:`cupy.random.geometric` for full documentation, :meth:`numpy.random.RandomState.geometric <numpy.random.mtrand.RandomState.geometric>` """ p = cupy.asarray(p) if size is None: size = p.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.geometric_kernel(p, self._rk_seed, y) self._update_seed(y.size) return y def hypergeometric(self, ngood, nbad, nsample, size=None, dtype=int): """Returns an array of samples drawn from the hypergeometric distribution. .. seealso:: :func:`cupy.random.hypergeometric` for full documentation, :meth:`numpy.random.RandomState.hypergeometric <numpy.random.mtrand.RandomState.hypergeometric>` """ ngood, nbad, nsample = \ cupy.asarray(ngood), cupy.asarray(nbad), cupy.asarray(nsample) if size is None: size = cupy.broadcast(ngood, nbad, nsample).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.hypergeometric_kernel(ngood, nbad, nsample, self._rk_seed, y) self._update_seed(y.size) return y _laplace_kernel = core.ElementwiseKernel( 'T x, T loc, T scale', 'T y', 'y = loc + scale * ((x <= 0.5) ? log(x + x): -log(x + x - 1.0))', 'laplace_kernel') def laplace(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from the laplace distribution. .. seealso:: :func:`cupy.random.laplace` for full documentation, :meth:`numpy.random.RandomState.laplace <numpy.random.mtrand.RandomState.laplace>` """ loc = cupy.asarray(loc, dtype) scale = cupy.asarray(scale, dtype) if size is None: size = cupy.broadcast(loc, scale).shape x = self._random_sample_raw(size, dtype) RandomState._laplace_kernel(x, loc, scale, x) return x def logistic(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from the logistic distribution. .. seealso:: :func:`cupy.random.logistic` for full documentation, :meth:`numpy.random.RandomState.logistic <numpy.random.mtrand.RandomState.logistic>` """ loc, scale = cupy.asarray(loc), cupy.asarray(scale) if size is None: size = cupy.broadcast(loc, scale).shape x = cupy.empty(shape=size, dtype=dtype) _kernels.open_uniform_kernel(self._rk_seed, x) self._update_seed(x.size) x = (1.0 - x) / x cupy.log(x, out=x) cupy.multiply(x, scale, out=x) cupy.add(x, loc, out=x) return x def lognormal(self, mean=0.0, sigma=1.0, size=None, dtype=float): """Returns an array of samples drawn from a log normal distribution. .. seealso:: :func:`cupy.random.lognormal` for full documentation, :meth:`numpy.random.RandomState.lognormal <numpy.random.mtrand.RandomState.lognormal>` """ dtype = _check_and_get_dtype(dtype) if dtype.char == 'f': func = curand.generateLogNormal else: func = curand.generateLogNormalDouble return self._generate_normal(func, size, dtype, mean, sigma) def logseries(self, p, size=None, dtype=int): """Returns an array of samples drawn from a log series distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.logseries` for full documentation, :meth:`numpy.random.RandomState.logseries <numpy.random.mtrand.RandomState.logseries>` """ p = cupy.asarray(p) if cupy.any(p <= 0): # synchronize! raise ValueError('p <= 0.0') if cupy.any(p >= 1): # synchronize! raise ValueError('p >= 1.0') if size is None: size = p.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.logseries_kernel(p, self._rk_seed, y) self._update_seed(y.size) return y def multivariate_normal(self, mean, cov, size=None, check_valid='ignore', tol=1e-08, method='cholesky', dtype=float): """Returns an array of samples drawn from the multivariate normal distribution. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`cupy.random.multivariate_normal` for full documentation, :meth:`numpy.random.RandomState.multivariate_normal <numpy.random.mtrand.RandomState.multivariate_normal>` """ util.experimental('cupy.random.RandomState.multivariate_normal') mean = cupy.asarray(mean, dtype=dtype) cov = cupy.asarray(cov, dtype=dtype) if size is None: shape = [] elif isinstance(size, (int, cupy.integer)): shape = [size] else: shape = size if len(mean.shape) != 1: raise ValueError('mean must be 1 dimensional') if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]): raise ValueError('cov must be 2 dimensional and square') if mean.shape[0] != cov.shape[0]: raise ValueError('mean and cov must have same length') final_shape = list(shape[:]) final_shape.append(mean.shape[0]) if method not in {'eigh', 'svd', 'cholesky'}: raise ValueError( "method must be one of {'eigh', 'svd', 'cholesky'}") if check_valid != 'ignore': if check_valid != 'warn' and check_valid != 'raise': raise ValueError( "check_valid must equal 'warn', 'raise', or 'ignore'") if check_valid == 'warn': with cupyx.errstate(linalg='raise'): try: decomp = cupy.linalg.cholesky(cov) except LinAlgError: with cupyx.errstate(linalg='ignore'): if method != 'cholesky': if method == 'eigh': (s, u) = cupy.linalg.eigh(cov) psd = not cupy.any(s < -tol) if method == 'svd': (u, s, vh) = cupy.linalg.svd(cov) psd = cupy.allclose(cupy.dot(vh.T * s, vh), cov, rtol=tol, atol=tol) decomp = u * cupy.sqrt(cupy.abs(s)) if not psd: warnings.warn( "covariance is not positive-" + "semidefinite, output may be " + "invalid.", RuntimeWarning) else: warnings.warn( "covariance is not positive-" + "semidefinite, output *is* " + "invalid.", RuntimeWarning) decomp = cupy.linalg.cholesky(cov) else: with cupyx.errstate(linalg=check_valid): try: if method == 'cholesky': decomp = cupy.linalg.cholesky(cov) elif method == 'eigh': (s, u) = cupy.linalg.eigh(cov) decomp = u * cupy.sqrt(cupy.abs(s)) elif method == 'svd': (u, s, vh) = cupy.linalg.svd(cov) decomp = u * cupy.sqrt(cupy.abs(s)) except LinAlgError: raise LinAlgError("Matrix is not positive definite; if " + "matrix is positive-semidefinite, set" + "'check_valid' to 'warn'") x = self.standard_normal(final_shape, dtype=dtype).reshape(-1, mean.shape[0]) x = cupy.dot(decomp, x.T) x = x.T x += mean x.shape = tuple(final_shape) return x def negative_binomial(self, n, p, size=None, dtype=int): """Returns an array of samples drawn from the negative binomial distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.negative_binomial` for full documentation, :meth:`numpy.random.RandomState.negative_binomial <numpy.random.mtrand.RandomState.negative_binomial>` """ n = cupy.asarray(n) p = cupy.asarray(p) if cupy.any(n <= 0): # synchronize! raise ValueError('n <= 0') if cupy.any(p < 0): # synchronize! raise ValueError('p < 0') if cupy.any(p > 1): # synchronize! raise ValueError('p > 1') y = self.gamma(n, (1 - p) / p, size) return self.poisson(y, dtype=dtype) def normal(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of normally distributed samples. .. seealso:: :func:`cupy.random.normal` for full documentation, :meth:`numpy.random.RandomState.normal <numpy.random.mtrand.RandomState.normal>` """ dtype = _check_and_get_dtype(dtype) if dtype.char == 'f': func = curand.generateNormal else: func = curand.generateNormalDouble return self._generate_normal(func, size, dtype, loc, scale) def pareto(self, a, size=None, dtype=float): """Returns an array of samples drawn from the pareto II distribution. .. seealso:: :func:`cupy.random.pareto_kernel` for full documentation, :meth:`numpy.random.RandomState.pareto <numpy.random.mtrand.RandomState.pareto>` """ a = cupy.asarray(a) x = self._random_sample_raw(size, dtype) cupy.log(x, out=x) cupy.exp(-x / a, out=x) return x - 1 def noncentral_chisquare(self, df, nonc, size=None, dtype=float): """Returns an array of samples drawn from the noncentral chi-square distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.noncentral_chisquare` for full documentation, :meth:`numpy.random.RandomState.noncentral_chisquare <numpy.random.mtrand.RandomState.noncentral_chisquare>` """ df, nonc = cupy.asarray(df), cupy.asarray(nonc) if cupy.any(df <= 0): # synchronize! raise ValueError('df <= 0') if cupy.any(nonc < 0): # synchronize! raise ValueError('nonc < 0') if size is None: size = cupy.broadcast(df, nonc).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.noncentral_chisquare_kernel(df, nonc, self._rk_seed, y) self._update_seed(y.size) return y def noncentral_f(self, dfnum, dfden, nonc, size=None, dtype=float): """Returns an array of samples drawn from the noncentral F distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.noncentral_f` for full documentation, :meth:`numpy.random.RandomState.noncentral_f <numpy.random.mtrand.RandomState.noncentral_f>` """ dfnum, dfden, nonc = \ cupy.asarray(dfnum), cupy.asarray(dfden), cupy.asarray(nonc) if cupy.any(dfnum <= 0): # synchronize! raise ValueError('dfnum <= 0') if cupy.any(dfden <= 0): # synchronize! raise ValueError('dfden <= 0') if cupy.any(nonc < 0): # synchronize! raise ValueError('nonc < 0') if size is None: size = cupy.broadcast(dfnum, dfden, nonc).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.noncentral_f_kernel(dfnum, dfden, nonc, self._rk_seed, y) self._update_seed(y.size) return y def poisson(self, lam=1.0, size=None, dtype=int): """Returns an array of samples drawn from the poisson distribution. .. seealso:: :func:`cupy.random.poisson` for full documentation, :meth:`numpy.random.RandomState.poisson <numpy.random.mtrand.RandomState.poisson>` """ lam = cupy.asarray(lam) if size is None: size = lam.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.poisson_kernel(lam, self._rk_seed, y) self._update_seed(y.size) return y def power(self, a, size=None, dtype=float): """Returns an array of samples drawn from the power distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.power` for full documentation, :meth:`numpy.random.RandomState.power <numpy.random.mtrand.RandomState.power>` """ a = cupy.asarray(a) if cupy.any(a < 0): # synchronize! raise ValueError('a < 0') if size is None: size = a.shape x = self.standard_exponential(size=size, dtype=dtype) cupy.exp(-x, out=x) cupy.add(1, -x, out=x) cupy.power(x, 1. / a, out=x) return x def rand(self, *size, **kwarg): """Returns uniform random values over the interval ``[0, 1)``. .. seealso:: :func:`cupy.random.rand` for full documentation, :meth:`numpy.random.RandomState.rand <numpy.random.mtrand.RandomState.rand>` """ dtype = kwarg.pop('dtype', float) if kwarg: raise TypeError('rand() got unexpected keyword arguments %s' % ', '.join(kwarg.keys())) return self.random_sample(size=size, dtype=dtype) def randn(self, *size, **kwarg): """Returns an array of standard normal random values. .. seealso:: :func:`cupy.random.randn` for full documentation, :meth:`numpy.random.RandomState.randn <numpy.random.mtrand.RandomState.randn>` """ dtype = kwarg.pop('dtype', float) if kwarg: raise TypeError('randn() got unexpected keyword arguments %s' % ', '.join(kwarg.keys())) return self.normal(size=size, dtype=dtype) _mod1_kernel = core.ElementwiseKernel('', 'T x', 'x = (x == (T)1) ? 0 : x', 'cupy_random_x_mod_1') def _random_sample_raw(self, size, dtype): dtype = _check_and_get_dtype(dtype) out = cupy.empty(size, dtype=dtype) if dtype.char == 'f': func = curand.generateUniform else: func = curand.generateUniformDouble func(self._generator, out.data.ptr, out.size) return out def random_sample(self, size=None, dtype=float): """Returns an array of random values over the interval ``[0, 1)``. .. seealso:: :func:`cupy.random.random_sample` for full documentation, :meth:`numpy.random.RandomState.random_sample <numpy.random.mtrand.RandomState.random_sample>` """ out = self._random_sample_raw(size, dtype) RandomState._mod1_kernel(out) return out def rayleigh(self, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from a rayleigh distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.rayleigh` for full documentation, :meth:`numpy.random.RandomState.rayleigh <numpy.random.mtrand.RandomState.rayleigh>` """ scale = cupy.asarray(scale) if size is None: size = scale.shape if cupy.any(scale < 0): # synchronize! raise ValueError('scale < 0') x = self._random_sample_raw(size, dtype) x = cupy.log(x, out=x) x = cupy.multiply(x, -2., out=x) x = cupy.sqrt(x, out=x) x = cupy.multiply(x, scale, out=x) return x def _interval(self, mx, size): """Generate multiple integers independently sampled uniformly from ``[0, mx]``. Args: mx (int): Upper bound of the interval size (None or int or tuple): Shape of the array or the scalar returned. Returns: int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with shape ``()`` is returned. If ``int``, 1-D array of length size is returned. If ``tuple``, multi-dimensional array with shape ``size`` is returned. Currently, only 32 bit or 64 bit integers can be sampled. """ # NOQA if size is None: size = () elif isinstance(size, int): size = size, if mx == 0: return cupy.zeros(size, dtype=numpy.uint32) if mx < 0: raise ValueError('mx must be non-negative (actual: {})'.format(mx)) elif mx <= _UINT32_MAX: dtype = numpy.uint32 elif mx <= _UINT64_MAX: dtype = numpy.uint64 else: raise ValueError( 'mx must be within uint64 range (actual: {})'.format(mx)) mask = (1 << mx.bit_length()) - 1 mask = cupy.array(mask, dtype=dtype) n = functools.reduce(operator.mul, size, 1) if n == 0: return cupy.empty(size, dtype=dtype) sample = cupy.empty((n, ), dtype=dtype) size32 = sample.view(dtype=numpy.uint32).size n_rem = n # The number of remaining elements to sample ret = None while n_rem > 0: # Call 32-bit RNG to fill 32-bit or 64-bit `sample` curand.generate(self._generator, sample.data.ptr, size32) # Drop the samples that exceed the upper limit sample &= mask success = sample <= mx if ret is None: # If the sampling has finished in the first iteration, # just return the sample. if success.all(): n_rem = 0 ret = sample break # Allocate the return array. ret = cupy.empty((n, ), dtype=dtype) n_succ = min(n_rem, int(success.sum())) ret[n - n_rem:n - n_rem + n_succ] = sample[success][:n_succ] n_rem -= n_succ assert n_rem == 0 return ret.reshape(size) def seed(self, seed=None): """Resets the state of the random number generator with a seed. .. seealso:: :func:`cupy.random.seed` for full documentation, :meth:`numpy.random.RandomState.seed <numpy.random.mtrand.RandomState.seed>` """ if seed is None: try: seed_str = binascii.hexlify(os.urandom(8)) seed = int(seed_str, 16) except NotImplementedError: seed = (time.time() * 1000000) % _UINT64_MAX else: if isinstance(seed, numpy.ndarray): seed = int(hashlib.md5(seed).hexdigest()[:16], 16) else: seed = int( numpy.asarray(seed).astype(numpy.uint64, casting='safe')) curand.setPseudoRandomGeneratorSeed(self._generator, seed) if (self.method not in (curand.CURAND_RNG_PSEUDO_MT19937, curand.CURAND_RNG_PSEUDO_MTGP32)): curand.setGeneratorOffset(self._generator, 0) self._rk_seed = seed def standard_cauchy(self, size=None, dtype=float): """Returns an array of samples drawn from the standard cauchy distribution. .. seealso:: :func:`cupy.random.standard_cauchy` for full documentation, :meth:`numpy.random.RandomState.standard_cauchy <numpy.random.mtrand.RandomState.standard_cauchy>` """ x = self.uniform(size=size, dtype=dtype) return cupy.tan(cupy.pi * (x - 0.5)) def standard_exponential(self, size=None, dtype=float): """Returns an array of samples drawn from the standard exp distribution. .. seealso:: :func:`cupy.random.standard_exponential` for full documentation, :meth:`numpy.random.RandomState.standard_exponential <numpy.random.mtrand.RandomState.standard_exponential>` """ x = self._random_sample_raw(size, dtype) return -cupy.log(x, out=x) def standard_gamma(self, shape, size=None, dtype=float): """Returns an array of samples drawn from a standard gamma distribution. .. seealso:: :func:`cupy.random.standard_gamma` for full documentation, :meth:`numpy.random.RandomState.standard_gamma <numpy.random.mtrand.RandomState.standard_gamma>` """ shape = cupy.asarray(shape) if size is None: size = shape.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.standard_gamma_kernel(shape, self._rk_seed, y) self._update_seed(y.size) return y def standard_normal(self, size=None, dtype=float): """Returns samples drawn from the standard normal distribution. .. seealso:: :func:`cupy.random.standard_normal` for full documentation, :meth:`numpy.random.RandomState.standard_normal <numpy.random.mtrand.RandomState.standard_normal>` """ return self.normal(size=size, dtype=dtype) def standard_t(self, df, size=None, dtype=float): """Returns an array of samples drawn from the standard t distribution. .. seealso:: :func:`cupy.random.standard_t` for full documentation, :meth:`numpy.random.RandomState.standard_t <numpy.random.mtrand.RandomState.standard_t>` """ df = cupy.asarray(df) if size is None: size = df.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.standard_t_kernel(df, self._rk_seed, y) self._update_seed(y.size) return y def tomaxint(self, size=None): """Draws integers between 0 and max integer inclusive. Args: size (int or tuple of ints): Output shape. Returns: cupy.ndarray: Drawn samples. .. seealso:: :meth:`numpy.random.RandomState.tomaxint <numpy.random.mtrand.RandomState.tomaxint>` """ if size is None: size = () sample = cupy.empty(size, dtype=cupy.int_) # cupy.random only uses int32 random generator size_in_int = sample.dtype.itemsize // 4 curand.generate(self._generator, sample.data.ptr, sample.size * size_in_int) # Disable sign bit sample &= cupy.iinfo(cupy.int_).max return sample _triangular_kernel = core.ElementwiseKernel( 'L left, M mode, R right', 'T x', """ T base, leftbase, ratio, leftprod, rightprod; base = right - left; leftbase = mode - left; ratio = leftbase / base; leftprod = leftbase*base; rightprod = (right - mode)*base; if (x <= ratio) { x = left + sqrt(x*leftprod); } else { x = right - sqrt((1.0 - x) * rightprod); } """, 'triangular_kernel') def triangular(self, left, mode, right, size=None, dtype=float): """Returns an array of samples drawn from the triangular distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.triangular` for full documentation, :meth:`numpy.random.RandomState.triangular <numpy.random.mtrand.RandomState.triangular>` """ left, mode, right = \ cupy.asarray(left), cupy.asarray(mode), cupy.asarray(right) if cupy.any(left > mode): # synchronize! raise ValueError('left > mode') if cupy.any(mode > right): # synchronize! raise ValueError('mode > right') if cupy.any(left == right): # synchronize! raise ValueError('left == right') if size is None: size = cupy.broadcast(left, mode, right).shape x = self.random_sample(size=size, dtype=dtype) return RandomState._triangular_kernel(left, mode, right, x) _scale_kernel = core.ElementwiseKernel('T low, T high', 'T x', 'x = T(low) + x * T(high - low)', 'cupy_scale') def uniform(self, low=0.0, high=1.0, size=None, dtype=float): """Returns an array of uniformly-distributed samples over an interval. .. seealso:: :func:`cupy.random.uniform` for full documentation, :meth:`numpy.random.RandomState.uniform <numpy.random.mtrand.RandomState.uniform>` """ dtype = numpy.dtype(dtype) rand = self.random_sample(size=size, dtype=dtype) if not numpy.isscalar(low): low = cupy.asarray(low, dtype) if not numpy.isscalar(high): high = cupy.asarray(high, dtype) return RandomState._scale_kernel(low, high, rand) def vonmises(self, mu, kappa, size=None, dtype=float): """Returns an array of samples drawn from the von Mises distribution. .. seealso:: :func:`cupy.random.vonmises` for full documentation, :meth:`numpy.random.RandomState.vonmises <numpy.random.mtrand.RandomState.vonmises>` """ mu, kappa = cupy.asarray(mu), cupy.asarray(kappa) if size is None: size = cupy.broadcast(mu, kappa).shape y = cupy.empty(shape=size, dtype=dtype) _kernels.vonmises_kernel(mu, kappa, self._rk_seed, y) self._update_seed(y.size) return y _wald_kernel = core.ElementwiseKernel( 'T mean, T scale, T U', 'T X', """ T mu_2l; T Y; mu_2l = mean / (2*scale); Y = mean*X*X; X = mean + mu_2l*(Y - sqrt(4*scale*Y + Y*Y)); if (U > mean/(mean+X)) { X = mean*mean/X; } """, 'wald_scale') def wald(self, mean, scale, size=None, dtype=float): """Returns an array of samples drawn from the Wald distribution. .. seealso:: :func:`cupy.random.wald` for full documentation, :meth:`numpy.random.RandomState.wald <numpy.random.mtrand.RandomState.wald>` """ mean, scale = \ cupy.asarray(mean, dtype=dtype), cupy.asarray(scale, dtype=dtype) if size is None: size = cupy.broadcast(mean, scale).shape x = self.normal(size=size, dtype=dtype) u = self.random_sample(size=size, dtype=dtype) return RandomState._wald_kernel(mean, scale, u, x) def weibull(self, a, size=None, dtype=float): """Returns an array of samples drawn from the weibull distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.weibull` for full documentation, :meth:`numpy.random.RandomState.weibull <numpy.random.mtrand.RandomState.weibull>` """ a = cupy.asarray(a) if cupy.any(a < 0): # synchronize! raise ValueError('a < 0') x = self.standard_exponential(size, dtype) cupy.power(x, 1. / a, out=x) return x def zipf(self, a, size=None, dtype=int): """Returns an array of samples drawn from the Zipf distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.zipf` for full documentation, :meth:`numpy.random.RandomState.zipf <numpy.random.mtrand.RandomState.zipf>` """ a = cupy.asarray(a) if cupy.any(a <= 1.0): # synchronize! raise ValueError('\'a\' must be a valid float > 1.0') if size is None: size = a.shape y = cupy.empty(shape=size, dtype=dtype) _kernels.zipf_kernel(a, self._rk_seed, y) self._update_seed(y.size) return y def choice(self, a, size=None, replace=True, p=None): """Returns an array of random values from a given 1-D array. .. seealso:: :func:`cupy.random.choice` for full document, :meth:`numpy.random.choice <numpy.random.mtrand.RandomState.choice>` """ if a is None: raise ValueError('a must be 1-dimensional or an integer') if isinstance(a, cupy.ndarray) and a.ndim == 0: raise NotImplementedError if isinstance(a, int): a_size = a if a_size <= 0: raise ValueError('a must be greater than 0') else: a = cupy.array(a, copy=False) if a.ndim != 1: raise ValueError('a must be 1-dimensional or an integer') else: a_size = len(a) if a_size == 0: raise ValueError('a must be non-empty') if p is not None: p = cupy.array(p) if p.ndim != 1: raise ValueError('p must be 1-dimensional') if len(p) != a_size: raise ValueError('a and p must have same size') if not (p >= 0).all(): raise ValueError('probabilities are not non-negative') p_sum = cupy.sum(p).get() if not numpy.allclose(p_sum, 1): raise ValueError('probabilities do not sum to 1') if size is None: raise NotImplementedError shape = size size = numpy.prod(shape) if not replace and p is None: if a_size < size: raise ValueError( 'Cannot take a larger sample than population when ' '\'replace=False\'') if isinstance(a, int): indices = cupy.arange(a, dtype='l') else: indices = a.copy() self.shuffle(indices) return indices[:size].reshape(shape) if not replace: raise NotImplementedError if p is not None: p = cupy.broadcast_to(p, (size, a_size)) index = cupy.argmax(cupy.log(p) + self.gumbel(size=(size, a_size)), axis=1) if not isinstance(shape, int): index = cupy.reshape(index, shape) else: index = self.randint(0, a_size, size=shape) # Align the dtype with NumPy index = index.astype(cupy.int64, copy=False) if isinstance(a, int): return index if index.ndim == 0: return cupy.array(a[index], dtype=a.dtype) return a[index] def shuffle(self, a): """Returns a shuffled array. .. seealso:: :func:`cupy.random.shuffle` for full document, :meth:`numpy.random.shuffle <numpy.random.mtrand.RandomState.shuffle>` """ if not isinstance(a, cupy.ndarray): raise TypeError('The array must be cupy.ndarray') if a.ndim == 0: raise TypeError('An array whose ndim is 0 is not supported') a[:] = a[self._permutation(len(a))] def permutation(self, a): """Returns a permuted range or a permutation of an array.""" if isinstance(a, int): return self._permutation(a) else: return a[self._permutation(len(a))] def _permutation(self, num): """Returns a permuted range.""" sample = cupy.empty((num), dtype=numpy.int32) curand.generate(self._generator, sample.data.ptr, num) if 128 < num <= 32 * 1024 * 1024: array = cupy.arange(num, dtype=numpy.int32) # apply sort of cache blocking block_size = 1 * 1024 * 1024 # The block size above is a value determined from the L2 cache size # of GP100 (L2 cache size / size of int = 4MB / 4B = 1M). It may be # better to change the value base on the L2 cache size of the GPU # you use. # When num > block_size, cupy kernel: _cupy_permutation is to be # launched multiple times. However, it is observed that performance # will be degraded if the launch count is too many. Therefore, # the block size is adjusted so that launch count will not exceed # twelve Note that this twelve is the value determined from # measurement on GP100. while num // block_size > 12: block_size *= 2 for j_start in range(0, num, block_size): j_end = j_start + block_size _cupy_permutation(sample, j_start, j_end, array, size=num) else: # When num > 32M, argsort is used, because it is faster than # custom kernel. See https://github.com/cupy/cupy/pull/603. array = cupy.argsort(sample) return array _gumbel_kernel = core.ElementwiseKernel( 'T x, T loc, T scale', 'T y', 'y = T(loc) - log(-log(x)) * T(scale)', 'gumbel_kernel') def gumbel(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from a Gumbel distribution. .. seealso:: :func:`cupy.random.gumbel` for full documentation, :meth:`numpy.random.RandomState.gumbel <numpy.random.mtrand.RandomState.gumbel>` """ x = self._random_sample_raw(size=size, dtype=dtype) if not numpy.isscalar(loc): loc = cupy.asarray(loc, dtype) if not numpy.isscalar(scale): scale = cupy.asarray(scale, dtype) RandomState._gumbel_kernel(x, loc, scale, x) return x def randint(self, low, high=None, size=None, dtype='l'): """Returns a scalar or an array of integer values over ``[low, high)``. .. seealso:: :func:`cupy.random.randint` for full documentation, :meth:`numpy.random.RandomState.randint <numpy.random.mtrand.RandomState.randint>` """ if high is None: lo = 0 hi1 = int(low) - 1 else: lo = int(low) hi1 = int(high) - 1 if lo > hi1: raise ValueError('low >= high') if lo < cupy.iinfo(dtype).min: raise ValueError('low is out of bounds for {}'.format( cupy.dtype(dtype).name)) if hi1 > cupy.iinfo(dtype).max: raise ValueError('high is out of bounds for {}'.format( cupy.dtype(dtype).name)) diff = hi1 - lo x = self._interval(diff, size).astype(dtype, copy=False) cupy.add(x, lo, out=x) return x
class _compressed_sparse_matrix(sparse_data._data_matrix, sparse_data._minmax_mixin): _compress_getitem_kern = core.ElementwiseKernel( 'T d, S ind, int32 minor', 'raw T answer', 'if (ind == minor) atomicAdd(&answer[0], d);', 'compress_getitem') _compress_getitem_complex_kern = core.ElementwiseKernel( 'T real, T imag, S ind, int32 minor', 'raw T answer_real, raw T answer_imag', ''' if (ind == minor) { atomicAdd(&answer_real[0], real); atomicAdd(&answer_imag[0], imag); } ''', 'compress_getitem_complex') _max_reduction_kern = core.RawKernel(r''' extern "C" __global__ void max_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length == length){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the block and update for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (data[entry] > running_value){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'max_reduction') _max_nonzero_reduction_kern = core.RawKernel(r''' extern "C" __global__ void max_nonzero_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length > 0){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (running_value < data[entry]){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'max_nonzero_reduction') _min_reduction_kern = core.RawKernel(r''' extern "C" __global__ void min_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length == length){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the block to update the initial value for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (data[entry] < running_value){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'min_reduction') _min_nonzero_reduction_kern = core.RawKernel(r''' extern "C" __global__ void min_nonzero_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of hte block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length > 0){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (running_value > data[entry]){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'min_nonzero_reduction') _max_arg_reduction_kern = core.RawKernel(r''' extern "C" __global__ void max_arg_reduction(double* data, int* indices, int* x, int* y, int length, long long* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density int data_index = 0; double data_value = 0; if (block_length == length){ // Block is dense. Fill the first value data_value = data[x[tid]]; data_index = indices[x[tid]]; } else if (block_length > 0) { // Block has at least one zero. Assign first occurrence as the // starting reference data_value = 0; for (data_index = 0; data_index < length; data_index++){ if (data_index != indices[x[tid] + data_index] || x[tid] + data_index >= y[tid]){ break; } } } else { // Zero valued array data_value = 0; data_index = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN data_value = nan(""); data_index = 0; break; } else { // Check for a value update if (data[entry] > data_value){ data_index = indices[entry]; data_value = data[entry]; } } } // Store in the return function z[tid] = data_index; } ''', 'max_arg_reduction') _min_arg_reduction_kern = core.RawKernel(r''' extern "C" __global__ void min_arg_reduction(double* data, int* indices, int* x, int* y, int length, long long* z) { // Get the index of hte block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density int data_index = 0; double data_value = 0; if (block_length == length){ // Block is dense. Fill the first value data_value = data[x[tid]]; data_index = indices[x[tid]]; } else if (block_length > 0) { // Block has at least one zero. Assign first occurrence as the // starting reference data_value = 0; for (data_index = 0; data_index < length; data_index++){ if (data_index != indices[x[tid] + data_index] || x[tid] + data_index >= y[tid]){ break; } } } else { // Zero valued array data_value = 0; data_index = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN data_value = nan(""); data_index = 0; break; } else { // Check for a value update if (data[entry] < data_value){ data_index = indices[entry]; data_value = data[entry]; } } } // Store in the return function z[tid] = data_index; } ''', 'min_arg_reduction') def __init__(self, arg1, shape=None, dtype=None, copy=False): if shape is not None: if not util.isshape(shape): raise ValueError('invalid shape (must be a 2-tuple of int)') shape = int(shape[0]), int(shape[1]) if base.issparse(arg1): x = arg1.asformat(self.format) data = x.data indices = x.indices indptr = x.indptr if arg1.format != self.format: # When formats are differnent, all arrays are already copied copy = False if shape is None: shape = arg1.shape has_canonical_format = x.has_canonical_format elif util.isshape(arg1): m, n = arg1 m, n = int(m), int(n) data = basic.zeros(0, dtype if dtype else 'd') indices = basic.zeros(0, 'i') indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype='i') # shape and copy argument is ignored shape = (m, n) copy = False has_canonical_format = True elif scipy_available and scipy.sparse.issparse(arg1): # Convert scipy.sparse to cupyx.scipy.sparse x = arg1.asformat(self.format) data = cupy.array(x.data) indices = cupy.array(x.indices, dtype='i') indptr = cupy.array(x.indptr, dtype='i') copy = False if shape is None: shape = arg1.shape has_canonical_format = x.has_canonical_format elif isinstance(arg1, tuple) and len(arg1) == 3: data, indices, indptr = arg1 if not (base.isdense(data) and data.ndim == 1 and base.isdense(indices) and indices.ndim == 1 and base.isdense(indptr) and indptr.ndim == 1): raise ValueError( 'data, indices, and indptr should be 1-D') if len(data) != len(indices): raise ValueError('indices and data should have the same size') has_canonical_format = False elif base.isdense(arg1): if arg1.ndim > 2: raise TypeError('expected dimension <= 2 array or matrix') elif arg1.ndim == 1: arg1 = arg1[None] elif arg1.ndim == 0: arg1 = arg1[None, None] data, indices, indptr = self._convert_dense(arg1) copy = False if shape is None: shape = arg1.shape has_canonical_format = True else: raise ValueError( 'Unsupported initializer format') if dtype is None: dtype = data.dtype else: dtype = numpy.dtype(dtype) if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D': raise ValueError( 'Only float32, float64, complex64 and complex128 ' 'are supported') data = data.astype(dtype, copy=copy) sparse_data._data_matrix.__init__(self, data) self.indices = indices.astype('i', copy=copy) self.indptr = indptr.astype('i', copy=copy) if shape is None: shape = self._swap(len(indptr) - 1, int(indices.max()) + 1) major, minor = self._swap(*shape) if len(indptr) != major + 1: raise ValueError('index pointer size (%d) should be (%d)' % (len(indptr), major + 1)) self._descr = cusparse.MatDescriptor.create() self._shape = shape self._has_canonical_format = has_canonical_format def _with_data(self, data, copy=True): if copy: return self.__class__( (data, self.indices.copy(), self.indptr.copy()), shape=self.shape, dtype=data.dtype) else: return self.__class__( (data, self.indices, self.indptr), shape=self.shape, dtype=data.dtype) def _convert_dense(self, x): raise NotImplementedError def _swap(self, x, y): raise NotImplementedError def _add_sparse(self, other, alpha, beta): raise NotImplementedError def _add(self, other, lhs_negative, rhs_negative): if cupy.isscalar(other): if other == 0: if lhs_negative: return -self else: return self.copy() else: raise NotImplementedError( 'adding a nonzero scalar to a sparse matrix is not ' 'supported') elif base.isspmatrix(other): alpha = -1 if lhs_negative else 1 beta = -1 if rhs_negative else 1 return self._add_sparse(other, alpha, beta) elif base.isdense(other): if lhs_negative: if rhs_negative: return -self.todense() - other else: return other - self.todense() else: if rhs_negative: return self.todense() - other else: return self.todense() + other else: return NotImplemented def __add__(self, other): return self._add(other, False, False) def __radd__(self, other): return self._add(other, False, False) def __sub__(self, other): return self._add(other, False, True) def __rsub__(self, other): return self._add(other, True, False) def __getitem__(self, slices): if isinstance(slices, tuple): slices = list(slices) elif isinstance(slices, list): slices = list(slices) if all([isinstance(s, int) for s in slices]): slices = [slices] else: slices = [slices] ellipsis = -1 n_ellipsis = 0 for i, s in enumerate(slices): if s is None: raise IndexError('newaxis is not supported') elif s is Ellipsis: ellipsis = i n_ellipsis += 1 if n_ellipsis > 0: ellipsis_size = self.ndim - (len(slices) - 1) slices[ellipsis:ellipsis + 1] = [slice(None)] * ellipsis_size if len(slices) == 2: row, col = slices elif len(slices) == 1: row, col = slices[0], slice(None) else: raise IndexError('invalid number of indices') major, minor = self._swap(row, col) major_size, minor_size = self._swap(*self._shape) if numpy.isscalar(major): i = int(major) if i < 0: i += major_size if not (0 <= i < major_size): raise IndexError('index out of bounds') if numpy.isscalar(minor): j = int(minor) if j < 0: j += minor_size if not (0 <= j < minor_size): raise IndexError('index out of bounds') return self._get_single(i, j) elif minor == slice(None): return self._get_major_slice(slice(i, i + 1)) elif isinstance(major, slice): if minor == slice(None): return self._get_major_slice(major) raise ValueError('unsupported indexing') def _get_single(self, major, minor): start = self.indptr[major] end = self.indptr[major + 1] answer = cupy.zeros((), self.dtype) data = self.data[start:end] indices = self.indices[start:end] if self.dtype.kind == 'c': self._compress_getitem_complex_kern( data.real, data.imag, indices, minor, answer.real, answer.imag) else: self._compress_getitem_kern( data, indices, minor, answer) return answer[()] def _get_major_slice(self, major): major_size, minor_size = self._swap(*self._shape) # major.indices cannot be used because scipy.sparse behaves differently major_start = major.start major_stop = major.stop major_step = major.step if major_start is None: major_start = 0 if major_stop is None: major_stop = major_size if major_step is None: major_step = 1 if major_start < 0: major_start += major_size if major_stop < 0: major_stop += major_size major_start = max(min(major_start, major_size), 0) major_stop = max(min(major_stop, major_size), 0) if major_step != 1: raise ValueError('slicing with step != 1 not supported') if not (major_start <= major_stop): # will give an empty slice, but preserve shape on the other axis major_start = major_stop start = self.indptr[major_start] stop = self.indptr[major_stop] data = self.data[start:stop] indptr = self.indptr[major_start:major_stop + 1] - start indices = self.indices[start:stop] shape = self._swap(len(indptr) - 1, minor_size) return self.__class__( (data, indices, indptr), shape=shape, dtype=self.dtype, copy=False) @property def has_canonical_format(self): return self._has_canonical_format def get_shape(self): """Returns the shape of the matrix. Returns: tuple: Shape of the matrix. """ return self._shape def getnnz(self, axis=None): """Returns the number of stored values, including explicit zeros. Args: axis: Not supported yet. Returns: int: The number of stored values. """ if axis is None: return self.data.size else: raise ValueError # TODO(unno): Implement sorted_indices def sum_duplicates(self): if self._has_canonical_format: return if self.data.size == 0: self._has_canonical_format = True return coo = self.tocoo() coo.sum_duplicates() self.__init__(coo.asformat(self.format)) self._has_canonical_format = True ##################### # Reduce operations # ##################### def _minor_reduce(self, ufunc, axis, nonzero): """Reduce nonzeros with a ufunc over the minor axis when non-empty Can be applied to a function of self.data by supplying data parameter. Warning: this does not call sum_duplicates() Args: ufunc (object): Function handle giving the operation to be conducted. axis (int): Matrix over which the reduction should be conducted. Returns: (cupy.ndarray): Reduce result for nonzeros in each major_index. """ # Call to the appropriate kernel function if axis == 1: # Create the vector to hold output value = cupy.zeros(self.shape[0]).astype(cupy.float64) if nonzero: # Perform the calculation if ufunc == cupy.amax: self._max_nonzero_reduction_kern( (self.shape[0],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.amin: self._min_nonzero_reduction_kern( (self.shape[0],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) else: # Perform the calculation if ufunc == cupy.amax: self._max_reduction_kern( (self.shape[0],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.amin: self._min_reduction_kern( (self.shape[0],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if axis == 0: # Create the vector to hold output value = cupy.zeros(self.shape[1]).astype(cupy.float64) if nonzero: # Perform the calculation if ufunc == cupy.amax: self._max_nonzero_reduction_kern( (self.shape[1],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.amin: self._min_nonzero_reduction_kern( (self.shape[1],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) else: # Perform the calculation if ufunc == cupy.amax: self._max_reduction_kern( (self.shape[1],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.amin: self._min_reduction_kern( (self.shape[1],), (1,), (self.data.astype(cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) return value def _arg_minor_reduce(self, ufunc, axis): """Reduce nonzeros with a ufunc over the minor axis when non-empty Can be applied to a function of self.data by supplying data parameter. Warning: this does not call sum_duplicates() Args: ufunc (object): Function handle giving the operation to be conducted. axis (int): Maxtrix over which the reduction should be conducted Returns: (cupy.ndarray): Reduce result for nonzeros in each major_index """ # Call to the appropriate kernel function if axis == 1: # Create the vector to hold output value = cupy.zeros(self.shape[0]).astype(cupy.int64) # Perform the calculation if ufunc == cupy.argmax: self._max_arg_reduction_kern( (self.shape[0],), (1,), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.argmin: self._min_arg_reduction_kern( (self.shape[0],), (1,), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if axis == 0: # Create the vector to hold output value = cupy.zeros(self.shape[1]).astype(cupy.int64) # Perform the calculation if ufunc == cupy.argmax: self._max_arg_reduction_kern( (self.shape[1],), (1,), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.argmin: self._min_arg_reduction_kern( (self.shape[1],), (1,), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) return value
class _compressed_sparse_matrix(sparse_data._data_matrix, sparse_data._minmax_mixin, _index.IndexMixin): _max_reduction_kern = core.RawKernel( r''' extern "C" __global__ void max_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length == length){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the block and update for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (data[entry] > running_value){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'max_reduction') _max_nonzero_reduction_kern = core.RawKernel( r''' extern "C" __global__ void max_nonzero_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length > 0){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (running_value < data[entry]){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'max_nonzero_reduction') _min_reduction_kern = core.RawKernel( r''' extern "C" __global__ void min_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length == length){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the block to update the initial value for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (data[entry] < running_value){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'min_reduction') _min_nonzero_reduction_kern = core.RawKernel( r''' extern "C" __global__ void min_nonzero_reduction(double* data, int* x, int* y, int length, double* z) { // Get the index of hte block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density double running_value = 0; if (block_length > 0){ running_value = data[x[tid]]; } else { running_value = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN running_value = nan(""); break; } else { // Check for a value update if (running_value > data[entry]){ running_value = data[entry]; } } } // Store in the return function z[tid] = running_value; } ''', 'min_nonzero_reduction') _max_arg_reduction_kern = core.RawKernel( r''' extern "C" __global__ void max_arg_reduction(double* data, int* indices, int* x, int* y, int length, long long* z) { // Get the index of the block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density int data_index = 0; double data_value = 0; if (block_length == length){ // Block is dense. Fill the first value data_value = data[x[tid]]; data_index = indices[x[tid]]; } else if (block_length > 0) { // Block has at least one zero. Assign first occurrence as the // starting reference data_value = 0; for (data_index = 0; data_index < length; data_index++){ if (data_index != indices[x[tid] + data_index] || x[tid] + data_index >= y[tid]){ break; } } } else { // Zero valued array data_value = 0; data_index = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN data_value = nan(""); data_index = 0; break; } else { // Check for a value update if (data[entry] > data_value){ data_index = indices[entry]; data_value = data[entry]; } } } // Store in the return function z[tid] = data_index; } ''', 'max_arg_reduction') _min_arg_reduction_kern = core.RawKernel( r''' extern "C" __global__ void min_arg_reduction(double* data, int* indices, int* x, int* y, int length, long long* z) { // Get the index of hte block int tid = blockIdx.x * blockDim.x + threadIdx.x; // Calculate the block length int block_length = y[tid] - x[tid]; // Select initial value based on the block density int data_index = 0; double data_value = 0; if (block_length == length){ // Block is dense. Fill the first value data_value = data[x[tid]]; data_index = indices[x[tid]]; } else if (block_length > 0) { // Block has at least one zero. Assign first occurrence as the // starting reference data_value = 0; for (data_index = 0; data_index < length; data_index++){ if (data_index != indices[x[tid] + data_index] || x[tid] + data_index >= y[tid]){ break; } } } else { // Zero valued array data_value = 0; data_index = 0; } // Iterate over the section of the sparse matrix for (int entry = x[tid]; entry < y[tid]; entry++){ if (data[entry] != data[entry]){ // Check for NaN data_value = nan(""); data_index = 0; break; } else { // Check for a value update if (data[entry] < data_value){ data_index = indices[entry]; data_value = data[entry]; } } } // Store in the return function z[tid] = data_index; } ''', 'min_arg_reduction') # TODO(leofang): rewrite a more load-balanced approach than this naive one? _has_sorted_indices_kern = core.ElementwiseKernel( 'raw T indptr, raw T indices', 'bool diff', ''' bool diff_out = true; for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) { if (indices[jj] > indices[jj+1]){ diff_out = false; } } diff = diff_out; ''', 'has_sorted_indices') # TODO(leofang): rewrite a more load-balanced approach than this naive one? _has_canonical_format_kern = core.ElementwiseKernel( 'raw T indptr, raw T indices', 'bool diff', ''' bool diff_out = true; if (indptr[i] > indptr[i+1]) { diff = false; return; } for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) { if (indices[jj] >= indices[jj+1]) { diff_out = false; } } diff = diff_out; ''', 'has_canonical_format') def __init__(self, arg1, shape=None, dtype=None, copy=False): if shape is not None: if not _util.isshape(shape): raise ValueError('invalid shape (must be a 2-tuple of int)') shape = int(shape[0]), int(shape[1]) if base.issparse(arg1): x = arg1.asformat(self.format) data = x.data indices = x.indices indptr = x.indptr if arg1.format != self.format: # When formats are differnent, all arrays are already copied copy = False if shape is None: shape = arg1.shape elif _util.isshape(arg1): m, n = arg1 m, n = int(m), int(n) data = basic.zeros(0, dtype if dtype else 'd') indices = basic.zeros(0, 'i') indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype='i') # shape and copy argument is ignored shape = (m, n) copy = False elif scipy_available and scipy.sparse.issparse(arg1): # Convert scipy.sparse to cupyx.scipy.sparse x = arg1.asformat(self.format) data = cupy.array(x.data) indices = cupy.array(x.indices, dtype='i') indptr = cupy.array(x.indptr, dtype='i') copy = False if shape is None: shape = arg1.shape elif isinstance(arg1, tuple) and len(arg1) == 2: # Note: This implementation is not efficeint, as it first # constructs a sparse matrix with coo format, then converts it to # compressed format. sp_coo = coo.coo_matrix(arg1, shape=shape, dtype=dtype, copy=copy) sp_compressed = sp_coo.asformat(self.format) data = sp_compressed.data indices = sp_compressed.indices indptr = sp_compressed.indptr elif isinstance(arg1, tuple) and len(arg1) == 3: data, indices, indptr = arg1 if not (base.isdense(data) and data.ndim == 1 and base.isdense(indices) and indices.ndim == 1 and base.isdense(indptr) and indptr.ndim == 1): raise ValueError('data, indices, and indptr should be 1-D') if len(data) != len(indices): raise ValueError('indices and data should have the same size') elif base.isdense(arg1): if arg1.ndim > 2: raise TypeError('expected dimension <= 2 array or matrix') elif arg1.ndim == 1: arg1 = arg1[None] elif arg1.ndim == 0: arg1 = arg1[None, None] data, indices, indptr = self._convert_dense(arg1) copy = False if shape is None: shape = arg1.shape else: raise ValueError('Unsupported initializer format') if dtype is None: dtype = data.dtype else: dtype = numpy.dtype(dtype) if dtype.char not in '?fdFD': raise ValueError( 'Only bool, float32, float64, complex64 and complex128 ' 'are supported') data = data.astype(dtype, copy=copy) sparse_data._data_matrix.__init__(self, data) self.indices = indices.astype('i', copy=copy) self.indptr = indptr.astype('i', copy=copy) if shape is None: shape = self._swap(len(indptr) - 1, int(indices.max()) + 1) major, minor = self._swap(*shape) if len(indptr) != major + 1: raise ValueError('index pointer size (%d) should be (%d)' % (len(indptr), major + 1)) self._descr = cusparse.MatDescriptor.create() self._shape = shape def _with_data(self, data, copy=True): if copy: return self.__class__( (data, self.indices.copy(), self.indptr.copy()), shape=self.shape, dtype=data.dtype) else: return self.__class__((data, self.indices, self.indptr), shape=self.shape, dtype=data.dtype) def _convert_dense(self, x): raise NotImplementedError def _swap(self, x, y): raise NotImplementedError def _add_sparse(self, other, alpha, beta): raise NotImplementedError def _add(self, other, lhs_negative, rhs_negative): if cupy.isscalar(other): if other == 0: if lhs_negative: return -self else: return self.copy() else: raise NotImplementedError( 'adding a nonzero scalar to a sparse matrix is not ' 'supported') elif base.isspmatrix(other): alpha = -1 if lhs_negative else 1 beta = -1 if rhs_negative else 1 return self._add_sparse(other, alpha, beta) elif base.isdense(other): if lhs_negative: if rhs_negative: return -self.todense() - other else: return other - self.todense() else: if rhs_negative: return self.todense() - other else: return self.todense() + other else: return NotImplemented def __add__(self, other): return self._add(other, False, False) def __radd__(self, other): return self._add(other, False, False) def __sub__(self, other): return self._add(other, False, True) def __rsub__(self, other): return self._add(other, True, False) def _get_intXint(self, row, col): major, minor = self._swap(row, col) data, indices, _ = _index._get_csr_submatrix_major_axis( self.data, self.indices, self.indptr, major, major + 1) dtype = data.dtype res = cupy.zeros((), dtype=dtype) if dtype.kind == 'c': _index._compress_getitem_complex_kern(data.real, data.imag, indices, minor, res.real, res.imag) else: _index._compress_getitem_kern(data, indices, minor, res) return res def _get_sliceXslice(self, row, col): major, minor = self._swap(row, col) copy = major.step in (1, None) return self._major_slice(major)._minor_slice(minor, copy=copy) def _get_arrayXarray(self, row, col, not_found_val=0): # inner indexing idx_dtype = self.indices.dtype M, N = self._swap(*self.shape) major, minor = self._swap(row, col) major = major.astype(idx_dtype, copy=False) minor = minor.astype(idx_dtype, copy=False) val = _index._csr_sample_values(M, N, self.indptr, self.indices, self.data, major.ravel(), minor.ravel(), not_found_val) if major.ndim == 1: # Scipy returns `matrix` here return cupy.expand_dims(val, 0) return self.__class__(val.reshape(major.shape)) def _get_columnXarray(self, row, col): # outer indexing major, minor = self._swap(row, col) return self._major_index_fancy(major)._minor_index_fancy(minor) def _major_index_fancy(self, idx): """Index along the major axis where idx is an array of ints. """ _, N = self._swap(*self.shape) M = idx.size new_shape = self._swap(M, N) if self.nnz == 0 or M == 0: return self.__class__(new_shape) return self.__class__(_index._csr_row_index(self.data, self.indices, self.indptr, idx), shape=new_shape, copy=False) def _minor_index_fancy(self, idx): """Index along the minor axis where idx is an array of ints. """ M, _ = self._swap(*self.shape) N = idx.size new_shape = self._swap(M, N) if self.nnz == 0 or N == 0: return self.__class__(new_shape) if idx.size * M < self.nnz: # TODO (asi1024): Implement faster algorithm. pass return self._tocsx()._major_index_fancy(idx)._tocsx() def _major_slice(self, idx, copy=False): """Index along the major axis where idx is a slice object. """ M, N = self._swap(*self.shape) start, stop, step = idx.indices(M) if start == 0 and stop == M and step == 1: return self.copy() if copy else self M = len(range(start, stop, step)) new_shape = self._swap(M, N) if step == 1: if M == 0 or self.nnz == 0: return self.__class__(new_shape, dtype=self.dtype) return self.__class__(_index._get_csr_submatrix_major_axis( self.data, self.indices, self.indptr, start, stop), shape=new_shape, copy=copy) rows = cupy.arange(start, stop, step, dtype=self.indptr.dtype) return self._major_index_fancy(rows) def _minor_slice(self, idx, copy=False): """Index along the minor axis where idx is a slice object. """ M, N = self._swap(*self.shape) start, stop, step = idx.indices(N) if start == 0 and stop == N and step == 1: return self.copy() if copy else self N = len(range(start, stop, step)) new_shape = self._swap(M, N) if N == 0 or self.nnz == 0: return self.__class__(new_shape) if step == 1: return self.__class__(_index._get_csr_submatrix_minor_axis( self.data, self.indices, self.indptr, start, stop), shape=new_shape, copy=False) cols = cupy.arange(start, stop, step, dtype=self.indices.dtype) return self._minor_index_fancy(cols) def _set_intXint(self, row, col, x): i, j = self._swap(row, col) self._set_many(i, j, x) def _set_arrayXarray(self, row, col, x): i, j = self._swap(row, col) self._set_many(i, j, x) def _set_arrayXarray_sparse(self, row, col, x): # clear entries that will be overwritten self._zero_many(*self._swap(row, col)) M, N = row.shape # matches col.shape broadcast_row = M != 1 and x.shape[0] == 1 broadcast_col = N != 1 and x.shape[1] == 1 r, c = x.row, x.col x = cupy.asarray(x.data, dtype=self.dtype) if broadcast_row: r = cupy.repeat(cupy.arange(M), r.size) c = cupy.tile(c, M) x = cupy.tile(x, M) if broadcast_col: r = cupy.repeat(r, N) c = cupy.tile(cupy.arange(N), c.size) x = cupy.repeat(x, N) # only assign entries in the new sparsity structure i, j = self._swap(row[r, c], col[r, c]) self._set_many(i, j, x) def _prepare_indices(self, i, j): M, N = self._swap(*self.shape) def check_bounds(indices, bound): idx = indices.max() if idx >= bound: raise IndexError('index (%d) out of range (>= %d)' % (idx, bound)) idx = indices.min() if idx < -bound: raise IndexError('index (%d) out of range (< -%d)' % (idx, bound)) i = cupy.array(i, dtype=self.indptr.dtype, copy=True, ndmin=1).ravel() j = cupy.array(j, dtype=self.indices.dtype, copy=True, ndmin=1).ravel() check_bounds(i, M) check_bounds(j, N) return i, j, M, N def _set_many(self, i, j, x): """Sets value at each (i, j) to x Here (i,j) index major and minor respectively, and must not contain duplicate entries. """ i, j, M, N = self._prepare_indices(i, j) x = cupy.array(x, dtype=self.dtype, copy=True, ndmin=1).ravel() new_sp = cupyx.scipy.sparse.csr_matrix((cupy.arange( self.nnz, dtype=cupy.float32), self.indices, self.indptr), shape=(M, N)) offsets = new_sp._get_arrayXarray(i, j, not_found_val=-1).astype( cupy.int32).ravel() if -1 not in offsets: # only affects existing non-zero cells self.data[offsets] = x return else: warnings.warn('Changing the sparsity structure of a ' '{}_matrix is expensive.' ' lil_matrix is more efficient.'.format(self.format)) # replace where possible mask = offsets > -1 self.data[offsets[mask]] = x[mask] # only insertions remain mask = ~mask i = i[mask] i[i < 0] += M j = j[mask] j[j < 0] += N self._insert_many(i, j, x[mask]) def _zero_many(self, i, j): """Sets value at each (i, j) to zero, preserving sparsity structure. Here (i,j) index major and minor respectively. """ i, j, M, N = self._prepare_indices(i, j) new_sp = cupyx.scipy.sparse.csr_matrix((cupy.arange( self.nnz, dtype=cupy.float32), self.indices, self.indptr), shape=(M, N)) offsets = new_sp._get_arrayXarray(i, j, not_found_val=-1).astype( cupy.int32).ravel() # only assign zeros to the existing sparsity structure self.data[offsets[offsets > -1]] = 0 def _perform_insert(self, indices_inserts, data_inserts, rows, row_counts, idx_dtype): """Insert new elements into current sparse matrix in sorted order""" indptr_diff = cupy.diff(self.indptr) indptr_diff[rows] += row_counts new_indptr = cupy.empty(self.indptr.shape, dtype=idx_dtype) new_indptr[0] = idx_dtype(0) new_indptr[1:] = indptr_diff # Build output arrays cupy.cumsum(new_indptr, out=new_indptr) out_nnz = int(new_indptr[-1]) new_indices = cupy.empty(out_nnz, dtype=idx_dtype) new_data = cupy.empty(out_nnz, dtype=self.data.dtype) # Build an indexed indptr that contains the offsets for each # row but only for in i, j, and x. new_indptr_lookup = cupy.zeros(new_indptr.size, dtype=idx_dtype) new_indptr_lookup[1:][rows] = row_counts cupy.cumsum(new_indptr_lookup, out=new_indptr_lookup) _index._insert_many_populate_arrays(indices_inserts, data_inserts, new_indptr_lookup, self.indptr, self.indices, self.data, new_indptr, new_indices, new_data, size=self.indptr.size - 1) self.indptr = new_indptr self.indices = new_indices self.data = new_data def _insert_many(self, i, j, x): """Inserts new nonzero at each (i, j) with value x Here (i,j) index major and minor respectively. i, j and x must be non-empty, 1d arrays. Inserts each major group (e.g. all entries per row) at a time. Maintains has_sorted_indices property. Modifies i, j, x in place. """ order = cupy.argsort(i) # stable for duplicates i = i.take(order) j = j.take(order) x = x.take(order) # Update index data type idx_dtype = sputils.get_index_dtype((self.indices, self.indptr), maxval=(self.nnz + x.size)) self.indptr = self.indptr.astype(idx_dtype) self.indices = self.indices.astype(idx_dtype) self.data = self.data.astype(self.dtype) indptr_inserts, indices_inserts, data_inserts = \ _index._select_last_indices(i, j, x, idx_dtype) rows, ui_indptr = cupy.unique(indptr_inserts, return_index=True) to_add = cupy.empty(ui_indptr.size + 1, ui_indptr.dtype) to_add[-1] = j.size to_add[:-1] = ui_indptr ui_indptr = to_add # Compute the counts for each row in the insertion array row_counts = cupy.zeros(ui_indptr.size - 1, dtype=idx_dtype) cupyx.scatter_add(row_counts, cupy.searchsorted(rows, indptr_inserts), 1) self._perform_insert(indices_inserts, data_inserts, rows, row_counts, idx_dtype) def __get_has_canonical_format(self): """Determine whether the matrix has sorted indices and no duplicates. Returns bool: ``True`` if the above applies, otherwise ``False``. .. note:: :attr:`has_canonical_format` implies :attr:`has_sorted_indices`, so if the latter flag is ``False``, so will the former be; if the former is found ``True``, the latter flag is also set. .. warning:: Getting this property might synchronize the device. """ # Modified from the SciPy counterpart. # In CuPy the implemented conversions do not exactly match those of # SciPy's, so it's hard to put this exactly as where it is in SciPy, # but this should do the job. if self.data.size == 0: self._has_canonical_format = True # check to see if result was cached elif not getattr(self, '_has_sorted_indices', True): # not sorted => not canonical self._has_canonical_format = False elif not hasattr(self, '_has_canonical_format'): is_canonical = self._has_canonical_format_kern( self.indptr, self.indices, size=self.indptr.size - 1) self._has_canonical_format = bool(is_canonical.all()) return self._has_canonical_format def __set_has_canonical_format(self, val): """Taken from SciPy as is.""" self._has_canonical_format = bool(val) if val: self.has_sorted_indices = True has_canonical_format = property(fget=__get_has_canonical_format, fset=__set_has_canonical_format) def __get_sorted(self): """Determine whether the matrix has sorted indices. Returns bool: ``True`` if the indices of the matrix are in sorted order, otherwise ``False``. .. warning:: Getting this property might synchronize the device. """ # Modified from the SciPy counterpart. # In CuPy the implemented conversions do not exactly match those of # SciPy's, so it's hard to put this exactly as where it is in SciPy, # but this should do the job. if self.data.size == 0: self._has_sorted_indices = True # check to see if result was cached elif not hasattr(self, '_has_sorted_indices'): is_sorted = self._has_sorted_indices_kern(self.indptr, self.indices, size=self.indptr.size - 1) self._has_sorted_indices = bool(is_sorted.all()) return self._has_sorted_indices def __set_sorted(self, val): self._has_sorted_indices = bool(val) has_sorted_indices = property(fget=__get_sorted, fset=__set_sorted) def get_shape(self): """Returns the shape of the matrix. Returns: tuple: Shape of the matrix. """ return self._shape def getnnz(self, axis=None): """Returns the number of stored values, including explicit zeros. Args: axis: Not supported yet. Returns: int: The number of stored values. """ if axis is None: return self.data.size else: raise ValueError def sorted_indices(self): """Return a copy of this matrix with sorted indices .. warning:: Calling this function might synchronize the device. """ # Taken from SciPy as is. A = self.copy() A.sort_indices() return A def sort_indices(self): # Unlike in SciPy, here this is implemented in child classes because # each child needs to call its own sort function from cuSPARSE raise NotImplementedError def sum_duplicates(self): """Eliminate duplicate matrix entries by adding them together. .. note:: This is an *in place* operation. .. warning:: Calling this function might synchronize the device. .. seealso:: :meth:`scipy.sparse.csr_matrix.sum_duplicates`, :meth:`scipy.sparse.csc_matrix.sum_duplicates` """ if self.has_canonical_format: return # TODO(leofang): add a kernel for compressed sparse matrices without # converting to coo coo = self.tocoo() coo.sum_duplicates() self.__init__(coo.asformat(self.format)) self.has_canonical_format = True ##################### # Reduce operations # ##################### def _minor_reduce(self, ufunc, axis, nonzero): """Reduce nonzeros with a ufunc over the minor axis when non-empty Can be applied to a function of self.data by supplying data parameter. Warning: this does not call sum_duplicates() Args: ufunc (object): Function handle giving the operation to be conducted. axis (int): Matrix over which the reduction should be conducted. Returns: (cupy.ndarray): Reduce result for nonzeros in each major_index. """ # Call to the appropriate kernel function if axis == 1: # Create the vector to hold output value = cupy.zeros(self.shape[0]).astype(cupy.float64) if nonzero: # Perform the calculation if ufunc == cupy.amax: self._max_nonzero_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.amin: self._min_nonzero_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) else: # Perform the calculation if ufunc == cupy.amax: self._max_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.amin: self._min_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if axis == 0: # Create the vector to hold output value = cupy.zeros(self.shape[1]).astype(cupy.float64) if nonzero: # Perform the calculation if ufunc == cupy.amax: self._max_nonzero_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.amin: self._min_nonzero_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) else: # Perform the calculation if ufunc == cupy.amax: self._max_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.amin: self._min_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) return value def _arg_minor_reduce(self, ufunc, axis): """Reduce nonzeros with a ufunc over the minor axis when non-empty Can be applied to a function of self.data by supplying data parameter. Warning: this does not call sum_duplicates() Args: ufunc (object): Function handle giving the operation to be conducted. axis (int): Maxtrix over which the reduction should be conducted Returns: (cupy.ndarray): Reduce result for nonzeros in each major_index """ # Call to the appropriate kernel function if axis == 1: # Create the vector to hold output value = cupy.zeros(self.shape[0]).astype(cupy.int64) # Perform the calculation if ufunc == cupy.argmax: self._max_arg_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.argmin: self._min_arg_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if axis == 0: # Create the vector to hold output value = cupy.zeros(self.shape[1]).astype(cupy.int64) # Perform the calculation if ufunc == cupy.argmax: self._max_arg_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.argmin: self._min_arg_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype(cupy.float64), self.indices, self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) return value
raise core.core._AxisError('axis(={}) out of bounds'.format(axis)) else: return _proc_as_batch(batch_kern, out, axis=axis) pos = 1 while pos < out.size: kern(pos, out, size=out.size) pos <<= 1 return out _cumsum_batch_kern = core.ElementwiseKernel( 'int64 pos, int64 batch', 'raw T x', ''' ptrdiff_t b = i % batch; ptrdiff_t j = i / batch; if (j & pos) { const ptrdiff_t dst_index[] = {j, b}; const ptrdiff_t src_index[] = {j ^ pos | (pos - 1), b}; x[dst_index] += x[src_index]; } ''', 'cumsum_batch_kernel') _cumsum_kern = core.ElementwiseKernel( 'int64 pos', 'raw T x', ''' if (i & pos) { x[i] += x[i ^ pos | (pos - 1)]; } ''', 'cumsum_kernel') def cumsum(a, axis=None, dtype=None, out=None): """Returns the cumulative sum of an array along a given axis.
from cupy import core gumbel_kernel = core.ElementwiseKernel( 'T x, T loc, T scale', 'T y', 'y = loc - log(-log(1 - x)) * scale', 'gumbel_kernel' ) laplace_kernel = core.ElementwiseKernel( 'T x, T loc, T scale', 'T y', 'y = (x < 0.5)? loc + scale * log(x + x):' ' loc - scale * log(2.0 - x - x)', 'laplace_kernel' )
_searchsorted_kernel = core.ElementwiseKernel( 'S x, raw T bins, int64 n_bins, bool side_is_right', 'int64 y', ''' if (_isnan<S>(x)) { long long pos = n_bins; if (!side_is_right) { while (pos > 0 && _isnan<T>(bins[pos-1])) { --pos; } } y = pos; return; } bool greater = (side_is_right ? x >= bins[n_bins-1] : x > bins[n_bins-1]); if (greater) { y = n_bins; return; } long long left = 0; long long right = n_bins-1; while (left < right) { long long m = left + (right - left) / 2; if (side_is_right ? bins[m] <= x : bins[m] < x) { left = m + 1; } else { right = m; } } y = right; ''', preamble=_preamble)
class RandomState(object): """Portable container of a pseudo-random number generator. An instance of this class holds the state of a random number generator. The state is available only on the device which has been current at the initialization of the instance. Functions of :mod:`cupy.random` use global instances of this class. Different instances are used for different devices. The global state for the current device can be obtained by the :func:`cupy.random.get_random_state` function. Args: seed (None or int): Seed of the random number generator. See the :meth:`~cupy.random.RandomState.seed` method for detail. method (int): Method of the random number generator. Following values are available:: cupy.cuda.curand.CURAND_RNG_PSEUDO_DEFAULT cupy.cuda.curand.CURAND_RNG_XORWOW cupy.cuda.curand.CURAND_RNG_MRG32K3A cupy.cuda.curand.CURAND_RNG_MTGP32 cupy.cuda.curand.CURAND_RNG_MT19937 cupy.cuda.curand.CURAND_RNG_PHILOX4_32_10 """ def __init__(self, seed=None, method=curand.CURAND_RNG_PSEUDO_DEFAULT): self._generator = curand.createGenerator(method) self.seed(seed) def __del__(self): # When createGenerator raises an error, _generator is not initialized if hasattr(self, '_generator'): curand.destroyGenerator(self._generator) def set_stream(self, stream=None): if stream is None: stream = cuda.Stream() curand.setStream(self._generator, stream.ptr) def _generate_normal(self, func, size, dtype, *args): # curand functions below don't support odd size. # * curand.generateNormal # * curand.generateNormalDouble # * curand.generateLogNormal # * curand.generateLogNormalDouble size = core.get_size(size) element_size = six.moves.reduce(operator.mul, size, 1) if element_size % 2 == 0: out = cupy.empty(size, dtype=dtype) func(self._generator, out.data.ptr, out.size, *args) return out else: out = cupy.empty((element_size + 1, ), dtype=dtype) func(self._generator, out.data.ptr, out.size, *args) return out[:element_size].reshape(size) # NumPy compatible functions def lognormal(self, mean=0.0, sigma=1.0, size=None, dtype=float): """Returns an array of samples drawn from a log normal distribution. .. seealso:: :func:`cupy.random.lognormal` for full documentation, :meth:`numpy.random.RandomState.lognormal` """ dtype = _check_and_get_dtype(dtype) if dtype.char == 'f': func = curand.generateLogNormal else: func = curand.generateLogNormalDouble return self._generate_normal(func, size, dtype, mean, sigma) def normal(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of normally distributed samples. .. seealso:: :func:`cupy.random.normal` for full documentation, :meth:`numpy.random.RandomState.normal` """ dtype = _check_and_get_dtype(dtype) if dtype.char == 'f': func = curand.generateNormal else: func = curand.generateNormalDouble return self._generate_normal(func, size, dtype, loc, scale) def rand(self, *size, **kwarg): """Returns uniform random values over the interval ``[0, 1)``. .. seealso:: :func:`cupy.random.rand` for full documentation, :meth:`numpy.random.RandomState.rand` """ dtype = kwarg.pop('dtype', float) if kwarg: raise TypeError('rand() got unexpected keyword arguments %s' % ', '.join(kwarg.keys())) return self.random_sample(size=size, dtype=dtype) def randn(self, *size, **kwarg): """Returns an array of standard normal random values. .. seealso:: :func:`cupy.random.randn` for full documentation, :meth:`numpy.random.RandomState.randn` """ dtype = kwarg.pop('dtype', float) if kwarg: raise TypeError('randn() got unexpected keyword arguments %s' % ', '.join(kwarg.keys())) return self.normal(size=size, dtype=dtype) _1m_kernel = core.ElementwiseKernel('', 'T x', 'x = 1 - x', 'cupy_random_1_minus_x') def random_sample(self, size=None, dtype=float): """Returns an array of random values over the interval ``[0, 1)``. .. seealso:: :func:`cupy.random.random_sample` for full documentation, :meth:`numpy.random.RandomState.random_sample` """ dtype = _check_and_get_dtype(dtype) out = cupy.empty(size, dtype=dtype) if dtype.char == 'f': func = curand.generateUniform else: func = curand.generateUniformDouble func(self._generator, out.data.ptr, out.size) RandomState._1m_kernel(out) return out def interval(self, mx, size): """Generate multiple integers independently sampled uniformly from ``[0, mx]``. Args: mx (int): Upper bound of the interval size (None or int or tuple): Shape of the array or the scalar returned. Returns: int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with shape ``()`` is returned. If ``int``, 1-D array of length size is returned. If ``tuple``, multi-dimensional array with shape ``size`` is returned. Currently, only 32 bit integers can be sampled. If 0 :math:`\\leq` ``mx`` :math:`\\leq` 0x7fffffff, a ``numpy.int32`` array is returned. If 0x80000000 :math:`\\leq` ``mx`` :math:`\\leq` 0xffffffff, a ``numpy.uint32`` array is returned. """ if size is None: return self.interval(mx, 1).reshape(()) elif isinstance(size, int): size = (size, ) if mx == 0: return cupy.zeros(size, dtype=numpy.int32) if mx < 0: raise ValueError('mx must be non-negative (actual: {})'.format(mx)) elif mx <= 0x7fffffff: dtype = numpy.int32 elif mx <= 0xffffffff: dtype = numpy.uint32 else: raise ValueError( 'mx must be within uint32 range (actual: {})'.format(mx)) mask = (1 << mx.bit_length()) - 1 mask = cupy.array(mask, dtype=dtype) n = functools.reduce(operator.mul, size, 1) sample = cupy.empty((n, ), dtype=dtype) n_rem = n # The number of remaining elements to sample ret = None while n_rem > 0: curand.generate(self._generator, sample.data.ptr, sample.size) # Drop the samples that exceed the upper limit sample &= mask success = sample <= mx if ret is None: # If the sampling has finished in the first iteration, # just return the sample. if success.all(): n_rem = 0 ret = sample break # Allocate the return array. ret = cupy.empty((n, ), dtype=dtype) n_succ = min(n_rem, int(success.sum())) ret[n - n_rem:n - n_rem + n_succ] = sample[success][:n_succ] n_rem -= n_succ assert n_rem == 0 return ret.reshape(size) def seed(self, seed=None): """Resets the state of the random number generator with a seed. .. seealso:: :func:`cupy.random.seed` for full documentation, :meth:`numpy.random.RandomState.seed` """ if seed is None: try: seed_str = binascii.hexlify(os.urandom(8)) seed = numpy.uint64(int(seed_str, 16)) except NotImplementedError: seed = numpy.uint64(time.clock() * 1000000) else: seed = numpy.uint64(seed) curand.setPseudoRandomGeneratorSeed(self._generator, seed) curand.setGeneratorOffset(self._generator, 0) def standard_normal(self, size=None, dtype=float): """Returns samples drawn from the standard normal distribution. .. seealso:: :func:`cupy.random.standard_normal` for full documentation, :meth:`numpy.random.RandomState.standard_normal` """ return self.normal(size=size, dtype=dtype) def uniform(self, low=0.0, high=1.0, size=None, dtype=float): """Returns an array of uniformly-distributed samples over an interval. .. seealso:: :func:`cupy.random.uniform` for full documentation, :meth:`numpy.random.RandomState.uniform` """ dtype = numpy.dtype(dtype) rand = self.random_sample(size=size, dtype=dtype) return dtype.type(low) + rand * dtype.type(high - low) def choice(self, a, size=None, replace=True, p=None): """Returns an array of random values from a given 1-D array. .. seealso:: :func:`cupy.random.choice` for full document, :meth:`numpy.random.choice` """ if a is None: raise ValueError('a must be 1-dimensional or an integer') if isinstance(a, cupy.ndarray) and a.ndim == 0: raise NotImplementedError if isinstance(a, six.integer_types): a_size = a if a_size <= 0: raise ValueError('a must be greater than 0') else: a = cupy.array(a, copy=False) if a.ndim != 1: raise ValueError('a must be 1-dimensional or an integer') else: a_size = len(a) if a_size == 0: raise ValueError('a must be non-empty') if p is not None: p = cupy.array(p) if p.ndim != 1: raise ValueError('p must be 1-dimensional') if len(p) != a_size: raise ValueError('a and p must have same size') if not (p >= 0).all(): raise ValueError('probabilities are not non-negative') p_sum = cupy.sum(p).get() if not numpy.allclose(p_sum, 1): raise ValueError('probabilities do not sum to 1') if size is None: raise NotImplementedError shape = size size = numpy.prod(shape) if not replace and p is None: if a_size < size: raise ValueError( 'Cannot take a larger sample than population when ' '\'replace=False\'') if isinstance(a, six.integer_types): indices = cupy.arange(a, dtype='l') else: indices = a.copy() self.shuffle(indices) return indices[:size].reshape(shape) if not replace: raise NotImplementedError if p is not None: p = cupy.broadcast_to(p, (size, a_size)) index = cupy.argmax(cupy.log(p) + cupy.random.gumbel(size=(size, a_size)), axis=1) if not isinstance(shape, six.integer_types): index = cupy.reshape(index, shape) else: index = cupy.random.randint(0, a_size, size=shape) # Align the dtype with NumPy index = index.astype(cupy.int64, copy=False) if isinstance(a, six.integer_types): return index if index.ndim == 0: return cupy.array(a[index], dtype=a.dtype) return a[index] def shuffle(self, a): """Returns a shuffled array. .. seealso:: :func:`cupy.random.shuffle` for full document, :meth:`numpy.random.shuffle` """ if not isinstance(a, cupy.ndarray): raise TypeError('The array must be cupy.ndarray') if a.ndim == 0: raise TypeError('An array whose ndim is 0 is not supported') sample = cupy.zeros((len(a)), dtype=numpy.int32) curand.generate(self._generator, sample.data.ptr, sample.size) a[:] = a[cupy.argsort(sample)]
class RandomState(object): """Portable container of a pseudo-random number generator. An instance of this class holds the state of a random number generator. The state is available only on the device which has been current at the initialization of the instance. Functions of :mod:`cupy.random` use global instances of this class. Different instances are used for different devices. The global state for the current device can be obtained by the :func:`cupy.random.get_random_state` function. Args: seed (None or int): Seed of the random number generator. See the :meth:`~cupy.random.RandomState.seed` method for detail. method (int): Method of the random number generator. Following values are available:: cupy.cuda.curand.CURAND_RNG_PSEUDO_DEFAULT cupy.cuda.curand.CURAND_RNG_XORWOW cupy.cuda.curand.CURAND_RNG_MRG32K3A cupy.cuda.curand.CURAND_RNG_MTGP32 cupy.cuda.curand.CURAND_RNG_MT19937 cupy.cuda.curand.CURAND_RNG_PHILOX4_32_10 """ def __init__(self, seed=None, method=curand.CURAND_RNG_PSEUDO_DEFAULT): self._generator = curand.createGenerator(method) self.seed(seed) def __del__(self): # When createGenerator raises an error, _generator is not initialized if hasattr(self, '_generator'): curand.destroyGenerator(self._generator) def _generate_normal(self, func, size, dtype, *args): # curand functions below don't support odd size. # * curand.generateNormal # * curand.generateNormalDouble # * curand.generateLogNormal # * curand.generateLogNormalDouble size = core.get_size(size) element_size = six.moves.reduce(operator.mul, size, 1) if element_size % 2 == 0: out = cupy.empty(size, dtype=dtype) func(self._generator, out.data.ptr, out.size, *args) return out else: out = cupy.empty((element_size + 1, ), dtype=dtype) func(self._generator, out.data.ptr, out.size, *args) return out[:element_size].reshape(size) # NumPy compatible functions def binomial(self, n, p, size=None, dtype=int): """Returns an array of samples drawn from the binomial distribution. .. seealso:: :func:`cupy.random.binomial` for full documentation, :meth:`numpy.random.RandomState.binomial` """ n, p = cupy.asarray(n), cupy.asarray(p) if size is None: size = cupy.broadcast(n, p).shape y = cupy.zeros(shape=size, dtype=dtype) _kernels.binomial_kernel(n, p, self.rk_seed, y) if size is None: self.rk_seed += 1 else: self.rk_seed += numpy.prod(size) return y _laplace_kernel = core.ElementwiseKernel( 'T x, T loc, T scale', 'T y', 'y = T(loc) + T(scale) * ((x < 0.5) ? log(x + x): -log(2.0 - x - x))', 'laplace_kernel') def laplace(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from the laplace distribution. .. seealso:: :func:`cupy.random.laplace` for full documentation, :meth:`numpy.random.RandomState.laplace` """ x = self.random_sample(size=size, dtype=dtype) if not numpy.isscalar(loc): loc = cupy.asarray(loc, dtype) if not numpy.isscalar(scale): scale = cupy.asarray(scale, dtype) RandomState._laplace_kernel(x, loc, scale, x) return x def lognormal(self, mean=0.0, sigma=1.0, size=None, dtype=float): """Returns an array of samples drawn from a log normal distribution. .. seealso:: :func:`cupy.random.lognormal` for full documentation, :meth:`numpy.random.RandomState.lognormal` """ dtype = _check_and_get_dtype(dtype) if dtype.char == 'f': func = curand.generateLogNormal else: func = curand.generateLogNormalDouble return self._generate_normal(func, size, dtype, mean, sigma) def normal(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of normally distributed samples. .. seealso:: :func:`cupy.random.normal` for full documentation, :meth:`numpy.random.RandomState.normal` """ dtype = _check_and_get_dtype(dtype) if dtype.char == 'f': func = curand.generateNormal else: func = curand.generateNormalDouble return self._generate_normal(func, size, dtype, loc, scale) def rand(self, *size, **kwarg): """Returns uniform random values over the interval ``[0, 1)``. .. seealso:: :func:`cupy.random.rand` for full documentation, :meth:`numpy.random.RandomState.rand` """ dtype = kwarg.pop('dtype', float) if kwarg: raise TypeError('rand() got unexpected keyword arguments %s' % ', '.join(kwarg.keys())) return self.random_sample(size=size, dtype=dtype) def randn(self, *size, **kwarg): """Returns an array of standard normal random values. .. seealso:: :func:`cupy.random.randn` for full documentation, :meth:`numpy.random.RandomState.randn` """ dtype = kwarg.pop('dtype', float) if kwarg: raise TypeError('randn() got unexpected keyword arguments %s' % ', '.join(kwarg.keys())) return self.normal(size=size, dtype=dtype) _1m_kernel = core.ElementwiseKernel('', 'T x', 'x = 1 - x', 'cupy_random_1_minus_x') def _random_sample_raw(self, size, dtype): dtype = _check_and_get_dtype(dtype) out = cupy.empty(size, dtype=dtype) if dtype.char == 'f': func = curand.generateUniform else: func = curand.generateUniformDouble func(self._generator, out.data.ptr, out.size) return out def random_sample(self, size=None, dtype=float): """Returns an array of random values over the interval ``[0, 1)``. .. seealso:: :func:`cupy.random.random_sample` for full documentation, :meth:`numpy.random.RandomState.random_sample` """ out = self._random_sample_raw(size, dtype) RandomState._1m_kernel(out) return out def _interval(self, mx, size): """Generate multiple integers independently sampled uniformly from ``[0, mx]``. Args: mx (int): Upper bound of the interval size (None or int or tuple): Shape of the array or the scalar returned. Returns: int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with shape ``()`` is returned. If ``int``, 1-D array of length size is returned. If ``tuple``, multi-dimensional array with shape ``size`` is returned. Currently, only 32 bit integers can be sampled. If 0 :math:`\\leq` ``mx`` :math:`\\leq` 0x7fffffff, a ``numpy.int32`` array is returned. If 0x80000000 :math:`\\leq` ``mx`` :math:`\\leq` 0xffffffff, a ``numpy.uint32`` array is returned. """ if size is None: return self._interval(mx, 1).reshape(()) elif isinstance(size, int): size = (size, ) if mx == 0: return cupy.zeros(size, dtype=numpy.int32) if mx < 0: raise ValueError('mx must be non-negative (actual: {})'.format(mx)) elif mx <= 0x7fffffff: dtype = numpy.int32 elif mx <= 0xffffffff: dtype = numpy.uint32 else: raise ValueError( 'mx must be within uint32 range (actual: {})'.format(mx)) mask = (1 << mx.bit_length()) - 1 mask = cupy.array(mask, dtype=dtype) n = functools.reduce(operator.mul, size, 1) sample = cupy.empty((n, ), dtype=dtype) n_rem = n # The number of remaining elements to sample ret = None while n_rem > 0: curand.generate(self._generator, sample.data.ptr, sample.size) # Drop the samples that exceed the upper limit sample &= mask success = sample <= mx if ret is None: # If the sampling has finished in the first iteration, # just return the sample. if success.all(): n_rem = 0 ret = sample break # Allocate the return array. ret = cupy.empty((n, ), dtype=dtype) n_succ = min(n_rem, int(success.sum())) ret[n - n_rem:n - n_rem + n_succ] = sample[success][:n_succ] n_rem -= n_succ assert n_rem == 0 return ret.reshape(size) def seed(self, seed=None): """Resets the state of the random number generator with a seed. .. seealso:: :func:`cupy.random.seed` for full documentation, :meth:`numpy.random.RandomState.seed` """ if seed is None: try: seed_str = binascii.hexlify(os.urandom(8)) seed = numpy.uint64(int(seed_str, 16)) except NotImplementedError: seed = numpy.uint64(time.clock() * 1000000) else: seed = numpy.asarray(seed).astype(numpy.uint64, casting='safe') curand.setPseudoRandomGeneratorSeed(self._generator, seed) curand.setGeneratorOffset(self._generator, 0) self.rk_seed = numpy.uint32(seed) def standard_normal(self, size=None, dtype=float): """Returns samples drawn from the standard normal distribution. .. seealso:: :func:`cupy.random.standard_normal` for full documentation, :meth:`numpy.random.RandomState.standard_normal` """ return self.normal(size=size, dtype=dtype) def tomaxint(self, size=None): """Draws integers between 0 and max integer inclusive. Args: size (int or tuple of ints): Output shape. Returns: cupy.ndarray: Drawn samples. .. seealso:: :meth:`numpy.random.RandomState.tomaxint` """ if size is None: size = () sample = cupy.empty(size, dtype=cupy.int_) # cupy.random only uses int32 random generator size_in_int = sample.dtype.itemsize // 4 curand.generate(self._generator, sample.data.ptr, sample.size * size_in_int) # Disable sign bit sample &= cupy.iinfo(cupy.int_).max return sample _scale_kernel = core.ElementwiseKernel('T low, T high', 'T x', 'x = T(low) + x * T(high - low)', 'cupy_scale') def uniform(self, low=0.0, high=1.0, size=None, dtype=float): """Returns an array of uniformly-distributed samples over an interval. .. seealso:: :func:`cupy.random.uniform` for full documentation, :meth:`numpy.random.RandomState.uniform` """ dtype = numpy.dtype(dtype) rand = self.random_sample(size=size, dtype=dtype) if not numpy.isscalar(low): low = cupy.asarray(low, dtype) if not numpy.isscalar(high): high = cupy.asarray(high, dtype) return RandomState._scale_kernel(low, high, rand) def choice(self, a, size=None, replace=True, p=None): """Returns an array of random values from a given 1-D array. .. seealso:: :func:`cupy.random.choice` for full document, :func:`numpy.random.choice` """ if a is None: raise ValueError('a must be 1-dimensional or an integer') if isinstance(a, cupy.ndarray) and a.ndim == 0: raise NotImplementedError if isinstance(a, six.integer_types): a_size = a if a_size <= 0: raise ValueError('a must be greater than 0') else: a = cupy.array(a, copy=False) if a.ndim != 1: raise ValueError('a must be 1-dimensional or an integer') else: a_size = len(a) if a_size == 0: raise ValueError('a must be non-empty') if p is not None: p = cupy.array(p) if p.ndim != 1: raise ValueError('p must be 1-dimensional') if len(p) != a_size: raise ValueError('a and p must have same size') if not (p >= 0).all(): raise ValueError('probabilities are not non-negative') p_sum = cupy.sum(p).get() if not numpy.allclose(p_sum, 1): raise ValueError('probabilities do not sum to 1') if size is None: raise NotImplementedError shape = size size = numpy.prod(shape) if not replace and p is None: if a_size < size: raise ValueError( 'Cannot take a larger sample than population when ' '\'replace=False\'') if isinstance(a, six.integer_types): indices = cupy.arange(a, dtype='l') else: indices = a.copy() self.shuffle(indices) return indices[:size].reshape(shape) if not replace: raise NotImplementedError if p is not None: p = cupy.broadcast_to(p, (size, a_size)) index = cupy.argmax(cupy.log(p) + self.gumbel(size=(size, a_size)), axis=1) if not isinstance(shape, six.integer_types): index = cupy.reshape(index, shape) else: index = self.randint(0, a_size, size=shape) # Align the dtype with NumPy index = index.astype(cupy.int64, copy=False) if isinstance(a, six.integer_types): return index if index.ndim == 0: return cupy.array(a[index], dtype=a.dtype) return a[index] def shuffle(self, a): """Returns a shuffled array. .. seealso:: :func:`cupy.random.shuffle` for full document, :func:`numpy.random.shuffle` """ if not isinstance(a, cupy.ndarray): raise TypeError('The array must be cupy.ndarray') if a.ndim == 0: raise TypeError('An array whose ndim is 0 is not supported') a[:] = a[self.permutation(len(a))] def permutation(self, num): """Returns a permuted range.""" if not isinstance(num, six.integer_types): raise TypeError('The data type of argument "num" must be integer') sample = cupy.empty((num), dtype=numpy.int32) curand.generate(self._generator, sample.data.ptr, num) if 128 < num <= 32 * 1024 * 1024: array = cupy.arange(num, dtype=numpy.int32) # apply sort of cache blocking block_size = 1 * 1024 * 1024 # The block size above is a value determined from the L2 cache size # of GP100 (L2 cache size / size of int = 4MB / 4B = 1M). It may be # better to change the value base on the L2 cache size of the GPU # you use. # When num > block_size, cupy kernel: _cupy_permutation is to be # launched multiple times. However, it is observed that performance # will be degraded if the launch count is too many. Therefore, # the block size is adjusted so that launch count will not exceed # twelve Note that this twelve is the value determined from # measurement on GP100. while num // block_size > 12: block_size *= 2 for j_start in range(0, num, block_size): j_end = j_start + block_size _cupy_permutation()(array, sample, j_start, j_end, size=num) else: # When num > 32M, argsort is used, because it is faster than # custom kernel. See https://github.com/cupy/cupy/pull/603. array = cupy.argsort(sample) return array _gumbel_kernel = core.ElementwiseKernel( 'T x, T loc, T scale', 'T y', 'y = T(loc) - log(-log(x)) * T(scale)', 'gumbel_kernel') def gumbel(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from a Gumbel distribution. .. seealso:: :func:`cupy.random.gumbel` for full documentation, :meth:`numpy.random.RandomState.gumbel` """ x = self._random_sample_raw(size=size, dtype=dtype) if not numpy.isscalar(loc): loc = cupy.asarray(loc, dtype) if not numpy.isscalar(scale): scale = cupy.asarray(scale, dtype) RandomState._gumbel_kernel(x, loc, scale, x) return x def randint(self, low, high=None, size=None, dtype='l'): """Returns a scalar or an array of integer values over ``[low, high)``. .. seealso:: :func:`cupy.random.randint` for full documentation, :meth:`numpy.random.RandomState.randint` """ if high is None: lo = 0 hi = low else: lo = low hi = high if lo >= hi: raise ValueError('low >= high') if lo < cupy.iinfo(dtype).min: raise ValueError('low is out of bounds for {}'.format( cupy.dtype(dtype).name)) if hi > cupy.iinfo(dtype).max + 1: raise ValueError('high is out of bounds for {}'.format( cupy.dtype(dtype).name)) diff = hi - lo - 1 if diff > cupy.iinfo(cupy.int32).max - cupy.iinfo(cupy.int32).min + 1: raise NotImplementedError( 'Sampling from a range whose extent is larger than int32 ' 'range is currently not supported') x = self._interval(diff, size).astype(dtype, copy=False) cupy.add(x, lo, out=x) return x
class coo_matrix(sparse_data._data_matrix): """COOrdinate format sparse matrix. Now it has only one initializer format below: ``coo_matrix(S)`` ``S`` is another sparse matrix. It is equivalent to ``S.tocoo()``. ``coo_matrix((M, N), [dtype])`` It constructs an empty matrix whose shape is ``(M, N)``. Default dtype is float64. ``coo_matrix((data, (row, col))`` All ``data``, ``row`` and ``col`` are one-dimenaional :class:`cupy.ndarray`. Args: arg1: Arguments for the initializer. shape (tuple): Shape of a matrix. Its length must be two. dtype: Data type. It must be an argument of :class:`numpy.dtype`. copy (bool): If ``True``, copies of given data are always used. .. seealso:: :class:`scipy.sparse.coo_matrix` """ format = 'coo' _sum_duplicates_diff = core.ElementwiseKernel( 'raw T row, raw T col', 'T diff', ''' T diff_out = 1; if (i == 0 || row[i - 1] == row[i] && col[i - 1] == col[i]) { diff_out = 0; } diff = diff_out; ''', 'sum_duplicates_diff') def __init__(self, arg1, shape=None, dtype=None, copy=False): if shape is not None and len(shape) != 2: raise ValueError( 'Only two-dimensional sparse arrays are supported.') if base.issparse(arg1): x = arg1.asformat(self.format) data = x.data row = x.row col = x.col if arg1.format != self.format: # When formats are differnent, all arrays are already copied copy = False if shape is None: shape = arg1.shape self.has_canonical_format = x.has_canonical_format elif _util.isshape(arg1): m, n = arg1 m, n = int(m), int(n) data = cupy.zeros(0, dtype if dtype else 'd') row = cupy.zeros(0, dtype='i') col = cupy.zeros(0, dtype='i') # shape and copy argument is ignored shape = (m, n) copy = False self.has_canonical_format = True elif _scipy_available and scipy.sparse.issparse(arg1): # Convert scipy.sparse to cupyx.scipy.sparse x = arg1.tocoo() data = cupy.array(x.data) row = cupy.array(x.row, dtype='i') col = cupy.array(x.col, dtype='i') copy = False if shape is None: shape = arg1.shape self.has_canonical_format = x.has_canonical_format elif isinstance(arg1, tuple) and len(arg1) == 2: try: data, (row, col) = arg1 except (TypeError, ValueError): raise TypeError('invalid input format') if not (base.isdense(data) and data.ndim == 1 and base.isdense(row) and row.ndim == 1 and base.isdense(col) and col.ndim == 1): raise ValueError('row, column, and data arrays must be 1-D') if not (len(data) == len(row) == len(col)): raise ValueError( 'row, column, and data array must all be the same length') self.has_canonical_format = False else: # TODO(leofang): support constructing from a dense matrix raise TypeError('invalid input format') if dtype is None: dtype = data.dtype else: dtype = numpy.dtype(dtype) if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D': raise ValueError('Only float32, float64, complex64 and complex128' ' are supported') data = data.astype(dtype, copy=copy) row = row.astype('i', copy=copy) col = col.astype('i', copy=copy) if shape is None: if len(row) == 0 or len(col) == 0: raise ValueError( 'cannot infer dimensions from zero sized index arrays') shape = (int(row.max()) + 1, int(col.max()) + 1) if len(data) > 0: if row.max() >= shape[0]: raise ValueError('row index exceeds matrix dimensions') if col.max() >= shape[1]: raise ValueError('column index exceeds matrix dimensions') if row.min() < 0: raise ValueError('negative row index found') if col.min() < 0: raise ValueError('negative column index found') sparse_data._data_matrix.__init__(self, data) self.row = row self.col = col if not _util.isshape(shape): raise ValueError('invalid shape (must be a 2-tuple of int)') self._shape = int(shape[0]), int(shape[1]) def _with_data(self, data, copy=True): """Returns a matrix with the same sparsity structure as self, but with different data. By default the index arrays (i.e. .row and .col) are copied. """ if copy: return coo_matrix((data, (self.row.copy(), self.col.copy())), shape=self.shape, dtype=data.dtype) else: return coo_matrix((data, (self.row, self.col)), shape=self.shape, dtype=data.dtype) def eliminate_zeros(self): """Removes zero entories in place.""" ind = self.data != 0 self.data = self.data[ind] self.row = self.row[ind] self.col = self.col[ind] def get_shape(self): """Returns the shape of the matrix. Returns: tuple: Shape of the matrix. """ return self._shape def getnnz(self, axis=None): """Returns the number of stored values, including explicit zeros.""" if axis is None: return self.data.size else: raise ValueError def get(self, stream=None): """Returns a copy of the array on host memory. Args: stream (cupy.cuda.Stream): CUDA stream object. If it is given, the copy runs asynchronously. Otherwise, the copy is synchronous. Returns: scipy.sparse.coo_matrix: Copy of the array on host memory. """ if not _scipy_available: raise RuntimeError('scipy is not available') data = self.data.get(stream) row = self.row.get(stream) col = self.col.get(stream) return scipy.sparse.coo_matrix((data, (row, col)), shape=self.shape) def sum_duplicates(self): """Eliminate duplicate matrix entries by adding them together. .. warning:: When sorting the indices, CuPy follows the convention of cuSPARSE, which is different from that of SciPy. Therefore, the order of the output indices may differ: .. code-block:: python >>> # 1 0 0 >>> # A = 1 1 0 >>> # 1 1 1 >>> data = cupy.array([1, 1, 1, 1, 1, 1], 'f') >>> row = cupy.array([0, 1, 1, 2, 2, 2], 'i') >>> col = cupy.array([0, 0, 1, 0, 1, 2], 'i') >>> A = cupyx.scipy.sparse.coo_matrix((data, (row, col)), ... shape=(3, 3)) >>> a = A.get() >>> A.sum_duplicates() >>> a.sum_duplicates() # a is scipy.sparse.coo_matrix >>> A.row array([0, 1, 1, 2, 2, 2], dtype=int32) >>> a.row array([0, 1, 2, 1, 2, 2], dtype=int32) >>> A.col array([0, 0, 1, 0, 1, 2], dtype=int32) >>> a.col array([0, 0, 0, 1, 1, 2], dtype=int32) .. warning:: Calling this function might synchronize the device. .. seealso:: :meth:`scipy.sparse.coo_matrix.sum_duplicates` """ if self.has_canonical_format: return # Note: The sorting order below follows the cuSPARSE convention (first # row then col, so-called row-major) and differs from that of SciPy, as # the cuSPARSE functions such as cusparseSpMV() assume this sorting # order. # See https://docs.nvidia.com/cuda/cusparse/index.html#coo-format keys = cupy.stack([self.col, self.row]) order = cupy.lexsort(keys) src_data = self.data[order] src_row = self.row[order] src_col = self.col[order] diff = self._sum_duplicates_diff(src_row, src_col, size=self.row.size) if diff[1:].all(): # All elements have different indices. data = src_data row = src_row col = src_col else: # TODO(leofang): move the kernels outside this method index = cupy.cumsum(diff, dtype='i') size = int(index[-1]) + 1 data = cupy.zeros(size, dtype=self.data.dtype) row = cupy.empty(size, dtype='i') col = cupy.empty(size, dtype='i') if self.data.dtype.kind == 'f': cupy.ElementwiseKernel( 'T src_data, int32 src_row, int32 src_col, int32 index', 'raw T data, raw int32 row, raw int32 col', ''' atomicAdd(&data[index], src_data); row[index] = src_row; col[index] = src_col; ''', 'sum_duplicates_assign')(src_data, src_row, src_col, index, data, row, col) elif self.data.dtype.kind == 'c': cupy.ElementwiseKernel( 'T src_real, T src_imag, int32 src_row, int32 src_col, ' 'int32 index', 'raw T real, raw T imag, raw int32 row, raw int32 col', ''' atomicAdd(&real[index], src_real); atomicAdd(&imag[index], src_imag); row[index] = src_row; col[index] = src_col; ''', 'sum_duplicates_assign_complex')(src_data.real, src_data.imag, src_row, src_col, index, data.real, data.imag, row, col) self.data = data self.row = row self.col = col self.has_canonical_format = True def toarray(self, order=None, out=None): """Returns a dense matrix representing the same value. Args: order (str): Not supported. out: Not supported. Returns: cupy.ndarray: Dense array representing the same value. .. seealso:: :meth:`scipy.sparse.coo_matrix.toarray` """ return self.tocsr().toarray(order=order, out=out) def tocoo(self, copy=False): """Converts the matrix to COOdinate format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Returns: cupyx.scipy.sparse.coo_matrix: Converted matrix. """ if copy: return self.copy() else: return self def tocsc(self, copy=False): """Converts the matrix to Compressed Sparse Column format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Actually this option is ignored because all arrays in a matrix cannot be shared in coo to csc conversion. Returns: cupyx.scipy.sparse.csc_matrix: Converted matrix. """ if self.nnz == 0: return csc.csc_matrix(self.shape, dtype=self.dtype) # copy is silently ignored (in line with SciPy) because both # sum_duplicates and coosort change the underlying data x = self.copy() x.sum_duplicates() cusparse.coosort(x, 'c') x = cusparse.coo2csc(x) x.has_canonical_format = True return x def tocsr(self, copy=False): """Converts the matrix to Compressed Sparse Row format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Actually this option is ignored because all arrays in a matrix cannot be shared in coo to csr conversion. Returns: cupyx.scipy.sparse.csr_matrix: Converted matrix. """ if self.nnz == 0: return csr.csr_matrix(self.shape, dtype=self.dtype) # copy is silently ignored (in line with SciPy) because both # sum_duplicates and coosort change the underlying data x = self.copy() x.sum_duplicates() cusparse.coosort(x, 'r') x = cusparse.coo2csr(x) x.has_canonical_format = True return x def transpose(self, axes=None, copy=False): """Returns a transpose matrix. Args: axes: This option is not supported. copy (bool): If ``True``, a returned matrix shares no data. Otherwise, it shared data arrays as much as possible. Returns: cupyx.scipy.sparse.spmatrix: Transpose matrix. """ if axes is not None: raise ValueError( 'Sparse matrices do not support an \'axes\' parameter because ' 'swapping dimensions is the only logical permutation.') shape = self.shape[1], self.shape[0] return coo_matrix((self.data, (self.col, self.row)), shape=shape, copy=copy)
class _compressed_sparse_matrix(sparse_data._data_matrix): _compress_getitem_kern = core.ElementwiseKernel( 'T d, S ind, int32 minor', 'raw T answer', 'if (ind == minor) atomicAdd(&answer[0], d);', 'compress_getitem') _compress_getitem_complex_kern = core.ElementwiseKernel( 'T real, T imag, S ind, int32 minor', 'raw T answer_real, raw T answer_imag', ''' if (ind == minor) { atomicAdd(&answer_real[0], real); atomicAdd(&answer_imag[0], imag); } ''', 'compress_getitem_complex') def __init__(self, arg1, shape=None, dtype=None, copy=False): if shape is not None and len(shape) != 2: raise ValueError( 'Only two-dimensional sparse arrays are supported.') if base.issparse(arg1): x = arg1.asformat(self.format) data = x.data indices = x.indices indptr = x.indptr if arg1.format != self.format: # When formats are differnent, all arrays are already copied copy = False if shape is None: shape = arg1.shape has_canonical_format = x.has_canonical_format elif util.isshape(arg1): m, n = arg1 m, n = int(m), int(n) data = basic.zeros(0, dtype if dtype else 'd') indices = basic.zeros(0, 'i') indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype='i') # shape and copy argument is ignored shape = (m, n) copy = False has_canonical_format = True elif scipy_available and scipy.sparse.issparse(arg1): # Convert scipy.sparse to cupy.sparse x = arg1.asformat(self.format) data = cupy.array(x.data) indices = cupy.array(x.indices, dtype='i') indptr = cupy.array(x.indptr, dtype='i') copy = False if shape is None: shape = arg1.shape has_canonical_format = x.has_canonical_format elif isinstance(arg1, tuple) and len(arg1) == 3: data, indices, indptr = arg1 if not (base.isdense(data) and data.ndim == 1 and base.isdense(indices) and indices.ndim == 1 and base.isdense(indptr) and indptr.ndim == 1): raise ValueError( 'data, indices, and indptr should be 1-D') if len(data) != len(indices): raise ValueError('indices and data should have the same size') has_canonical_format = False elif base.isdense(arg1): if arg1.ndim > 2: raise TypeError('expected dimension <= 2 array or matrix') elif arg1.ndim == 1: arg1 = arg1[None] elif arg1.ndim == 0: arg1 = arg1[None, None] data, indices, indptr = self._convert_dense(arg1) copy = False if shape is None: shape = arg1.shape has_canonical_format = True else: raise ValueError( 'Unsupported initializer format') if dtype is None: dtype = data.dtype else: dtype = numpy.dtype(dtype) if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D': raise ValueError( 'Only float32, float64, complex64 and complex128 ' 'are supported') data = data.astype(dtype, copy=copy) sparse_data._data_matrix.__init__(self, data) self.indices = indices.astype('i', copy=copy) self.indptr = indptr.astype('i', copy=copy) if shape is None: shape = self._swap(len(indptr) - 1, int(indices.max()) + 1) major, minor = self._swap(*shape) if len(indptr) != major + 1: raise ValueError('index pointer size (%d) should be (%d)' % (len(indptr), major + 1)) self._descr = cusparse.MatDescriptor.create() self._shape = shape self._has_canonical_format = has_canonical_format def _with_data(self, data): return self.__class__( (data, self.indices.copy(), self.indptr.copy()), shape=self.shape) def _convert_dense(self, x): raise NotImplementedError def _swap(self, x, y): raise NotImplementedError def _add_sparse(self, other, alpha, beta): raise NotImplementedError def _add(self, other, lhs_negative, rhs_negative): if cupy.isscalar(other): if other == 0: if lhs_negative: return -self else: return self.copy() else: raise NotImplementedError( 'adding a nonzero scalar to a sparse matrix is not ' 'supported') elif base.isspmatrix(other): alpha = -1 if lhs_negative else 1 beta = -1 if rhs_negative else 1 return self._add_sparse(other, alpha, beta) elif base.isdense(other): if lhs_negative: if rhs_negative: return -self.todense() - other else: return other - self.todense() else: if rhs_negative: return self.todense() - other else: return self.todense() + other else: return NotImplemented def __add__(self, other): return self._add(other, False, False) def __radd__(self, other): return self._add(other, False, False) def __sub__(self, other): return self._add(other, False, True) def __rsub__(self, other): return self._add(other, True, False) def __getitem__(self, slices): if isinstance(slices, tuple): slices = list(slices) elif isinstance(slices, list): slices = list(slices) if all([isinstance(s, int) for s in slices]): slices = [slices] else: slices = [slices] ellipsis = -1 n_ellipsis = 0 for i, s in enumerate(slices): if s is None: raise IndexError('newaxis is not supported') elif s is Ellipsis: ellipsis = i n_ellipsis += 1 if n_ellipsis > 0: ellipsis_size = self.ndim - (len(slices) - 1) slices[ellipsis:ellipsis + 1] = [slice(None)] * ellipsis_size if len(slices) == 2: row, col = slices elif len(slices) == 1: row, col = slices[0], slice(None) else: raise IndexError('invalid number of indices') major, minor = self._swap(row, col) major_size, minor_size = self._swap(*self._shape) if numpy.isscalar(major): i = int(major) if i < 0: i += major_size if not (0 <= i < major_size): raise IndexError('index out of bounds') if numpy.isscalar(minor): j = int(minor) if j < 0: j += minor_size if not (0 <= j < minor_size): raise IndexError('index out of bounds') return self._get_single(i, j) elif minor == slice(None): return self._get_major_slice(slice(i, i + 1)) elif isinstance(major, slice): if minor == slice(None): return self._get_major_slice(major) raise ValueError('unsupported indexing') def _get_single(self, major, minor): start = self.indptr[major] end = self.indptr[major + 1] answer = cupy.zeros((), self.dtype) data = self.data[start:end] indices = self.indices[start:end] if self.dtype.kind == 'c': self._compress_getitem_complex_kern( data.real, data.imag, indices, minor, answer.real, answer.imag) else: self._compress_getitem_kern( data, indices, minor, answer) return answer[()] def _get_major_slice(self, major): major_size, minor_size = self._swap(*self._shape) # major.indices cannot be used because scipy.sparse behaves differently major_start = major.start major_stop = major.stop major_step = major.step if major_start is None: major_start = 0 if major_stop is None: major_stop = major_size if major_step is None: major_step = 1 if major_start < 0: major_start += major_size if major_stop < 0: major_stop += major_size if major_step != 1: raise ValueError('slicing with step != 1 not supported') if not (0 <= major_start <= major_size and 0 <= major_stop <= major_size and major_start <= major_stop): raise IndexError('index out of bounds') start = self.indptr[major_start] stop = self.indptr[major_stop] data = self.data[start:stop] indptr = self.indptr[major_start:major_stop + 1] - start indices = self.indices[start:stop] shape = self._swap(len(indptr) - 1, minor_size) return self.__class__( (data, indices, indptr), shape=shape, dtype=self.dtype, copy=False) @property def has_canonical_format(self): return self._has_canonical_format def get_shape(self): """Returns the shape of the matrix. Returns: tuple: Shape of the matrix. """ return self._shape def getnnz(self, axis=None): """Returns the number of stored values, including explicit zeros. Args: axis: Not supported yet. Returns: int: The number of stored values. """ if axis is None: return self.data.size else: raise ValueError # TODO(unno): Implement sorted_indices def sum_duplicates(self): if self._has_canonical_format: return if self.data.size == 0: self._has_canonical_format = True return coo = self.tocoo() coo.sum_duplicates() self.__init__(coo.asformat(self.format)) self._has_canonical_format = True