def _min_or_max(self, axis, out, min_or_max, explicit): if out is not None: raise ValueError(("Sparse matrices do not support " "an 'out' parameter.")) sputils.validateaxis(axis) if axis is None: if 0 in self.shape: raise ValueError("zero-size array to reduction operation") zero = cupy.zeros((), dtype=self.dtype) if self.nnz == 0: return zero self.sum_duplicates() m = min_or_max(self.data) if explicit: return m if self.nnz != internal.prod(self.shape): if min_or_max is cupy.min: m = cupy.minimum(zero, m) elif min_or_max is cupy.max: m = cupy.maximum(zero, m) else: assert False return m if axis < 0: axis += 2 return self._min_or_max_axis(axis, min_or_max, explicit)
def check_mode(self, array, mode, dtype, batched=False): if runtime.is_hip and driver.get_build_version() < 307: if dtype in (numpy.complex64, numpy.complex128): pytest.skip('ungqr unsupported') a_cpu = numpy.asarray(array, dtype=dtype) a_gpu = cupy.asarray(array, dtype=dtype) result_gpu = cupy.linalg.qr(a_gpu, mode=mode) if ((not batched) or (numpy.lib.NumpyVersion(numpy.__version__) >= '1.22.0')): result_cpu = numpy.linalg.qr(a_cpu, mode=mode) self._check_result(result_cpu, result_gpu) else: # We still want to test it to gain confidence... # TODO(leofang): Use @testing.with_requires('numpy>=1.22') once # NumPy 1.22 is out, and clean up this helper function batch_shape = a_cpu.shape[:-2] batch_size = prod(batch_shape) a_cpu = a_cpu.reshape(batch_size, *a_cpu.shape[-2:]) for i in range(batch_size): res_cpu = numpy.linalg.qr(a_cpu[i], mode=mode) if isinstance(result_gpu, tuple): q_gpu, r_gpu = result_gpu q_gpu = q_gpu.reshape(batch_size, *q_gpu.shape[-2:]) idx = -1 if mode == 'raw' else -2 r_gpu = r_gpu.reshape(batch_size, *r_gpu.shape[idx:]) res_gpu = (q_gpu[i], r_gpu[i]) self._check_result(res_cpu, res_gpu) else: # mode == 'r' res_gpu = result_gpu.reshape( batch_size, *result_gpu.shape[-2:])[i] self._check_result(res_cpu, res_gpu)
def _qr_batched(a, mode): batch_shape = a.shape[:-2] batch_size = internal.prod(batch_shape) m, n = a.shape[-2:] k = min(m, n) # first handle any 0-size inputs if batch_size == 0 or k == 0: # support float32, float64, complex64, and complex128 dtype, out_dtype = _util.linalg_common_type(a) if mode == 'reduced': return (cupy.empty(batch_shape + (m, k), out_dtype), cupy.empty(batch_shape + (k, n), out_dtype)) elif mode == 'complete': q = _util.stacked_identity(batch_shape, m, out_dtype) return (q, cupy.empty(batch_shape + (m, n), out_dtype)) elif mode == 'r': return cupy.empty(batch_shape + (k, n), out_dtype) elif mode == 'raw': return (cupy.empty(batch_shape + (n, m), out_dtype), cupy.empty(batch_shape + (k,), out_dtype)) # ...then delegate real computation to cuSOLVER/rocSOLVER a = a.reshape(-1, *(a.shape[-2:])) out = _geqrf_orgqr_batched(a, mode) if mode == 'r': return out.reshape(batch_shape + out.shape[-2:]) q, r = out q = q.reshape(batch_shape + q.shape[-2:]) idx = -1 if mode == 'raw' else -2 r = r.reshape(batch_shape + r.shape[idx:]) return (q, r)
def medfilt(volume, kernel_size=None): """Perform a median filter on an N-dimensional array. Apply a median filter to the input array using a local window-size given by `kernel_size`. The array will automatically be zero-padded. Args: volume (cupy.ndarray): An N-dimensional input array. kernel_size (int or list of ints): Gives the size of the median filter window in each dimension. Elements of `kernel_size` should be odd. If `kernel_size` is a scalar, then this scalar is used as the size in each dimension. Default size is 3 for each dimension. Returns: cupy.ndarray: An array the same size as input containing the median filtered result. .. seealso:: :func:`cupyx.scipy.ndimage.median_filter` .. seealso:: :func:`scipy.signal.medfilt` """ if volume.dtype.kind == 'c': # scipy doesn't support complex raise ValueError("complex types not supported") # output is forced to float64 to match scipy kernel_size = _get_kernel_size(kernel_size, volume.ndim) if any(k > s for k, s in zip(kernel_size, volume.shape)): warnings.warn('kernel_size exceeds volume extent: ' 'volume will be zero-padded') size = internal.prod(kernel_size) return filters.rank_filter(volume, size // 2, size=kernel_size, output=float, mode='constant')
def shaped_arange(shape, xp=cupy, dtype=numpy.float32, order='C'): """Returns an array with given shape, array module, and dtype. Args: shape(tuple of int): Shape of returned ndarray. xp(numpy or cupy): Array module to use. dtype(dtype): Dtype of returned ndarray. order({'C', 'F'}): Order of returned ndarray. Returns: numpy.ndarray or cupy.ndarray: The array filled with :math:`1, \\cdots, N` with specified dtype with given shape, array module. Here, :math:`N` is the size of the returned array. If ``dtype`` is ``numpy.bool_``, evens (resp. odds) are converted to ``True`` (resp. ``False``). """ dtype = numpy.dtype(dtype) a = numpy.arange(1, internal.prod(shape) + 1, 1) if dtype == '?': a = a % 2 == 0 elif dtype.kind == 'c': a = a + a * 1j return xp.array(a.astype(dtype).reshape(shape), order=order)
def empty_like_pinned(a, dtype=None, order='K', subok=None, shape=None): """Returns a new, uninitialized NumPy array with the same shape and dtype as those of the given array. This is a convenience function which is just :func:`numpy.empty_like`, except that the underlying memory is pinned/pagelocked. This function currently does not support ``subok`` option. Args: a (numpy.ndarray or cupy.ndarray): Base array. dtype: Data type specifier. The data type of ``a`` is used by default. order ({'C', 'F', 'A', or 'K'}): Overrides the memory layout of the result. ``'C'`` means C-order, ``'F'`` means F-order, ``'A'`` means ``'F'`` if ``a`` is Fortran contiguous, ``'C'`` otherwise. ``'K'`` means match the layout of ``a`` as closely as possible. subok: Not supported yet, must be None. shape (int or tuple of ints): Overrides the shape of the result. If ``order='K'`` and the number of dimensions is unchanged, will try to keep order, otherwise, ``order='C'`` is implied. Returns: numpy.ndarray: A new array with same shape and dtype of ``a`` with elements not initialized. .. seealso:: :func:`numpy.empty_like` """ # We're kinda duplicating the code here because order='K' needs special # treatment: strides need to be computed if subok is not None: raise TypeError('subok is not supported yet') if dtype is None: dtype = a.dtype shape = _update_shape(a, shape) order, strides, _ = _new_like_order_and_strides(a, dtype, order, shape, get_memptr=False) nbytes = internal.prod(shape) * numpy.dtype(dtype).itemsize mem = cuda.alloc_pinned_memory(nbytes) out = numpy.ndarray(shape, dtype=dtype, buffer=mem, strides=strides, order=order) return out
def _potrf_batched(a): """Batched Cholesky decomposition. Decompose a given array of two-dimensional square matrices into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Args: a (cupy.ndarray): The input array of matrices with dimension ``(..., N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. """ if not check_availability('potrfBatched'): raise RuntimeError('potrfBatched is not available') dtype, out_dtype = _util.linalg_common_type(a) if a.size == 0: return cupy.empty(a.shape, out_dtype) if dtype == 'f': potrfBatched = cusolver.spotrfBatched elif dtype == 'd': potrfBatched = cusolver.dpotrfBatched elif dtype == 'F': potrfBatched = cusolver.cpotrfBatched else: # dtype == 'D': potrfBatched = cusolver.zpotrfBatched x = a.astype(dtype, order='C', copy=True) xp = cupy._core._mat_ptrs(x) n = x.shape[-1] ldx = x.strides[-2] // x.dtype.itemsize handle = device.get_cusolver_handle() batch_size = internal.prod(x.shape[:-2]) dev_info = cupy.empty(batch_size, dtype=numpy.int32) potrfBatched( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, xp.data.ptr, ldx, dev_info.data.ptr, batch_size) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( potrfBatched, dev_info) return cupy.tril(x).astype(out_dtype, copy=False)
def tensorsolve(a, b, axes=None): """Solves tensor equations denoted by ``ax = b``. Suppose that ``b`` is equivalent to ``cupy.tensordot(a, x)``. This function computes tensor ``x`` from ``a`` and ``b``. Args: a (cupy.ndarray): The tensor with ``len(shape) >= 1`` b (cupy.ndarray): The tensor with ``len(shape) >= 1`` axes (tuple of ints): Axes in ``a`` to reorder to the right before inversion. Returns: cupy.ndarray: The tensor with shape ``Q`` such that ``b.shape + Q == a.shape``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.tensorsolve` """ if axes is not None: allaxes = list(range(a.ndim)) for k in axes: allaxes.remove(k) allaxes.insert(a.ndim, k) a = a.transpose(allaxes) oldshape = a.shape[-(a.ndim - b.ndim):] prod = internal.prod(oldshape) a = a.reshape(-1, prod) b = b.ravel() result = solve(a, b) return result.reshape(oldshape)
def tensorinv(a, ind=2): """Computes the inverse of a tensor. This function computes tensor ``a_inv`` from tensor ``a`` such that ``tensordot(a_inv, a, ind) == I``, where ``I`` denotes the identity tensor. Args: a (cupy.ndarray): The tensor such that ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. ind (int): The positive number used in ``axes`` option of ``tensordot``. Returns: cupy.ndarray: The inverse of a tensor whose shape is equivalent to ``a.shape[ind:] + a.shape[:ind]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.tensorinv` """ _util._assert_cupy_array(a) if ind <= 0: raise ValueError('Invalid ind argument') oldshape = a.shape invshape = oldshape[ind:] + oldshape[:ind] prod = internal.prod(oldshape[ind:]) a = a.reshape(prod, -1) a_inv = inv(a) return a_inv.reshape(*invshape)
def shaped_reverse_arange(shape, xp=cupy, dtype=numpy.float32): """Returns an array filled with decreasing numbers. Args: shape(tuple of int): Shape of returned ndarray. xp(numpy or cupy): Array module to use. dtype(dtype): Dtype of returned ndarray. Returns: numpy.ndarray or cupy.ndarray: The array filled with :math:`N, \\cdots, 1` with specified dtype with given shape, array module. Here, :math:`N` is the size of the returned array. If ``dtype`` is ``numpy.bool_``, evens (resp. odds) are converted to ``True`` (resp. ``False``). """ dtype = numpy.dtype(dtype) size = internal.prod(shape) a = numpy.arange(size, 0, -1) if dtype == '?': a = a % 2 == 0 elif dtype.kind == 'c': a = a + a * 1j return xp.array(a.astype(dtype).reshape(shape))
def empty_pinned(shape, dtype=float, order='C'): """Returns a new, uninitialized NumPy array with the given shape and dtype. This is a convenience function which is just :func:`numpy.empty`, except that the underlying memory is pinned/pagelocked. Args: shape (int or tuple of ints): Dimensionalities of the array. dtype: Data type specifier. order ({'C', 'F'}): Row-major (C-style) or column-major (Fortran-style) order. Returns: numpy.ndarray: A new array with elements not initialized. .. seealso:: :func:`numpy.empty` """ shape = _update_shape(None, shape) nbytes = internal.prod(shape) * numpy.dtype(dtype).itemsize mem = cuda.alloc_pinned_memory(nbytes) out = numpy.ndarray(shape, dtype=dtype, buffer=mem, order=order) return out
def _svd_batched(a, full_matrices, compute_uv): batch_shape = a.shape[:-2] batch_size = internal.prod(batch_shape) n, m = a.shape[-2:] dtype, uv_dtype = _util.linalg_common_type(a) s_dtype = uv_dtype.char.lower() # first handle any 0-size inputs if batch_size == 0: k = min(m, n) s = cupy.empty(batch_shape + (k, ), s_dtype) if compute_uv: if full_matrices: u = cupy.empty(batch_shape + (n, n), dtype=uv_dtype) vt = cupy.empty(batch_shape + (m, m), dtype=uv_dtype) else: u = cupy.empty(batch_shape + (n, k), dtype=uv_dtype) vt = cupy.empty(batch_shape + (k, m), dtype=uv_dtype) return u, s, vt else: return s elif m == 0 or n == 0: s = cupy.empty(batch_shape + (0, ), s_dtype) if compute_uv: if full_matrices: u = _util.stacked_identity(batch_shape, n, uv_dtype) vt = _util.stacked_identity(batch_shape, m, uv_dtype) else: u = cupy.empty(batch_shape + (n, 0), dtype=uv_dtype) vt = cupy.empty(batch_shape + (0, m), dtype=uv_dtype) return u, s, vt else: return s # ...then delegate real computation to cuSOLVER a = a.reshape(-1, *(a.shape[-2:])) if runtime.is_hip or (m <= 32 and n <= 32): # copy is done in _gesvdj_batched, so let's try not to do it here a = a.astype(dtype, order='C', copy=False) out = _gesvdj_batched(a, full_matrices, compute_uv, False) else: # manually loop over cusolverDn<t>gesvd() # copy (via possible type casting) is done in _gesvd_batched # note: _gesvd_batched returns V, not V^H out = _gesvd_batched(a, dtype.char, full_matrices, compute_uv, False) if compute_uv: u, s, v = out u = u.astype(uv_dtype, copy=False) u = u.reshape(*batch_shape, *(u.shape[-2:])) s = s.astype(s_dtype, copy=False) s = s.reshape(*batch_shape, *(s.shape[-1:])) v = v.astype(uv_dtype, copy=False) v = v.reshape(*batch_shape, *(v.shape[-2:])) return u, s, v.swapaxes(-2, -1).conj() else: s = out s = s.astype(s_dtype, copy=False) s = s.reshape(*batch_shape, *(s.shape[-1:])) return s
def test_two(self): assert internal.prod([2, 3]) == 6
def test_one(self): assert internal.prod([2]) == 2
def check_usv(self, shape, dtype): array = testing.shaped_random(shape, numpy, dtype=dtype, seed=self.seed) a_cpu = numpy.asarray(array, dtype=dtype) a_gpu = cupy.asarray(array, dtype=dtype) result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices) result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices) # Check if the input matrix is not broken cupy.testing.assert_allclose(a_gpu, a_cpu) assert len(result_gpu) == 3 for i in range(3): assert result_gpu[i].shape == result_cpu[i].shape assert result_gpu[i].dtype == result_cpu[i].dtype u_cpu, s_cpu, vh_cpu = result_cpu u_gpu, s_gpu, vh_gpu = result_gpu cupy.testing.assert_allclose(s_gpu, s_cpu, rtol=1e-5, atol=1e-4) # reconstruct the matrix k = s_cpu.shape[-1] if len(shape) == 2: if self.full_matrices: a_gpu_usv = cupy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) else: a_gpu_usv = cupy.dot(u_gpu * s_gpu, vh_gpu) else: if self.full_matrices: a_gpu_usv = cupy.matmul(u_gpu[..., :k] * s_gpu[..., None, :], vh_gpu[..., :k, :]) else: a_gpu_usv = cupy.matmul(u_gpu * s_gpu[..., None, :], vh_gpu) cupy.testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4) # assert unitary if len(shape) == 2: cupy.testing.assert_allclose(cupy.matmul(u_gpu.T.conj(), u_gpu), numpy.eye(u_gpu.shape[1]), atol=1e-4) cupy.testing.assert_allclose(cupy.matmul(vh_gpu, vh_gpu.T.conj()), numpy.eye(vh_gpu.shape[0]), atol=1e-4) else: batch = prod(shape[:-2]) u_len = u_gpu.shape[-1] vh_len = vh_gpu.shape[-2] if batch == 0: id_u_cpu = numpy.empty(shape[:-2] + (u_len, u_len)) id_vh_cpu = numpy.empty(shape[:-2] + (vh_len, vh_len)) else: id_u_cpu = [numpy.eye(u_len) for _ in range(batch)] id_u_cpu = numpy.stack(id_u_cpu, axis=0).reshape(*(shape[:-2]), u_len, u_len) id_vh_cpu = [numpy.eye(vh_len) for _ in range(batch)] id_vh_cpu = numpy.stack(id_vh_cpu, axis=0).reshape( *(shape[:-2]), vh_len, vh_len) cupy.testing.assert_allclose(cupy.matmul( u_gpu.swapaxes(-1, -2).conj(), u_gpu), id_u_cpu, atol=1e-4) cupy.testing.assert_allclose(cupy.matmul( vh_gpu, vh_gpu.swapaxes(-1, -2).conj()), id_vh_cpu, atol=1e-4)
def tensordot(a, b, axes=2): """Returns the tensor dot product of two arrays along specified axes. This is equivalent to compute dot product along the specified axes which are treated as one axis by reshaping. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. axes: - If it is an integer, then ``axes`` axes at the last of ``a`` and the first of ``b`` are used. - If it is a pair of sequences of integers, then these two sequences specify the list of axes for ``a`` and ``b``. The corresponding axes are paired for sum-product. Returns: cupy.ndarray: The tensor dot product of ``a`` and ``b`` along the axes specified by ``axes``. .. seealso:: :func:`numpy.tensordot` """ a_ndim = a.ndim b_ndim = b.ndim if a_ndim == 0 or b_ndim == 0: if axes != 0 and axes != ((), ()): raise ValueError('An input is zero-dim while axes has dimensions') return cupy.multiply(a, b) if isinstance(axes, collections.abc.Sequence): if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') a_axes, b_axes = axes if numpy.isscalar(a_axes): a_axes = a_axes, if numpy.isscalar(b_axes): b_axes = b_axes, else: a_axes = tuple(range(a_ndim - axes, a_ndim)) b_axes = tuple(range(axes)) sum_ndim = len(a_axes) if sum_ndim != len(b_axes): raise ValueError('Axes length mismatch') for a_axis, b_axis in zip(a_axes, b_axes): if a.shape[a_axis] != b.shape[b_axis]: raise ValueError('Axis dimension mismatch') # Make the axes non-negative a = _move_axes_to_head(a, [axis % a_ndim for axis in a_axes]) b = _move_axes_to_head(b, [axis % b_ndim for axis in b_axes]) ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:] k = internal.prod(a.shape[:sum_ndim]) # Avoid division by zero: _core.tensordot_core returns zeros without # checking n, m consistency, thus allowing 0-length dimensions to work n = a.size // k if k != 0 else 0 m = b.size // k if k != 0 else 0 return _core.tensordot_core(a, b, None, n, m, k, ret_shape)
def test_empty(self): assert internal.prod([]) == 1