def _get_bin_edges(a, bins, range): """ Computes the bins used internally by `histogram`. Args: a (ndarray): Ravelled data array bins (int or ndarray): Forwarded argument from `histogram`. range (None or tuple): Forwarded argument from `histogram`. Returns: bin_edges (ndarray): Array of bin edges """ # parse the overloaded bins argument n_equal_bins = None bin_edges = None if isinstance(bins, str): raise NotImplementedError( 'only integer and array bins are implemented') elif isinstance(bins, cupy.ndarray) or numpy.ndim(bins) == 1: # TODO(okuta): After #3060 is merged, `if cupy.ndim(bins) == 1:`. if isinstance(bins, cupy.ndarray): bin_edges = bins else: bin_edges = numpy.asarray(bins) if (bin_edges[:-1] > bin_edges[1:]).any(): # synchronize! when CuPy raise ValueError( '`bins` must increase monotonically, when an array') if isinstance(bin_edges, numpy.ndarray): bin_edges = cupy.asarray(bin_edges) elif numpy.ndim(bins) == 0: try: n_equal_bins = operator.index(bins) except TypeError: raise TypeError('`bins` must be an integer, a string, or an array') if n_equal_bins < 1: raise ValueError('`bins` must be positive, when an integer') first_edge, last_edge = _get_outer_edges(a, range) else: raise ValueError('`bins` must be 1d, when an array') if n_equal_bins is not None: # numpy's gh-10322 means that type resolution rules are dependent on # array shapes. To avoid this causing problems, we pick a type now and # stick with it throughout. bin_type = cupy.result_type(first_edge, last_edge, a) if cupy.issubdtype(bin_type, cupy.integer): bin_type = cupy.result_type(bin_type, float) # bin edges must be computed bin_edges = cupy.linspace(first_edge, last_edge, n_equal_bins + 1, endpoint=True, dtype=bin_type) return bin_edges
def _maximum_minimum(self, other, cupy_op, op_name, dense_check): if _util.isscalarlike(other): other = cupy.asarray(other, dtype=self.dtype) if dense_check(other): dtype = self.dtype # Note: This is a work-around to make the output dtype the same # as SciPy. It might be SciPy version dependent. if dtype == numpy.float32: dtype = numpy.float64 elif dtype == numpy.complex64: dtype = numpy.complex128 dtype = cupy.result_type(dtype, other) other = other.astype(dtype, copy=False) # Note: The computation steps below are different from SciPy. new_array = cupy_op(self.todense(), other) return csr_matrix(new_array) else: self.sum_duplicates() new_data = cupy_op(self.data, other) return csr_matrix((new_data, self.indices, self.indptr), shape=self.shape, dtype=self.dtype) elif _util.isdense(other): self.sum_duplicates() other = cupy.atleast_2d(other) return cupy_op(self.todense(), other) elif isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() return binopt_csr(self, other, op_name) raise NotImplementedError
def leslie(f, s): """Create a Leslie matrix. Given the length n array of fecundity coefficients ``f`` and the length n-1 array of survival coefficients ``s``, return the associated Leslie matrix. Args: f (cupy.ndarray): The "fecundity" coefficients. s (cupy.ndarray): The "survival" coefficients, has to be 1-D. The length of ``s`` must be one less than the length of ``f``, and it must be at least 1. Returns: cupy.ndarray: The array is zero except for the first row, which is ``f``, and the first sub-diagonal, which is ``s``. The data-type of the array will be the data-type of ``f[0]+s[0]``. .. seealso:: :func:`scipy.linalg.leslie` """ if f.ndim != 1: raise ValueError('Incorrect shape for f. f must be 1D') if s.ndim != 1: raise ValueError('Incorrect shape for s. s must be 1D') n = f.size if n != s.size + 1: raise ValueError('Length of s must be one less than length of f') if s.size == 0: raise ValueError('The length of s must be at least 1.') a = cupy.zeros((n, n), dtype=cupy.result_type(f, s)) a[0] = f cupy.fill_diagonal(a[1:], s) return a
def _linspace_scalar(start, stop, num=50, endpoint=True, retstep=False, dtype=None): """Returns an array with evenly-spaced values within a given interval. Instead of specifying the step width like :func:`cupy.arange`, this function requires the total number of elements specified. Args: start: Start of the interval. stop: End of the interval. num: Number of elements. endpoint (bool): If ``True``, the stop value is included as the last element. Otherwise, the stop value is omitted. retstep (bool): If ``True``, this function returns (array, step). Otherwise, it returns only the array. dtype: Data type specifier. It is inferred from the start and stop arguments by default. Returns: cupy.ndarray: The 1-D array of ranged values. """ dt = cupy.result_type(start, stop, float(num)) if dtype is None: # In actual implementation, only float is used dtype = dt ret = cupy.empty((num, ), dtype=dt) div = (num - 1) if endpoint else num if div <= 0: if num > 0: ret.fill(start) step = float('nan') else: step = float(stop - start) / div stop = float(stop) if step == 0.0: # for underflow _linspace_ufunc_underflow(start, stop - start, div, ret) else: _linspace_ufunc(start, step, ret) if endpoint: # Here num == div + 1 > 1 is ensured. ret[-1] = stop if cupy.issubdtype(dtype, cupy.integer): cupy.floor(ret, out=ret) ret = ret.astype(dtype, copy=False) if retstep: return ret, step else: return ret
def _dot_convolve(a1, a2, mode): if a1.size == 0 or a2.size == 0: raise ValueError('Array arguments cannot be empty') is_inverted = False if a1.size < a2.size: a1, a2 = a2, a1 is_inverted = True dtype = cupy.result_type(a1, a2) n1, n2 = a1.size, a2.size a1 = a1.astype(dtype, copy=False) a2 = a2.astype(dtype, copy=False) if mode == 'full': out_size = n1 + n2 - 1 a1 = cupy.pad(a1, n2 - 1) elif mode == 'same': out_size = n1 pad_size = (n2 - 1) // 2 a1 = cupy.pad(a1, (n2 - 1 - pad_size, pad_size)) elif mode == 'valid': out_size = n1 - n2 + 1 stride = a1.strides[0] a1 = stride_tricks.as_strided(a1, (out_size, n2), (stride, stride)) output = _dot_kernel(a1, a2[::-1], axis=1) return is_inverted, output
def _fft_convolve(a1, a2, mode): if a1.size < a2.size: a1, a2 = a2, a1 if a1.dtype.kind == 'c' or a2.dtype.kind == 'c': fft, ifft = cupy.fft.fft, cupy.fft.ifft else: fft, ifft = cupy.fft.rfft, cupy.fft.irfft dtype = cupy.result_type(a1, a2) n1, n2 = a1.size, a2.size out_size = n1 + n2 - 1 fa1 = fft(a1, out_size) fa2 = fft(a2, out_size) out = ifft(fa1 * fa2, out_size) if mode == 'full': start, end = None, None elif mode == 'same': start = (n2 - 1) // 2 end = start + n1 elif mode == 'valid': start, end = n2 - 1, n1 else: raise ValueError( 'acceptable mode flags are `valid`, `same`, or `full`.') out = out[start:end] if dtype.kind in 'iu': out = cupy.around(out) return out.astype(dtype, copy=False)
def _dot_convolve(a1, a2, mode): offset = 0 if a1.size < a2.size: a1, a2 = a2, a1 offset = 1 - a2.size % 2 dtype = cupy.result_type(a1, a2) n1, n2 = a1.size, a2.size a1 = a1.astype(dtype, copy=False) a2 = a2.astype(dtype, copy=False) if mode == 'full': out_size = n1 + n2 - 1 a1 = cupy.pad(a1, n2 - 1) elif mode == 'same': out_size = n1 pad_size = (n2 - 1) // 2 + offset a1 = cupy.pad(a1, (n2 - 1 - pad_size, pad_size)) elif mode == 'valid': out_size = n1 - n2 + 1 stride = a1.strides[0] a1 = stride_tricks.as_strided(a1, (out_size, n2), (stride, stride)) output = _dot_kernel(a1, a2[::-1], axis=1) return output
def __truediv__(self, other): """Point-wise division by another matrix, vector or scalar""" if _util.isscalarlike(other): dtype = self.dtype if dtype == numpy.float32: # Note: This is a work-around to make the output dtype the same # as SciPy. It might be SciPy version dependent. dtype = numpy.float64 dtype = cupy.result_type(dtype, other) d = cupy.reciprocal(other, dtype=dtype) return multiply_by_scalar(self, d) elif _util.isdense(other): other = cupy.atleast_2d(other) check_shape_for_pointwise_op(self.shape, other.shape) return self.todense() / other elif base.isspmatrix(other): # Note: If broadcasting is needed, an exception is raised here for # compatibility with SciPy, as SciPy does not support broadcasting # in the "sparse / sparse" case. check_shape_for_pointwise_op(self.shape, other.shape, allow_broadcasting=False) dtype = numpy.promote_types(self.dtype, other.dtype) if dtype.char not in 'FD': dtype = numpy.promote_types(numpy.float64, dtype) # Note: The following implementation converts two sparse matrices # into dense matrices and then performs a point-wise division, # which can use lots of memory. self_dense = self.todense().astype(dtype, copy=False) return self_dense / other.todense() raise NotImplementedError
def _correlate(in1, in2, mode='full', method='auto', convolution=False): quick_out = _st_core._check_conv_inputs(in1, in2, mode, convolution) if quick_out is not None: return quick_out if method not in ('auto', 'direct', 'fft'): raise ValueError('acceptable methods are "auto", "direct", or "fft"') if method == 'auto': method = choose_conv_method(in1, in2, mode=mode) if method == 'direct': return _st_core._direct_correlate(in1, in2, mode, in1.dtype, convolution) # if method == 'fft': inputs_swapped = _st_core._inputs_swap_needed(mode, in1.shape, in2.shape) if inputs_swapped: in1, in2 = in2, in1 if not convolution: in2 = _st_core._reverse_and_conj(in2) out = fftconvolve(in1, in2, mode) result_type = cupy.result_type(in1, in2) if result_type.kind in 'ui': out = out.round() out = out.astype(result_type, copy=False) if not convolution and inputs_swapped: out = cupy.ascontiguousarray(_st_core._reverse_and_conj(out)) return out
def corr_pairwise(x, y, return_pearson=False): """Covariance and Pearson product-moment correlation coefficients on the GPU for paired data with tolerance of NaNs. Curently only supports rows as samples and columns as observations. Parameters ---------- x : array_like The baseline array of values. y : array_like The comparison array of values. Returns ------- corr : cupy ndarray Array of correlation values """ def _cov_pairwise(x1, x2, factor): return cupy.nansum(x1 * x2, axis=1, keepdims=True) * cupy.true_divide( 1, factor) # Coerce arrays into 2D format and set dtype dtype = cupy.result_type(x, y, cupy.float64) x = cupy.asarray(x, dtype=dtype) y = cupy.asarray(y, dtype=dtype) assert x.shape == y.shape if x.ndim < 2: x = x[None, :] y = y[None, :] n_samples, n_obs = x.shape # Calculate degrees of freedom for each sample pair ddof = 1 nan_count = (cupy.isnan(x) | cupy.isnan(y)).sum(axis=1, keepdims=True) fact = n_obs - nan_count - ddof # Mean normalize x -= cupy.nanmean(x, axis=1, keepdims=True) y -= cupy.nanmean(y, axis=1, keepdims=True) # Calculate covariance matrix corr = _cov_pairwise(x, y, fact) if return_pearson: x_corr = _cov_pairwise(x, x, fact) y_corr = _cov_pairwise(y, y, fact) auto_corr = cupy.sqrt(x_corr) * cupy.sqrt(y_corr) corr = corr / auto_corr corr = cupy.clip(corr.real, -1, 1, out=corr.real) return corr return corr.squeeze()
def polysub(a1, a2): """Computes the difference of two polynomials. Args: a1 (scalar, cupy.ndarray or cupy.poly1d): first input polynomial. a2 (scalar, cupy.ndarray or cupy.poly1d): second input polynomial. Returns: cupy.ndarray or cupy.poly1d: The difference of the inputs. .. seealso:: :func:`numpy.polysub` """ if a1.shape[0] <= a2.shape[0]: out = cupy.pad(a1, (a2.shape[0] - a1.shape[0], 0)) out = out.astype(cupy.result_type(a1, a2), copy=False) out -= a2 else: out = cupy.pad(a2, (a1.shape[0] - a2.shape[0], 0)) out = out.astype(cupy.result_type(a1, a2), copy=False) out -= 2 * out - a1 return out
def __init__(self, h, x_dtype, up, down): """Helper for resampling""" h = cp.asarray(h) if h.ndim != 1 or h.size == 0: raise ValueError('h must be 1D with non-zero length') self._output_type = cp.result_type(h.dtype, x_dtype, cp.float32) h = cp.asarray(h, self._output_type) self._up = int(up) self._down = int(down) if self._up < 1 or self._down < 1: raise ValueError('Both up and down must be >= 1') # This both transposes, and "flips" each phase for filtering self._h_trans_flip = _pad_h(h, self._up) self._h_trans_flip = cp.ascontiguousarray(self._h_trans_flip)
def get_kth_unique_value(data, k, axis=1): """Find the kth value along an axis of a matrix on the GPU Parameters ---------- data : array_like The array of values to be ranked. k : {int} kth unique value to be found axis : {None, int}, optional Axis along which to perform the ranking. Default is 1 -- samples in rows, observations in columns Returns ------- kth_values : cupy ndarray An array of kth values. """ # Coerce data into array -- make a copy since it needs to be sorted # TODO -- should the sort be done in Numba kernel (and how to do it)? dtype = cupy.result_type(data, cupy.float64) data_id = id(data) data = cupy.ascontiguousarray(data, dtype=dtype) if data_id == id(data): # Ensure sort is being done on a copy data = data.copy() assert data.ndim <= 2 if data.ndim < 2: if axis == 0: data = data[:, None] else: data = data[None, :] if axis == 0: n_obs, n_samples = data.shape else: n_samples, n_obs = data.shape data.sort(axis=axis) kth_values = cupy.zeros(n_samples, dtype=data.dtype) _get_kth_unique_kernel.forall(n_samples, 1)(data, kth_values, k, axis) if axis == 0: kth_values = kth_values[None, :] else: kth_values = kth_values[:, None] return kth_values
def polyval(p, x): """Evaluates a polynomial at specific values. Args: p (cupy.ndarray or cupy.poly1d): input polynomial. x (scalar, cupy.ndarray): values at which the polynomial is evaluated. Returns: cupy.ndarray or cupy.poly1d: polynomial evaluated at x. .. warning:: This function doesn't currently support poly1d values to evaluate. .. seealso:: :func:`numpy.polyval` """ if isinstance(p, cupy.poly1d): p = p.coeffs if not isinstance(p, cupy.ndarray) or p.ndim == 0: raise TypeError('p can be 1d ndarray or poly1d object only') if p.ndim != 1: # to be consistent with polyarithmetic routines' behavior of # not allowing multidimensional polynomial inputs. raise ValueError('p can be 1d ndarray or poly1d object only') # TODO(Dahlia-Chehata): Support poly1d x if (isinstance(x, cupy.ndarray) and x.ndim <= 1) or numpy.isscalar(x): val = cupy.asarray(x).reshape(-1, 1) else: raise NotImplementedError( 'poly1d or non 1d values are not currently supported') out = p[::-1] * cupy.power(val, cupy.arange(p.size)) out = out.sum(axis=1) dtype = cupy.result_type(p, val) if cupy.isscalar(x) or x.ndim == 0: return out.astype(dtype, copy=False).reshape() if p.dtype == numpy.complex128 and val.dtype in [ numpy.float16, numpy.float32, numpy.complex64 ]: return out.astype(numpy.complex64, copy=False) p_kind_score = numpy.dtype(p.dtype.char.lower()).kind x_kind_score = numpy.dtype(val.dtype.char.lower()).kind if (p.dtype.kind not in 'c' and (p_kind_score == x_kind_score or val.dtype.kind in 'c')) or ( issubclass(p.dtype.type, numpy.integer) and issubclass(val.dtype.type, numpy.floating)): return out.astype(val.dtype, copy=False) return out.astype(dtype, copy=False)
def histogram(x, bins=10): """Computes the histogram of a set of data. Args: x (cupy.ndarray): Input array. bins (int or cupy.ndarray): If ``bins`` is an int, it represents the number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it represents a bin edges. Returns: tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray` storing the values of the histogram, and ``bin_edges`` is a :class:`cupy.ndarray` storing the bin edges. .. warning:: This function may synchronize the device. .. seealso:: :func:`numpy.histogram` """ if x.dtype.kind == 'c': # TODO(unno): comparison between complex numbers is not implemented raise NotImplementedError('complex number is not supported') if isinstance(bins, int): if x.size == 0: min_value = 0.0 max_value = 1.0 else: min_value = float(x.min()) max_value = float(x.max()) if min_value == max_value: min_value -= 0.5 max_value += 0.5 bin_type = cupy.result_type(min_value, max_value, x) bins = cupy.linspace(min_value, max_value, bins + 1, dtype=bin_type) elif isinstance(bins, cupy.ndarray): if (bins[:-1] > bins[1:]).any(): # synchronize! raise ValueError('bins must increase monotonically.') else: raise NotImplementedError('Only int or ndarray are supported for bins') y = cupy.zeros(bins.size - 1, dtype='l') _histogram_kernel(x, bins, bins.size, y) return y, bins
def polyadd(a1, a2): """Computes the sum of two polynomials. Args: a1 (scalar, cupy.ndarray or cupy.poly1d): first input polynomial. a2 (scalar, cupy.ndarray or cupy.poly1d): second input polynomial. Returns: cupy.ndarray or cupy.poly1d: The sum of the inputs. .. seealso:: :func:`numpy.polyadd` """ if a1.size < a2.size: a1, a2 = a2, a1 out = cupy.pad(a2, (a1.size - a2.size, 0)) out = out.astype(cupy.result_type(a1, a2), copy=False) out += a1 return out
def _bmat(list_obj): """ Helper function to create a block matrix in cupy from a list of smaller 2D dense arrays """ n_rows = len(list_obj) n_cols = len(list_obj[0]) final_shape = [0, 0] # calculating expected size of output for i in range(n_rows): final_shape[0] += list_obj[i][0].shape[0] for j in range(n_cols): final_shape[1] += list_obj[0][j].shape[1] # obtaining result's datatype dtype = cupy.result_type( *[arr.dtype for list_iter in list_obj for arr in list_iter]) # checking order F_order = all(arr.flags['F_CONTIGUOUS'] for list_iter in list_obj for arr in list_iter) C_order = all(arr.flags['C_CONTIGUOUS'] for list_iter in list_obj for arr in list_iter) order = 'F' if F_order and not C_order else 'C' result = cupy.empty(tuple(final_shape), dtype=dtype, order=order) start_idx_row = 0 start_idx_col = 0 end_idx_row = 0 end_idx_col = 0 for i in range(n_rows): end_idx_row = start_idx_row + list_obj[i][0].shape[0] start_idx_col = 0 for j in range(n_cols): end_idx_col = start_idx_col + list_obj[i][j].shape[1] result[start_idx_row:end_idx_row, start_idx_col:end_idx_col] = list_obj[i][j] start_idx_col = end_idx_col start_idx_row = end_idx_row return result
def _fft_convolve(a1, a2, mode): offset = 0 if a1.size < a2.size: a1, a2 = a2, a1 offset = 1 - a2.size % 2 # if either of them is complex, the dtype after multiplication will also be if a1.dtype.kind == 'c' or a2.dtype.kind == 'c': fft, ifft = cupy.fft.fft, cupy.fft.ifft else: fft, ifft = cupy.fft.rfft, cupy.fft.irfft dtype = cupy.result_type(a1, a2) n1, n2 = a1.size, a2.size out_size = cupyx.scipy.fft.next_fast_len(n1 + n2 - 1) fa1 = fft(a1, out_size) fa2 = fft(a2, out_size) out = ifft(fa1 * fa2, out_size) if mode == 'full': start, end = 0, n1 + n2 - 1 elif mode == 'same': start = (n2 - 1) // 2 + offset end = start + n1 elif mode == 'valid': start, end = n2 - 1, n1 else: raise ValueError( 'acceptable mode flags are `valid`, `same`, or `full`.') out = out[start:end] if dtype.kind in 'iu': out = cupy.around(out) return out.astype(dtype, copy=False)
def einsum(*operands, **kwargs): """einsum(subscripts, *operands, dtype=False) Evaluates the Einstein summation convention on the operands. Using the Einstein summation convention, many common multi-dimensional array operations can be represented in a simple fashion. This function provides a way to compute such summations. .. note:: Memory contiguity of calculation result is not always compatible with `numpy.einsum`. ``out``, ``order``, and ``casting`` options are not supported. Args: subscripts (str): Specifies the subscripts for summation. operands (sequence of arrays): These are the arrays for the operation. Returns: cupy.ndarray: The calculation based on the Einstein summation convention. .. seealso:: :func:`numpy.einsum` """ input_subscripts, output_subscript, operands = \ _parse_einsum_input(operands) assert isinstance(input_subscripts, list) assert isinstance(operands, list) dtype = kwargs.pop('dtype', None) # casting = kwargs.pop('casting', 'safe') casting_kwargs = {} # casting is not supported yet in astype optimize = kwargs.pop('optimize', False) if optimize is True: optimize = 'greedy' if kwargs: raise TypeError('Did not understand the following kwargs: %s' % list(kwargs.keys)) result_dtype = cupy.result_type(*operands) if dtype is None else dtype operands = [ cupy.asanyarray(arr) for arr in operands ] input_subscripts = [ _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim) for idx, (sub, arr) in enumerate(zip(input_subscripts, operands)) ] # Get length of each unique dimension and ensure all dimensions are correct dimension_dict = {} for idx, sub in enumerate(input_subscripts): sh = operands[idx].shape for axis, label in enumerate(sub): dim = sh[axis] if label in dimension_dict.keys(): # For broadcasting cases we always want the largest dim size if dimension_dict[label] == 1: dimension_dict[label] = dim elif dim not in (1, dimension_dict[label]): dim_old = dimension_dict[label] raise ValueError( 'Size of label \'%s\' for operand %d (%d) ' 'does not match previous terms (%d).' % (_chr(label), idx, dim, dim_old)) else: dimension_dict[label] = dim if output_subscript is None: # Build output subscripts tmp_subscripts = list(itertools.chain.from_iterable(input_subscripts)) output_subscript = [ label for label in sorted(set(tmp_subscripts)) if label < 0 or tmp_subscripts.count(label) == 1 ] else: if not options['sum_ellipsis']: if '@' not in output_subscript and -1 in dimension_dict: raise ValueError( 'output has more dimensions than subscripts ' 'given in einstein sum, but no \'...\' ellipsis ' 'provided to broadcast the extra dimensions.') output_subscript = _parse_ellipsis_subscript( output_subscript, None, ellipsis_len=sum(label < 0 for label in dimension_dict.keys()) ) # Make sure output subscripts are in the input tmp_subscripts = set(itertools.chain.from_iterable(input_subscripts)) for label in output_subscript: if label not in tmp_subscripts: raise ValueError( 'einstein sum subscripts string included output subscript ' '\'%s\' which never appeared in an input' % _chr(label)) if len(output_subscript) != len(set(output_subscript)): for label in output_subscript: if output_subscript.count(label) >= 2: raise ValueError( 'einstein sum subscripts string includes output ' 'subscript \'%s\' multiple times' % _chr(label)) _einsum_diagonals(input_subscripts, operands) # no more raises if len(operands) >= 2: if any(arr.size == 0 for arr in operands): return cupy.zeros( tuple(dimension_dict[label] for label in output_subscript), dtype=result_dtype ) # Don't squeeze if unary, because this affects later (in trivial sum) # whether the return is a writeable view. for idx in range(len(operands)): arr = operands[idx] if 1 in arr.shape: squeeze_indices = [] sub = [] for axis, label in enumerate(input_subscripts[idx]): if arr.shape[axis] == 1: squeeze_indices.append(axis) else: sub.append(label) input_subscripts[idx] = sub operands[idx] = cupy.squeeze(arr, axis=tuple(squeeze_indices)) assert operands[idx].ndim == len(input_subscripts[idx]) del arr # unary einsum without summation should return a (writeable) view returns_view = len(operands) == 1 # unary sum for idx, sub in enumerate(input_subscripts): other_subscripts = copy.copy(input_subscripts) other_subscripts[idx] = output_subscript other_subscripts = set(itertools.chain.from_iterable(other_subscripts)) sum_axes = tuple( axis for axis, label in enumerate(sub) if label not in other_subscripts ) if sum_axes: returns_view = False input_subscripts[idx] = [ label for axis, label in enumerate(sub) if axis not in sum_axes ] operands[idx] = operands[idx].sum( axis=sum_axes, dtype=result_dtype) if returns_view: operands = [a.view() for a in operands] else: operands = [ a.astype(result_dtype, copy=False, **casting_kwargs) for a in operands ] # no more casts optimize_algorithms = { 'greedy': _greedy_path, 'optimal': _optimal_path, } if optimize is False: path = [tuple(range(len(operands)))] elif len(optimize) and (optimize[0] == 'einsum_path'): path = optimize[1:] else: try: if len(optimize) == 2 and isinstance(optimize[1], (int, float)): algo = optimize_algorithms[optimize[0]] memory_limit = int(optimize[1]) else: algo = optimize_algorithms[optimize] memory_limit = 2 ** 31 # TODO(kataoka): fix? except (TypeError, KeyError): # unhashable type or not found raise TypeError('Did not understand the path (optimize): %s' % str(optimize)) input_sets = [set(sub) for sub in input_subscripts] output_set = set(output_subscript) path = algo(input_sets, output_set, dimension_dict, memory_limit) if any(len(indices) > 2 for indices in path): warnings.warn( 'memory efficient einsum is not supported yet', _util.PerformanceWarning) for idx0, idx1 in _iter_path_pairs(path): # "reduced" binary einsum arr0 = operands.pop(idx0) sub0 = input_subscripts.pop(idx0) arr1 = operands.pop(idx1) sub1 = input_subscripts.pop(idx1) sub_others = list(itertools.chain( output_subscript, itertools.chain.from_iterable(input_subscripts))) arr_out, sub_out = reduced_binary_einsum( arr0, sub0, arr1, sub1, sub_others) operands.append(arr_out) input_subscripts.append(sub_out) del arr0, arr1 # unary einsum at last arr0, = operands sub0, = input_subscripts transpose_axes = [] for label in output_subscript: if label in sub0: transpose_axes.append(sub0.index(label)) arr_out = arr0.transpose(transpose_axes).reshape([ dimension_dict[label] for label in output_subscript ]) assert returns_view or arr_out.dtype == result_dtype return arr_out
def rankdata(data, method='average', na_option='keep', axis=1, is_symmetric=False): """Rank observations for a series of samples, with tie handling NOTE: due to a bug with cudf ranking, data will be transposed if row-wise ranking is selected Parameters ---------- data : array_like The array of values to be ranked. method : {'average', 'min', 'max', 'dense', 'ordinal'}, optional The method used to assign ranks to tied elements. The following methods are available (default is 'average'): * 'average': The average of the ranks that would have been assigned to all the tied values is assigned to each value. * 'min': The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) * 'max': The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. * 'dense': Like 'min', but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements. * 'ordinal': All values are given a distinct rank, corresponding to the order that the values occur in `a`. axis : {None, int}, optional Axis along which to perform the ranking. Default is 1 -- samples in rows, observations in columns is_symmetric : {False, bool}, optional Will be used to avoid additional data transpose steps if axis = 1 Returns ------- ranks : cupy ndarray An array of size equal to the size of `a`, containing rank scores. See also scipy.stats.rankdata, for which this function is a replacement """ dtype = cupy.result_type(data.dtype, cupy.float64) data = cupy.asarray(data, dtype=dtype) if is_symmetric: assert data.ndim == 2 assert data.shape[0] == data.shape[1] if data.ndim < 2: data = data[:, None] elif (data.ndim == 2) & (axis == 1) & (not is_symmetric): data = data.T ranks = cudf.DataFrame(data).rank(axis=0, method=method, na_option=na_option) ranks = ranks.values if axis == 1: ranks = ranks.T return ranks
def histogram(x, bins=10): """Computes the histogram of a set of data. Args: x (cupy.ndarray): Input array. bins (int or cupy.ndarray): If ``bins`` is an int, it represents the number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it represents a bin edges. Returns: tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray` storing the values of the histogram, and ``bin_edges`` is a :class:`cupy.ndarray` storing the bin edges. .. seealso:: :func:`numpy.histogram` """ if x.dtype.kind == 'c': # TODO(unno): comparison between complex numbers is not implemented raise NotImplementedError('complex number is not supported') if isinstance(bins, int): if x.size == 0: min_value = 0.0 max_value = 1.0 else: min_value = float(x.min()) max_value = float(x.max()) if min_value == max_value: min_value -= 0.5 max_value += 0.5 bin_type = cupy.result_type(min_value, max_value, x) bins = cupy.linspace(min_value, max_value, bins + 1, dtype=bin_type) elif isinstance(bins, cupy.ndarray): if cupy.any(bins[:-1] > bins[1:]): raise ValueError('bins must increase monotonically.') else: raise NotImplementedError('Only int or ndarray are supported for bins') # atomicAdd only supports int32 y = cupy.zeros(bins.size - 1, dtype=cupy.int32) # TODO(unno): use searchsorted cupy.ElementwiseKernel( 'S x, raw T bins, int32 n_bins', 'raw int32 y', ''' if (x < bins[0] or bins[n_bins - 1] < x) { return; } int high = n_bins - 1; int low = 0; while (high - low > 1) { int mid = (high + low) / 2; if (bins[mid] <= x) { low = mid; } else { high = mid; } } atomicAdd(&y[low], 1); ''')(x, bins, bins.size, y) return y.astype('l'), bins
def histogram(x, bins=10, range=None, weights=None, density=False): """Computes the histogram of a set of data. Args: x (cupy.ndarray): Input array. bins (int or cupy.ndarray): If ``bins`` is an int, it represents the number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it represents a bin edges. range (2-tuple of float, optional): The lower and upper range of the bins. If not provided, range is simply ``(x.min(), x.max())``. Values outside the range are ignored. The first element of the range must be less than or equal to the second. `range` affects the automatic bin computation as well. While bin width is computed to be optimal based on the actual data within `range`, the bin count will fill the entire range including portions containing no data. density (bool, optional): If False, the default, returns the number of samples in each bin. If True, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. weights (cupy.ndarray, optional): An array of weights, of the same shape as `x`. Each value in `x` only contributes its associated weight towards the bin count (instead of 1). Returns: tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray` storing the values of the histogram, and ``bin_edges`` is a :class:`cupy.ndarray` storing the bin edges. .. warning:: This function may synchronize the device. .. seealso:: :func:`numpy.histogram` """ if x.dtype.kind == 'c': # TODO(unno): comparison between complex numbers is not implemented raise NotImplementedError('complex number is not supported') if not isinstance(x, cupy.ndarray): raise ValueError("x must be a cupy.ndarray") x, weights = _ravel_and_check_weights(x, weights) bin_edges = _get_bin_edges(x, bins, range) if weights is None: y = cupy.zeros(bin_edges.size - 1, dtype='l') for accelerator in _accelerator.get_routine_accelerators(): # CUB uses int for bin counts # TODO(leofang): support >= 2^31 elements in x? if (accelerator == _accelerator.ACCELERATOR_CUB and x.size <= 0x7fffffff and bin_edges.size <= 0x7fffffff): # Need to ensure the dtype of bin_edges as it's needed for both # the CUB call and the correction later if isinstance(bins, cupy.ndarray): bin_type = cupy.result_type(bin_edges, x) if cupy.issubdtype(bin_type, cupy.integer): bin_type = cupy.result_type(bin_type, float) bin_edges = bin_edges.astype(bin_type, copy=False) # CUB's upper bin boundary is exclusive for all bins, including # the last bin, so we must shift it to comply with NumPy if x.dtype.kind in 'ui': bin_edges[-1] += 1 elif x.dtype.kind == 'f': old_edge = bin_edges[-1].copy() bin_edges[-1] = cupy.nextafter(bin_edges[-1], bin_edges[-1] + 1) y = cub.device_histogram(x, bin_edges, y) # shift the uppermost edge back if x.dtype.kind in 'ui': bin_edges[-1] -= 1 elif x.dtype.kind == 'f': bin_edges[-1] = old_edge # TODO(asi1024): Refactor temporary fix for dtype compatibility if isinstance(bins, cupy.ndarray): bin_edges = bin_edges.astype(bins.dtype, copy=False) break else: _histogram_kernel(x, bin_edges, bin_edges.size, y) else: simple_weights = ( cupy.can_cast(weights.dtype, cupy.float64) or cupy.can_cast(weights.dtype, cupy.complex128) ) if not simple_weights: # object dtype such as Decimal are supported in NumPy, but not here raise NotImplementedError( "only weights with dtype that can be cast to float or complex " "are supported") if weights.dtype.kind == 'c': y = cupy.zeros(bin_edges.size - 1, dtype=cupy.complex128) _weighted_histogram_kernel( x, bin_edges, bin_edges.size, weights.real, y.real) _weighted_histogram_kernel( x, bin_edges, bin_edges.size, weights.imag, y.imag) else: if weights.dtype.kind in 'bui': y = cupy.zeros(bin_edges.size - 1, dtype=int) else: y = cupy.zeros(bin_edges.size - 1, dtype=cupy.float64) _weighted_histogram_kernel( x, bin_edges, bin_edges.size, weights, y) if density: db = cupy.array(cupy.diff(bin_edges), cupy.float64) return y/db/y.sum(), bin_edges return y, bin_edges
def firfilter(b, x, axis=-1, zi=None): """ Filter data along one-dimension with an FIR filter. Filter a data sequence, `x`, using a digital filter. This works for many fundamental data types (including Object type). Please note, cuSignal doesn't support IIR filters presently, and this implementation is optimized for large filtering operations (and inherently depends on fftconvolve) Parameters ---------- b : array_like The numerator coefficient vector in a 1-D sequence. x : array_like An N-dimensional input array. axis : int, optional The axis of the input data array along which to apply the linear filter. The filter is applied to each subarray along this axis. Default is -1. zi : array_like, optional Initial conditions for the filter delays. It is a vector (or array of vectors for an N-dimensional input) of length ``max(len(a), len(b)) - 1``. If `zi` is None or is not given then initial rest is assumed. See `lfiltic` for more information. Returns ------- y : array The output of the digital filter. zf : array, optional If `zi` is None, this is not returned, otherwise, `zf` holds the final filter delay values. """ b = cp.asarray(b) if b.ndim != 1: raise ValueError('object of too small depth for desired array') if x.ndim == 0: raise ValueError('x must be at least 1-D') inputs = [b, x] if zi is not None: # _linear_filter does not broadcast zi, but does do expansion of # singleton dims. zi = cp.asarray(zi) if zi.ndim != x.ndim: raise ValueError('object of too small depth for desired array') expected_shape = list(x.shape) expected_shape[axis] = b.shape[0] - 1 expected_shape = tuple(expected_shape) # check the trivial case where zi is the right shape first if zi.shape != expected_shape: strides = zi.ndim * [None] if axis < 0: axis += zi.ndim for k in range(zi.ndim): if k == axis and zi.shape[k] == expected_shape[k]: strides[k] = zi.strides[k] elif k != axis and zi.shape[k] == expected_shape[k]: strides[k] = zi.strides[k] elif k != axis and zi.shape[k] == 1: strides[k] = 0 else: raise ValueError('Unexpected shape for zi: expected ' '%s, found %s.' % (expected_shape, zi.shape)) zi = cp.lib.stride_tricks.as_strided(zi, expected_shape, strides) inputs.append(zi) dtype = cp.result_type(*inputs) if dtype.char not in 'fdgFDGO': raise NotImplementedError("input type '%s' not supported" % dtype) b = cp.array(b, dtype=dtype) x = cp.array(x, dtype=dtype, copy=False) out_full = cp.apply_along_axis(lambda y: cp.convolve(b, y), axis, x) ind = out_full.ndim * [slice(None)] if zi is not None: ind[axis] = slice(zi.shape[axis]) out_full[tuple(ind)] += zi ind[axis] = slice(out_full.shape[axis] - len(b) + 1) out = out_full[tuple(ind)] if zi is None: return out else: ind[axis] = slice(out_full.shape[axis] - len(b) + 1, None) zf = out_full[tuple(ind)] return out, zf
def sosfilt( sos, x, axis=-1, zi=None, ): """ Filter data along one dimension using cascaded second-order sections. Filter a data sequence, `x`, using a digital IIR filter defined by `sos`. Parameters ---------- sos : array_like Array of second-order filter coefficients, must have shape ``(n_sections, 6)``. Each row corresponds to a second-order section, with the first three columns providing the numerator coefficients and the last three providing the denominator coefficients. x : array_like An N-dimensional input array. axis : int, optional The axis of the input data array along which to apply the linear filter. The filter is applied to each subarray along this axis. Default is -1. zi : array_like, optional Initial conditions for the cascaded filter delays. It is a (at least 2D) vector of shape ``(n_sections, ..., 2, ...)``, where ``..., 2, ...`` denotes the shape of `x`, but with ``x.shape[axis]`` replaced by 2. If `zi` is None or is not given then initial rest (i.e. all zeros) is assumed. Note that these initial conditions are *not* the same as the initial conditions given by `lfiltic` or `lfilter_zi`. Returns ------- y : ndarray The output of the digital filter. zf : ndarray, optional If `zi` is None, this is not returned, otherwise, `zf` holds the final filter delay values. See Also -------- zpk2sos, sos2zpk, sosfilt_zi, sosfiltfilt, sosfreqz Notes ----- WARNING: This is an experimental API and is prone to change in future versions of cuSignal. The filter function is implemented as a series of second-order filters with direct-form II transposed structure. It is designed to minimize numerical precision errors for high-order filters. Limitations ----------- 1. The number of n_sections must be less than 513. 2. The number of samples must be greater than the number of sections Examples -------- sosfilt is a stable alternative to `lfilter` as using 2nd order sections reduces numerical error. We are working on building out sos filter output, so please submit GitHub feature requests as needed. You can also generate a filter on CPU with scipy.signal and then move that to GPU for actual filtering operations with `cp.asarray`. Plot a 13th-order filter's impulse response using both `sosfilt`: >>> from scipy import signal >>> import cusignal >>> import cupy as cp >>> # Generate filter on CPU with Scipy.Signal >>> sos = signal.ellip(13, 0.009, 80, 0.05, output='sos') >>> # Move data to GPU >>> sos = cp.asarray(sos) >>> x = cp.random.randn(100_000_000) >>> y = cusignal.sosfilt(sos, x) """ x = cp.asarray(x) if x.ndim == 0: raise ValueError("x must be at least 1D") sos, n_sections = _validate_sos(sos) sos = cp.asarray(sos) x_zi_shape = list(x.shape) x_zi_shape[axis] = 2 x_zi_shape = tuple([n_sections] + x_zi_shape) inputs = [sos, x] if zi is not None: inputs.append(np.asarray(zi)) dtype = cp.result_type(*inputs) if dtype.char not in "fdgFDGO": raise NotImplementedError("input type '%s' not supported" % dtype) if zi is not None: zi = cp.array(zi, dtype) # make a copy so that we can operate in place if zi.shape != x_zi_shape: raise ValueError("Invalid zi shape. With axis=%r, an input with " "shape %r, and an sos array with %d sections, zi " "must have shape %r, got %r." % (axis, x.shape, n_sections, x_zi_shape, zi.shape)) return_zi = True else: zi = cp.zeros(x_zi_shape, dtype=dtype) return_zi = False axis = axis % x.ndim # make positive x = cp.moveaxis(x, axis, -1) zi = cp.moveaxis(zi, [0, axis + 1], [-2, -1]) x_shape, zi_shape = x.shape, zi.shape x = cp.reshape(x, (-1, x.shape[-1])) x = cp.array(x, dtype, order="C") # make a copy, can modify in place zi = cp.ascontiguousarray(cp.reshape(zi, (-1, n_sections, 2))) sos = sos.astype(dtype, copy=False) max_smem = _get_max_smem() max_tpb = _get_max_tpb() # Determine how much shared memory is needed out_size = sos.shape[0] z_size = zi.shape[1] * zi.shape[2] sos_size = sos.shape[0] * sos.shape[1] shared_mem = (out_size + z_size + sos_size) * x.dtype.itemsize if shared_mem > max_smem: max_sections = (max_smem // (1 + zi.shape[2] + sos.shape[1]) // x.dtype.itemsize) raise ValueError("The number of sections ({}), requires too much " "shared memory ({}B) > ({}B). \n" "\n**Max sections possible ({})**".format( sos.shape[0], shared_mem, max_smem, max_sections)) if sos.shape[0] > max_tpb: raise ValueError("The number of sections ({}), must be less " "than max threads per block ({})".format( sos.shape[0], max_tpb)) if sos.shape[0] > x.shape[1]: raise ValueError("The number of samples ({}), must be greater " "than the number of sections ({})".format( x.shape[1], sos.shape[0])) _sosfilt(sos, x, zi) x.shape = x_shape x = cp.moveaxis(x, -1, axis) if return_zi: zi.shape = zi_shape zi = cp.moveaxis(zi, [-2, -1], [0, axis + 1]) out = (x, zi) else: out = x return out
def _try_use_cutensornet(*args, **kwargs): if cupy.cuda.runtime.is_hip: return None if (_accelerator.ACCELERATOR_CUTENSORNET not in _accelerator.get_routine_accelerators()): return None if cutensornet is None: warnings.warn( 'using the cuTensorNet backend was requested but it cannot be ' 'imported -- maybe you forgot to install cuQuantum Python? ' 'Please do "pip install cuquantum-python" or "conda install ' '-c conda-forge cuquantum-python" and retry', stacklevel=2) return None # cannot pop as we might still need kwargs later dtype = kwargs.get('dtype', None) path = kwargs.get('optimize', False) if path is True: path = 'greedy' # we do very lightweight pre-processing here just to inspect the # operands; the actual input verification is deferred to cuTensorNet # which can generate far better diagonostic messages args = _get_einsum_operands(args) operands = [cupy.asarray(op) for op in args[1]] if len(operands) == 1: # As of cuTENSOR 1.5.0 it still chokes with some common operations # like trace ("ii->") so it's easier to just skip all single-operand # cases instead of whitelisting what could be done explicitly return None if (any(op.size == 0 for op in operands) or any(len(op.shape) == 0 for op in operands)): # To cuTensorNet the shape is invalid return None # all input dtypes must be identical (to a numerical dtype) result_dtype = cupy.result_type(*operands) if dtype is None else dtype if result_dtype not in (cupy.float32, cupy.float64, cupy.complex64, cupy.complex128): return None operands = [op.astype(result_dtype, copy=False) for op in operands] # prepare cutn inputs device = cupy.cuda.runtime.getDevice() handle = cutn_handle_cache.get(device, cutensornet.create()) cutn_options = { 'device_id': device, 'handle': handle, 'memory_limit': 4**31 } # TODO(leofang): fix? # TODO(leofang): support all valid combinations: # - path from user, contract with cutn (done) # - path from cupy, contract with cutn (not yet) # - path from cutn, contract with cutn (done) # - path from cutn, contract with cupy (not yet) raise_warning = False if path is False: # following the same convention (contracting from the right) as would # be produced by _iter_path_pairs(), but converting to a list of pairs # due to cuTensorNet's requirement path = [(i - 1, i - 2) for i in range(len(operands), 1, -1)] elif len(path) and path[0] == 'einsum_path': # let cuTensorNet check if the format is correct path = path[1:] elif len(path) == 2: if isinstance(path[1], (int, float)): raise_warning = True if path[0] != 'cutensornet': raise_warning = True path = None else: # path is a string if path != 'cutensornet': raise_warning = True path = None if raise_warning: warnings.warn( 'the cuTensorNet backend ignores the "optimize" option ' 'except when an explicit contraction path is provided ' 'or when optimize=False (disable optimization); also, ' 'the maximum intermediate size, if set, is ignored', stacklevel=2) cutn_optimizer = {'path': path} if path else None if len(args) == 2: out = cutensornet.contract(args[0], *operands, options=cutn_options, optimize=cutn_optimizer) elif len(args) == 3: inputs = [i for pair in zip(operands, args[0]) for i in pair] if args[2] is not None: inputs.append(args[2]) out = cutensornet.contract(*inputs, options=cutn_options, optimize=cutn_optimizer) else: assert False return out
def convolve( in1, in2, mode="full", method="auto", ): """ Convolve two N-dimensional arrays. Convolve `in1` and `in2`, with the output size determined by the `mode` argument. Parameters ---------- in1 : array_like First input. in2 : array_like Second input. Should have the same number of dimensions as `in1`. mode : str {'full', 'valid', 'same'}, optional A string indicating the size of the output: ``full`` The output is the full discrete linear convolution of the inputs. (Default) ``valid`` The output consists only of those elements that do not rely on the zero-padding. In 'valid' mode, either `in1` or `in2` must be at least as large as the other in every dimension. ``same`` The output is the same size as `in1`, centered with respect to the 'full' output. method : str {'auto', 'direct', 'fft'}, optional A string indicating which method to use to calculate the convolution. ``direct`` The convolution is determined directly from sums, the definition of convolution. ``fft`` The Fourier Transform is used to perform the convolution by calling `fftconvolve`. ``auto`` Automatically chooses direct or Fourier method based on an estimate of which is faster (default). Returns ------- convolve : array An N-dimensional array containing a subset of the discrete linear convolution of `in1` with `in2`. See Also -------- choose_conv_method : chooses the fastest appropriate convolution method fftconvolve Notes ----- By default, `convolve` and `correlate` use ``method='auto'``, which calls `choose_conv_method` to choose the fastest method using pre-computed values (`choose_conv_method` can also measure real-world timing with a keyword argument). Because `fftconvolve` relies on floating point numbers, there are certain constraints that may force `method=direct` (more detail in `choose_conv_method` docstring). Examples -------- Smooth a square pulse using a Hann window: >>> import cusignal >>> import cupy as cp >>> sig = cp.repeat(cp.asarray([0., 1., 0.]), 100) >>> win = cusignal.hann(50) >>> filtered = cusignal.convolve(sig, win, mode='same') / cp.sum(win) >>> import matplotlib.pyplot as plt >>> fig, (ax_orig, ax_win, ax_filt) = plt.subplots(3, 1, sharex=True) >>> ax_orig.plot(cp.asnumpy(sig)) >>> ax_orig.set_title('Original pulse') >>> ax_orig.margins(0, 0.1) >>> ax_win.plot(cp.asnumpy(win)) >>> ax_win.set_title('Filter impulse response') >>> ax_win.margins(0, 0.1) >>> ax_filt.plot(cp.asnumpy(filtered)) >>> ax_filt.set_title('Filtered signal') >>> ax_filt.margins(0, 0.1) >>> fig.tight_layout() >>> fig.show() """ volume = cp.asarray(in1) kernel = cp.asarray(in2) if volume.ndim == kernel.ndim == 0: return volume * kernel elif volume.ndim != kernel.ndim: raise ValueError("in1 and in2 should have the same dimensionality") if _inputs_swap_needed(mode, volume.shape, kernel.shape): # Convolution is commutative # order doesn't have any effect on output volume, kernel = kernel, volume if method == "auto": method = choose_conv_method(volume, kernel, mode=mode) if method == "fft": out = fftconvolve(volume, kernel, mode=mode) result_type = cp.result_type(volume, kernel) if result_type.kind in {"u", "i"}: out = cp.around(out) return out.astype(result_type) elif method == "direct": if volume.ndim > 1: raise ValueError("Direct method is only implemented for 1D") swapped_inputs = (mode != "valid") and (kernel.size > volume.size) if swapped_inputs: volume, kernel = kernel, volume return _convolution_cuda._convolve(volume, kernel, True, swapped_inputs, mode) else: raise ValueError("Acceptable method flags are 'auto'," " 'direct', or 'fft'.")
def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0): """Returns an array with evenly-spaced values within a given interval. Instead of specifying the step width like :func:`cupy.arange`, this function requires the total number of elements specified. Args: start (scalar or array_like): Starting value(s) of the sequence. stop (scalar or array_like): Ending value(s) of the sequence, unless ``endpoint`` is set to ``False``. In that case, the sequence consists of all but the last of ``num + 1`` evenly spaced samples, so that ``stop`` is excluded. Note that the step size changes when ``endpoint`` is ``False``. num: Number of elements. endpoint (bool): If ``True``, the stop value is included as the last element. Otherwise, the stop value is omitted. retstep (bool): If ``True``, this function returns (array, step). Otherwise, it returns only the array. dtype: Data type specifier. It is inferred from the start and stop arguments by default. axis (int): The axis in the result to store the samples. Relevant only if start or stop are array-like. By default ``0``, the samples will be along a new axis inserted at the beginning. Use ``-1`` to get an axis at the end. Returns: cupy.ndarray: The 1-D array of ranged values. .. seealso:: :func:`numpy.linspace` """ if num < 0: raise ValueError('linspace with num<0 is not supported') div = (num - 1) if endpoint else num scalar_start = cupy.isscalar(start) scalar_stop = cupy.isscalar(stop) if scalar_start and scalar_stop: return _linspace_scalar(start, stop, num, endpoint, retstep, dtype) if not scalar_start: if not (isinstance(start, cupy.ndarray) and start.dtype.kind == 'f'): start = cupy.asarray(start) * 1.0 if not scalar_stop: if not (isinstance(stop, cupy.ndarray) and stop.dtype.kind == 'f'): stop = cupy.asarray(stop) * 1.0 dt = cupy.result_type(start, stop, float(num)) if dtype is None: # In actual implementation, only float is used dtype = dt delta = stop - start # ret = cupy.arange(0, num, dtype=dt).reshape((-1,) + (1,) * delta.ndim) ret = cupy.empty((num, ), dtype=dt) _arange_ufunc(0.0, 1.0, ret, dtype=dt) ret = ret.reshape((-1, ) + (1, ) * delta.ndim) # In-place multiplication y *= delta/div is faster, but prevents the # multiplicant from overriding what class is produced, and thus prevents, # e.g. use of Quantities, see numpy#7142. Hence, we multiply in place only # for standard scalar types. if num > 1: step = delta / div if cupy.any(step == 0): # Special handling for denormal numbers, numpy#5437 ret /= div ret = ret * delta else: ret = ret * step else: # 0 and 1 item long sequences have an undefined step step = float('nan') # Multiply with delta to allow possible override of output class. ret = ret * delta ret += start if endpoint and num > 1: ret[-1] = stop if axis != 0: ret = cupy.moveaxis(ret, 0, axis) if cupy.issubdtype(dtype, cupy.integer): cupy.floor(ret, out=ret) ret = ret.astype(dtype, copy=False) if retstep: return ret, step else: return ret
def _fft_convolve(a1, a2, mode): offset = 0 if a1.size < a2.size: a1, a2 = a2, a1 offset = 1 - a2.size % 2 # if either of them is complex, the dtype after multiplication will also be if a1.dtype.kind == 'c' or a2.dtype.kind == 'c': fft, ifft = cupy.fft.fft, cupy.fft.ifft is_c2c = True else: fft, ifft = cupy.fft.rfft, cupy.fft.irfft is_c2c = False # hack to work around NumPy/CuPy FFT dtype incompatibility: # CuPy internally converts fp16 to fp32 before doing FFT (whereas Numpy # converts both fp16 and fp32 to fp64), so here we do the cast early and # explicitly, and make sure a correct cuFFT plan can be generated. After # the fft-ifft round trip, we cast the output dtype to the correct one. out_dtype = cupy.result_type(a1, a2) dtype = _output_dtype(out_dtype, 'C2C' if is_c2c else 'R2C') a1 = a1.astype(dtype, copy=False) a2 = a2.astype(dtype, copy=False) n1, n2 = a1.size, a2.size out_size = cupyx.scipy.fft.next_fast_len(n1 + n2 - 1) # skip calling get_fft_plan() as we know the args exactly if is_c2c: fft_t = cufft.CUFFT_C2C if dtype == cupy.complex64 else cufft.CUFFT_Z2Z fft_plan = cufft.Plan1d(out_size, fft_t, 1) ifft_plan = fft_plan else: fft_t = cufft.CUFFT_R2C if dtype == cupy.float32 else cufft.CUFFT_D2Z fft_plan = cufft.Plan1d(out_size, fft_t, 1) # this is a no-op context manager # TODO(leofang): use contextlib.nullcontext() for PY37+? ifft_plan = contextlib.suppress() with fft_plan: fa1 = fft(a1, out_size) fa2 = fft(a2, out_size) with ifft_plan: out = ifft(fa1 * fa2, out_size) if mode == 'full': start, end = 0, n1 + n2 - 1 elif mode == 'same': start = (n2 - 1) // 2 + offset end = start + n1 elif mode == 'valid': start, end = n2 - 1, n1 else: raise ValueError( 'acceptable mode flags are `valid`, `same`, or `full`.') out = out[start:end] if out.dtype.kind in 'iu': out = cupy.around(out) return out.astype(out_dtype, copy=False)
def _get_bin_edges(a, bins, range): """ Computes the bins used internally by `histogram`. Args: a (ndarray): Ravelled data array bins (int or ndarray): Forwarded argument from `histogram`. range (None or tuple): Forwarded argument from `histogram`. Returns: bin_edges (ndarray): Array of bin edges uniform_bins (Number, Number, int): The upper bound, lowerbound, and number of bins, used in the implementation of `histogram` that works on uniform bins. """ # parse the overloaded bins argument n_equal_bins = None bin_edges = None # if isinstance(bins, cupy.ndarray) and bins.ndim == 0: # # allow uint8 array, etc # if bins.dtype not in 'bui': # raise TypeError( # "`bins` must be an integer, a string, or an array") # bins = int(bins) # synchronize if isinstance(bins, int): # will not allow 0-dimensional cupy array # if cupy.ndim(bins) == 0: try: n_equal_bins = operator.index(bins) except TypeError: raise TypeError("`bins` must be an integer, a string, or an array") if n_equal_bins < 1: raise ValueError("`bins` must be positive, when an integer") first_edge, last_edge = _get_outer_edges(a, range) elif isinstance(bins, cupy.ndarray): if bins.ndim == 1: # cupy.ndim(bins) == 0: bin_edges = cupy.asarray(bins) if (bin_edges[:-1] > bin_edges[1:]).any(): # synchronize! raise ValueError( "`bins` must increase monotonically, when an array") elif isinstance(bins, str): raise NotImplementedError( "only integer and array bins are implemented") if n_equal_bins is not None: # numpy's gh-10322 means that type resolution rules are dependent on # array shapes. To avoid this causing problems, we pick a type now and # stick with it throughout. bin_type = cupy.result_type(first_edge, last_edge, a) if cupy.issubdtype(bin_type, cupy.integer): bin_type = cupy.result_type(bin_type, float) # bin edges must be computed bin_edges = cupy.linspace( first_edge, last_edge, n_equal_bins + 1, endpoint=True, dtype=bin_type, ) return bin_edges, (first_edge, last_edge, n_equal_bins) else: return bin_edges, None
def select(condlist, choicelist, default=0): """Return an array drawn from elements in choicelist, depending on conditions. Args: condlist (list of bool arrays): The list of conditions which determine from which array in `choicelist` the output elements are taken. When multiple conditions are satisfied, the first one encountered in `condlist` is used. choicelist (list of cupy.ndarray): The list of arrays from which the output elements are taken. It has to be of the same length as `condlist`. default (scalar) : If provided, will fill element inserted in `output` when all conditions evaluate to False. default value is 0. Returns: cupy.ndarray: The output at position m is the m-th element of the array in `choicelist` where the m-th element of the corresponding array in `condlist` is True. .. seealso:: :func:`numpy.select` """ if len(condlist) != len(choicelist): raise ValueError( 'list of cases must be same length as list of conditions') if len(condlist) == 0: raise ValueError("select with an empty condition list is not possible") if not cupy.isscalar(default): raise TypeError("default only accepts scalar values") for i in range(len(choicelist)): if not isinstance(choicelist[i], cupy.ndarray): raise TypeError("choicelist only accepts lists of cupy ndarrays") cond = condlist[i] if cond.dtype.type is not cupy.bool_: raise ValueError( 'invalid entry {} in condlist: should be boolean ndarray'. format(i)) dtype = cupy.result_type(*choicelist) condlist = cupy.broadcast_arrays(*condlist) choicelist = cupy.broadcast_arrays(*choicelist, default) if choicelist[0].ndim == 0: result_shape = condlist[0].shape else: result_shape = cupy.broadcast_arrays(condlist[0], choicelist[0])[0].shape result = cupy.empty(result_shape, dtype) cupy.copyto(result, default) choicelist = choicelist[-2::-1] condlist = condlist[::-1] for choice, cond in zip(choicelist, condlist): cupy.copyto(result, choice, where=cond) return result