def d_TSV(mat): dif_c = 2*cp.diff(mat,axis=1) dif_1 = cp.pad(dif_c, [(0,0),(1,0)], mode = 'constant') dif_2 = cp.pad(-dif_c, [(0,0),(0,1)], mode = 'constant') dif_c= 2*cp.diff(mat,axis=0) dif_3 = cp.pad(dif_c, [(1,0),(0,0)], mode = 'constant') dif_4 = cp.pad(-dif_c, [(0,1),(0,0)], mode = 'constant') return dif_1 + dif_2 + dif_3 + dif_4
def TSV(mat): # TSV terms from left to right dif_c = cp.diff(mat,axis=1) sum_tsv1 = cp.sum(dif_c*dif_c) # TSV terms from bottom to top dif_c= cp.diff(mat,axis=0) sum_tsv2 = cp.sum(dif_c*dif_c) #Return all TSV terms return sum_tsv1+sum_tsv2
def count_nonzero(X, axis=None, sample_weight=None): """A variant of X.getnnz() with extension to weighting on axis 0 Useful in efficiently calculating multilabel metrics. Parameters ---------- X : CSR sparse matrix of shape (n_samples, n_labels) Input data. axis : None, 0 or 1 The axis on which the data is aggregated. sample_weight : array-like of shape (n_samples,), default=None Weight for each row of X. """ if axis == -1: axis = 1 elif axis == -2: axis = 0 elif X.format != 'csr': raise TypeError('Expected CSR sparse format, got {0}'.format(X.format)) # We rely here on the fact that np.diff(Y.indptr) for a CSR # will return the number of nonzero entries in each row. # A bincount over Y.indices will return the number of nonzeros # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14. if axis is None: if sample_weight is None: return X.nnz else: return np.dot(np.diff(X.indptr), sample_weight) elif axis == 1: out = np.diff(X.indptr) if sample_weight is None: # astype here is for consistency with axis=0 dtype return out.astype('intp') return out * sample_weight elif axis == 0: if sample_weight is None: return np.bincount(X.indices, minlength=X.shape[1]) else: weights = np.repeat(sample_weight, np.diff(X.indptr)) return np.bincount(X.indices, minlength=X.shape[1], weights=weights) else: raise ValueError('Unsupported axis: {0}'.format(axis))
def average_precision_score(y_true, y_score): """ Compute average precision score using precision and recall computed from cuml. """ precision, recall, _ = precision_recall_curve(y_true, y_score) # return step function integral return -cp.sum(cp.diff(recall) * cp.array(precision)[:-1])
def _sparse_fit(self, X, strategy, missing_values, fill_value): """Fit the transformer on sparse data.""" mask_data = _get_mask(X.data, missing_values) n_implicit_zeros = X.shape[0] - np.diff(X.indptr) statistics = np.empty(X.shape[1]) if strategy == "constant": # for constant strategy, self.statistcs_ is used to store # fill_value in each column statistics.fill(fill_value) else: for i in range(X.shape[1]): column = X.data[X.indptr[i]:X.indptr[i + 1]] mask_column = mask_data[X.indptr[i]:X.indptr[i + 1]] column = column[~mask_column] # combine explicit and implicit zeros mask_zeros = _get_mask(column, 0) column = column[~mask_zeros] n_explicit_zeros = mask_zeros.sum() n_zeros = n_implicit_zeros[i] + n_explicit_zeros if strategy == "mean": s = column.size + n_zeros statistics[i] = np.nan if s == 0 else column.sum() / s elif strategy == "median": statistics[i] = _get_median(column, n_zeros) elif strategy == "most_frequent": statistics[i] = _most_frequent(column, 0, n_zeros) return statistics
def transform(self, X) -> SparseCumlArray: """Impute all missing values in X. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data to complete. """ check_is_fitted(self) X = self._validate_input(X, in_fit=False) X_indicator = super()._transform_indicator(X) statistics = self.statistics_ if X.shape[1] != statistics.shape[0]: raise ValueError("X has %d features per sample, expected %d" % (X.shape[1], self.statistics_.shape[0])) # Delete the invalid columns if strategy is not constant if self.strategy == "constant": valid_statistics = statistics else: # same as np.isnan but also works for object dtypes invalid_mask = _get_mask(statistics, np.nan) valid_mask = np.logical_not(invalid_mask) valid_statistics = statistics[valid_mask] valid_statistics_indexes = np.flatnonzero(valid_mask) if invalid_mask.any(): missing = np.arange(X.shape[1])[invalid_mask] if self.verbose: warnings.warn("Deleting features without " "observed values: %s" % missing) X = X[:, valid_statistics_indexes] # Do actual imputation if sparse.issparse(X): if self.missing_values == 0: raise ValueError("Imputation not possible when missing_values " "== 0 and input is sparse. Provide a dense " "array instead.") else: mask = _get_mask(X.data, self.missing_values) indexes = np.repeat(np.arange(len(X.indptr) - 1, dtype=np.int), np.diff(X.indptr).tolist())[mask] X.data[mask] = valid_statistics[indexes].astype(X.dtype, copy=False) else: mask = _get_mask(X, self.missing_values) if self.strategy == "constant": X[mask] = valid_statistics[0] else: for i, vi in enumerate(valid_statistics_indexes): feature_idxs = np.flatnonzero(mask[:, vi]) X[feature_idxs, vi] = valid_statistics[i] X = super()._concatenate_indicator(X, X_indicator) return X
def _major_slice(self, idx, copy=False): """Index along the major axis where idx is a slice object. """ if idx == slice(None): return self.copy() if copy else self M, N = self._swap(*self.shape) start, stop, step = idx.indices(M) M = len(range(start, stop, step)) new_shape = self._swap(M, N) if M == 0: return self.__class__(new_shape) row_nnz = cupy.diff(self.indptr) idx_dtype = self.indices.dtype res_indptr = cupy.zeros(M + 1, dtype=idx_dtype) cupy.cumsum(row_nnz[idx], out=res_indptr[1:]) if step == 1: idx_start = self.indptr[start] idx_stop = self.indptr[stop] res_indices = cupy.array(self.indices[idx_start:idx_stop], copy=copy) res_data = cupy.array(self.data[idx_start:idx_stop], copy=copy) else: res_indices, res_data = _index._csr_row_slice( start, step, self.indptr, self.indices, self.data, res_indptr) return self.__class__((res_data, res_indices, res_indptr), shape=new_shape, copy=False)
def _csr_row_index(rows, Ap, Aj, Ax): """Populate indices and data arrays from the given row index Args: rows (cupy.ndarray): index array of rows to populate Ap (cupy.ndarray): indptr array from input sparse matrix Aj (cupy.ndarray): indices array from input sparse matrix Ax (cupy.ndarray): data array from input sparse matrix Returns: Bp (cupy.ndarray): indptr array for output sparse matrix Bj (cupy.ndarray): indices array of output sparse matrix Bx (cupy.ndarray): data array of output sparse matrix """ row_nnz = cupy.diff(Ap) Bp = cupy.empty(rows.size + 1, dtype=Ap.dtype) Bp[0] = 0 cupy.cumsum(row_nnz[rows], out=Bp[1:]) nnz = int(Bp[-1]) out_rows = cupy.empty(nnz, dtype=numpy.int32) # Build a COO row array from output CSR indptr. # Calling backend cusparse API directly to avoid # constructing a whole COO object. handle = device.get_cusparse_handle() cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO) Bj, Bx = _csr_row_index_ker(out_rows, rows, Ap, Aj, Ax, Bp) return Bp, Bj, Bx
def _basic_simps(y, start, stop, x, dx, axis): import cupy nd = len(y.shape) if start is None: start = 0 step = 2 slice_all = (slice(None), ) * nd slice0 = tupleset(slice_all, axis, slice(start, stop, step)) slice1 = tupleset(slice_all, axis, slice(start + 1, stop + 1, step)) slice2 = tupleset(slice_all, axis, slice(start + 2, stop + 2, step)) if x is None: # Even spaced Simpson's rule. result = cupy.sum(dx / 3.0 * (y[slice0] + 4 * y[slice1] + y[slice2]), axis=axis) else: # Account for possibly different spacings. # Simpson's rule changes a bit. h = cupy.diff(x, axis=axis) sl0 = tupleset(slice_all, axis, slice(start, stop, step)) sl1 = tupleset(slice_all, axis, slice(start + 1, stop + 1, step)) h0 = h[sl0] h1 = h[sl1] hsum = h0 + h1 hprod = h0 * h1 h0divh1 = h0 / h1 tmp = hsum / 6.0 * (y[slice0] * (2 - 1.0 / h0divh1) + y[slice1] * hsum * hsum / hprod + y[slice2] * (2 - h0divh1)) result = cupy.sum(tmp, axis=axis) return result
def _min_or_max_axis(X, axis, min_or_max): N = X.shape[axis] if N == 0: raise ValueError("zero-size array to reduction operation") M = X.shape[1 - axis] mat = X.tocsc() if axis == 0 else X.tocsr() mat.sum_duplicates() major_index, value = _minor_reduce(mat, min_or_max) not_full = np.diff(mat.indptr)[major_index] < N if min_or_max == 'min': min_or_max = np.fmin else: min_or_max = np.fmax value[not_full] = min_or_max(value[not_full], 0) mask = value != 0 major_index = np.compress(mask, major_index) value = np.compress(mask, value) if axis == 0: res = gpu_sp.coo_matrix((value, (np.zeros(len(value)), major_index)), dtype=X.dtype, shape=(1, M)) else: res = gpu_sp.coo_matrix((value, (major_index, np.zeros(len(value)))), dtype=X.dtype, shape=(M, 1)) return res.A.ravel()
def wint(n, t): N = len(t) s = cp.linspace(1e-40, 1, n) # Inverse vandermonde matrix tmp1 = cp.arange(n) tmp2 = cp.arange(1, n + 2) iv = cp.linalg.inv(cp.exp(cp.outer(tmp1, cp.log(s)))) u = cp.diff( cp.exp(cp.outer(tmp2, cp.log(s))) * cp.tile(1.0 / tmp2[..., cp.newaxis], [1, n])) # integration over short intervals W1 = cp.matmul(iv, u[1:n + 1, :]) # x*pn(x) term W2 = cp.matmul(iv, u[0:n, :]) # const*pn(x) term # Compensate for overlapping short intervals tmp1 = cp.arange(1, n) tmp2 = (n - 1) * cp.ones((N - 2 * (n - 1) - 1)) tmp3 = cp.arange(n - 1, 0, -1) p = 1 / cp.concatenate((tmp1, tmp2, tmp3)) w = cp.zeros(N) for j in range(N - n + 1): # Change coordinates, and constant and linear parts W = ((t[j + n - 1] - t[j])**2) * W1 + (t[j + n - 1] - t[j]) * t[j] * W2 for k in range(n - 1): w[j:j + n] = w[j:j + n] + p[j + k] * W[:, k] wn = w wn[-40:] = (w[-40]) / (N - 40) * cp.arange(N - 40, N) return wn
def _perform_insert(self, indices_inserts, data_inserts, rows, row_counts, idx_dtype): """Insert new elements into current sparse matrix in sorted order""" indptr_diff = cupy.diff(self.indptr) indptr_diff[rows] += row_counts new_indptr = cupy.empty(self.indptr.shape, dtype=idx_dtype) new_indptr[0] = idx_dtype(0) new_indptr[1:] = indptr_diff # Build output arrays cupy.cumsum(new_indptr, out=new_indptr) out_nnz = int(new_indptr[-1]) new_indices = cupy.empty(out_nnz, dtype=idx_dtype) new_data = cupy.empty(out_nnz, dtype=self.data.dtype) # Build an indexed indptr that contains the offsets for each # row but only for in i, j, and x. new_indptr_lookup = cupy.zeros(new_indptr.size, dtype=idx_dtype) new_indptr_lookup[1:][rows] = row_counts cupy.cumsum(new_indptr_lookup, out=new_indptr_lookup) _index._insert_many_populate_arrays( indices_inserts, data_inserts, new_indptr_lookup, self.indptr, self.indices, self.data, new_indptr, new_indices, new_data, size=self.indptr.size-1) self.indptr = new_indptr self.indices = new_indices self.data = new_data
def run_bootstrap(v, number_samples=2, block_size=60, number_of_threads=256): """ @v, stock price matrix. [time, stocks] @number_samples, number of samples @block_size, sample block size """ length, assets = v.shape # get the time length and the number of assets, init_prices = v[0, :].reshape(1, -1, 1) # initial prices for all assets v = cupy.log(v) # compute the price difference, dimension of [length -1, assets] ref = cupy.diff(v, axis=0) # output results output = cupy.zeros((number_samples, assets, length)) # sample starting position, exclusive sample_range = length - block_size # number of positions to sample to cover the whole seq length num_positions = (length - 2) // block_size + 1 sample_positions = cupy.random.randint( 0, sample_range, num_positions * number_samples) # compute random starting posistion number_of_blocks = len(sample_positions) boot_strap[(number_of_blocks, ), (number_of_threads, )](output, ref.T, block_size, num_positions, sample_positions) # reshape the results [number_samples, number assets, time] # output = output.reshape(number_samples, assets, length) # convert it into prices return (cupy.exp(output.cumsum(axis=2)) * init_prices)
def cumulative_trapezoid(y, x=None, dx=1.0, axis=-1, initial=None): if _GPU_ENABLED: y = cp.asarray(y) if x is None: d = dx else: x = cp.asarray(x) if x.ndim == 1: d = cp.diff(x) shape = [1] * y.ndim shape[axis] = -1 d = d.reshape(shape) elif len(x.shape) != len(y.shape): raise ValueError("If given, shape of x must be 1-D or the " "same as y.") else: d = cp.diff(x, axis=axis) if d.shape[axis] != y.shape[axis] - 1: raise ValueError("If given, length of x along axis must be the " "same as y.") def tupleset(t, i, value): l = list(t) l[i] = value return tuple(l) nd = len(y.shape) slice1 = tupleset((slice(None), ) * nd, axis, slice(1, None)) slice2 = tupleset((slice(None), ) * nd, axis, slice(None, -1)) res = cp.cumsum(d * (y[slice1] + y[slice2]) / 2.0, axis=axis) if initial is not None: if not np.isscalar(initial): raise ValueError("`initial` parameter should be a scalar.") shape = list(res.shape) shape[axis] = 1 res = cp.concatenate( [cp.full(shape, initial, dtype=res.dtype), res], axis=axis) return res else: try: from scipy.integrate import cumulative_trapezoid as ctz except ImportError: from scipy.integrate import cumtrapz as ctz return ctz(y=y, x=x, dx=dx, axis=axis, initial=initial)
def _filter_cells(sparse_gpu_array, min_genes, max_genes, barcodes=None): degrees = cp.diff(sparse_gpu_array.indptr) query = ((min_genes <= degrees) & (degrees <= max_genes)) query = query.get() if barcodes is None: return sparse_gpu_array.get()[query] else: return sparse_gpu_array.get()[query], barcodes[query]
def euler(func, x0, t, args=None): solution = cp.empty(shape=(len(t), len(x0), len(x0[0]))) solution[0] = x0 x = x0 for i, dt in enumerate(cp.diff(t)): x = cp.add(x, cp.multiply(dt, func(x, t[i], *args))) solution[i + 1] = x return solution
def csr_polynomial_expansion(X, interaction_only, degree): """Apply polynomial expansion on CSR matrix Parameters ---------- X : sparse CSR matrix Input array Returns ------- New expansed matrix """ assert degree in (2, 3) interaction_only = 1 if interaction_only else 0 d = X.shape[1] if degree == 2: expanded_dimensionality = int((d**2 + d) / 2 - interaction_only * d) else: expanded_dimensionality = int((d**3 + 3 * d**2 + 2 * d) / 6 - interaction_only * d**2) if expanded_dimensionality == 0: return None assert expanded_dimensionality > 0 nnz = cp.diff(X.indptr) if degree == 2: total_nnz = (nnz**2 + nnz) / 2 - interaction_only * nnz else: total_nnz = ((nnz**3 + 3 * nnz**2 + 2 * nnz) / 6 - interaction_only * nnz**2) del nnz nnz_cumsum = total_nnz.cumsum(dtype=cp.int64) total_nnz_max = int(total_nnz.max()) total_nnz = int(total_nnz.sum()) num_rows = X.indptr.shape[0] - 1 expanded_data = cp.empty(shape=total_nnz, dtype=X.data.dtype) expanded_indices = cp.empty(shape=total_nnz, dtype=X.indices.dtype) expanded_indptr = cp.empty(shape=num_rows + 1, dtype=X.indptr.dtype) expanded_indptr[0] = X.indptr[0] expanded_indptr[1:] = nnz_cumsum tpb = (32, 32) bpg_x = ceil(X.indptr.shape[0] / tpb[0]) bpg_y = ceil(total_nnz_max / tpb[1]) bpg = (bpg_x, bpg_y) perform_expansion[bpg, tpb](X.indptr, X.indices, X.data, expanded_data, expanded_indices, d, interaction_only, degree, expanded_indptr) return cp.sparse.csr_matrix( (expanded_data, expanded_indices, expanded_indptr), shape=(num_rows, expanded_dimensionality))
def _minor_reduce(X, min_or_max): fminmax = ufunc_dic[min_or_max] major_index = np.flatnonzero(np.diff(X.indptr)) values = cpu_np.zeros(major_index.shape[0], dtype=X.dtype) ptrs = X.indptr[major_index] start = ptrs[0] for i, end in enumerate(ptrs[1:]): values[i] = fminmax(X.data[start:end]) start = end values[-1] = fminmax(X.data[end:]) return major_index, np.array(values)
def _compute_weights_3d(data, spacing, beta, eps, multichannel): # Weight calculation is main difference in multispectral version # Original gradient**2 replaced with sum of gradients ** 2 gradients = cp.concatenate( [cp.diff(data[..., 0], axis=ax).ravel() / spacing[ax] for ax in [2, 1, 0] if data.shape[ax] > 1], axis=0) gradients *= gradients for channel in range(1, data.shape[-1]): grad = cp.concatenate( [cp.diff(data[..., channel], axis=ax).ravel() / spacing[ax] for ax in [2, 1, 0] if data.shape[ax] > 1], axis=0) grad *= grad gradients += grad # All channels considered together in this standard deviation scale_factor = -beta / (10 * data.std()) if multichannel: # New final term in beta to give == results in trivial case where # multiple identical spectra are passed. scale_factor /= math.sqrt(data.shape[-1]) weights = cp.exp(scale_factor * gradients) weights += eps return -weights
def inplace_csr_row_scale(X, scale): """ Inplace row scaling of a CSR matrix. Scale each sample of the data matrix by multiplying with specific scale provided by the caller assuming a (n_samples, n_features) shape. Parameters ---------- X : CSR sparse matrix, shape (n_samples, n_features) Matrix to be scaled. scale : float array with shape (n_samples,) Array of precomputed sample-wise values to use for scaling. """ assert scale.shape[0] == X.shape[0] X.data *= np.repeat(scale, np.diff(X.indptr).tolist())
def average_precision_score(y_true, y_score): """ Compute average precision (AP) from prediction scores. .. note:: this implementation can only be used with binary classification. Parameters ---------- y_true : array-like of shape (n_samples,) True labels. The binary cases expect labels with shape (n_samples,) y_score : array-like of shape (n_samples,) Target scores. In the binary cases, these can be either probability estimates or non-thresholded decision values (as returned by `decision_function` on some classifiers). The binary case expects a shape (n_samples,), and the scores must be the scores of the class with the greater label. Returns ------- average_precision : float Examples -------- >>> import numpy as np >>> from cuml.metrics import average_precision_score >>> y_true = np.array([0, 0, 1, 1]) >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) >>> print(average_precision_score(y_true, y_scores)) 0.83 """ # y_true, n_rows, n_cols, ytype = \ # input_to_cupy_array(y_true, check_dtype=[np.int32, np.int64, # np.float32, np.float64]) # y_score, _, _, _ = \ # input_to_cupy_array(y_score, check_dtype=[np.int32, np.int64, # np.float32, np.float64], # check_rows=n_rows, check_cols=n_cols) if cp.unique(y_true).shape[0] == 1: raise ValueError("average_precision_score cannot be used when " "only one class present in y_true. Average precision " "score is not defined in that case.") precision, recall, thresholds = precision_recall_curve(y_true, y_score) return -cp.sum(cp.diff(recall) * cp.array(precision)[:-1])
def __init__( self, points, values, method="linear", bounds_error=True, fill_value=cp.nan, ): if method not in ["linear", "nearest"]: raise ValueError("Method '%s' is not defined" % method) self.method = method self.bounds_error = bounds_error # allow reasonable duck-typed values values = cp.asarray(values) if len(points) > values.ndim: raise ValueError("There are %d point arrays, but values has %d " "dimensions" % (len(points), values.ndim)) if hasattr(values, "dtype") and hasattr(values, "astype"): if not cp.issubdtype(values.dtype, cp.inexact): values = values.astype(float) self.fill_value = fill_value if fill_value is not None: fill_value_dtype = cp.asarray(fill_value).dtype if hasattr(values, "dtype") and not cp.can_cast( fill_value_dtype, values.dtype, casting="same_kind"): raise ValueError("fill_value must be either 'None' or " "of a type compatible with values") for i, p in enumerate(points): if not cp.all(cp.diff(p) > 0.0): raise ValueError("The points in dimension %d must be strictly " "ascending" % i) if not cp.asarray(p).ndim == 1: raise ValueError("The points in dimension %d must be " "1-dimensional" % i) if not values.shape[i] == len(p): raise ValueError("There are %d points and %d values in " "dimension %d" % (len(p), values.shape[i], i)) self.grid = tuple([cp.asarray(p) for p in points]) self.values = values
def _major_index_fancy(self, idx): """Index along the major axis where idx is an array of ints. """ _, N = self._swap(*self.shape) M = len(idx) new_shape = self._swap(M, N) if M == 0: return self.__class__(new_shape) row_nnz = cupy.diff(self.indptr) idx_dtype = self.indices.dtype res_indptr = cupy.zeros(M + 1, dtype=idx_dtype) cupy.cumsum(row_nnz[idx], out=res_indptr[1:]) res_indices, res_data = _index._csr_row_index(idx, self.indptr, self.indices, self.data, res_indptr) return self.__class__((res_data, res_indices, res_indptr), shape=new_shape, copy=False)
def _csr_row_index(Ax, Aj, Ap, rows): """Populate indices and data arrays from the given row index Args: Ax (cupy.ndarray): data array from input sparse matrix Aj (cupy.ndarray): indices array from input sparse matrix Ap (cupy.ndarray): indptr array from input sparse matrix rows (cupy.ndarray): index array of rows to populate Returns: Bx (cupy.ndarray): data array of output sparse matrix Bj (cupy.ndarray): indices array of output sparse matrix Bp (cupy.ndarray): indptr array for output sparse matrix """ row_nnz = cupy.diff(Ap) Bp = cupy.empty(rows.size + 1, dtype=Ap.dtype) Bp[0] = 0 cupy.cumsum(row_nnz[rows], out=Bp[1:]) nnz = int(Bp[-1]) out_rows = _csr_indptr_to_coo_rows(nnz, Bp) Bj, Bx = _csr_row_index_ker(out_rows, rows, Ap, Aj, Ax, Bp) return Bx, Bj, Bp
def inplace_csr_row_normalize_l1(X): """Normalize CSR matrix inplace with L1 norm Parameters ---------- X : sparse CSR matrix Input array Returns ------- Normalized matrix """ n_rows = X.indptr.shape[0] max_nnz = cp.diff(X.indptr).max() tpb = (32, 32) bpg_x = ceil(n_rows / tpb[0]) bpg_y = ceil(max_nnz / tpb[1]) bpg = (bpg_x, bpg_y) norm = cp.zeros(n_rows - 1, dtype=X.dtype) l1_step1_k[bpg, tpb](X.indptr, X.data, norm) norm_step2_k[bpg, tpb](X.indptr, X.data, norm)
def _minor_reduce(X, min_or_max): if min_or_max == 'min': min_or_max = np.min else: min_or_max = np.max major_index = np.flatnonzero(np.diff(X.indptr)) # reduceat tries casts X.indptr to intp, which errors # if it is int64 on a 32 bit system. # Reinitializing prevents this where possible, see #13737 X = type(X)((X.data, X.indices, X.indptr), shape=X.shape) value = cpu_np.zeros(len(X.indptr) - 1, dtype=X.dtype) start = X.indptr[0] for i, end in enumerate(X.indptr[1:]): value[i] = min_or_max(X.data[start:end]) start = end value = np.array(value) return major_index, value
def diag_indices_from(arr): """ Return the indices to access the main diagonal of an n-dimensional array. See `diag_indices` for full details. Args: arr (cupy.ndarray): At least 2-D. .. seealso:: :func:`numpy.diag_indices_from` """ if not isinstance(arr, cupy.ndarray): raise TypeError("Argument must be cupy.ndarray") if not arr.ndim >= 2: raise ValueError("input array must be at least 2-d") # For more than d=2, the strided formula is only valid for arrays with # all dimensions equal, so we check first. if not cupy.all(cupy.diff(arr.shape) == 0): raise ValueError("All dimensions of input must be of equal length") return diag_indices(arr.shape[0], arr.ndim)
def cumsum(x, Kahan=0): """ Wrapper for exclusive prefix sum computation with an optional refinement step using a approach similar to Kahan summation. This function is not exposed to the user. Arguments: ------- x: cupy.core.core.ndarray the input array of length n to be scanned with operation + Kahan: int non-negative number of Kahan summation adjustment rounds Returns ------- cupy.core.core.ndarray the computed exclusive prefix scan of length n+1 """ assert(isinstance(Kahan, int) and Kahan >= 0) # allocate an empty array with leading 0 y = cp.empty(len(x)+1, dtype=x.dtype) y[0] = 0 # compute the inclusive prefix sum starting at entry 1 cp.cumsum(x, out=y[1:]) # basically exploit that (d/dt int f(t) dt) - f(t) = r = 0 forall f(t) # in case delta is non-vanishing due to numeric inaccuracies, we add # the prefix scan of r to the final result (inaccuracies might add up) if Kahan: r = x-cp.diff(y) if(cp.max(cp.abs(r))): y += cumsum(r, Kahan-1) return y
def trapz(y, x=None, dx=1.0, axis=-1): """ Lifted from `numpy <https://github.com/numpy/numpy/blob/v1.15.1/numpy/lib/function_base.py#L3804-L3891>`_. Integrate along the given axis using the composite trapezoidal rule. Integrate `y` (`x`) along given axis. Parameters ========== y : array_like Input array to integrate. x : array_like, optional The sample points corresponding to the `y` values. If `x` is None, the sample points are assumed to be evenly spaced `dx` apart. The default is None. dx : scalar, optional The spacing between sample points when `x` is None. The default is 1. axis : int, optional The axis along which to integrate. Returns ======= trapz : float Definite integral as approximated by trapezoidal rule. References ========== .. [1] Wikipedia page: http://en.wikipedia.org/wiki/Trapezoidal_rule Examples ======== >>> trapz([1,2,3]) 4.0 >>> trapz([1,2,3], x=[4,6,8]) 8.0 >>> trapz([1,2,3], dx=2) 8.0 >>> a = xp.arange(6).reshape(2, 3) >>> a array([[0, 1, 2], [3, 4, 5]]) >>> trapz(a, axis=0) array([ 1.5, 2.5, 3.5]) >>> trapz(a, axis=1) array([ 2., 8.]) """ y = xp.asanyarray(y) if x is None: d = dx else: x = xp.asanyarray(x) if x.ndim == 1: d = xp.diff(x) # reshape to correct shape shape = [1] * y.ndim shape[axis] = d.shape[0] d = d.reshape(shape) else: d = xp.diff(x, axis=axis) ndim = y.ndim slice1 = [slice(None)] * ndim slice2 = [slice(None)] * ndim slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) product = d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0 try: ret = product.sum(axis) except ValueError: ret = xp.add.reduce(product, axis) return ret
def _sparse_document_frequency(X): """Count the number of non-zero values for each feature in sparse X.""" if cupyx.scipy.sparse.isspmatrix_csr(X): return cp.bincount(X.indices, minlength=X.shape[1]) else: return cp.diff(X.indptr)