def inverse_bwt(string, ind): # Перевод строки в список чисел по юникоду str_list = [ord(i) for i in string] # Переход к массиву на GPU # Массив с последовательностью введенной строки s_arr = cp.array(str_list) # отсортированная последовательность sorted_s = cp.array(sorted(str_list)) # Просто слияние двух предыдущих массивов tab_s = cp.vstack((s_arr, sorted_s)) for i in range(1, len(s_arr) - 1): # Сортировка, с получением индексов # массива tab_s для новой строки, хотя фактически - столбца # (метод .T меняет оси массива местами) j = cp.lexsort( cp.array([tab_s.T[:, i].tolist() for i in range(i, -1, -1)])) # Добавление отсортированной по j строки к массиву tab_s = cp.vstack((s_arr, tab_s.T[j].T)) # Сортировка последней строки j = cp.lexsort( cp.array( [tab_s.T[:, i].tolist() for i in range(len(s_arr) - 1, -1, -1)])) # Обратный перевод чисел в симвлы по юникоду str_list = [chr(i) for i in tab_s.T[j][ind]] return ''.join(str_list)
def __sort_pixels(values, shape, mask=None, return_fails=False, stable=False): """ Uses the values (pixels with extra data) to sort all of the pixels. If stable is True than a stable sort is performed, defaulting to False. However, if values represent 'tuples' of data per pixel than lexsort is used which is always stable. Additionally, 1D data is assumed to be already sorted in which case this simply calculates fails if requested (and likely to be 0) and applies the mask if necessary. Returns the indices of the sorted values and the number of fails (or None if not requested). """ from ..util import is_on_gpu ##### Check if the values already contain the failures ##### fails = None if return_fails and isinstance(values, tuple): values, fails = values ##### Assign strict ordering ##### if values.ndim == 1: # Already sorted from ..util import prod assert values.size == prod(shape) if mask is not None: values = values[mask.ravel()] idx = values elif values.shape == shape: # Single value per pixel values = values.ravel() if mask is None else values[mask] # Numpy now uses radix sort for integers which is much faster than quicksort kind = 'stable' if stable or values.dtype.kind not in 'fc' else 'quicksort' idx = values.argsort() if is_on_gpu(values) else values.argsort( kind=kind) else: # Tuple of values per pixel - need lexsort assert values.shape[1:] == shape values = values.reshape(values.shape[0], -1) if mask is None else values[:, mask] if is_on_gpu(values): from cupy import lexsort # pylint: disable=import-error idx = lexsort(values) else: from numpy import lexsort idx = lexsort(values, 0) # Done if not calculating failures if not return_fails or fails is not None: return idx, fails # Calculate the number of sort failures values = values.T # for lexsorted values values_sorted = values[idx] not_equals = values_sorted[1:] != values_sorted[:-1] del values_sorted if not_equals.ndim == 2: not_equals = not_equals.any(1) # for lexsorted values return idx, int(not_equals.size - not_equals.sum())
def group_max(data, groups): order = cp.lexsort(cp.vstack((data, groups))) groups = groups[order] # this is only needed if groups is unsorted data = data[order] index = cp.empty(groups.shape[0], 'bool') index[-1] = True index[:-1] = groups[1:] != groups[:-1] return data[index], index
def sum_duplicates(self): """Eliminate duplicate matrix entries by adding them together. .. seealso:: :func:`scipy.sparse.coo_matrix.sum_duplicates` """ if self._has_canonical_format: return if self.data.size == 0: self._has_canonical_format = True return keys = cupy.stack([self.row, self.col]) order = cupy.lexsort(keys) src_data = self.data[order] src_row = self.row[order] src_col = self.col[order] diff = cupy.ElementwiseKernel( 'raw int32 row, raw int32 col', 'int32 diff', ''' int index; if (i == 0 || row[i - 1] == row[i] && col[i - 1] == col[i]) { diff = 0; } else { diff = 1; } ''', 'sum_duplicates_diff' )(src_row, src_col, size=self.row.size) if diff[1:].all(): # All elements have different indices. data = src_data row = src_row col = src_col else: index = cupy.cumsum(diff, dtype='i') size = int(index[-1]) + 1 data = cupy.zeros(size, dtype=self.data.dtype) row = cupy.empty(size, dtype='i') col = cupy.empty(size, dtype='i') cupy.ElementwiseKernel( 'T src_data, int32 src_row, int32 src_col, int32 index', 'raw T data, raw int32 row, raw int32 col', ''' atomicAdd(&data[index], src_data); row[index] = src_row; col[index] = src_col; ''', 'sum_duplicates_assign', preamble=util._preamble_atomic_add )(src_data, src_row, src_col, index, data, row, col) self.data = data self.row = row self.col = col self._has_canonical_format = True
def cupy_unique_axis0(array): # axis is still not supported for cupy.unique, this # is a workaround if len(array.shape) != 2: raise ValueError("Input array must be 2D.") sortarr = array[cp.lexsort(array.T[::-1])] mask = cp.empty(array.shape[0], dtype=cp.bool_) mask[0] = True mask[1:] = cp.any(sortarr[1:] != sortarr[:-1], axis=1) return sortarr[mask]
def _select_last_indices(i, j, x, idx_dtype): """Find the unique indices for each row and keep only the last""" i = cupy.asarray(i, dtype=idx_dtype) j = cupy.asarray(j, dtype=idx_dtype) stacked = cupy.stack([j, i]) order = cupy.lexsort(stacked).astype(idx_dtype) indptr_inserts = i[order] indices_inserts = j[order] data_inserts = x[order] mask = cupy.ones(indptr_inserts.size, dtype='bool') _unique_mask_kern(indptr_inserts, indices_inserts, order, mask, size=indptr_inserts.size-1) return indptr_inserts[mask], indices_inserts[mask], data_inserts[mask]
def __sort_pixels_gpu(values, shape, mask=None, return_fails=False, stable=False): ##### Check if the values already contain the failures ##### fails = None if return_fails and isinstance(values, tuple): values, fails = values ##### Assign strict ordering ##### if values.ndim == 1: # Already sorted from hist.util import prod assert values.size == prod(shape) idx = values elif values.shape == shape: # Single value per pixel values = values.ravel() idx = values.argsort() else: # Tuple of values per pixel - need lexsort from cupy import lexsort assert values.shape[1:] == shape values = values.reshape(values.shape[0], -1) idx = lexsort(values) # Done if not calculating failures if not return_fails or fails is not None: return idx, fails # Calculate the number of sort failures values = values.T # for lexsorted values values_sorted = values[idx] not_equals = values_sorted[1:] != values_sorted[:-1] del values_sorted if not_equals.ndim == 2: not_equals = not_equals.any(1) # for lexsorted values return idx, int(not_equals.size - not_equals.sum())
def cupy_unique_axis0(array, return_counts=False): """ Support method as cupy currently doesn't support .unique + axis :param array: :param return_counts: :return: """ if len(array.shape) != 2: raise ValueError("Input array must be 2D.") sortarr = array[np.lexsort(array.T[::-1])] mask = np.empty(array.shape[0], dtype=np.bool_) mask[0] = True mask[1:] = np.any(sortarr[1:] != sortarr[:-1], axis=1) if return_counts: nonzero = np.nonzero(mask)[0] # may synchronize idx = np.empty((nonzero.size + 1, ), nonzero.dtype) idx[:-1] = nonzero idx[-1] = mask.size return sortarr[mask], idx[1:] - idx[:-1] else: return sortarr[mask]
def sort_states(states,state_count): """Sort the states to place identical states next to each other This function sorts the states stored in a 2d numpy.ndarray so that identical states are placed next to each other. To increase speed, the states are not actually sorted since moving data around in memory can be time consuming, and usually not useful. What is returned is a sorted index and the location of unique states in the sorted index. Args: states ([numpy.ndarray]): A 2d array of compressed states. See ``compress_states`` function. state_count ([int]): The number of states (or number of rows to sort). Returns: edges ([np.ndarray]): Bin edges, or locations of unique states index ([np.ndarray]): Sorted index. This output can be used to actually sort the input states by doing ``states[index]`` """ logger.debug('sort_states') if has_cupy: logger.debug('sort_states: cupy.lexsort') states = cupy.asarray(states[:state_count]).T index = cupy.lexsort(states) states = states[:,index] uniques = cupy.argwhere(cupy.any(states[:,:-1] != states[:,1:],axis=0)) + 1 bin_edges = cupy.zeros((uniques.size+2,),dtype=np.int64) bin_edges[1:-1] = uniques.squeeze() bin_edges[-1] = states.shape[1] bin_edges = cupy.asnumpy(bin_edges) index = cupy.asnumpy(index) else: logger.debug('sort_states: tensorstate._lex_sort') bin_edges,index = ts._lex_sort(states,state_count) return bin_edges,index
def direct_bwt(string): # Перевод строки в список чисел по юникоду str_list = [ord(i) for i in string] # Переход к массиву на GPU s_arr = cp.array(str_list) # Создание матрицы со сдвигом # Пояснение: # сдвиг матрицы происходит на устройстве, потом переводится в список на # хост и затем снова в массив на устройстве (делается из-за проблем с # отсутствием поддержки типа списков с нечисловыми элементами) s_mat = cp.array( [cp.roll(s_arr, i).tolist() for i in range(0, len(s_arr))]) # Сортировка по индексам столбцов # Пояснение: аналогично вышенаписанному i = cp.lexsort( cp.array([s_mat[:, i].tolist() for i in range(len(s_arr) - 1, -1, -1)])) s_mat = s_mat[i] # Обратный перевод чисел в симвлы по юникоду str_list = [chr(i) for i in s_mat[:, -1].tolist()] return (''.join(str_list), cp.where(cp.all(s_arr == s_mat, axis=1))[0].item())
def sum_duplicates(self): """Eliminate duplicate matrix entries by adding them together. .. warning:: When sorting the indices, CuPy follows the convention of cuSPARSE, which is different from that of SciPy. Therefore, the order of the output indices may differ: .. code-block:: python >>> # 1 0 0 >>> # A = 1 1 0 >>> # 1 1 1 >>> data = cupy.array([1, 1, 1, 1, 1, 1], 'f') >>> row = cupy.array([0, 1, 1, 2, 2, 2], 'i') >>> col = cupy.array([0, 0, 1, 0, 1, 2], 'i') >>> A = cupyx.scipy.sparse.coo_matrix((data, (row, col)), ... shape=(3, 3)) >>> a = A.get() >>> A.sum_duplicates() >>> a.sum_duplicates() # a is scipy.sparse.coo_matrix >>> A.row array([0, 1, 1, 2, 2, 2], dtype=int32) >>> a.row array([0, 1, 2, 1, 2, 2], dtype=int32) >>> A.col array([0, 0, 1, 0, 1, 2], dtype=int32) >>> a.col array([0, 0, 0, 1, 1, 2], dtype=int32) .. warning:: Calling this function might synchronize the device. .. seealso:: :meth:`scipy.sparse.coo_matrix.sum_duplicates` """ if self.has_canonical_format: return # Note: The sorting order below follows the cuSPARSE convention (first # row then col, so-called row-major) and differs from that of SciPy, as # the cuSPARSE functions such as cusparseSpMV() assume this sorting # order. # See https://docs.nvidia.com/cuda/cusparse/index.html#coo-format keys = cupy.stack([self.col, self.row]) order = cupy.lexsort(keys) src_data = self.data[order] src_row = self.row[order] src_col = self.col[order] diff = self._sum_duplicates_diff(src_row, src_col, size=self.row.size) if diff[1:].all(): # All elements have different indices. data = src_data row = src_row col = src_col else: # TODO(leofang): move the kernels outside this method index = cupy.cumsum(diff, dtype='i') size = int(index[-1]) + 1 data = cupy.zeros(size, dtype=self.data.dtype) row = cupy.empty(size, dtype='i') col = cupy.empty(size, dtype='i') if self.data.dtype.kind == 'f': cupy.ElementwiseKernel( 'T src_data, int32 src_row, int32 src_col, int32 index', 'raw T data, raw int32 row, raw int32 col', ''' atomicAdd(&data[index], src_data); row[index] = src_row; col[index] = src_col; ''', 'sum_duplicates_assign')(src_data, src_row, src_col, index, data, row, col) elif self.data.dtype.kind == 'c': cupy.ElementwiseKernel( 'T src_real, T src_imag, int32 src_row, int32 src_col, ' 'int32 index', 'raw T real, raw T imag, raw int32 row, raw int32 col', ''' atomicAdd(&real[index], src_real); atomicAdd(&imag[index], src_imag); row[index] = src_row; col[index] = src_col; ''', 'sum_duplicates_assign_complex')(src_data.real, src_data.imag, src_row, src_col, index, data.real, data.imag, row, col) self.data = data self.row = row self.col = col self.has_canonical_format = True
def _select(input, labels=None, index=None, find_min=False, find_max=False, find_min_positions=False, find_max_positions=False, find_median=False): """Return one or more of: min, max, min position, max position, median. If neither `labels` or `index` is provided, these are the global values in `input`. If `index` is None, but `labels` is provided, a global value across all non-zero labels is given. When both `labels` and `index` are provided, lists of values are provided for each labeled region specified in `index`. See further details in :func:`cupyx.scipy.ndimage.minimum`, etc. Used by minimum, maximum, minimum_position, maximum_position, extrema. """ find_positions = find_min_positions or find_max_positions positions = None if find_positions: positions = cupy.arange(input.size).reshape(input.shape) def single_group(vals, positions): result = [] if find_min: result += [vals.min()] if find_min_positions: result += [positions[vals == vals.min()][0]] if find_max: result += [vals.max()] if find_max_positions: result += [positions[vals == vals.max()][0]] if find_median: result += [cupy.median(vals)] return result if labels is None: return single_group(input, positions) # ensure input and labels match sizes input, labels = cupy.broadcast_arrays(input, labels) if index is None: mask = labels > 0 masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) if cupy.isscalar(index): mask = labels == index masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) index = cupy.asarray(index) safe_int = _safely_castable_to_int(labels.dtype) min_label = labels.min() max_label = labels.max() # Remap labels to unique integers if necessary, or if the largest label is # larger than the number of values. if (not safe_int or min_label < 0 or max_label > labels.size): # Remap labels, and indexes unique_labels, labels = cupy.unique(labels, return_inverse=True) idxs = cupy.searchsorted(unique_labels, index) # Make all of idxs valid idxs[idxs >= unique_labels.size] = 0 found = unique_labels[idxs] == index else: # Labels are an integer type, and there aren't too many idxs = cupy.asanyarray(index, int).copy() found = (idxs >= 0) & (idxs <= max_label) idxs[~found] = max_label + 1 input = input.ravel() labels = labels.ravel() if find_positions: positions = positions.ravel() using_cub = _core._accelerator.ACCELERATOR_CUB in \ cupy._core.get_routine_accelerators() if using_cub: # Cutoff values below were determined empirically for relatively large # input arrays. if find_positions or find_median: n_label_cutoff = 15 else: n_label_cutoff = 30 else: n_label_cutoff = 0 if n_label_cutoff and len(idxs) <= n_label_cutoff: return _select_via_looping( input, labels, idxs, positions, find_min, find_min_positions, find_max, find_max_positions, find_median ) order = cupy.lexsort(cupy.stack((input.ravel(), labels.ravel()))) input = input[order] labels = labels[order] if find_positions: positions = positions[order] # Determine indices corresponding to the min or max value for each label label_change_index = cupy.searchsorted(labels, cupy.arange(1, max_label + 2)) if find_min or find_min_positions or find_median: # index corresponding to the minimum value at each label min_index = label_change_index[:-1] if find_max or find_max_positions or find_median: # index corresponding to the maximum value at each label max_index = label_change_index[1:] - 1 result = [] # the order below matches the order expected by cupy.ndimage.extrema if find_min: mins = cupy.zeros(int(labels.max()) + 2, input.dtype) mins[labels[min_index]] = input[min_index] result += [mins[idxs]] if find_min_positions: minpos = cupy.zeros(labels.max().item() + 2, int) minpos[labels[min_index]] = positions[min_index] result += [minpos[idxs]] if find_max: maxs = cupy.zeros(int(labels.max()) + 2, input.dtype) maxs[labels[max_index]] = input[max_index] result += [maxs[idxs]] if find_max_positions: maxpos = cupy.zeros(labels.max().item() + 2, int) maxpos[labels[max_index]] = positions[max_index] result += [maxpos[idxs]] if find_median: locs = cupy.arange(len(labels)) lo = cupy.zeros(int(labels.max()) + 2, int) lo[labels[min_index]] = locs[min_index] hi = cupy.zeros(int(labels.max()) + 2, int) hi[labels[max_index]] = locs[max_index] lo = lo[idxs] hi = hi[idxs] # lo is an index to the lowest value in input for each label, # hi is an index to the largest value. # move them to be either the same ((hi - lo) % 2 == 0) or next # to each other ((hi - lo) % 2 == 1), then average. step = (hi - lo) // 2 lo += step hi -= step if input.dtype.kind in 'iub': # fix for https://github.com/scipy/scipy/issues/12836 result += [(input[lo].astype(float) + input[hi].astype(float)) / 2.0] else: result += [(input[lo] + input[hi]) / 2.0] return result
def test_lexsort_unsupported_dtype(self, dtype): a = testing.shaped_random((2, 10), cupy, dtype) with self.assertRaises(TypeError): return cupy.lexsort(a)
def test_lexsort_three_or_more_dim(self): a = testing.shaped_random((2, 10, 10), cupy) with self.assertRaises(NotImplementedError): return cupy.lexsort(a)
def sum_duplicates(self): """Eliminate duplicate matrix entries by adding them together. .. warning:: Calling this function might synchronize the device. .. seealso:: :meth:`scipy.sparse.coo_matrix.sum_duplicates` """ if self.has_canonical_format: return # Note: it is unclear how the sorting order would matter. However, this # is what SciPy performs in sum_duplicates(). Although this order is # different from cuSPARSE convention (first row then col), we are not # calling coosort here so it should be alright. keys = cupy.stack([self.row, self.col]) order = cupy.lexsort(keys) src_data = self.data[order] src_row = self.row[order] src_col = self.col[order] diff = self._sum_duplicates_diff(src_row, src_col, size=self.row.size) if diff[1:].all(): # All elements have different indices. data = src_data row = src_row col = src_col else: # TODO(leofang): move the kernels outside this method index = cupy.cumsum(diff, dtype='i') size = int(index[-1]) + 1 data = cupy.zeros(size, dtype=self.data.dtype) row = cupy.empty(size, dtype='i') col = cupy.empty(size, dtype='i') if self.data.dtype.kind == 'f': cupy.ElementwiseKernel( 'T src_data, int32 src_row, int32 src_col, int32 index', 'raw T data, raw int32 row, raw int32 col', ''' atomicAdd(&data[index], src_data); row[index] = src_row; col[index] = src_col; ''', 'sum_duplicates_assign')(src_data, src_row, src_col, index, data, row, col) elif self.data.dtype.kind == 'c': cupy.ElementwiseKernel( 'T src_real, T src_imag, int32 src_row, int32 src_col, ' 'int32 index', 'raw T real, raw T imag, raw int32 row, raw int32 col', ''' atomicAdd(&real[index], src_real); atomicAdd(&imag[index], src_imag); row[index] = src_row; col[index] = src_col; ''', 'sum_duplicates_assign_complex')(src_data.real, src_data.imag, src_row, src_col, index, data.real, data.imag, row, col) self.data = data self.row = row self.col = col self.has_canonical_format = True