def count_nonzero(X, axis=None, sample_weight=None): """A variant of X.getnnz() with extension to weighting on axis 0 Useful in efficiently calculating multilabel metrics. Parameters ---------- X : CSR sparse matrix of shape (n_samples, n_labels) Input data. axis : None, 0 or 1 The axis on which the data is aggregated. sample_weight : array-like of shape (n_samples,), default=None Weight for each row of X. """ if axis == -1: axis = 1 elif axis == -2: axis = 0 elif X.format != 'csr': raise TypeError('Expected CSR sparse format, got {0}'.format(X.format)) # We rely here on the fact that np.diff(Y.indptr) for a CSR # will return the number of nonzero entries in each row. # A bincount over Y.indices will return the number of nonzeros # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14. if axis is None: if sample_weight is None: return X.nnz else: return np.dot(np.diff(X.indptr), sample_weight) elif axis == 1: out = np.diff(X.indptr) if sample_weight is None: # astype here is for consistency with axis=0 dtype return out.astype('intp') return out * sample_weight elif axis == 0: if sample_weight is None: return np.bincount(X.indices, minlength=X.shape[1]) else: weights = np.repeat(sample_weight, np.diff(X.indptr)) return np.bincount(X.indices, minlength=X.shape[1], weights=weights) else: raise ValueError('Unsupported axis: {0}'.format(axis))
def counts(y): _, y_indices = cp.unique(y, return_inverse=True) class_counts = cp.bincount(y_indices) total = cp.sum(class_counts) percent_counts = [] for count in (class_counts): percent_counts.append( cp.around(float(count) / total.item(), decimals=2).item()) return percent_counts
def remove_small_objects_gpu(mask: cupy.ndarray, min_size: int) -> None: """ See scikit-image remove_small_objects() N.B. Input array must be a labeled mask. This is a inplace operation. """ component_sizes = cupy.bincount(mask.ravel()) too_small = component_sizes < min_size too_small_mask = too_small[mask] mask[too_small_mask] = 0
def remove_small_objects_gpu(mask: cupy.ndarray, min_size: int) -> None: """ See scikit-image remove_small_objects() N.B. Input array can be a binary mask (bool type) or labeled mask (int type). This is a inplace operation. """ _check_dtype_supported(mask) ccs, _ = label(mask) if mask.dtype == bool else mask component_sizes = cupy.bincount(ccs.ravel()) too_small = component_sizes < min_size too_small_mask = too_small[ccs] mask[too_small_mask] = 0
def keep_largest_connected_component_gpu(mask: cupy.ndarray, ) -> None: """ Keep the largest connected component. Remove small connected components, only keep the largest connected component (excluding background). N.B. Input array can be a binary mask (bool type) or labeled mask (int type). This is a inplace operation. """ _check_dtype_supported(mask) ccs, _ = label(mask) if mask.dtype == bool else mask component_sizes = cupy.bincount(ccs.ravel()) if len(component_sizes) == 1: # just background return largest_cc_index = cupy.argmax(component_sizes[1:]) + 1 mask[ccs != largest_cc_index] = 0
def _bincount_histogram(image, source_range): """ Efficient histogram calculation for an image of integers. This function is significantly more efficient than cupy.histogram but works only on images of integers. It is based on cupy.bincount. Parameters ---------- image : array Input image. source_range : string 'image' determines the range from the input image. 'dtype' determines the range from the expected range of the images of that data type. Returns ------- hist : array The values of the histogram. bin_centers : array The values at the center of the bins. """ if source_range not in ["image", "dtype"]: raise ValueError( "Incorrect value for `source_range` argument: {}".format( source_range ) ) if source_range == "image": image_min = int(image.min().astype(np.int64)) image_max = int(image.max().astype(np.int64)) elif source_range == "dtype": image_min, image_max = dtype_limits(image, clip_negative=False) image, offset = _offset_array(image, image_min, image_max) hist = cp.bincount(image.ravel(), minlength=image_max - image_min + 1) bin_centers = cp.arange(image_min, image_max + 1) if source_range == "image": idx = max(image_min, 0) hist = hist[idx:] return hist, bin_centers
def perimeter(image, neighbourhood=4): """Calculate total perimeter of all objects in binary image. Parameters ---------- image : (N, M) ndarray 2D binary image. neighbourhood : 4 or 8, optional Neighborhood connectivity for border pixel determination. It is used to compute the contour. A higher neighbourhood widens the border on which the perimeter is computed. Returns ------- perimeter : float Total perimeter of all objects in binary image. References ---------- .. [1] K. Benkrid, D. Crookes. Design and FPGA Implementation of a Perimeter Estimator. The Queen's University of Belfast. http://www.cs.qub.ac.uk/~d.crookes/webpubs/papers/perimeter.doc Examples -------- >>> from skimage import data, util >>> from skimage.measure import label >>> # coins image (binary) >>> img_coins = data.coins() > 110 >>> # total perimeter of all objects in the image >>> perimeter(img_coins, neighbourhood=4) # doctest: +ELLIPSIS 7796.867... >>> perimeter(img_coins, neighbourhood=8) # doctest: +ELLIPSIS 8806.268... """ if image.ndim != 2: raise NotImplementedError("`perimeter` supports 2D images only") if neighbourhood == 4: strel = STREL_4 else: strel = STREL_8 strel = cp.asarray(strel) image = image.astype(cp.uint8) eroded_image = ndi.binary_erosion(image, strel, border_value=0) border_image = image - eroded_image perimeter_weights = cp.zeros(50, dtype=cp.double) perimeter_weights[[5, 7, 15, 17, 25, 27]] = 1 perimeter_weights[[21, 33]] = sqrt(2) perimeter_weights[[13, 23]] = (1 + sqrt(2)) / 2 perimeter_image = ndi.convolve( border_image, cp.asarray([[10, 2, 10], [2, 1, 2], [10, 2, 10]]), mode="constant", cval=0, ) # You can also write # return perimeter_weights[perimeter_image].sum() # but that was measured as taking much longer than bincount + cp.dot (5x # as much time) perimeter_histogram = cp.bincount(perimeter_image.ravel(), minlength=50) total_perimeter = perimeter_histogram @ perimeter_weights return total_perimeter
def _stratify_split(X, stratify, labels, n_train, n_test, x_numba, y_numba, random_state): """ Function to perform a stratified split based on stratify column. Based on scikit-learn stratified split implementation. Parameters ---------- X, y: Shuffled input data and labels stratify: column to be stratified on. n_train: Number of samples in train set n_test: number of samples in test set x_numba: Determines whether the data should be converted to numba y_numba: Determines whether the labales should be converted to numba Returns ------- X_train, X_test: Data X divided into train and test sets y_train, y_test: Labels divided into train and test sets """ x_cudf = False labels_cudf = False if isinstance(X, cudf.DataFrame): x_cudf = True elif hasattr(X, "__cuda_array_interface__"): X = cp.asarray(X) x_order = _strides_to_order(X.__cuda_array_interface__['strides'], cp.dtype(X.dtype)) # labels and stratify will be only cp arrays if isinstance(labels, cudf.Series): labels_cudf = True labels = labels.values elif hasattr(labels, "__cuda_array_interface__"): labels = cp.asarray(labels) elif isinstance(stratify, cudf.DataFrame): # ensuring it has just one column if labels.shape[1] != 1: raise ValueError('Expected one column for labels, but found df' 'with shape = %d' % (labels.shape)) labels_cudf = True labels = labels[0].values labels_order = _strides_to_order( labels.__cuda_array_interface__['strides'], cp.dtype(labels.dtype)) # Converting to cupy array removes the need to add an if-else block # for startify column if isinstance(stratify, cudf.Series): stratify = stratify.values elif hasattr(stratify, "__cuda_array_interface__"): stratify = cp.asarray(stratify) elif isinstance(stratify, cudf.DataFrame): # ensuring it has just one column if stratify.shape[1] != 1: raise ValueError('Expected one column, but found column' 'with shape = %d' % (stratify.shape)) stratify = stratify[0].values classes, stratify_indices = cp.unique(stratify, return_inverse=True) n_classes = classes.shape[0] class_counts = cp.bincount(stratify_indices) if cp.min(class_counts) < 2: raise ValueError("The least populated class in y has only 1" " member, which is too few. The minimum" " number of groups for any class cannot" " be less than 2.") if n_train < n_classes: raise ValueError('The train_size = %d should be greater or ' 'equal to the number of classes = %d' % (n_train, n_classes)) class_indices = cp.split(cp.argsort(stratify_indices), cp.cumsum(class_counts)[:-1].tolist()) X_train = None # random_state won't be None or int, that's handled earlier if isinstance(random_state, np.random.RandomState): random_state = cp.random.RandomState(seed=random_state.get_state()[1]) # Break ties n_i = _approximate_mode(class_counts, n_train, random_state) class_counts_remaining = class_counts - n_i t_i = _approximate_mode(class_counts_remaining, n_test, random_state) for i in range(n_classes): permutation = random_state.permutation(class_counts[i].item()) perm_indices_class_i = class_indices[i].take(permutation) y_train_i = cp.array(labels[perm_indices_class_i[:n_i[i]]], order=labels_order) y_test_i = cp.array(labels[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]], order=labels_order) if hasattr(X, "__cuda_array_interface__") or \ isinstance(X, cupyx.scipy.sparse.csr_matrix): X_train_i = cp.array(X[perm_indices_class_i[:n_i[i]]], order=x_order) X_test_i = cp.array(X[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]], order=x_order) if X_train is None: X_train = cp.array(X_train_i, order=x_order) y_train = cp.array(y_train_i, order=labels_order) X_test = cp.array(X_test_i, order=x_order) y_test = cp.array(y_test_i, order=labels_order) else: X_train = cp.concatenate([X_train, X_train_i], axis=0) X_test = cp.concatenate([X_test, X_test_i], axis=0) y_train = cp.concatenate([y_train, y_train_i], axis=0) y_test = cp.concatenate([y_test, y_test_i], axis=0) elif x_cudf: X_train_i = X.iloc[perm_indices_class_i[:n_i[i]]] X_test_i = X.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]] if X_train is None: X_train = X_train_i y_train = y_train_i X_test = X_test_i y_test = y_test_i else: X_train = cudf.concat([X_train, X_train_i], ignore_index=False) X_test = cudf.concat([X_test, X_test_i], ignore_index=False) y_train = cp.concatenate([y_train, y_train_i], axis=0) y_test = cp.concatenate([y_test, y_test_i], axis=0) if x_numba: X_train = cuda.as_cuda_array(X_train) X_test = cuda.as_cuda_array(X_test) elif x_cudf: X_train = cudf.DataFrame(X_train) X_test = cudf.DataFrame(X_test) if y_numba: y_train = cuda.as_cuda_array(y_train) y_test = cuda.as_cuda_array(y_test) elif labels_cudf: y_train = cudf.Series(y_train) y_test = cudf.Series(y_test) return X_train, X_test, y_train, y_test
def _stratify_split(X, y, n_train, n_test, x_numba, y_numba, random_state): """ Function to perform a stratified split based on y lables. Based on scikit-learn stratified split implementation. Parameters ---------- X, y: Shuffled input data and labels n_train: Number of samples in train set n_test: number of samples in test set x_numba: Determines whether the data should be converted to numba y_numba: Determines whether the labales should be converted to numba Returns ------- X_train, X_test: Data X divided into train and test sets y_train, y_test: Labels divided into train and test sets """ x_cudf = False y_cudf = False if isinstance(X, cudf.DataFrame): x_cudf = True elif hasattr(X, "__cuda_array_interface__"): X = cp.asarray(X) x_order = _strides_to_order(X.__cuda_array_interface__['strides'], cp.dtype(X.dtype)) if isinstance(y, cudf.Series): y_cudf = True elif hasattr(y, "__cuda_array_interface__"): y = cp.asarray(y) y_order = _strides_to_order(y.__cuda_array_interface__['strides'], cp.dtype(y.dtype)) elif isinstance(y, cudf.DataFrame): y_cudf = True # ensuring it has just one column if y.shape[1] != 1: raise ValueError('Expected one label, but found y' 'with shape = %d' % (y.shape)) classes, y_indices = cp.unique(y.values if y_cudf else y, return_inverse=True) n_classes = classes.shape[0] class_counts = cp.bincount(y_indices) if n_train < n_classes: raise ValueError('The train_size = %d should be greater or ' 'equal to the number of classes = %d' % (n_train, n_classes)) if n_test < n_classes: raise ValueError('The test_size = %d should be greater or ' 'equal to the number of classes = %d' % (n_test, n_classes)) class_indices = cp.array_split(cp.argsort(y_indices), n_classes) X_train = None # random_state won't be None or int, that's handled earlier if isinstance(random_state, np.random.RandomState): random_state = cp.random.RandomState(seed=random_state.get_state()[1]) # Break ties n_i = _approximate_mode(class_counts, n_train, random_state) class_counts_remaining = class_counts - n_i t_i = _approximate_mode(class_counts_remaining, n_test, random_state) for i in range(n_classes): permutation = random_state.permutation(class_counts[i].item()) perm_indices_class_i = class_indices[i].take(permutation) if hasattr(X, "__cuda_array_interface__") or \ isinstance(X, cupyx.scipy.sparse.csr_matrix): X_train_i = cp.array(X[perm_indices_class_i[:n_i[i]]], order=x_order) X_test_i = cp.array(X[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]], order=x_order) y_train_i = cp.array(y[perm_indices_class_i[:n_i[i]]], order=y_order) y_test_i = cp.array(y[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]], order=y_order) if X_train is None: X_train = cp.array(X_train_i, order=x_order) y_train = cp.array(y_train_i, order=y_order) X_test = cp.array(X_test_i, order=x_order) y_test = cp.array(y_test_i, order=y_order) else: X_train = cp.concatenate([X_train, X_train_i], axis=0) X_test = cp.concatenate([X_test, X_test_i], axis=0) y_train = cp.concatenate([y_train, y_train_i], axis=0) y_test = cp.concatenate([y_test, y_test_i], axis=0) elif x_cudf: X_train_i = X.iloc[perm_indices_class_i[:n_i[i]]] X_test_i = X.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]] y_train_i = y.iloc[perm_indices_class_i[:n_i[i]]] y_test_i = y.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]] if X_train is None: X_train = X_train_i y_train = y_train_i X_test = X_test_i y_test = y_test_i else: X_train = cudf.concat([X_train, X_train_i], ignore_index=False) X_test = cudf.concat([X_test, X_test_i], ignore_index=False) y_train = cudf.concat([y_train, y_train_i], ignore_index=False) y_test = cudf.concat([y_test, y_test_i], ignore_index=False) if x_numba: X_train = cuda.as_cuda_array(X_train) X_test = cuda.as_cuda_array(X_test) elif x_cudf: X_train = cudf.DataFrame(X_train) X_test = cudf.DataFrame(X_test) if y_numba: y_train = cuda.as_cuda_array(y_train) y_test = cuda.as_cuda_array(y_test) elif y_cudf: y_train = cudf.DataFrame(y_train) y_test = cudf.DataFrame(y_test) return X_train, X_test, y_train, y_test
def histogramdd(sample, bins=10, range=None, weights=None, density=False): """ Compute the multidimensional histogram of some data. Parameters ---------- sample : (N, D) array, or (D, N) array_like The data to be histogrammed. Note the unusual interpretation of sample when an array_like: * When an array, each row is a coordinate in a D-dimensional space - such as ``histogramdd(cupy.array([p1, p2, p3]))``. * When an array_like, each element is the list of values for single coordinate - such as ``histogramdd((X, Y, Z))``. The first form should be preferred. bins : sequence or int, optional The bin specification: * A sequence of arrays describing the monotonically increasing bin edges along each dimension. * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). range : sequence, optional A sequence of length D, each an optional (lower, upper) tuple giving the outer bin edges to be used if the edges are not given explicitly in `bins`. An entry of None in the sequence results in the minimum and maximum values being used for the corresponding dimension. The default, None, is equivalent to passing a tuple of D None values. density : bool, optional If False, the default, returns the number of samples in each bin. If True, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. weights : (N,) array_like, optional An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. The values of the returned histogram are equal to the sum of the weights belonging to the samples falling into each bin. Returns ------- H : ndarray The multidimensional histogram of sample x. See normed and weights for the different possible semantics. edges : list A list of D arrays describing the bin edges for each dimension. See Also -------- histogram: 1-D histogram histogram2d: 2-D histogram Examples -------- >>> r = cupy.random.randn(100,3) >>> H, edges = cupy.histogramdd(r, bins = (5, 8, 4)) >>> H.shape, edges[0].size, edges[1].size, edges[2].size ((5, 8, 4), 6, 9, 5) """ if isinstance(sample, cupy.ndarray): # Sample is an ND-array. if sample.ndim == 1: sample = sample[:, cupy.newaxis] nsamples, ndim = sample.shape else: sample = cupy.stack(sample, axis=-1) nsamples, ndim = sample.shape nbin = numpy.empty(ndim, int) edges = ndim * [None] dedges = ndim * [None] if weights is not None: weights = cupy.asarray(weights) try: nbins = len(bins) if nbins != ndim: raise ValueError( "The dimension of bins must be equal to the dimension of the " " sample x.") except TypeError: # bins is an integer bins = ndim * [bins] # normalize the range argument if range is None: range = (None, ) * ndim elif len(range) != ndim: raise ValueError("range argument must have one entry per dimension") # Create edge arrays for i in _range(ndim): if cnp.ndim(bins[i]) == 0: if bins[i] < 1: raise ValueError( "`bins[{}]` must be positive, when an integer".format(i)) smin, smax = _get_outer_edges(sample[:, i], range[i]) num = int(bins[i] + 1) # synchronize! edges[i] = cupy.linspace(smin, smax, num) elif cnp.ndim(bins[i]) == 1: edges[i] = cupy.asarray(bins[i]) if (edges[i][:-1] > edges[i][1:]).any(): raise ValueError( "`bins[{}]` must be monotonically increasing, when an array" .format(i)) else: raise ValueError( "`bins[{}]` must be a scalar or 1d array".format(i)) nbin[i] = len(edges[i]) + 1 # includes an outlier on each end dedges[i] = cupy.diff(edges[i]) # Compute the bin number each sample falls into. ncount = tuple( # avoid cupy.digitize to work around gh-11022 cupy.searchsorted(edges[i], sample[:, i], side="right") for i in _range(ndim)) # Using digitize, values that fall on an edge are put in the right bin. # For the rightmost bin, we want values equal to the right edge to be # counted in the last bin, and not as an outlier. for i in _range(ndim): # Find which points are on the rightmost edge. on_edge = sample[:, i] == edges[i][-1] # Shift these points one bin to the left. ncount[i][on_edge] -= 1 # Compute the sample indices in the flattened histogram matrix. # This raises an error if the array is too large. xy = cnp.ravel_multi_index(ncount, nbin) # Compute the number of repetitions in xy and assign it to the # flattened histmat. hist = cupy.bincount(xy, weights, minlength=numpy.prod(nbin)) # Shape into a proper matrix hist = hist.reshape(nbin) # This preserves the (bad) behavior observed in gh-7845, for now. hist = hist.astype(float) # Note: NumPy uses casting='safe' here too # Remove outliers (indices 0 and -1 for each dimension). core = ndim * (slice(1, -1), ) hist = hist[core] if density: # calculate the probability density function s = hist.sum() for i in _range(ndim): shape = [1] * ndim shape[i] = nbin[i] - 2 hist = hist / dedges[i].reshape(shape) hist /= s if any(hist.shape != numpy.asarray(nbin) - 2): raise RuntimeError("Internal Shape Error") return hist, edges
def test_qn(loss, dtype, penalty, l1_strength, l2_strength, fit_intercept): if penalty == "none" and (l1_strength > 0 or l2_strength > 0): pytest.skip("`none` penalty does not take l1/l2_strength") tol = 1e-6 qn = cuQN(loss=loss, fit_intercept=fit_intercept, l1_strength=l1_strength, l2_strength=l2_strength, tol=1e-8, output_type="cupy") if loss == 'softmax': X, y = make_classification(n_samples=5000, n_informative=10, n_features=20, n_classes=4, dtype=dtype) stratify = y.astype(dtype) X_train, X_test, y_train, y_test = train_test_split(X.astype(dtype), y.astype(dtype), stratify=stratify) most_class = cp.unique(y)[cp.argmax(cp.bincount(y))] baseline_preds = cp.array([most_class] * y_test.shape[0], dtype=dtype) baseline_score = accuracy_score(y_test, baseline_preds) y_pred = qn.fit(X_train, y_train).predict(X_test) cuml_score = accuracy_score(y_test, y_pred) assert (cuml_score > baseline_score) assert (cuml_score >= 0.50) elif loss == 'sigmoid': X = np.array(precomputed_X, dtype=dtype) y = np.array(precomputed_y_log, dtype=dtype) qn.fit(X, y) print(qn.objective) print(qn.coef_) if penalty == 'none' and l1_strength == 0.0 and l2_strength == 0.0: if fit_intercept: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.1088872], [2.4812558]]), decimal=3) else: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865]]), decimal=3) elif penalty == 'l1' and l2_strength == 0.0: if fit_intercept: if l1_strength == 0.0: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-2.1088872], [2.4812558]]), decimal=3) else: assert (qn.objective - 0.44295936822891235) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6899368], [1.9021575]]), decimal=3) else: if l1_strength == 0.0: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865] ]), decimal=3) else: assert (qn.objective - 0.4769895672798157) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6214856], [2.3650239]]), decimal=3) # assert False elif penalty == 'l2' and l1_strength == 0.0: if fit_intercept: if l2_strength == 0.0: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-2.1088872], [2.4812558]]), decimal=3) else: assert (qn.objective - 0.43780848383903503) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.5337948], [1.678699]]), decimal=3) else: if l2_strength == 0.0: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865] ]), decimal=3) else: assert (qn.objective - 0.4750209450721741) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.3931049], [2.0140104]]), decimal=3) if penalty == 'elasticnet': if fit_intercept: if l1_strength == 0.0 and l2_strength == 0.0: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-2.1088872], [2.4812558]]), decimal=3) elif l1_strength == 0.0: assert (qn.objective - 0.43780848383903503) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.5337948], [1.678699]]), decimal=3) elif l2_strength == 0.0: assert (qn.objective - 0.44295936822891235) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6899368], [1.9021575]]), decimal=3) else: assert (qn.objective - 0.467987984418869) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.3727235], [1.4639963]]), decimal=3) else: if l1_strength == 0.0 and l2_strength == 0.0: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865] ]), decimal=3) elif l1_strength == 0.0: assert (qn.objective - 0.4750209450721741) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.3931049], [2.0140104]]), decimal=3) elif l2_strength == 0.0: assert (qn.objective - 0.4769895672798157) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6214856], [2.3650239]]), decimal=3) else: assert (qn.objective - 0.5067970156669617) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.2102532], [1.752459]]), decimal=3) print()
def time_weights(self): np.bincount(self.d, weights=self.e)
def perimeter_crofton(image, directions=4): """Calculate total Crofton perimeter of all objects in binary image. Parameters ---------- image : (N, M) ndarray 2D image. If image is not binary, all values strictly greater than zero are considered as the object. directions : 2 or 4, optional Number of directions used to approximate the Crofton perimeter. By default, 4 is used: it should be more accurate than 2. Computation time is the same in both cases. Returns ------- perimeter : float Total perimeter of all objects in binary image. Notes ----- This measure is based on Crofton formula [1], which is a measure from integral geometry. It is defined for general curve length evaluation via a double integral along all directions. In a discrete space, 2 or 4 directions give a quite good approximation, 4 being more accurate than 2 for more complex shapes. Similar to :func:`~.measure.perimeter`, this function returns an approximation of the perimeter in continuous space. References ---------- .. [1] https://en.wikipedia.org/wiki/Crofton_formula .. [2] S. Rivollier. Analyse d’image geometrique et morphometrique par diagrammes de forme et voisinages adaptatifs generaux. PhD thesis, 2010. Ecole Nationale Superieure des Mines de Saint-Etienne. https://tel.archives-ouvertes.fr/tel-00560838 """ if image.ndim != 2: raise NotImplementedError( "`perimeter_crofton` supports 2D images only") # as image could be a label image, transform it to binary image image = (image > 0).astype(cp.uint8) image = cp.pad(image, pad_width=1, mode="constant") XF = ndi.convolve( image, cp.array([[0, 0, 0], [0, 1, 4], [0, 2, 8]]), mode="constant", cval=0, ) h = cp.bincount(XF.ravel(), minlength=16) # definition of the LUT # fmt: off if directions == 2: coefs = [ 0, np.pi / 2, 0, 0, 0, np.pi / 2, 0, 0, np.pi / 2, np.pi, 0, 0, np.pi / 2, np.pi, 0, 0 ] else: sq2 = math.sqrt(2) coefs = [ 0, np.pi / 4 * (1 + 1 / sq2), np.pi / (4 * sq2), np.pi / (2 * sq2), 0, np.pi / 4 * (1 + 1 / sq2), 0, np.pi / (4 * sq2), np.pi / 4, np.pi / 2, np.pi / (4 * sq2), np.pi / (4 * sq2), np.pi / 4, np.pi / 2, 0, 0 ] # fmt: on total_perimeter = cp.asarray(coefs) @ h return total_perimeter
def euler_number(image, connectivity=None): """Calculate the Euler characteristic in binary image. For 2D objects, the Euler number is the number of objects minus the number of holes. For 3D objects, the Euler number is obtained as the number of objects plus the number of holes, minus the number of tunnels, or loops. Parameters ---------- image: (N, M) ndarray or (N, M, D) ndarray. 2D or 3D images. If image is not binary, all values strictly greater than zero are considered as the object. connectivity : int, optional Maximum number of orthogonal hops to consider a pixel/voxel as a neighbor. Accepted values are ranging from 1 to input.ndim. If ``None``, a full connectivity of ``input.ndim`` is used. 4 or 8 neighborhoods are defined for 2D images (connectivity 1 and 2, respectively). 6 or 26 neighborhoods are defined for 3D images, (connectivity 1 and 3, respectively). Connectivity 2 is not defined. Returns ------- euler_number : int Euler characteristic of the set of all objects in the image. Notes ----- The Euler characteristic is an integer number that describes the topology of the set of all objects in the input image. If object is 4-connected, then background is 8-connected, and conversely. The computation of the Euler characteristic is based on an integral geometry formula in discretized space. In practice, a neighbourhood configuration is constructed, and a LUT is applied for each configuration. The coefficients used are the ones of Ohser et al. It can be useful to compute the Euler characteristic for several connectivities. A large relative difference between results for different connectivities suggests that the image resolution (with respect to the size of objects and holes) is too low. References ---------- .. [1] S. Rivollier. Analyse d’image geometrique et morphometrique par diagrammes de forme et voisinages adaptatifs generaux. PhD thesis, 2010. Ecole Nationale Superieure des Mines de Saint-Etienne. https://tel.archives-ouvertes.fr/tel-00560838 .. [2] Ohser J., Nagel W., Schladitz K. (2002) The Euler Number of Discretized Sets - On the Choice of Adjacency in Homogeneous Lattices. In: Mecke K., Stoyan D. (eds) Morphology of Condensed Matter. Lecture Notes in Physics, vol 600. Springer, Berlin, Heidelberg. Examples -------- >>> import numpy as np >>> SAMPLE = np.zeros((100,100,100)); >>> SAMPLE[40:60, 40:60, 40:60]=1 >>> euler_number(SAMPLE) # doctest: +ELLIPSIS 1... >>> SAMPLE[45:55,45:55,45:55] = 0; >>> euler_number(SAMPLE) # doctest: +ELLIPSIS 2... >>> SAMPLE = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], ... [1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0], ... [0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1], ... [0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]]) >>> euler_number(SAMPLE) # doctest: 0 >>> euler_number(SAMPLE, connectivity=1) # doctest: 2 """ # noqa # as image can be a label image, transform it to binary image = (image > 0).astype(int) image = cp.pad(image, pad_width=1, mode="constant") # check connectivity if connectivity is None: connectivity = image.ndim # config variable is an adjacency configuration. A coefficient given by # variable coefs is attributed to each configuration in order to get # the Euler characteristic. if image.ndim == 2: config = cp.array([[0, 0, 0], [0, 1, 4], [0, 2, 8]]) if connectivity == 1: coefs = EULER_COEFS2D_4 else: coefs = EULER_COEFS2D_8 bins = 16 else: # 3D images if connectivity == 2: raise NotImplementedError( "For 3D images, Euler number is implemented " "for connectivities 1 and 3 only") # fmt: off config = cp.array([[[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 1, 4], [0, 2, 8]], [[0, 0, 0], [0, 16, 64], [0, 32, 128]]]) # fmt: on if connectivity == 1: coefs = EULER_COEFS3D_26[::-1] else: coefs = EULER_COEFS3D_26 bins = 256 # XF has values in the 0-255 range in 3D, and in the 0-15 range in 2D, # with one unique value for each binary configuration of the # 27-voxel cube in 3D / 8-pixel square in 2D, up to symmetries XF = ndi.convolve(image, config, mode="constant", cval=0) h = cp.bincount(XF.ravel(), minlength=bins) coefs = cp.asarray(coefs) if image.ndim == 2: return coefs @ h else: return int(0.125 * coefs @ h)
def remove_small_objects(ar, min_size=64, connectivity=1, in_place=False): """Remove objects smaller than the specified size. Expects ar to be an array with labeled objects, and removes objects smaller than min_size. If `ar` is bool, the image is first labeled. This leads to potentially different behavior for bool and 0-and-1 arrays. Parameters ---------- ar : ndarray (arbitrary shape, int or bool type) The array containing the objects of interest. If the array type is int, the ints must be non-negative. min_size : int, optional (default: 64) The smallest allowable object size. connectivity : int, {1, 2, ..., ar.ndim}, optional (default: 1) The connectivity defining the neighborhood of a pixel. Used during labelling if `ar` is bool. in_place : bool, optional (default: False) If ``True``, remove the objects in the input array itself. Otherwise, make a copy. Raises ------ TypeError If the input array is of an invalid type, such as float or string. ValueError If the input array contains negative values. Returns ------- out : ndarray, same shape and type as input `ar` The input array with small connected components removed. Examples -------- >>> import cupy as cp >>> from cupyimg.skimage import morphology >>> a = cp.array([[0, 0, 0, 1, 0], ... [1, 1, 1, 0, 0], ... [1, 1, 1, 0, 1]], bool) >>> b = morphology.remove_small_objects(a, 6) >>> b array([[False, False, False, False, False], [ True, True, True, False, False], [ True, True, True, False, False]]) >>> c = morphology.remove_small_objects(a, 7, connectivity=2) >>> c array([[False, False, False, True, False], [ True, True, True, False, False], [ True, True, True, False, False]]) >>> d = morphology.remove_small_objects(a, 6, in_place=True) >>> d is a True """ # Raising type error if not int or bool _check_dtype_supported(ar) if in_place: out = ar else: out = ar.copy() if min_size == 0: # shortcut for efficiency return out if out.dtype == bool: selem = ndi.generate_binary_structure(ar.ndim, connectivity) ccs = cp.zeros_like(ar, dtype=cp.int32) ndi.label(ar, selem, output=ccs) else: ccs = out try: component_sizes = cp.bincount(ccs.ravel()) except ValueError: raise ValueError("Negative value labels are not supported. Try " "relabeling the input with `scipy.ndimage.label` or " "`skimage.morphology.label`.") if len(component_sizes) == 2 and out.dtype != bool: warn("Only one label was provided to `remove_small_objects`. " "Did you mean to use a boolean array?") too_small = component_sizes < min_size too_small_mask = too_small[ccs] out[too_small_mask] = 0 return out
def mode(a): counts = xp.bincount(a.astype(xp.int64)) return xp.argmax(counts)
def _sparse_document_frequency(X): """Count the number of non-zero values for each feature in sparse X.""" if cupyx.scipy.sparse.isspmatrix_csr(X): return cp.bincount(X.indices, minlength=X.shape[1]) else: return cp.diff(X.indptr)
def time_bincount(self): np.bincount(self.d)