def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, 'i') info = cupy.empty((), 'i') # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf #<-- MODIFIED elif dtype == 'd': getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf elif dtype == 'F': getrf_bufferSize = cusolver.cgetrf_bufferSize getrf = cusolver.cgetrf else: getrf_bufferSize = cusolver.zgetrf_bufferSize getrf = cusolver.zgetrf #<-- MODIFIED buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, info.data.ptr) if info[()] == 0: diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() else: sign = cupy.array(0.0, dtype=dtype) #ORIGINAL # logdet = cupy.array(float('-inf'), dtype) #<-- MODIFIED if dtype in ['f', 'd']: logdet = cupy.array(float('-inf'), dtype) elif dtype == 'F': logdet = cupy.array(float('-inf'), cupy.float32) else: logdet = cupy.array(float('-inf'), cupy.float64) #<-- MODIFIED return sign, logdet
def mean_peak_distance(peak_image, centroids, return_numpy=True): """ Calculate the mean peak distance in degrees between two corresponding peaks for each line profile in an SLI image series. Args: peak_image: Boolean NumPy array specifying the peak positions in the full SLI stack centroids: Use centroid calculation to better determine the peak position regardless of the number of measurements / illumination angles used. return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy array will be returned. Returns: NumPy array of floating point values containing the mean peak distance of the line profiles in degrees. """ peak_distance_gpu = peak_distance(peak_image, centroids, return_numpy=False) peak_distance_gpu[peak_distance_gpu > 180] = 0 peak_distance_gpu = cupy.sum(peak_distance_gpu, axis=-1) / \ cupy.maximum(1, cupy.count_nonzero(peak_distance_gpu, axis=-1)) if return_numpy: peak_width_cpu = cupy.asnumpy(peak_distance_gpu) del peak_distance_gpu return peak_width_cpu else: return peak_distance_gpu
def batch_all_triplet_loss(embeddings, labels, margin=0.2, dist_type='l2'): """Build the triplet loss over a batch of embeddings. We generate all the valid triplets and average the loss over the positive ones. Args: embeddings: Variable of shape=(batch_size, embed_dim) labels: labels of the batch, of size=(batch_size,) margin: margin for triplet loss dist_type: definition of distance, 'l2' or 'cos' Returns: triplet_loss: scalar Variable containing the triplet loss """ # distance(f(xa), f(xp)) - distance(f(xa), f(xn)) + alpha pairwise_dist = _pairwise_distances(embeddings, dist_type) anchor_positive_dist = F.expand_dims(pairwise_dist, axis=2) anchor_negative_dist = F.expand_dims(pairwise_dist, axis=1) triplet_loss = anchor_positive_dist - anchor_negative_dist + margin # Set invalid triplet [i, j, k] to 0. mask = _get_triplet_mask(labels) triplet_loss = mask * triplet_loss # Ignore enough separated example pairs loss. triplet_loss = F.relu(triplet_loss) # Calculate mean of loss. total = F.sum(triplet_loss) count = xp.count_nonzero(triplet_loss.data) return total / count if (count > 0.0) else chainer.Variable( xp.array(0.0, dtype=xp.float32))
def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, dtype=numpy.int32) dev_info = cupy.empty((), dtype=numpy.int32) # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf else: getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that # should never happen even if the matrix contains nan or inf. # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for # debugging purposes. diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() singular = dev_info > 0 return ( cupy.where(singular, dtype.type(0), sign), cupy.where(singular, dtype.type('-inf'), logdet), )
def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, dtype=numpy.int32) dev_info = cupy.empty(1, dtype=numpy.int32) # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf else: getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) try: cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( getrf, dev_info) diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() except linalg.LinAlgError: sign = cupy.array(0.0, dtype=dtype) logdet = cupy.array(float('-inf'), dtype) return sign, logdet
def count_nonzero(self): """Returns number of non-zero entries. .. note:: This method counts the actual number of non-zero entories, which does not include explicit zero entries. Instead ``nnz`` returns the number of entries including explicit zeros. Returns: Number of non-zero entries. """ return cupy.count_nonzero(self.data)
def inclination_sign(peak_image, centroids, correction_angle=0, return_numpy=True): """ Calculate the inclination sign from the peak positions. The inclination sign is based on the peak distance between two peaks. Explanation of the results: -1: The minimal peak distance is behind the first peak (wrapping around) 0: This pixel / line profile has more than two peaks 1: The minimal peak distance is in front of the first peak. Args: peak_distance: 3D NumPy array - of the peak distance between two peaks return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or NumPy array will be returned. Returns: inclination_sign: 3D NumPy array inclination sign """ gpu_peak_image = cupy.array(peak_image).astype('int8') gpu_centroids = cupy.array(centroids).astype('float32') result_img_gpu = cupy.empty( (gpu_peak_image.shape[0], gpu_peak_image.shape[1]), dtype='float32') number_of_peaks = cupy.count_nonzero(gpu_peak_image, axis=-1).astype( 'int8') threads_per_block = (1, 1) blocks_per_grid = gpu_peak_image.shape[:-1] _inclination_sign[blocks_per_grid, threads_per_block](gpu_peak_image, gpu_centroids, number_of_peaks, result_img_gpu, correction_angle) cuda.synchronize() del number_of_peaks if peak_image is None: del gpu_peak_image if return_numpy: result_img_cpu = cupy.asnumpy(result_img_gpu) del result_img_gpu return result_img_cpu else: return result_img_gpu
def mean_peak_prominence(image, peak_image=None, kind_of_normalization=0, return_numpy=True): """ Calculate the mean peak prominence of all given peak positions within a line profile. The line profile will be normalized by dividing the line profile through its mean value. Therefore, values above 1 are possible. Args: image: Original line profile used to detect all peaks. This array will be further analyzed to better determine the peak positions. peak_image: Boolean NumPy array specifying the peak positions in the full SLI stack kind_of_normalization: Normalize given line profile by using a normalization technique based on the kind_of_normalization parameter. 0 : Scale line profile to be between 0 and 1 1 : Divide line profile through its mean value return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy array will be returned. Returns: Floating point value containing the mean peak prominence of the line profile in degrees. """ if peak_image is not None: gpu_peak_image = cupy.array(peak_image).astype('uint8') else: gpu_peak_image = peaks(image, return_numpy=False).astype('uint8') peak_prominence_gpu = peak_prominence(image, peak_image, kind_of_normalization, return_numpy=False) peak_prominence_gpu = cupy.sum(peak_prominence_gpu, axis=-1) / \ cupy.maximum(1, cupy.count_nonzero(gpu_peak_image, axis=-1)) peak_prominence_gpu = peak_prominence_gpu.astype('float32') del gpu_peak_image if return_numpy: peak_width_cpu = cupy.asnumpy(peak_prominence_gpu) del peak_prominence_gpu return peak_width_cpu else: return peak_prominence_gpu
def _get_median(data, n_zeros): """Compute the median of data with n_zeros additional zeros. This function is used to support sparse matrices; it modifies data in-place """ n_elems = len(data) + n_zeros if not n_elems: return np.nan n_negative = np.count_nonzero(data < 0) middle, is_odd = divmod(n_elems, 2) data.sort() if is_odd: return _get_elem_at_rank(middle, data, n_negative, n_zeros) return (_get_elem_at_rank(middle - 1, data, n_negative, n_zeros) + _get_elem_at_rank(middle, data, n_negative, n_zeros)) / 2.
def mean_peak_width(image, peak_image=None, target_height=0.5, return_numpy=True): """ Calculate the mean peak width of all given peak positions within a line profile. Args: image: Original line profile used to detect all peaks. This array will be further analyzed to better determine the peak positions. peak_image: Boolean NumPy array specifying the peak positions in the full SLI stack target_height: Relative peak height in relation to the prominence of the given peak. return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy array will be returned. Returns: NumPy array where each entry corresponds to the mean peak width of the line profile. The values are in degree. """ if peak_image is not None: gpu_peak_image = cupy.array(peak_image).astype('uint8') else: gpu_peak_image = peaks(image, return_numpy=False).astype('uint8') peak_width_gpu = peak_width(image, gpu_peak_image, target_height, return_numpy=False) peak_width_gpu = cupy.sum(peak_width_gpu, axis=-1) / \ cupy.maximum(1, cupy.count_nonzero(gpu_peak_image, axis=-1)) del gpu_peak_image if return_numpy: peak_width_cpu = cupy.asnumpy(peak_width_gpu) del peak_width_gpu return peak_width_cpu else: return peak_width_gpu
def deskew(image, angle, dz, pixel_size): deskewed = deskewGPU(image, angle, dz, pixel_size) image_cp = cp.array(image) deskewed_cp = cp.array(deskewed) pages, col, row = image_cp.shape noise_size = cp.ceil(cp.max(cp.array([row, col])) * 0.1) image_noise_patch = image_cp[0:noise_size, col - (noise_size + 1):col - 1, :] image_noise_patch = image_noise_patch.flatten() fill_length = deskewed_cp.size - cp.count_nonzero(deskewed_cp) repeat_frequency = cp.ceil(fill_length / image_noise_patch.size) repeat_frequency = cp.asnumpy(repeat_frequency).flatten().astype( dtype=np.uint16)[0] noise = cp.tile(image_noise_patch, repeat_frequency + 1) noise = noise[0:fill_length] deskewed_cp[deskewed_cp == 0] = noise return cp.asnumpy(deskewed_cp)
def num_peaks(image=None, peak_image=None, return_numpy=True): """ Calculate the number of peaks from each line profile in an SLI image series by detecting all peaks and applying thresholds to remove unwanted peaks. Args: image: Full SLI measurement (series of images) which is prepared for the pipeline using the SLIX toolbox methods. peak_image: Boolean NumPy array specifying the peak positions in the full SLI stack return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy array will be returned. Returns: Array where each entry corresponds to the number of detected peaks within the first dimension of the SLI image series. """ if peak_image is None and image is not None: peak_image = peaks(image, return_numpy=False) elif peak_image is not None: peak_image = cupy.array(peak_image) else: raise ValueError('Either image or peak_image has to be defined.') resulting_image = cupy.count_nonzero(peak_image, axis=-1) \ .astype(cupy.uint16) if return_numpy: resulting_image_cpu = cupy.asnumpy(resulting_image) del resulting_image return resulting_image_cpu else: return resulting_image
def compose_vector_fields( d1, d2, premult_index, premult_disp, time_scaling, comp=None, order=1, *, coord_axis=-1, omit_stats=False, xcoords=None, Y=None, Z=None, ): if comp is None: comp = cupy.empty_like(d1, order="C") # need vector elements on first axis, not last if coord_axis != 0: d1 = cupy.ascontiguousarray(cupy.moveaxis(d1, -1, 0)) d2 = cupy.ascontiguousarray(cupy.moveaxis(d2, -1, 0)) else: if not d1.flags.c_contiguous: d1 = cupy.ascontiguousarray(d1) if not d2.flags.c_contiguous: d2 = cupy.ascontiguousarray(d2) ndim = d1.shape[0] B = premult_disp A = premult_index t = time_scaling if xcoords is None: xcoords = cupy.meshgrid( *[cupy.arange(s, dtype=d1.real.dtype) for s in d1.shape[1:]], indexing="ij", sparse=True, ) # TODO: reduce number of temporary arrays if ndim in [2, 3]: if Y is None: Y = cupy.empty_like(d1) if A is None: if B is None: if ndim == 3: composeNone_3d( d1[0], d1[1], d1[2], xcoords[0], xcoords[1], xcoords[2], Y[0], Y[1], Y[2], ) else: composeNone_2d(d1[0], d1[1], xcoords[0], xcoords[1], Y[0], Y[1]) else: B = cupy.asarray(B[:ndim, :ndim], dtype=d1.dtype, order="C") if ndim == 3: composeB_3d( d1[0], d1[1], d1[2], xcoords[0], xcoords[1], xcoords[2], B, Y[0], Y[1], Y[2], ) else: composeB_2d(d1[0], d1[1], xcoords[0], xcoords[1], B, Y[0], Y[1]) elif B is None: A = cupy.asarray(A[:ndim, :], dtype=d1.dtype, order="C") if ndim == 3: composeA_3d(xcoords[0], xcoords[1], xcoords[2], A, Y[0], Y[1], Y[2]) else: composeA_2d(xcoords[0], xcoords[1], A, Y[0], Y[1]) else: A = cupy.asarray(A[:ndim, :], dtype=d1.dtype, order="C") B = cupy.asarray(B[:ndim, :ndim], dtype=d1.dtype, order="C") if ndim == 3: composeAB_3d( d1[0], d1[1], d1[2], xcoords[0], xcoords[1], xcoords[2], B, A, Y[0], Y[1], Y[2], ) else: composeAB_2d(d1[0], d1[1], xcoords[0], xcoords[1], B, A, Y[0], Y[1]) else: if B is None: d1tmp = d1.copy() # have to copy to avoid modification of d1 else: d1tmp = _apply_affine_to_field(d1, B[:ndim, :ndim], include_translations=False, coord_axis=0) if A is None: Y = d1tmp for n in range(ndim): Y[n] += xcoords[n] else: # Y = mul0(A, xcoords, sh, cupy, lastcol=1) Y = _apply_affine_to_field(xcoords, A[:ndim, :], include_translations=True, coord_axis=0) Y += d1tmp if Z is None: Z = cupy.empty_like(Y) for n in range(ndim): Z[n, ...] = ndi.map_coordinates(d2[n], Y, order=1, mode="constant") if coord_axis == 0: res = comp else: res = cupy.empty_like(Z) if omit_stats and ndim in [2, 3]: _shape = cupy.asarray([d1.shape[1 + n] - 1 for n in range(ndim)], dtype=cupy.int32) if ndim == 3: _comp_apply_masked_time_scaling_3d( d1[0], d1[1], d1[2], Y[0], Y[1], Y[2], Z[0], Z[1], Z[2], t, _shape, res[0], res[1], res[2], ) else: _comp_apply_masked_time_scaling_2d(d1[0], d1[1], Y[0], Y[1], Z[0], Z[1], t, _shape, res[0], res[1]) else: # TODO: declare count as boolean? count = cupy.zeros(Z.shape[1:], dtype=np.int32) # We now compute: # res = d1 + t * Z # except that res = 0 where either coordinate in # interpolating Y was outside the displacement extent for n in range(ndim): _comp_apply_masked_time_scaling_nd(d1[n], Y[n], Z[n], t, d1.shape[1 + n] - 1, res[n], count) # nnz corresponds to the number of points in comp inside the domain count = count > 0 # remove after init count as boolean if not omit_stats: nnz = res.size // ndim - cupy.count_nonzero(count) res *= ~count[np.newaxis, ...] if omit_stats: stats = None else: # compute the stats stats = cupy.empty((3, ), dtype=float) nn = res[0] * res[0] for n in range(1, ndim): nn += res[n] * res[n] # TODO: do we want stats to be a GPU array or CPU array? stats[0] = cupy.sqrt(nn.max()) mean_norm = nn.sum() / nnz stats[1] = cupy.sqrt(mean_norm) nn *= nn stats[2] = cupy.sqrt(nn.sum() / nnz - mean_norm * mean_norm) if coord_axis != 0: res = cupy.moveaxis(res, 0, -1) comp[...] = res return comp, stats
# dimensional arrays. if x.ndim < 2: raise np.linalg.LinAlgError( "1-dimensional array given. Array must be at least two-dimensional" ) S = np.linalg.svd(x._array, compute_uv=False) if rtol is None: tol = S.max(axis=-1, keepdims=True) * max(x.shape[-2:]) * np.finfo( S.dtype).eps else: if isinstance(rtol, Array): rtol = rtol._array # Note: this is different from np.linalg.matrix_rank, which does not multiply # the tolerance by the largest singular value. tol = S.max(axis=-1, keepdims=True) * np.asarray(rtol)[..., np.newaxis] return Array._new(np.count_nonzero(S > tol, axis=-1)) # Note: this function is new in the array API spec. Unlike transpose, it only # transposes the last two axes. def matrix_transpose(x: Array, /) -> Array: if x.ndim < 2: raise ValueError( "x must be at least 2-dimensional for matrix_transpose") return Array._new(np.swapaxes(x._array, -1, -2)) # Note: outer is the numpy top-level namespace, not np.linalg def outer(x1: Array, x2: Array, /) -> Array: """ Array API compatible wrapper for :py:func:`np.outer <numpy.outer>`.
def slogdet(a): """Returns sign and logarithm of the determinant of an array. It calculates the natural logarithm of the determinant of a given value. Args: a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``. Returns: tuple of :class:`~cupy.ndarray`: It returns a tuple ``(sign, logdet)``. ``sign`` represents each sign of the determinant as a real number ``0``, ``1`` or ``-1``. 'logdet' represents the natural logarithm of the absolute of the determinant. If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` will be ``-inf``. The shapes of both ``sign`` and ``logdet`` are equal to ``a.shape[:-2]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. warning:: To produce the same results as :func:`numpy.linalg.slogdet` for singular inputs, set the `linalg` configuration to `raise`. .. seealso:: :func:`numpy.linalg.slogdet` """ if a.ndim < 2: msg = ('%d-dimensional array given. ' 'Array must be at least two-dimensional' % a.ndim) raise linalg.LinAlgError(msg) _util._assert_nd_squareness(a) dtype = numpy.promote_types(a.dtype.char, 'f') real_dtype = numpy.dtype(dtype.char.lower()) if dtype not in (numpy.float32, numpy.float64, numpy.complex64, numpy.complex128): msg = ('dtype must be float32, float64, complex64, or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) a_shape = a.shape shape = a_shape[:-2] n = a_shape[-2] if a.size == 0: # empty batch (result is empty, too) or empty matrices det([[]]) == 1 sign = cupy.ones(shape, dtype) logdet = cupy.zeros(shape, real_dtype) return sign, logdet lu, ipiv, dev_info = _decomposition._lu_factor(a, dtype) # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that # should never happen even if the matrix contains nan or inf. # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for # debugging purposes. diag = cupy.diagonal(lu, axis1=-2, axis2=-1) logdet = cupy.log(cupy.abs(diag)).sum(axis=-1) # ipiv is 1-origin non_zero = cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1) if dtype.kind == "f": non_zero += cupy.count_nonzero(diag < 0, axis=-1) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 if dtype.kind == "c": sign = sign * cupy.prod(diag / cupy.abs(diag), axis=-1) singular = dev_info > 0 return ( cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape), cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape), )
def _count_accurate_predictions(y_hat, y): y_hat = rmm_cupy_ary(cp.asarray, y_hat, dtype=y_hat.dtype) y = rmm_cupy_ary(cp.asarray, y, dtype=y.dtype) return y.shape[0] - cp.count_nonzero(y - y_hat)
def _quantile_is_valid(q): if cupy.count_nonzero(q < 0.0) or cupy.count_nonzero(q > 1.0): return False return True
def time_count_nonzero_multi_axis(self, numaxes, size, dtype): if self.x.ndim >= 2: np.count_nonzero(self.x, axis=(self.x.ndim - 1, self.x.ndim - 2))
def time_count_nonzero(self, numaxes, size, dtype): np.count_nonzero(self.x)
def direction(peak_image, centroids, correction_angle=0, number_of_directions=3, strategy='strict', return_numpy=True): """ Calculate up to `number_of_directions` direction angles based on the given peak positions. If more than `number_of_directions*2` peaks are present, no direction angle will be calculated to avoid errors. This will result in a direction angle of BACKGROUND_COLOR. The peak positions are determined by the position of the corresponding peak pairs (i.e. 6 peaks: 1+4, 2+5, 3+6). If two peaks are too far away or too near (outside of 180°±35°), the direction angle will be considered as invalid, resulting in a direction angle of BACKGROUND_COLOR. Args: correction_angle: Correct the resulting direction angle by the value. This is useful when the stack or camera was rotated. peak_image: Boolean NumPy array specifying the peak positions in the full SLI stack centroids: Centroids resulting from `centroid_correction` for more accurate results number_of_directions: Number of directions which shall be generated. strategy: Strategy to determine the direction angle. Possible values are 'strict', 'safe' and 'unsafe'. 'strict' will only calculate a direction angle if all peak pairs are within 180°±35°. 'safe' will calculate a direction angle if the peak pair is within 180°±35°. 'unsafe' will calculate a direction angle independent of the peak pair distance. return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy array will be returned. Returns: NumPy array with the shape (x, y, `number_of_directions`) containing up to `number_of_directions` direction angles. x equals the number of pixels of the SLI image series. If a direction angle is invalid or missing, the array entry will be BACKGROUND_COLOR instead. """ strategy_dict = {'strict': 0, 'safe': 1, 'unsafe': 2} gpu_peak_image = cupy.array(peak_image).astype('int8') gpu_centroids = cupy.array(centroids).astype('float32') result_img_gpu = cupy.empty( (gpu_peak_image.shape[0], gpu_peak_image.shape[1], number_of_directions), dtype='float32') number_of_peaks = cupy.count_nonzero(gpu_peak_image, axis=-1).astype( 'int8') blocks_per_grid, threads_per_block = prepare_kernel_execution(gpu_peak_image) _direction[blocks_per_grid, threads_per_block](gpu_peak_image, gpu_centroids, number_of_peaks, result_img_gpu, correction_angle, strategy_dict[strategy]) cuda.synchronize() del number_of_peaks if peak_image is None: del gpu_peak_image if return_numpy: result_img_cpu = cupy.asnumpy(result_img_gpu) del result_img_gpu return result_img_cpu else: return result_img_gpu
def update(self): """ Where the magic happens. Finds a threshold that will limit the number of params in the network to the tracked_size, and resets those params to the initial value to emulate how DropBack would work in real hardware. Chainer will calculate all grads, and this updater inserts itself before the next forward pass can occur to set the parameters back to what they should be. Only the params with the largest current-initial value will not be reset to initial. This emulates the accumulated gradient updates of the actual algorithm. :return: """ if self.first_iter: self.first_iter = False self.params = [i for i in self.opt.target.params()] for i, p in enumerate(self.params): self.init_params.append(xp.copy(p.data)) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) xp.savez( os.path.join(self.output_dir, 'init_params_{0}'.format(self.time_stamp)), self.init_params) if self.tracked_size: self.frozen_masks = [None] * len(self.params) super(DropBack, self).update() if self.decay_init and not self.first_iter: for i, _ in enumerate(self.init_params): self.init_params[i] = self.init_params[i] * .90 if self.tracked_size: if not self.freeze: abs_values = [] for i, param in enumerate(self.params): if param.name == 'b': values = (xp.abs(param.data).flatten()).copy() else: values = ( xp.abs(param.data - self.init_params[i]).flatten()).copy() abs_values.append(values) abs_vals = xp.concatenate(abs_values) thresh = xp.partition(abs_vals, self.tracked_size)[-self.tracked_size] for i, param in enumerate(self.params): if param.name == 'b': if self.freeze: mask = self.frozen_masks[i] else: mask = xp.abs(param.data) > thresh param.data = mask * param.data else: if self.freeze: mask = self.frozen_masks[i] else: mask = xp.abs(param.data - self.init_params[i]) > thresh param.data = mask * param.data + self.init_params[i] * ~mask self.frozen_masks[i] = mask if self.iteration == 3465: print("Checking inv...") total_sum = sum([ xp.count_nonzero(p.data != self.init_params[i]) for i, p in enumerate(self.params) ]) print( "********\n\n Total non zero is: {}\n\n1*********".format( total_sum)) assert total_sum <= self.tracked_size * 1.1 if self.track: if (self.iteration - 1) % 100 == 0: flat_now = xp.concatenate( [i.array.ravel() for i in self.params]) flat_0 = xp.concatenate([i.ravel() for i in self.init_params]) xp.savez( os.path.join(self.output_dir, f'l2_{self.iteration-1}'), xp.linalg.norm(flat_now - flat_0)) xp.savez( os.path.join(self.output_dir, f'param_hist_{self.iteration-1}'), xp.concatenate([ i.array.ravel() for i in self.params if i.name == 'b' or i.name == 'W' ]))
def fit_genetic_algorithm(M_PROBABILITY, X_PROBABILITY, P_SIZE, N_GEN, SEL_M, CROSS_M, MUT_METHOD, K, items, encoding, BEST_CHR_CONV_LIM, PERCENT_CHR_CONV_LIM): for user in tqdm.tqdm_notebook(range(items.shape[0])): # define evaluations list to plot afterwards U_EVALS_LIST = list() # fetch user's rating top - k neighbors (k=10) neighbors_data, neighbors_indeces = fetch_neighborhood(user, items, K) # fetch optimal solution vector optim_values, optim_indeces = fetch_optim(user, items, neighbors_indeces) # reformat optimal solution vector # trim non neighbor users optim_values = optim_values[neighbors_indeces] # drop zero values optim_mean = cupy.count_nonzero([optim_values], axis=1) # find mean of all k vectors and flatten optimal solution to a single vector optim_values = optim_values.sum(axis=0) optim_indeces = optim_indeces[optim_values != 0] optim_values = optim_values[optim_values != 0] optim_mean = optim_mean[optim_mean != 0] optim_values = optim_values / optim_mean # round up mean optim_values = cupy.ceil(optim_values).astype(cupy.int64) # random initialize population (chromosomes) chromosomes = cupy.random.choice(cupy.unique(optim_values), (P_SIZE, optim_values.shape[0])) # initialize error (best_chr) array evaluation_best_chr = cupy.zeros(N_GEN) # initialize error (gen) array evaluation_overall_gen = cupy.zeros(N_GEN) # initialize best_chr counter BEST_CHR_CONV_CTR = 0 # initialize early stopping conditional stop = False # fit GA for gen in tqdm.tqdm_notebook(range(N_GEN)): # select and crossover population chromosomes = crossover_chromosomes(X_PROBABILITY, CROSS_M, SEL_M, chromosomes, optim_values, encoding) # mutate chromosomes chromosomes = mutate_chromosomes(M_PROBABILITY, MUT_METHOD, chromosomes, optim_values) # check GA convergence stop, evaluation_best_chr, evaluation_overall_gen, BEST_CHR_CONV_CTR, cause = \ genetic_algorithm_convergence(evaluation_best_chr, evaluation_overall_gen, gen, chromosomes, optim_values, BEST_CHR_CONV_LIM, BEST_CHR_CONV_CTR, PERCENT_CHR_CONV_LIM) # if GA converges if stop is True: # print met GA convergence conditional print("User [", user, "]\tGen [", gen, "]\t: GA converged (", cause, ")") # save population save_population(user, chromosomes, optim_values) # save different fitness metrics save_evaluations(user, chromosomes, optim_values) # stop fitting break # custom convergence conditional: evaluate by chromosome accuracy [90% accuracy convergence] if cupy.mean(cupy.fromiter((evaluate_chromosome(chromosomes[i], optim_values) for i in range(chromosomes.shape[0])), cupy.int64)) / \ chromosomes[0].shape[0] > 0.9: # print population accuracy print( "Mean accuracy: ", cupy.mean( cupy.fromiter( (evaluate_chromosome(chromosomes[i], optim_values) for i in range(chromosomes.shape[0])), cupy.int64)), "\tTotal chromosomes: ", chromosomes[0].shape[0]) # save population save_population(user, chromosomes, optim_values) # save different fitness metrics save_evaluations(user, chromosomes, optim_values) # stop fitting break # append mean population fitness U_EVALS_LIST.append( cupy.mean( cupy.fromiter( (evaluate_chromosome(chromosomes[i], optim_values) for i in range(chromosomes.shape[0])), cupy.int64))) # save population save_population(user, chromosomes, optim_values) # save different fitness metrics save_evaluations(user, chromosomes, optim_values) # convert mean population fitness list to cupy array U_EVALS_ARRAY = cupy.asarray(U_EVALS_LIST) # define x axis of plot x_axis = cupy.arange(U_EVALS_ARRAY.shape[0]) # plot mean population fitness array matplotlib.pyplot.clf() matplotlib.pyplot.plot(x_axis, U_EVALS_ARRAY, label="User " + str(user) + " - Evaluation") matplotlib.pyplot.legend() # matplotlib.pyplot.show() # save figure matplotlib.pyplot.savefig("user_" + str(user) + "-evaluation.png", bbox_inches='tight')
def _count_accurate_predictions(y_hat_y): y_hat, y = y_hat_y y_hat = cp.asarray(y_hat, dtype=y_hat.dtype) y = cp.asarray(y, dtype=y.dtype) return y.shape[0] - cp.count_nonzero(y - y_hat)
def count_nan_inf(x): bool_arr = cp.isinf(x) | cp.isnan(x) return cp.count_nonzero(bool_arr)
def time_count_nonzero_axis(self, numaxes, size, dtype): np.count_nonzero(self.x, axis=self.x.ndim - 1)
k = 0 times = 0 idx = cp.arange(train_datas) x = list() y = list() print("開始訓練...") while cp.less(k, train_datas): times = cp.add(times, 1) output = train.TTTG_Network( input_data, weight, baise, weight2, baise2, weight3, baise3, weight4, baise4, L, expect_data, True, True) if times % 10000 == 0: error = cp.sum(output[0]) result = expect_data[idx, output[1]] k = cp.count_nonzero(result) # result = cp.equal(expect_data, cp.asarray(output[1])) # AND閘 expect_idx = list() for i in map(cp.nonzero, expect_data[result == 0]): expect_idx.append(i[0].tolist()) print("輸入棋盤,輸出位置,預計輸出,實際輸出:", *list(zip(input_data[result == 0].tolist( ), output[1][result == 0].tolist(), expect_idx, output[2][result == 0].tolist())), sep='\n') print("第%d次訓練(學習率:%f):" % (times, L)) print("總誤差:", error) print("答對筆數/總筆數: %d/%d" % (k, train_datas)) # x.append(L) # y.append(output[0]) # L = L * 10 # if L >= 1: # plt.plot([5, 4, 3, 2, 1, 0], y) # plt.show()
def _binary_erosion(input, structure, iterations, mask, output, border_value, origin, invert, brute_force=True): try: iterations = operator.index(iterations) except TypeError: raise TypeError('iterations parameter should be an integer') if input.dtype.kind == 'c': raise TypeError('Complex type not supported') if structure is None: structure = generate_binary_structure(input.ndim, 1) all_weights_nonzero = input.ndim == 1 center_is_true = True default_structure = True else: structure = structure.astype(dtype=bool, copy=False) # transfer to CPU for use in determining if it is fully dense # structure_cpu = cupy.asnumpy(structure) default_structure = False if structure.ndim != input.ndim: raise RuntimeError('structure and input must have same dimensionality') if not structure.flags.c_contiguous: structure = cupy.ascontiguousarray(structure) if structure.size < 1: raise RuntimeError('structure must not be empty') if mask is not None: if mask.shape != input.shape: raise RuntimeError('mask and input must have equal sizes') if not mask.flags.c_contiguous: mask = cupy.ascontiguousarray(mask) masked = True else: masked = False origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int) if isinstance(output, cupy.ndarray): if output.dtype.kind == 'c': raise TypeError('Complex output type not supported') else: output = bool output = _util._get_output(output, input) temp_needed = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS') if temp_needed: # input and output arrays cannot share memory temp = output output = _util._get_output(output.dtype, input) if structure.ndim == 0: # kernel doesn't handle ndim=0, so special case it here if float(structure): output[...] = cupy.asarray(input, dtype=bool) else: output[...] = ~cupy.asarray(input, dtype=bool) return output origin = tuple(origin) int_type = _util._get_inttype(input) offsets = _filters_core._origins_to_offsets(origin, structure.shape) if not default_structure: # synchronize required to determine if all weights are non-zero nnz = int(cupy.count_nonzero(structure)) all_weights_nonzero = nnz == structure.size if all_weights_nonzero: center_is_true = True else: center_is_true = _center_is_true(structure, origin) erode_kernel = _get_binary_erosion_kernel( structure.shape, int_type, offsets, center_is_true, border_value, invert, masked, all_weights_nonzero, ) if iterations == 1: if masked: output = erode_kernel(input, structure, mask, output) else: output = erode_kernel(input, structure, output) elif center_is_true and not brute_force: raise NotImplementedError( 'only brute_force iteration has been implemented') else: if cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS'): raise ValueError('output and input may not overlap in memory') tmp_in = cupy.empty_like(input, dtype=output.dtype) tmp_out = output if iterations >= 1 and not iterations & 1: tmp_in, tmp_out = tmp_out, tmp_in if masked: tmp_out = erode_kernel(input, structure, mask, tmp_out) else: tmp_out = erode_kernel(input, structure, tmp_out) # TODO: kernel doesn't return the changed status, so determine it here changed = not (input == tmp_out).all() # synchronize! ii = 1 while ii < iterations or ((iterations < 1) and changed): tmp_in, tmp_out = tmp_out, tmp_in if masked: tmp_out = erode_kernel(tmp_in, structure, mask, tmp_out) else: tmp_out = erode_kernel(tmp_in, structure, tmp_out) changed = not (tmp_in == tmp_out).all() ii += 1 if not changed and (not ii & 1): # synchronize! # can exit early if nothing changed # (only do this after even number of tmp_in/out swaps) break output = tmp_out if temp_needed: temp[...] = output output = temp return output
m=m+1 x=[] continue elif len(line)==1 and m==3: #print(x) nsw=n-nbus-nbrch-ngen ipt_switch=x continue x.append(line) n=n+1 ipt_bus=cp.array(ipt_bus,dtype=cp.float64) ipt_gen=cp.array(ipt_gen,dtype=cp.float64) ipt_brch= cp.array(ipt_brch,dtype=cp.float64) ipt_switch=cp.array(ipt_switch,dtype=cp.float64) nPV=cp.count_nonzero(ipt_bus[:,9]==2) #print("Data read in successfully!") #print("nbus:", nbus, " nbrch:", nbrch, " ngen:", ngen) # assign bus data bus_int=ipt_bus[:,0] V=ipt_bus[:,1] b_ang=ipt_bus[:,2] b_pg=ipt_bus[:,3] b_qg=ipt_bus[:,4] Pl=ipt_bus[:,5] Ql=ipt_bus[:,6] Gb=ipt_bus[:,7] Bb=ipt_bus[:,8] b_type=ipt_bus[:,9] # assign branch data from_bus=ipt_brch[:,0].astype(cp.int)
def accuracy(a_hat, a_true): result = cp.equal(a_hat, a_true) right = cp.count_nonzero(result) return right / a_hat.shape[1]
def threshold_multiotsu(image, classes=3, nbins=256): r"""Generate `classes`-1 threshold values to divide gray levels in `image`. The threshold values are chosen to maximize the total sum of pairwise variances between the thresholded graylevel classes. See Notes and [1]_ for more details. Parameters ---------- image : (N, M) ndarray Grayscale input image. classes : int, optional Number of classes to be thresholded, i.e. the number of resulting regions. nbins : int, optional Number of bins used to calculate the histogram. This value is ignored for integer arrays. Returns ------- thresh : array Array containing the threshold values for the desired classes. Raises ------ ValueError If ``image`` contains less grayscale value then the desired number of classes. Notes ----- This implementation relies on a Cython function whose complexity is :math:`O\left(\frac{Ch^{C-1}}{(C-1)!}\right)`, where :math:`h` is the number of histogram bins and :math:`C` is the number of classes desired. The input image must be grayscale. References ---------- .. [1] Liao, P-S., Chen, T-S. and Chung, P-C., "A fast algorithm for multilevel thresholding", Journal of Information Science and Engineering 17 (5): 713-727, 2001. Available at: <https://ftp.iis.sinica.edu.tw/JISE/2001/200109_01.pdf> :DOI:`10.6688/JISE.2001.17.5.1` .. [2] Tosa, Y., "Multi-Otsu Threshold", a java plugin for ImageJ. Available at: <http://imagej.net/plugins/download/Multi_OtsuThreshold.java> Examples -------- >>> import cupy as cp >>> from cucim.skimage.color import label2rgb >>> from skimage import data >>> image = cp.asarray(data.camera()) >>> thresholds = threshold_multiotsu(image) >>> regions = cp.digitize(image, bins=thresholds) >>> regions_colorized = label2rgb(regions) """ try: from skimage.filters._multiotsu import ( _get_multiotsu_thresh_indices, _get_multiotsu_thresh_indices_lut) except ImportError: raise ImportError( "could not the required (private) multi-otsu helper functions " "from scikit-image") if len(image.shape) > 2 and image.shape[-1] in (3, 4): msg = ("threshold_multiotsu is expected to work correctly only for " "grayscale images; image shape {0} looks like an RGB image") warn(msg.format(image.shape)) # calculating the histogram and the probability of each gray level. prob, bin_centers = histogram(image.ravel(), nbins=nbins, source_range='image', normalize=True) nvalues = cp.count_nonzero(prob) if nvalues < classes: msg = ("The input image has only {} different values. " "It can not be thresholded in {} classes") raise ValueError(msg.format(nvalues, classes)) elif nvalues == classes: thresh_idx = cp.where(prob > 0)[0][:-1] else: # Need probabilities on the CPU to use the Cython code # CuPy Backend: prob is small, so CPU computations should be faster prob = cp.asnumpy(prob) # synchronization! prob = prob.astype("float32") # Get threshold indices try: thresh_idx = _get_multiotsu_thresh_indices_lut(prob, classes - 1) except MemoryError: # Don't use LUT if the number of bins is too large (if the # image is uint16 for example): in this case, the # allocated memory is too large. thresh_idx = _get_multiotsu_thresh_indices(prob, classes - 1) # transfer indices back to the GPU thresh_idx = cp.asarray(thresh_idx) # synchronization! thresh = bin_centers[thresh_idx] return thresh