コード例 #1
0
ファイル: norms.py プロジェクト: puat133/MCMC-MultiSPDE
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, 'i')
    info = cupy.empty((), 'i')

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    #<-- MODIFIED
    elif dtype == 'd':
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf
    elif dtype == 'F':
        getrf_bufferSize = cusolver.cgetrf_bufferSize
        getrf = cusolver.cgetrf
    else:
        getrf_bufferSize = cusolver.zgetrf_bufferSize
        getrf = cusolver.zgetrf
    #<-- MODIFIED

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr,
          info.data.ptr)

    if info[()] == 0:
        diag = cupy.diag(a_copy)
        # ipiv is 1-origin
        non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                    cupy.count_nonzero(diag < 0))
        # Note: sign == -1 ** (non_zero % 2)
        sign = (non_zero % 2) * -2 + 1
        logdet = cupy.log(abs(diag)).sum()
    else:
        sign = cupy.array(0.0, dtype=dtype)
        #ORIGINAL
        # logdet = cupy.array(float('-inf'), dtype)

        #<-- MODIFIED
        if dtype in ['f', 'd']:
            logdet = cupy.array(float('-inf'), dtype)
        elif dtype == 'F':
            logdet = cupy.array(float('-inf'), cupy.float32)
        else:
            logdet = cupy.array(float('-inf'), cupy.float64)
        #<-- MODIFIED

    return sign, logdet
コード例 #2
0
ファイル: toolbox.py プロジェクト: 3d-pli/SLIX
def mean_peak_distance(peak_image, centroids, return_numpy=True):
    """
    Calculate the mean peak distance in degrees between two corresponding peaks
    for each line profile in an SLI image series.

    Args:

        peak_image: Boolean NumPy array specifying the peak positions in the full
        SLI stack

        centroids: Use centroid calculation to better determine the peak position
        regardless of the number of
        measurements / illumination angles used.

        return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy
        array will be returned.

    Returns:

        NumPy array of floating point values containing the mean peak distance of
        the line profiles in degrees.
    """
    peak_distance_gpu = peak_distance(peak_image, centroids,
                                      return_numpy=False)
    peak_distance_gpu[peak_distance_gpu > 180] = 0
    peak_distance_gpu = cupy.sum(peak_distance_gpu, axis=-1) / \
                        cupy.maximum(1, cupy.count_nonzero(peak_distance_gpu,
                                                           axis=-1))
    if return_numpy:
        peak_width_cpu = cupy.asnumpy(peak_distance_gpu)
        del peak_distance_gpu
        return peak_width_cpu
    else:
        return peak_distance_gpu
コード例 #3
0
def batch_all_triplet_loss(embeddings, labels, margin=0.2, dist_type='l2'):
    """Build the triplet loss over a batch of embeddings.

    We generate all the valid triplets and average the loss over the positive ones.

    Args:
        embeddings: Variable of shape=(batch_size, embed_dim)
        labels: labels of the batch, of size=(batch_size,)
        margin: margin for triplet loss
        dist_type: definition of distance, 'l2' or 'cos'

    Returns:
        triplet_loss: scalar Variable containing the triplet loss
    """
    # distance(f(xa), f(xp)) - distance(f(xa), f(xn)) + alpha
    pairwise_dist = _pairwise_distances(embeddings, dist_type)
    anchor_positive_dist = F.expand_dims(pairwise_dist, axis=2)
    anchor_negative_dist = F.expand_dims(pairwise_dist, axis=1)
    triplet_loss = anchor_positive_dist - anchor_negative_dist + margin

    # Set invalid triplet [i, j, k] to 0.
    mask = _get_triplet_mask(labels)
    triplet_loss = mask * triplet_loss

    # Ignore enough separated example pairs loss.
    triplet_loss = F.relu(triplet_loss)

    # Calculate mean of loss.
    total = F.sum(triplet_loss)
    count = xp.count_nonzero(triplet_loss.data)
    return total / count if (count > 0.0) else chainer.Variable(
        xp.array(0.0, dtype=xp.float32))
コード例 #4
0
ファイル: norms.py プロジェクト: take-cheeze/cupy
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, dtype=numpy.int32)
    dev_info = cupy.empty((), dtype=numpy.int32)

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    else:
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr,
          dev_info.data.ptr)

    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
    # should never happen even if the matrix contains nan or inf.
    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
    # debugging purposes.

    diag = cupy.diag(a_copy)
    # ipiv is 1-origin
    non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                cupy.count_nonzero(diag < 0))

    # Note: sign == -1 ** (non_zero % 2)
    sign = (non_zero % 2) * -2 + 1
    logdet = cupy.log(abs(diag)).sum()

    singular = dev_info > 0
    return (
        cupy.where(singular, dtype.type(0), sign),
        cupy.where(singular, dtype.type('-inf'), logdet),
    )
コード例 #5
0
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, dtype=numpy.int32)
    dev_info = cupy.empty(1, dtype=numpy.int32)

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    else:
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, dev_info.data.ptr)

    try:
        cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
            getrf, dev_info)

        diag = cupy.diag(a_copy)
        # ipiv is 1-origin
        non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                    cupy.count_nonzero(diag < 0))
        # Note: sign == -1 ** (non_zero % 2)
        sign = (non_zero % 2) * -2 + 1
        logdet = cupy.log(abs(diag)).sum()
    except linalg.LinAlgError:
        sign = cupy.array(0.0, dtype=dtype)
        logdet = cupy.array(float('-inf'), dtype)

    return sign, logdet
コード例 #6
0
    def count_nonzero(self):
        """Returns number of non-zero entries.

        .. note::
           This method counts the actual number of non-zero entories, which
           does not include explicit zero entries.
           Instead ``nnz`` returns the number of entries including explicit
           zeros.

        Returns:
            Number of non-zero entries.

        """
        return cupy.count_nonzero(self.data)
コード例 #7
0
ファイル: toolbox.py プロジェクト: 3d-pli/SLIX
def inclination_sign(peak_image, centroids, correction_angle=0, return_numpy=True):
    """
    Calculate the inclination sign from the peak positions.
    The inclination sign is based on the peak distance between two peaks.

    Explanation of the results:
    -1: The minimal peak distance is behind the first peak (wrapping around)
    0: This pixel / line profile has more than two peaks
    1: The minimal peak distance is in front of the first peak.

    Args:

        peak_distance: 3D NumPy array - of the peak distance between two peaks

        return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or
        NumPy array will be returned.

    Returns:

        inclination_sign: 3D NumPy array
            inclination sign
    """
    gpu_peak_image = cupy.array(peak_image).astype('int8')
    gpu_centroids = cupy.array(centroids).astype('float32')

    result_img_gpu = cupy.empty(
        (gpu_peak_image.shape[0], gpu_peak_image.shape[1]), dtype='float32')
    number_of_peaks = cupy.count_nonzero(gpu_peak_image, axis=-1).astype(
        'int8')

    threads_per_block = (1, 1)
    blocks_per_grid = gpu_peak_image.shape[:-1]
    _inclination_sign[blocks_per_grid, threads_per_block](gpu_peak_image,
                                                          gpu_centroids,
                                                          number_of_peaks,
                                                          result_img_gpu,
                                                          correction_angle)
    cuda.synchronize()
    del number_of_peaks

    if peak_image is None:
        del gpu_peak_image

    if return_numpy:
        result_img_cpu = cupy.asnumpy(result_img_gpu)
        del result_img_gpu
        return result_img_cpu
    else:
        return result_img_gpu
コード例 #8
0
ファイル: toolbox.py プロジェクト: 3d-pli/SLIX
def mean_peak_prominence(image, peak_image=None, kind_of_normalization=0,
                         return_numpy=True):
    """
    Calculate the mean peak prominence of all given peak positions within a
    line profile. The line profile will be normalized by dividing the line
    profile through its mean value. Therefore, values above 1 are possible.

    Args:

        image: Original line profile used to detect all peaks. This array will be
            further analyzed to better determine the peak positions.

        peak_image: Boolean NumPy array specifying the peak positions in the full
        SLI stack

        kind_of_normalization: Normalize given line profile by using a
        normalization technique based on the kind_of_normalization parameter.
           0 : Scale line profile to be between 0 and 1
           1 : Divide line profile through its mean value

        return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy
        array will be returned.

    Returns:

        Floating point value containing the mean peak prominence of the line
        profile in degrees.
    """
    if peak_image is not None:
        gpu_peak_image = cupy.array(peak_image).astype('uint8')
    else:
        gpu_peak_image = peaks(image, return_numpy=False).astype('uint8')
    peak_prominence_gpu = peak_prominence(image, peak_image,
                                          kind_of_normalization,
                                          return_numpy=False)
    peak_prominence_gpu = cupy.sum(peak_prominence_gpu, axis=-1) / \
                          cupy.maximum(1, cupy.count_nonzero(gpu_peak_image,
                                                             axis=-1))
    peak_prominence_gpu = peak_prominence_gpu.astype('float32')

    del gpu_peak_image
    if return_numpy:
        peak_width_cpu = cupy.asnumpy(peak_prominence_gpu)
        del peak_prominence_gpu
        return peak_width_cpu
    else:
        return peak_prominence_gpu
コード例 #9
0
def _get_median(data, n_zeros):
    """Compute the median of data with n_zeros additional zeros.

    This function is used to support sparse matrices; it modifies data in-place
    """
    n_elems = len(data) + n_zeros
    if not n_elems:
        return np.nan
    n_negative = np.count_nonzero(data < 0)
    middle, is_odd = divmod(n_elems, 2)
    data.sort()

    if is_odd:
        return _get_elem_at_rank(middle, data, n_negative, n_zeros)

    return (_get_elem_at_rank(middle - 1, data, n_negative, n_zeros) +
            _get_elem_at_rank(middle, data, n_negative, n_zeros)) / 2.
コード例 #10
0
ファイル: toolbox.py プロジェクト: 3d-pli/SLIX
def mean_peak_width(image, peak_image=None, target_height=0.5,
                    return_numpy=True):
    """
    Calculate the mean peak width of all given peak positions within a line
    profile.

    Args:

        image: Original line profile used to detect all peaks. This array will be
        further analyzed to better determine the peak positions.

        peak_image: Boolean NumPy array specifying the peak positions in the full
        SLI stack

        target_height: Relative peak height in relation to the prominence of the
        given peak.

        return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy
        array will be returned.

    Returns:

        NumPy array where each entry corresponds to the mean peak width of the
        line profile. The values are in degree.
    """
    if peak_image is not None:
        gpu_peak_image = cupy.array(peak_image).astype('uint8')
    else:
        gpu_peak_image = peaks(image, return_numpy=False).astype('uint8')
    peak_width_gpu = peak_width(image, gpu_peak_image, target_height,
                                return_numpy=False)
    peak_width_gpu = cupy.sum(peak_width_gpu, axis=-1) / \
                     cupy.maximum(1, cupy.count_nonzero(gpu_peak_image,
                                                        axis=-1))

    del gpu_peak_image
    if return_numpy:
        peak_width_cpu = cupy.asnumpy(peak_width_gpu)
        del peak_width_gpu
        return peak_width_cpu
    else:
        return peak_width_gpu
コード例 #11
0
def deskew(image, angle, dz, pixel_size):
    deskewed = deskewGPU(image, angle, dz, pixel_size)

    image_cp = cp.array(image)
    deskewed_cp = cp.array(deskewed)

    pages, col, row = image_cp.shape
    noise_size = cp.ceil(cp.max(cp.array([row, col])) * 0.1)
    image_noise_patch = image_cp[0:noise_size,
                                 col - (noise_size + 1):col - 1, :]
    image_noise_patch = image_noise_patch.flatten()

    fill_length = deskewed_cp.size - cp.count_nonzero(deskewed_cp)
    repeat_frequency = cp.ceil(fill_length / image_noise_patch.size)
    repeat_frequency = cp.asnumpy(repeat_frequency).flatten().astype(
        dtype=np.uint16)[0]
    noise = cp.tile(image_noise_patch, repeat_frequency + 1)
    noise = noise[0:fill_length]
    deskewed_cp[deskewed_cp == 0] = noise

    return cp.asnumpy(deskewed_cp)
コード例 #12
0
ファイル: toolbox.py プロジェクト: 3d-pli/SLIX
def num_peaks(image=None, peak_image=None, return_numpy=True):
    """
    Calculate the number of peaks from each line profile in an SLI image series
    by detecting all peaks and applying thresholds to remove unwanted peaks.

    Args:

        image: Full SLI measurement (series of images) which is prepared for the
               pipeline using the SLIX toolbox methods.

        peak_image: Boolean NumPy array specifying the peak positions in the full SLI stack

        return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy
                      array will be returned.

    Returns:

        Array where each entry corresponds to the number of detected peaks within
        the first dimension of the SLI image series.
    """

    if peak_image is None and image is not None:
        peak_image = peaks(image, return_numpy=False)
    elif peak_image is not None:
        peak_image = cupy.array(peak_image)
    else:
        raise ValueError('Either image or peak_image has to be defined.')

    resulting_image = cupy.count_nonzero(peak_image, axis=-1) \
        .astype(cupy.uint16)
    if return_numpy:
        resulting_image_cpu = cupy.asnumpy(resulting_image)
        del resulting_image
        return resulting_image_cpu
    else:
        return resulting_image
コード例 #13
0
ファイル: vector_fields.py プロジェクト: dipy/cudipy
def compose_vector_fields(
    d1,
    d2,
    premult_index,
    premult_disp,
    time_scaling,
    comp=None,
    order=1,
    *,
    coord_axis=-1,
    omit_stats=False,
    xcoords=None,
    Y=None,
    Z=None,
):
    if comp is None:
        comp = cupy.empty_like(d1, order="C")

    # need vector elements on first axis, not last
    if coord_axis != 0:
        d1 = cupy.ascontiguousarray(cupy.moveaxis(d1, -1, 0))
        d2 = cupy.ascontiguousarray(cupy.moveaxis(d2, -1, 0))
    else:
        if not d1.flags.c_contiguous:
            d1 = cupy.ascontiguousarray(d1)
        if not d2.flags.c_contiguous:
            d2 = cupy.ascontiguousarray(d2)
    ndim = d1.shape[0]
    B = premult_disp
    A = premult_index
    t = time_scaling

    if xcoords is None:
        xcoords = cupy.meshgrid(
            *[cupy.arange(s, dtype=d1.real.dtype) for s in d1.shape[1:]],
            indexing="ij",
            sparse=True,
        )

    # TODO: reduce number of temporary arrays
    if ndim in [2, 3]:
        if Y is None:
            Y = cupy.empty_like(d1)
        if A is None:
            if B is None:
                if ndim == 3:
                    composeNone_3d(
                        d1[0],
                        d1[1],
                        d1[2],
                        xcoords[0],
                        xcoords[1],
                        xcoords[2],
                        Y[0],
                        Y[1],
                        Y[2],
                    )
                else:
                    composeNone_2d(d1[0], d1[1], xcoords[0], xcoords[1], Y[0],
                                   Y[1])
            else:
                B = cupy.asarray(B[:ndim, :ndim], dtype=d1.dtype, order="C")
                if ndim == 3:
                    composeB_3d(
                        d1[0],
                        d1[1],
                        d1[2],
                        xcoords[0],
                        xcoords[1],
                        xcoords[2],
                        B,
                        Y[0],
                        Y[1],
                        Y[2],
                    )
                else:
                    composeB_2d(d1[0], d1[1], xcoords[0], xcoords[1], B, Y[0],
                                Y[1])
        elif B is None:
            A = cupy.asarray(A[:ndim, :], dtype=d1.dtype, order="C")
            if ndim == 3:
                composeA_3d(xcoords[0], xcoords[1], xcoords[2], A, Y[0], Y[1],
                            Y[2])
            else:
                composeA_2d(xcoords[0], xcoords[1], A, Y[0], Y[1])
        else:
            A = cupy.asarray(A[:ndim, :], dtype=d1.dtype, order="C")
            B = cupy.asarray(B[:ndim, :ndim], dtype=d1.dtype, order="C")
            if ndim == 3:
                composeAB_3d(
                    d1[0],
                    d1[1],
                    d1[2],
                    xcoords[0],
                    xcoords[1],
                    xcoords[2],
                    B,
                    A,
                    Y[0],
                    Y[1],
                    Y[2],
                )
            else:
                composeAB_2d(d1[0], d1[1], xcoords[0], xcoords[1], B, A, Y[0],
                             Y[1])
    else:
        if B is None:
            d1tmp = d1.copy()  # have to copy to avoid modification of d1
        else:
            d1tmp = _apply_affine_to_field(d1,
                                           B[:ndim, :ndim],
                                           include_translations=False,
                                           coord_axis=0)

        if A is None:
            Y = d1tmp
            for n in range(ndim):
                Y[n] += xcoords[n]
        else:
            # Y = mul0(A, xcoords, sh, cupy, lastcol=1)
            Y = _apply_affine_to_field(xcoords,
                                       A[:ndim, :],
                                       include_translations=True,
                                       coord_axis=0)
            Y += d1tmp

    if Z is None:
        Z = cupy.empty_like(Y)
    for n in range(ndim):
        Z[n, ...] = ndi.map_coordinates(d2[n], Y, order=1, mode="constant")

    if coord_axis == 0:
        res = comp
    else:
        res = cupy.empty_like(Z)

    if omit_stats and ndim in [2, 3]:
        _shape = cupy.asarray([d1.shape[1 + n] - 1 for n in range(ndim)],
                              dtype=cupy.int32)
        if ndim == 3:
            _comp_apply_masked_time_scaling_3d(
                d1[0],
                d1[1],
                d1[2],
                Y[0],
                Y[1],
                Y[2],
                Z[0],
                Z[1],
                Z[2],
                t,
                _shape,
                res[0],
                res[1],
                res[2],
            )
        else:
            _comp_apply_masked_time_scaling_2d(d1[0], d1[1], Y[0], Y[1], Z[0],
                                               Z[1], t, _shape, res[0], res[1])
    else:

        # TODO: declare count as boolean?
        count = cupy.zeros(Z.shape[1:], dtype=np.int32)

        # We now compute:
        #    res = d1 + t * Z
        #    except that res = 0 where either coordinate in
        #    interpolating Y was outside the displacement extent
        for n in range(ndim):
            _comp_apply_masked_time_scaling_nd(d1[n], Y[n], Z[n], t,
                                               d1.shape[1 + n] - 1, res[n],
                                               count)

        # nnz corresponds to the number of points in comp inside the domain
        count = count > 0  # remove after init count as boolean
        if not omit_stats:
            nnz = res.size // ndim - cupy.count_nonzero(count)
        res *= ~count[np.newaxis, ...]

    if omit_stats:
        stats = None
    else:
        # compute the stats
        stats = cupy.empty((3, ), dtype=float)
        nn = res[0] * res[0]
        for n in range(1, ndim):
            nn += res[n] * res[n]
        # TODO: do we want stats to be a GPU array or CPU array?
        stats[0] = cupy.sqrt(nn.max())
        mean_norm = nn.sum() / nnz
        stats[1] = cupy.sqrt(mean_norm)
        nn *= nn
        stats[2] = cupy.sqrt(nn.sum() / nnz - mean_norm * mean_norm)

    if coord_axis != 0:
        res = cupy.moveaxis(res, 0, -1)
        comp[...] = res

    return comp, stats
コード例 #14
0
    # dimensional arrays.
    if x.ndim < 2:
        raise np.linalg.LinAlgError(
            "1-dimensional array given. Array must be at least two-dimensional"
        )
    S = np.linalg.svd(x._array, compute_uv=False)
    if rtol is None:
        tol = S.max(axis=-1, keepdims=True) * max(x.shape[-2:]) * np.finfo(
            S.dtype).eps
    else:
        if isinstance(rtol, Array):
            rtol = rtol._array
        # Note: this is different from np.linalg.matrix_rank, which does not multiply
        # the tolerance by the largest singular value.
        tol = S.max(axis=-1, keepdims=True) * np.asarray(rtol)[..., np.newaxis]
    return Array._new(np.count_nonzero(S > tol, axis=-1))


# Note: this function is new in the array API spec. Unlike transpose, it only
# transposes the last two axes.
def matrix_transpose(x: Array, /) -> Array:
    if x.ndim < 2:
        raise ValueError(
            "x must be at least 2-dimensional for matrix_transpose")
    return Array._new(np.swapaxes(x._array, -1, -2))


# Note: outer is the numpy top-level namespace, not np.linalg
def outer(x1: Array, x2: Array, /) -> Array:
    """
    Array API compatible wrapper for :py:func:`np.outer <numpy.outer>`.
コード例 #15
0
def slogdet(a):
    """Returns sign and logarithm of the determinant of an array.

    It calculates the natural logarithm of the determinant of a given value.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``.

    Returns:
        tuple of :class:`~cupy.ndarray`:
            It returns a tuple ``(sign, logdet)``. ``sign`` represents each
            sign of the determinant as a real number ``0``, ``1`` or ``-1``.
            'logdet' represents the natural logarithm of the absolute of the
            determinant.
            If the determinant is zero, ``sign`` will be ``0`` and ``logdet``
            will be ``-inf``.
            The shapes of both ``sign`` and ``logdet`` are equal to
            ``a.shape[:-2]``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. warning::
        To produce the same results as :func:`numpy.linalg.slogdet` for
        singular inputs, set the `linalg` configuration to `raise`.

    .. seealso:: :func:`numpy.linalg.slogdet`
    """
    if a.ndim < 2:
        msg = ('%d-dimensional array given. '
               'Array must be at least two-dimensional' % a.ndim)
        raise linalg.LinAlgError(msg)
    _util._assert_nd_squareness(a)

    dtype = numpy.promote_types(a.dtype.char, 'f')
    real_dtype = numpy.dtype(dtype.char.lower())

    if dtype not in (numpy.float32, numpy.float64,
                     numpy.complex64, numpy.complex128):
        msg = ('dtype must be float32, float64, complex64, or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    a_shape = a.shape
    shape = a_shape[:-2]
    n = a_shape[-2]

    if a.size == 0:
        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
        sign = cupy.ones(shape, dtype)
        logdet = cupy.zeros(shape, real_dtype)
        return sign, logdet

    lu, ipiv, dev_info = _decomposition._lu_factor(a, dtype)

    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
    # should never happen even if the matrix contains nan or inf.
    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
    # debugging purposes.

    diag = cupy.diagonal(lu, axis1=-2, axis2=-1)

    logdet = cupy.log(cupy.abs(diag)).sum(axis=-1)

    # ipiv is 1-origin
    non_zero = cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1)
    if dtype.kind == "f":
        non_zero += cupy.count_nonzero(diag < 0, axis=-1)

    # Note: sign == -1 ** (non_zero % 2)
    sign = (non_zero % 2) * -2 + 1
    if dtype.kind == "c":
        sign = sign * cupy.prod(diag / cupy.abs(diag), axis=-1)

    singular = dev_info > 0
    return (
        cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape),
        cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape),
    )
コード例 #16
0
 def _count_accurate_predictions(y_hat, y):
     y_hat = rmm_cupy_ary(cp.asarray, y_hat, dtype=y_hat.dtype)
     y = rmm_cupy_ary(cp.asarray, y, dtype=y.dtype)
     return y.shape[0] - cp.count_nonzero(y - y_hat)
コード例 #17
0
def _quantile_is_valid(q):
    if cupy.count_nonzero(q < 0.0) or cupy.count_nonzero(q > 1.0):
        return False
    return True
コード例 #18
0
ファイル: bench_core.py プロジェクト: okuta/cupy-benchmark
 def time_count_nonzero_multi_axis(self, numaxes, size, dtype):
     if self.x.ndim >= 2:
         np.count_nonzero(self.x, axis=(self.x.ndim - 1, self.x.ndim - 2))
コード例 #19
0
ファイル: bench_core.py プロジェクト: okuta/cupy-benchmark
 def time_count_nonzero(self, numaxes, size, dtype):
     np.count_nonzero(self.x)
コード例 #20
0
ファイル: toolbox.py プロジェクト: 3d-pli/SLIX
def direction(peak_image, centroids, correction_angle=0,
              number_of_directions=3, strategy='strict', return_numpy=True):
    """
    Calculate up to `number_of_directions` direction angles based on the given
    peak positions. If more than `number_of_directions*2` peaks are present, no
    direction angle will be calculated to avoid errors. This will result in a
    direction angle of BACKGROUND_COLOR. The peak positions are determined by
    the position of the corresponding peak pairs (i.e. 6 peaks: 1+4, 2+5, 3+6).
    If two peaks are too far away or too near (outside of 180°±35°), the
    direction angle will be considered as invalid, resulting in a direction
    angle of BACKGROUND_COLOR.

    Args:

        correction_angle: Correct the resulting direction angle by the value.
        This is useful when the stack or camera was rotated.

        peak_image: Boolean NumPy array specifying the peak positions in the full
        SLI stack

        centroids: Centroids resulting from `centroid_correction` for more accurate
                   results

        number_of_directions: Number of directions which shall be generated.

        strategy: Strategy to determine the direction angle. Possible values are
                  'strict', 'safe' and 'unsafe'. 'strict' will only calculate a direction
                  angle if all peak pairs are within 180°±35°. 'safe' will calculate a
                  direction angle if the peak pair is within 180°±35°. 'unsafe' will
                  calculate a direction angle independent of the peak pair distance.

        return_numpy: Necessary if using `use_gpu`. Specifies if a CuPy or Numpy
        array will be returned.

    Returns:

        NumPy array with the shape (x, y, `number_of_directions`) containing up to
        `number_of_directions` direction angles. x equals the number of pixels of
        the SLI image series. If a direction angle is invalid or missing, the
        array entry will be BACKGROUND_COLOR instead.
    """
    strategy_dict = {'strict': 0, 'safe': 1, 'unsafe': 2}

    gpu_peak_image = cupy.array(peak_image).astype('int8')
    gpu_centroids = cupy.array(centroids).astype('float32')

    result_img_gpu = cupy.empty(
        (gpu_peak_image.shape[0], gpu_peak_image.shape[1],
         number_of_directions), dtype='float32')
    number_of_peaks = cupy.count_nonzero(gpu_peak_image, axis=-1).astype(
        'int8')

    blocks_per_grid, threads_per_block = prepare_kernel_execution(gpu_peak_image)
    _direction[blocks_per_grid, threads_per_block](gpu_peak_image,
                                                   gpu_centroids,
                                                   number_of_peaks,
                                                   result_img_gpu,
                                                   correction_angle,
                                                   strategy_dict[strategy])
    cuda.synchronize()
    del number_of_peaks

    if peak_image is None:
        del gpu_peak_image

    if return_numpy:
        result_img_cpu = cupy.asnumpy(result_img_gpu)
        del result_img_gpu
        return result_img_cpu
    else:
        return result_img_gpu
コード例 #21
0
ファイル: DropBack.py プロジェクト: compstruct/DropBack
    def update(self):
        """
        Where the magic happens. Finds a threshold that will limit the number of params in the network
        to the tracked_size, and resets those params to the initial value to emulate how DropBack would
        work in real hardware.

        Chainer will calculate all grads, and this updater inserts itself before the next
        forward pass can occur to set the parameters back to what they should be. Only the params with the largest
        current-initial value will not be reset to initial. This emulates the accumulated gradient updates of the actual
        algorithm.
        :return:
        """
        if self.first_iter:
            self.first_iter = False
            self.params = [i for i in self.opt.target.params()]
            for i, p in enumerate(self.params):
                self.init_params.append(xp.copy(p.data))
            if not os.path.exists(self.output_dir):
                os.makedirs(self.output_dir)
            xp.savez(
                os.path.join(self.output_dir,
                             'init_params_{0}'.format(self.time_stamp)),
                self.init_params)
            if self.tracked_size:
                self.frozen_masks = [None] * len(self.params)
        super(DropBack, self).update()
        if self.decay_init and not self.first_iter:
            for i, _ in enumerate(self.init_params):
                self.init_params[i] = self.init_params[i] * .90
        if self.tracked_size:
            if not self.freeze:
                abs_values = []
                for i, param in enumerate(self.params):
                    if param.name == 'b':
                        values = (xp.abs(param.data).flatten()).copy()
                    else:
                        values = (
                            xp.abs(param.data -
                                   self.init_params[i]).flatten()).copy()
                    abs_values.append(values)
                abs_vals = xp.concatenate(abs_values)
                thresh = xp.partition(abs_vals,
                                      self.tracked_size)[-self.tracked_size]
            for i, param in enumerate(self.params):
                if param.name == 'b':
                    if self.freeze:
                        mask = self.frozen_masks[i]
                    else:
                        mask = xp.abs(param.data) > thresh
                    param.data = mask * param.data
                else:
                    if self.freeze:
                        mask = self.frozen_masks[i]
                    else:
                        mask = xp.abs(param.data -
                                      self.init_params[i]) > thresh
                    param.data = mask * param.data + self.init_params[i] * ~mask
                self.frozen_masks[i] = mask
            if self.iteration == 3465:
                print("Checking inv...")
                total_sum = sum([
                    xp.count_nonzero(p.data != self.init_params[i])
                    for i, p in enumerate(self.params)
                ])
                print(
                    "********\n\n Total non zero is: {}\n\n1*********".format(
                        total_sum))
                assert total_sum <= self.tracked_size * 1.1
        if self.track:
            if (self.iteration - 1) % 100 == 0:
                flat_now = xp.concatenate(
                    [i.array.ravel() for i in self.params])
                flat_0 = xp.concatenate([i.ravel() for i in self.init_params])
                xp.savez(
                    os.path.join(self.output_dir, f'l2_{self.iteration-1}'),
                    xp.linalg.norm(flat_now - flat_0))
                xp.savez(
                    os.path.join(self.output_dir,
                                 f'param_hist_{self.iteration-1}'),
                    xp.concatenate([
                        i.array.ravel() for i in self.params
                        if i.name == 'b' or i.name == 'W'
                    ]))
コード例 #22
0
def fit_genetic_algorithm(M_PROBABILITY, X_PROBABILITY, P_SIZE, N_GEN, SEL_M,
                          CROSS_M, MUT_METHOD, K, items, encoding,
                          BEST_CHR_CONV_LIM, PERCENT_CHR_CONV_LIM):
    for user in tqdm.tqdm_notebook(range(items.shape[0])):
        # define evaluations list to plot afterwards
        U_EVALS_LIST = list()
        # fetch user's rating top - k neighbors (k=10)
        neighbors_data, neighbors_indeces = fetch_neighborhood(user, items, K)
        # fetch optimal solution vector
        optim_values, optim_indeces = fetch_optim(user, items,
                                                  neighbors_indeces)
        # reformat optimal solution vector
        # trim non neighbor users
        optim_values = optim_values[neighbors_indeces]
        # drop zero values
        optim_mean = cupy.count_nonzero([optim_values], axis=1)
        # find mean of all k vectors and flatten optimal solution to a single vector
        optim_values = optim_values.sum(axis=0)
        optim_indeces = optim_indeces[optim_values != 0]
        optim_values = optim_values[optim_values != 0]
        optim_mean = optim_mean[optim_mean != 0]
        optim_values = optim_values / optim_mean
        # round up mean
        optim_values = cupy.ceil(optim_values).astype(cupy.int64)
        # random initialize population (chromosomes)
        chromosomes = cupy.random.choice(cupy.unique(optim_values),
                                         (P_SIZE, optim_values.shape[0]))
        # initialize error (best_chr) array
        evaluation_best_chr = cupy.zeros(N_GEN)
        # initialize error (gen) array
        evaluation_overall_gen = cupy.zeros(N_GEN)
        # initialize best_chr counter
        BEST_CHR_CONV_CTR = 0
        # initialize early stopping conditional
        stop = False
        # fit GA
        for gen in tqdm.tqdm_notebook(range(N_GEN)):
            # select and crossover population
            chromosomes = crossover_chromosomes(X_PROBABILITY, CROSS_M, SEL_M,
                                                chromosomes, optim_values,
                                                encoding)
            # mutate chromosomes
            chromosomes = mutate_chromosomes(M_PROBABILITY, MUT_METHOD,
                                             chromosomes, optim_values)
            # check GA convergence
            stop, evaluation_best_chr, evaluation_overall_gen, BEST_CHR_CONV_CTR, cause = \
                genetic_algorithm_convergence(evaluation_best_chr, evaluation_overall_gen, gen,
                                              chromosomes, optim_values, BEST_CHR_CONV_LIM, BEST_CHR_CONV_CTR,
                                              PERCENT_CHR_CONV_LIM)
            # if GA converges
            if stop is True:
                # print met GA convergence conditional
                print("User [", user, "]\tGen [", gen, "]\t: GA converged (",
                      cause, ")")
                # save population
                save_population(user, chromosomes, optim_values)
                # save different fitness metrics
                save_evaluations(user, chromosomes, optim_values)
                # stop fitting
                break
            # custom convergence conditional: evaluate by chromosome accuracy [90% accuracy convergence]
            if cupy.mean(cupy.fromiter((evaluate_chromosome(chromosomes[i], optim_values)
                                          for i in range(chromosomes.shape[0])), cupy.int64)) / \
                    chromosomes[0].shape[0] > 0.9:
                # print population accuracy
                print(
                    "Mean accuracy: ",
                    cupy.mean(
                        cupy.fromiter(
                            (evaluate_chromosome(chromosomes[i], optim_values)
                             for i in range(chromosomes.shape[0])),
                            cupy.int64)), "\tTotal chromosomes: ",
                    chromosomes[0].shape[0])
                # save population
                save_population(user, chromosomes, optim_values)
                # save different fitness metrics
                save_evaluations(user, chromosomes, optim_values)
                # stop fitting
                break
            # append mean population fitness
            U_EVALS_LIST.append(
                cupy.mean(
                    cupy.fromiter(
                        (evaluate_chromosome(chromosomes[i], optim_values)
                         for i in range(chromosomes.shape[0])), cupy.int64)))
        # save population
        save_population(user, chromosomes, optim_values)
        # save different fitness metrics
        save_evaluations(user, chromosomes, optim_values)
        # convert mean population fitness list to cupy array
        U_EVALS_ARRAY = cupy.asarray(U_EVALS_LIST)
        # define x axis of plot
        x_axis = cupy.arange(U_EVALS_ARRAY.shape[0])
        # plot mean population fitness array
        matplotlib.pyplot.clf()
        matplotlib.pyplot.plot(x_axis,
                               U_EVALS_ARRAY,
                               label="User " + str(user) + " - Evaluation")
        matplotlib.pyplot.legend()
        # matplotlib.pyplot.show()
        # save figure
        matplotlib.pyplot.savefig("user_" + str(user) + "-evaluation.png",
                                  bbox_inches='tight')
コード例 #23
0
 def _count_accurate_predictions(y_hat_y):
     y_hat, y = y_hat_y
     y_hat = cp.asarray(y_hat, dtype=y_hat.dtype)
     y = cp.asarray(y, dtype=y.dtype)
     return y.shape[0] - cp.count_nonzero(y - y_hat)
コード例 #24
0
ファイル: cp_base.py プロジェクト: wfh1300/Numpy_Base
def count_nan_inf(x):
    bool_arr = cp.isinf(x) | cp.isnan(x)
    return cp.count_nonzero(bool_arr)
コード例 #25
0
ファイル: bench_core.py プロジェクト: okuta/cupy-benchmark
 def time_count_nonzero_axis(self, numaxes, size, dtype):
     np.count_nonzero(self.x, axis=self.x.ndim - 1)
コード例 #26
0
ファイル: TTTG_GPU.py プロジェクト: necro-wbj/TTTG

k = 0
times = 0
idx = cp.arange(train_datas)
x = list()
y = list()
print("開始訓練...")
while cp.less(k, train_datas):
    times = cp.add(times, 1)
    output = train.TTTG_Network(
        input_data, weight, baise, weight2, baise2, weight3, baise3, weight4, baise4, L, expect_data, True, True)
    if times % 10000 == 0:
        error = cp.sum(output[0])
        result = expect_data[idx, output[1]]
        k = cp.count_nonzero(result)
        # result = cp.equal(expect_data, cp.asarray(output[1]))  # AND閘
        expect_idx = list()
        for i in map(cp.nonzero, expect_data[result == 0]):
            expect_idx.append(i[0].tolist())
        print("輸入棋盤,輸出位置,預計輸出,實際輸出:", *list(zip(input_data[result == 0].tolist(
        ), output[1][result == 0].tolist(), expect_idx, output[2][result == 0].tolist())), sep='\n')
        print("第%d次訓練(學習率:%f):" % (times, L))
        print("總誤差:", error)
        print("答對筆數/總筆數: %d/%d" % (k, train_datas))
        # x.append(L)
        # y.append(output[0])
        # L = L * 10
        # if L >= 1:
        #     plt.plot([5, 4, 3, 2, 1, 0], y)
        #     plt.show()
コード例 #27
0
ファイル: morphology.py プロジェクト: venkywonka/cupy
def _binary_erosion(input,
                    structure,
                    iterations,
                    mask,
                    output,
                    border_value,
                    origin,
                    invert,
                    brute_force=True):
    try:
        iterations = operator.index(iterations)
    except TypeError:
        raise TypeError('iterations parameter should be an integer')

    if input.dtype.kind == 'c':
        raise TypeError('Complex type not supported')
    if structure is None:
        structure = generate_binary_structure(input.ndim, 1)
        all_weights_nonzero = input.ndim == 1
        center_is_true = True
        default_structure = True
    else:
        structure = structure.astype(dtype=bool, copy=False)
        # transfer to CPU for use in determining if it is fully dense
        # structure_cpu = cupy.asnumpy(structure)
        default_structure = False
    if structure.ndim != input.ndim:
        raise RuntimeError('structure and input must have same dimensionality')
    if not structure.flags.c_contiguous:
        structure = cupy.ascontiguousarray(structure)
    if structure.size < 1:
        raise RuntimeError('structure must not be empty')

    if mask is not None:
        if mask.shape != input.shape:
            raise RuntimeError('mask and input must have equal sizes')
        if not mask.flags.c_contiguous:
            mask = cupy.ascontiguousarray(mask)
        masked = True
    else:
        masked = False
    origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int)

    if isinstance(output, cupy.ndarray):
        if output.dtype.kind == 'c':
            raise TypeError('Complex output type not supported')
    else:
        output = bool
    output = _util._get_output(output, input)
    temp_needed = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
    if temp_needed:
        # input and output arrays cannot share memory
        temp = output
        output = _util._get_output(output.dtype, input)
    if structure.ndim == 0:
        # kernel doesn't handle ndim=0, so special case it here
        if float(structure):
            output[...] = cupy.asarray(input, dtype=bool)
        else:
            output[...] = ~cupy.asarray(input, dtype=bool)
        return output
    origin = tuple(origin)
    int_type = _util._get_inttype(input)
    offsets = _filters_core._origins_to_offsets(origin, structure.shape)
    if not default_structure:
        # synchronize required to determine if all weights are non-zero
        nnz = int(cupy.count_nonzero(structure))
        all_weights_nonzero = nnz == structure.size
        if all_weights_nonzero:
            center_is_true = True
        else:
            center_is_true = _center_is_true(structure, origin)

    erode_kernel = _get_binary_erosion_kernel(
        structure.shape,
        int_type,
        offsets,
        center_is_true,
        border_value,
        invert,
        masked,
        all_weights_nonzero,
    )

    if iterations == 1:
        if masked:
            output = erode_kernel(input, structure, mask, output)
        else:
            output = erode_kernel(input, structure, output)
    elif center_is_true and not brute_force:
        raise NotImplementedError(
            'only brute_force iteration has been implemented')
    else:
        if cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS'):
            raise ValueError('output and input may not overlap in memory')
        tmp_in = cupy.empty_like(input, dtype=output.dtype)
        tmp_out = output
        if iterations >= 1 and not iterations & 1:
            tmp_in, tmp_out = tmp_out, tmp_in
        if masked:
            tmp_out = erode_kernel(input, structure, mask, tmp_out)
        else:
            tmp_out = erode_kernel(input, structure, tmp_out)
        # TODO: kernel doesn't return the changed status, so determine it here
        changed = not (input == tmp_out).all()  # synchronize!
        ii = 1
        while ii < iterations or ((iterations < 1) and changed):
            tmp_in, tmp_out = tmp_out, tmp_in
            if masked:
                tmp_out = erode_kernel(tmp_in, structure, mask, tmp_out)
            else:
                tmp_out = erode_kernel(tmp_in, structure, tmp_out)
            changed = not (tmp_in == tmp_out).all()
            ii += 1
            if not changed and (not ii & 1):  # synchronize!
                # can exit early if nothing changed
                # (only do this after even number of tmp_in/out swaps)
                break
        output = tmp_out
    if temp_needed:
        temp[...] = output
        output = temp
    return output
コード例 #28
0
ファイル: run.py プロジェクト: dacongi/HPCeSE
            m=m+1
            x=[]
            continue
        elif len(line)==1 and m==3:
            #print(x)
            nsw=n-nbus-nbrch-ngen
            ipt_switch=x
            continue
        x.append(line)
        n=n+1
        
ipt_bus=cp.array(ipt_bus,dtype=cp.float64)
ipt_gen=cp.array(ipt_gen,dtype=cp.float64)
ipt_brch= cp.array(ipt_brch,dtype=cp.float64)
ipt_switch=cp.array(ipt_switch,dtype=cp.float64)
nPV=cp.count_nonzero(ipt_bus[:,9]==2)
#print("Data read in successfully!")
#print("nbus:", nbus, " nbrch:", nbrch, " ngen:", ngen)
# assign bus data
bus_int=ipt_bus[:,0]
V=ipt_bus[:,1]
b_ang=ipt_bus[:,2]
b_pg=ipt_bus[:,3]
b_qg=ipt_bus[:,4]
Pl=ipt_bus[:,5]
Ql=ipt_bus[:,6]
Gb=ipt_bus[:,7]
Bb=ipt_bus[:,8]
b_type=ipt_bus[:,9]
# assign branch data
from_bus=ipt_brch[:,0].astype(cp.int)
コード例 #29
0
def accuracy(a_hat, a_true):
    result = cp.equal(a_hat, a_true)
    right = cp.count_nonzero(result)
    return right / a_hat.shape[1]
コード例 #30
0
def threshold_multiotsu(image, classes=3, nbins=256):
    r"""Generate `classes`-1 threshold values to divide gray levels in `image`.

    The threshold values are chosen to maximize the total sum of pairwise
    variances between the thresholded graylevel classes. See Notes and [1]_
    for more details.

    Parameters
    ----------
    image : (N, M) ndarray
        Grayscale input image.
    classes : int, optional
        Number of classes to be thresholded, i.e. the number of resulting
        regions.
    nbins : int, optional
        Number of bins used to calculate the histogram. This value is ignored
        for integer arrays.

    Returns
    -------
    thresh : array
        Array containing the threshold values for the desired classes.

    Raises
    ------
    ValueError
         If ``image`` contains less grayscale value then the desired
         number of classes.

    Notes
    -----
    This implementation relies on a Cython function whose complexity
    is :math:`O\left(\frac{Ch^{C-1}}{(C-1)!}\right)`, where :math:`h`
    is the number of histogram bins and :math:`C` is the number of
    classes desired.

    The input image must be grayscale.

    References
    ----------
    .. [1] Liao, P-S., Chen, T-S. and Chung, P-C., "A fast algorithm for
           multilevel thresholding", Journal of Information Science and
           Engineering 17 (5): 713-727, 2001. Available at:
           <https://ftp.iis.sinica.edu.tw/JISE/2001/200109_01.pdf>
           :DOI:`10.6688/JISE.2001.17.5.1`
    .. [2] Tosa, Y., "Multi-Otsu Threshold", a java plugin for ImageJ.
           Available at:
           <http://imagej.net/plugins/download/Multi_OtsuThreshold.java>

    Examples
    --------
    >>> import cupy as cp
    >>> from cucim.skimage.color import label2rgb
    >>> from skimage import data
    >>> image = cp.asarray(data.camera())
    >>> thresholds = threshold_multiotsu(image)
    >>> regions = cp.digitize(image, bins=thresholds)
    >>> regions_colorized = label2rgb(regions)

    """
    try:
        from skimage.filters._multiotsu import (
            _get_multiotsu_thresh_indices, _get_multiotsu_thresh_indices_lut)
    except ImportError:
        raise ImportError(
            "could not the required (private) multi-otsu helper functions "
            "from scikit-image")

    if len(image.shape) > 2 and image.shape[-1] in (3, 4):
        msg = ("threshold_multiotsu is expected to work correctly only for "
               "grayscale images; image shape {0} looks like an RGB image")
        warn(msg.format(image.shape))

    # calculating the histogram and the probability of each gray level.
    prob, bin_centers = histogram(image.ravel(),
                                  nbins=nbins,
                                  source_range='image',
                                  normalize=True)

    nvalues = cp.count_nonzero(prob)
    if nvalues < classes:
        msg = ("The input image has only {} different values. "
               "It can not be thresholded in {} classes")
        raise ValueError(msg.format(nvalues, classes))
    elif nvalues == classes:
        thresh_idx = cp.where(prob > 0)[0][:-1]
    else:
        # Need probabilities on the CPU to use the Cython code
        # CuPy Backend: prob is small, so CPU computations should be faster
        prob = cp.asnumpy(prob)  # synchronization!
        prob = prob.astype("float32")

        # Get threshold indices
        try:
            thresh_idx = _get_multiotsu_thresh_indices_lut(prob, classes - 1)
        except MemoryError:
            # Don't use LUT if the number of bins is too large (if the
            # image is uint16 for example): in this case, the
            # allocated memory is too large.
            thresh_idx = _get_multiotsu_thresh_indices(prob, classes - 1)
        # transfer indices back to the GPU
        thresh_idx = cp.asarray(thresh_idx)  # synchronization!

    thresh = bin_centers[thresh_idx]

    return thresh