Beispiel #1
0
def upcast(*args):
    """Returns the nearest supported sparse dtype for the
    combination of one or more types.

    upcast(t0, t1, ..., tn) -> T  where T is a supported dtype

    Examples:
        >>> upcast('int32')
        <type 'numpy.int32'>
        >>> upcast('int32','float32')
        <type 'numpy.float64'>
        >>> upcast('bool',float)
        <type 'numpy.complex128'>
    """

    t = _upcast_memo.get(args)
    if t is not None:
        return t

    upcast = cupy.find_common_type(args, [])

    for t in supported_dtypes:
        if cupy.can_cast(upcast, t):
            _upcast_memo[args] = t
            return t

    raise TypeError('no supported conversion for types: %r' % (args,))
Beispiel #2
0
def unravel_index(indices, dims, order='C'):
    """Converts array of flat indices into a tuple of coordinate arrays.

    Args:
        indices (cupy.ndarray): An integer array whose elements are indices
            into the flattened version of an array of dimensions :obj:`dims`.
        dims (tuple of ints): The shape of the array to use for unraveling
            indices.
        order ('C' or 'F'): Determines whether the indices should be viewed as
            indexing in row-major (C-style) or column-major (Fortran-style)
            order.

    Returns:
        tuple of ndarrays:
        Each array in the tuple has the same shape as the indices array.

    Examples
    --------
    >>> cupy.unravel_index(cupy.array([22, 41, 37]), (7, 6))
    (array([3, 6, 6]), array([4, 5, 1]))
    >>> cupy.unravel_index(cupy.array([31, 41, 13]), (7, 6), order='F')
    (array([3, 6, 6]), array([4, 5, 1]))

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.unravel_index`, :func:`ravel_multi_index`

    """
    order = 'C' if order is None else order.upper()
    if order == 'C':
        dims = reversed(dims)
    elif order == 'F':
        pass
    else:
        raise ValueError('order not understood')

    if not cupy.can_cast(indices, cupy.int64, 'same_kind'):
        raise TypeError('Iterator operand 0 dtype could not be cast '
                        'from dtype(\'{}\') to dtype(\'{}\') '
                        'according to the rule \'same_kind\''.format(
                            indices.dtype,
                            cupy.int64().dtype))

    if (indices < 0).any():  # synchronize!
        raise ValueError('invalid entry in index array')

    unraveled_coords = []
    for dim in dims:
        unraveled_coords.append(indices % dim)
        indices = indices // dim

    if (indices > 0).any():  # synchronize!
        raise ValueError('invalid entry in index array')

    if order == 'C':
        unraveled_coords = reversed(unraveled_coords)
    return tuple(unraveled_coords)
Beispiel #3
0
def can_cast(from_: Union[Dtype, Array], to: Dtype, /) -> bool:
    """
    Array API compatible wrapper for :py:func:`np.can_cast <numpy.can_cast>`.

    See its docstring for more information.
    """
    from ._array_object import Array

    if isinstance(from_, Array):
        from_ = from_._array
    return np.can_cast(from_, to)
Beispiel #4
0
def get_index_dtype(arrays=(), maxval=None, check_contents=False):
    """Based on input (integer) arrays ``a``, determines a suitable index data
    type that can hold the data in the arrays.

    Args:
        arrays (tuple of array_like):
            Input arrays whose types/contents to check
        maxval (float, optional):
            Maximum value needed
        check_contents (bool, optional):
            Whether to check the values in the arrays and not just their types.
            Default: False (check only the types)

    Returns:
        dtype: Suitable index data type (int32 or int64)
    """

    int32min = cupy.iinfo(cupy.int32).min
    int32max = cupy.iinfo(cupy.int32).max

    dtype = cupy.int32
    if maxval is not None:
        if maxval > int32max:
            dtype = cupy.int64

    if isinstance(arrays, cupy.ndarray):
        arrays = (arrays,)

    for arr in arrays:
        arr = cupy.asarray(arr)
        if not cupy.can_cast(arr.dtype, cupy.int32):
            if check_contents:
                if arr.size == 0:
                    # a bigger type not needed
                    continue
                elif cupy.issubdtype(arr.dtype, cupy.integer):
                    maxval = arr.max()
                    minval = arr.min()
                    if minval >= int32min and maxval <= int32max:
                        # a bigger type not needed
                        continue

            dtype = cupy.int64
            break

    return dtype
Beispiel #5
0
    def __init__(
        self,
        points,
        values,
        method="linear",
        bounds_error=True,
        fill_value=cp.nan,
    ):
        if method not in ["linear", "nearest"]:
            raise ValueError("Method '%s' is not defined" % method)
        self.method = method
        self.bounds_error = bounds_error

        # allow reasonable duck-typed values
        values = cp.asarray(values)

        if len(points) > values.ndim:
            raise ValueError("There are %d point arrays, but values has %d "
                             "dimensions" % (len(points), values.ndim))

        if hasattr(values, "dtype") and hasattr(values, "astype"):
            if not cp.issubdtype(values.dtype, cp.inexact):
                values = values.astype(float)

        self.fill_value = fill_value
        if fill_value is not None:
            fill_value_dtype = cp.asarray(fill_value).dtype
            if hasattr(values, "dtype") and not cp.can_cast(
                    fill_value_dtype, values.dtype, casting="same_kind"):
                raise ValueError("fill_value must be either 'None' or "
                                 "of a type compatible with values")

        for i, p in enumerate(points):
            if not cp.all(cp.diff(p) > 0.0):
                raise ValueError("The points in dimension %d must be strictly "
                                 "ascending" % i)
            if not cp.asarray(p).ndim == 1:
                raise ValueError("The points in dimension %d must be "
                                 "1-dimensional" % i)
            if not values.shape[i] == len(p):
                raise ValueError("There are %d points and %d values in "
                                 "dimension %d" % (len(p), values.shape[i], i))
        self.grid = tuple([cp.asarray(p) for p in points])
        self.values = values
Beispiel #6
0
    def fit(self, X: CSeries, y: CSeries):
        """[summary].

        Args:
            X (cupy.ndarray): Input cupy ndarray.
            y (cupy.ndarray): Target cupy ndarray.
        """
        # Label encoding if necessary
        if not cupy.can_cast(X.dtype, cupy.int):
            if cudf_is_available() and isinstance(X, cudf.Series):
                X = X.to_array()
            X, uniques = pd.Series(cupy.asnumpy(X)).factorize()
            X = cudf.Series(X)
            self._label_encoding_uniques = uniques

        self.classes_, counts = cupy.unique(X, return_counts=True)
        self.class_means_ = cupy.zeros_like(self.classes_, dtype="float64")

        assert isinstance(y, cudf.Series)
        df = cudf.DataFrame()
        df.insert(0, "X", X)
        df.insert(0, "y", y.values)
        agg = df.groupby("X").agg("mean").to_pandas()

        for idx, uniq_value in enumerate(self.classes_):
            uniq_value = cupy.asnumpy(uniq_value).item()
            mean_value = agg.loc[uniq_value]["y"]
            self.class_means_[idx] = mean_value

        self.classes_ = cupy.array(
            np.append(cupy.asnumpy(self.classes_),
                      [cupy.asnumpy(cupy.max(self.classes_)) + 1]))
        self.class_means_ = cupy.array(
            np.append(cupy.asnumpy(self.class_means_),
                      [cupy.asnumpy(self.default_unseen_)]))

        self.lut_ = cupy.hstack(
            [self.classes_.reshape(-1, 1),
             self.class_means_.reshape(-1, 1)])
Beispiel #7
0
def log_softmax(x, axis=None):
    """Compute logarithm of softmax function

    Parameters
    ----------
    x : array-like
        Input array
    axis : int or tuple of ints, optional
        Axis to compute values along. Default is None and softmax
        will be  computed over the entire array `x`

    Returns
    -------
    s : cupy.ndarry
        An array with the same shape as `x`. Exponential of the
        result will sum to 1 along the specified axis. If `x` is a
        scalar, a scalar is returned

    """

    x_max = cp.amax(x, axis=axis, keepdims=True)

    if x_max.ndim > 0:
        x_max[~cp.isfinite(x_max)] = 0
    elif not cp.isfinite(x_max):
        x_max = 0

    tmp = x - x_max

    if tmp.dtype.kind in 'iu':
        for out_dtype in [cp.float16, cp.float32, cp.float64]:
            if cp.can_cast(tmp.dtype, out_dtype):
                tmp = tmp.astype(out_dtype)
                break

    out = _log_softmax_kernel(tmp, axis=axis, keepdims=True)

    out = tmp - out
    return out
Beispiel #8
0
def interp(x, xp, fp, left=None, right=None, period=None):
    """ One-dimensional linear interpolation.

    Args:
        x (cupy.ndarray): a 1D array of points on which the interpolation
            is performed.
        xp (cupy.ndarray): a 1D array of points on which the function values
            (``fp``) are known.
        fp (cupy.ndarray): a 1D array containing the function values at the
            the points ``xp``.
        left (float or complex): value to return if ``x < xp[0]``. Default is
            ``fp[0]``.
        right (float or complex): value to return if ``x > xp[-1]``. Default is
            ``fp[-1]``.
        period (None or float): a period for the x-coordinates. Parameters
            ``left`` and ``right`` are ignored if ``period`` is specified.
            Default is ``None``.

    Returns:
        cupy.ndarray: The interpolated values, same shape as ``x``.

    .. note::
        This function may synchronize if ``left`` or ``right`` is not already
        on the device.

    .. seealso:: :func:`numpy.interp`

    """

    if xp.ndim != 1 or fp.ndim != 1:
        raise ValueError('xp and fp must be 1D arrays')
    if xp.size != fp.size:
        raise ValueError('fp and xp are not of the same length')
    if xp.size == 0:
        raise ValueError('array of sample points is empty')
    if not x.flags.c_contiguous:
        raise NotImplementedError('Non-C-contiguous x is currently not '
                                  'supported')
    x_dtype = cupy.common_type(x, xp)
    if not cupy.can_cast(x_dtype, cupy.float64):
        raise TypeError('Cannot cast array data from'
                        ' {} to {} according to the rule \'safe\''.format(
                            x_dtype, cupy.float64))

    if period is not None:
        # The handling of "period" below is modified from NumPy's

        if period == 0:
            raise ValueError("period must be a non-zero value")
        period = abs(period)
        left = None
        right = None

        x = x.astype(cupy.float64)
        xp = xp.astype(cupy.float64)

        # normalizing periodic boundaries
        x %= period
        xp %= period
        asort_xp = cupy.argsort(xp)
        xp = xp[asort_xp]
        fp = fp[asort_xp]
        xp = cupy.concatenate((xp[-1:] - period, xp, xp[0:1] + period))
        fp = cupy.concatenate((fp[-1:], fp, fp[0:1]))
        assert xp.flags.c_contiguous
        assert fp.flags.c_contiguous

    # NumPy always returns float64 or complex128, so we upcast all values
    # on the fly in the kernel
    out_dtype = 'D' if fp.dtype.kind == 'c' else 'd'
    output = cupy.empty(x.shape, dtype=out_dtype)
    idx = cupy.searchsorted(xp, x, side='right')
    left = fp[0] if left is None else cupy.array(left, fp.dtype)
    right = fp[-1] if right is None else cupy.array(right, fp.dtype)
    kern = _get_interp_kernel(out_dtype == 'D')
    kern(x, idx, xp, fp, xp.size, left, right, output)
    return output
Beispiel #9
0
def histogram(x, bins=10, range=None, weights=None, density=False):
    """Computes the histogram of a set of data.

    Args:
        x (cupy.ndarray): Input array.
        bins (int or cupy.ndarray): If ``bins`` is an int, it represents the
            number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it
            represents a bin edges.
        range (2-tuple of float, optional): The lower and upper range of the
            bins.  If not provided, range is simply ``(a.min(), a.max())``.
            Values outside the range are ignored. The first element of the
            range must be less than or equal to the second. `range` affects the
            automatic bin computation as well. While bin width is computed to
            be optimal based on the actual data within `range`, the bin count
            will fill the entire range including portions containing no data.
        density (bool, optional): If False, the default, returns the number of
            samples in each bin. If True, returns the probability *density*
            function at the bin, ``bin_count / sample_count / bin_volume``.
        weights (cupy.ndarray, optional): An array of weights, of the same
            shape as `x`.  Each value in `x` only contributes its associated
            weight towards the bin count (instead of 1).
    Returns:
        tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray`
        storing the values of the histogram, and ``bin_edges`` is a
        :class:`cupy.ndarray` storing the bin edges.

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.histogram`
    """

    if x.dtype.kind == "c":
        # TODO(unno): comparison between complex numbers is not implemented
        raise NotImplementedError("complex number is not supported")

    if not isinstance(x, cupy.ndarray):
        raise ValueError("x must be a cupy.ndarray")

    x, weights = _ravel_and_check_weights(x, weights)
    bin_edges, uniform_bins = _get_bin_edges(x, bins, range)

    if weights is None:
        y = cupy.zeros(bin_edges.size - 1, dtype="l")
        _histogram_kernel(x, bin_edges, bin_edges.size, y)
    else:
        simple_weights = cupy.can_cast(weights.dtype,
                                       cupy.double) or cupy.can_cast(
                                           weights.dtype, complex)
        if not simple_weights:
            # object dtype such as Decimal are supported in NumPy, but not here
            raise NotImplementedError(
                "only weights with dtype that can be cast to float or complex "
                "are supported")
        if weights.dtype.kind == "c":
            y = cupy.zeros(bin_edges.size - 1, dtype=complex)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size,
                                       weights.real, y.real)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size,
                                       weights.imag, y.imag)
        else:
            if weights.dtype.kind in "bui":
                y = cupy.zeros(bin_edges.size - 1, dtype=int)
            else:
                y = cupy.zeros(bin_edges.size - 1, dtype=float)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size, weights,
                                       y)

    if density:
        db = cupy.array(cupy.diff(bin_edges), float)
        return y / db / y.sum(), bin_edges
    return y, bin_edges
Beispiel #10
0
def ravel_multi_index(multi_index, dims, mode='wrap', order='C'):
    """
    Converts a tuple of index arrays into an array of flat indices, applying
    boundary modes to the multi-index.

    Args:
        multi_index (tuple of cupy.ndarray) : A tuple of integer arrays, one
            array for each dimension.
        dims (tuple of ints): The shape of array into which the indices from
            ``multi_index`` apply.
        mode ('raise', 'wrap' or 'clip'), optional: Specifies how out-of-bounds
            indices are handled.  Can specify either one mode or a tuple of
            modes, one mode per index:

            - *'raise'* -- raise an error
            - *'wrap'* -- wrap around (default)
            - *'clip'* -- clip to the range

            In 'clip' mode, a negative index which would normally wrap will
            clip to 0 instead.
        order ('C' or 'F'), optional: Determines whether the multi-index should
            be viewed as indexing in row-major (C-style) or column-major
            (Fortran-style) order.

    Returns:
        raveled_indices (cupy.ndarray): An array of indices into the flattened
            version of an array of dimensions ``dims``.

    .. warning::

        This function may synchronize the device when ``mode == 'raise'``.

    Notes
    -----
    Note that the default `mode` (``'wrap'``) is different than in NumPy. This
    is done to avoid potential device synchronization.

    Examples
    --------
    >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (7,6))
    array([22, 41, 37])
    >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (7,6),
    ...                        order='F')
    array([31, 41, 13])
    >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (4,6),
    ...                        mode='clip')
    array([22, 23, 19])
    >>> cupy.ravel_multi_index(cupy.asarray([[3,6,6],[4,5,1]]), (4,4),
    ...                        mode=('clip', 'wrap'))
    array([12, 13, 13])
    >>> cupy.ravel_multi_index(cupy.asarray((3,1,4,1)), (6,7,8,9))
    array(1621)

    .. seealso:: :func:`numpy.ravel_multi_index`, :func:`unravel_index`
    """

    ndim = len(dims)
    if len(multi_index) != ndim:
        raise ValueError(
            "parameter multi_index must be a sequence of "
            "length {}".format(ndim))

    for d in dims:
        if not isinstance(d, numbers.Integral):
            raise TypeError(
                "{} object cannot be interpreted as an integer".format(
                    type(d)))

    if isinstance(mode, str):
        mode = (mode, ) * ndim

    if functools.reduce(operator.mul, dims) > cupy.iinfo(cupy.int64).max:
        raise ValueError("invalid dims: array size defined by dims is larger "
                         "than the maximum possible size")

    s = 1
    ravel_strides = [1] * ndim
    if order is None:
        order = "C"
    if order == "C":
        for i in range(ndim - 2, -1, -1):
            s = s * dims[i + 1]
            ravel_strides[i] = s
    elif order == "F":
        for i in range(1, ndim):
            s = s * dims[i - 1]
            ravel_strides[i] = s
    else:
        raise TypeError("order not understood")

    multi_index = cupy.broadcast_arrays(*multi_index)
    raveled_indices = cupy.zeros(multi_index[0].shape, dtype=cupy.int64)
    for d, stride, idx, _mode in zip(dims, ravel_strides, multi_index, mode):

        if not isinstance(idx, cupy.ndarray):
            raise TypeError("elements of multi_index must be cupy arrays")
        if not cupy.can_cast(idx, cupy.int64, 'same_kind'):
            raise TypeError(
                'multi_index entries could not be cast from dtype(\'{}\') to '
                'dtype(\'{}\') according to the rule \'same_kind\''.format(
                    idx.dtype, cupy.int64().dtype))
        idx = idx.astype(cupy.int64, copy=False)

        if _mode == "raise":
            if cupy.any(cupy.logical_or(idx >= d, idx < 0)):
                raise ValueError("invalid entry in coordinates array")
        elif _mode == "clip":
            idx = cupy.clip(idx, 0, d - 1)
        elif _mode == 'wrap':
            idx = idx % d
        else:
            raise TypeError("Unrecognized mode: {}".format(_mode))
        raveled_indices += stride * idx
    return raveled_indices
Beispiel #11
0
def ediff1d(arr, to_end=None, to_begin=None):
    """
    Calculates the difference between consecutive elements of an array.

    Args:
        arr (cupy.ndarray): Input array.
        to_end (cupy.ndarray, optional): Numbers to append at the end
            of the returend differences.
        to_begin (cupy.ndarray, optional): Numbers to prepend at the
            beginning of the returned differences.

    Returns:
        cupy.ndarray: New array consisting differences among succeeding
        elements.

    .. seealso:: :func:`numpy.ediff1d`
    """
    if not isinstance(arr, cupy.ndarray):
        raise TypeError('`arr` should be of type cupy.ndarray')

    # to flattened array.
    arr = arr.ravel()

    # to ensure the dtype of the output array is same as that of input.
    dtype_req = arr.dtype

    # if none optional cases are given
    if to_begin is None and to_end is None:
        return arr[1:] - arr[:-1]

    if to_begin is None:
        l_begin = 0
    else:
        if not isinstance(to_begin, cupy.ndarray):
            raise TypeError('`to_begin` should be of type cupy.ndarray')
        if not cupy.can_cast(to_begin, dtype_req, casting="same_kind"):
            raise TypeError("dtype of `to_begin` must be compatible "
                            "with input `arr` under the `same_kind` rule.")

        to_begin = to_begin.ravel()
        l_begin = len(to_begin)

    if to_end is None:
        l_end = 0
    else:
        if not isinstance(to_end, cupy.ndarray):
            raise TypeError('`to_end` should be of type cupy.ndarray')
        if not cupy.can_cast(to_end, dtype_req, casting="same_kind"):
            raise TypeError("dtype of `to_end` must be compatible "
                            "with input `arr` under the `same_kind` rule.")

        to_end = to_end.ravel()
        l_end = len(to_end)

    # calulating using in place operation
    l_diff = max(len(arr) - 1, 0)
    result = cupy.empty(l_diff + l_begin + l_end, dtype=arr.dtype)
    # Cupy does not support subclassing a ndarray
    # result = arr.__array_wrap__(result)
    if l_begin > 0:
        result[:l_begin] = to_begin
    if l_end > 0:
        result[l_begin + l_diff:] = to_end
    cupy.subtract(arr[1:], arr[:-1], result[l_begin:l_begin + l_diff])
    return result
Beispiel #12
0
def histogram(x, bins=10, range=None, weights=None, density=False):
    """Computes the histogram of a set of data.

    Args:
        x (cupy.ndarray): Input array.
        bins (int or cupy.ndarray): If ``bins`` is an int, it represents the
            number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it
            represents a bin edges.
        range (2-tuple of float, optional): The lower and upper range of the
            bins.  If not provided, range is simply ``(x.min(), x.max())``.
            Values outside the range are ignored. The first element of the
            range must be less than or equal to the second. `range` affects the
            automatic bin computation as well. While bin width is computed to
            be optimal based on the actual data within `range`, the bin count
            will fill the entire range including portions containing no data.
        density (bool, optional): If False, the default, returns the number of
            samples in each bin. If True, returns the probability *density*
            function at the bin, ``bin_count / sample_count / bin_volume``.
        weights (cupy.ndarray, optional): An array of weights, of the same
            shape as `x`.  Each value in `x` only contributes its associated
            weight towards the bin count (instead of 1).
    Returns:
        tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray`
        storing the values of the histogram, and ``bin_edges`` is a
        :class:`cupy.ndarray` storing the bin edges.

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.histogram`
    """

    if x.dtype.kind == 'c':
        # TODO(unno): comparison between complex numbers is not implemented
        raise NotImplementedError('complex number is not supported')

    if not isinstance(x, cupy.ndarray):
        raise ValueError('x must be a cupy.ndarray')

    x, weights = _ravel_and_check_weights(x, weights)
    bin_edges = _get_bin_edges(x, bins, range)

    if weights is None:
        y = cupy.zeros(bin_edges.size - 1, dtype='l')
        for accelerator in _accelerator.get_routine_accelerators():
            # CUB uses int for bin counts
            # TODO(leofang): support >= 2^31 elements in x?
            if (accelerator == _accelerator.ACCELERATOR_CUB
                    and x.size <= 0x7fffffff and bin_edges.size <= 0x7fffffff):
                # Need to ensure the dtype of bin_edges as it's needed for both
                # the CUB call and the correction later
                assert isinstance(bin_edges, cupy.ndarray)
                if numpy.issubdtype(x.dtype, numpy.integer):
                    bin_type = numpy.float
                else:
                    bin_type = numpy.result_type(bin_edges.dtype, x.dtype)
                    if (bin_type == numpy.float16
                            and not common._is_fp16_supported()):
                        bin_type = numpy.float32
                    x = x.astype(bin_type, copy=False)
                acc_bin_edge = bin_edges.astype(bin_type, copy=True)
                # CUB's upper bin boundary is exclusive for all bins, including
                # the last bin, so we must shift it to comply with NumPy
                if x.dtype.kind in 'ui':
                    acc_bin_edge[-1] += 1
                elif x.dtype.kind == 'f':
                    last = acc_bin_edge[-1]
                    acc_bin_edge[-1] = cupy.nextafter(last, last + 1)
                if runtime.is_hip:
                    y = y.astype(cupy.uint64, copy=False)
                y = cub.device_histogram(x, acc_bin_edge, y)
                if runtime.is_hip:
                    y = y.astype(cupy.int64, copy=False)
                break
        else:
            _histogram_kernel(x, bin_edges, bin_edges.size, y)
    else:
        simple_weights = (cupy.can_cast(weights.dtype, cupy.float64)
                          or cupy.can_cast(weights.dtype, cupy.complex128))
        if not simple_weights:
            # object dtype such as Decimal are supported in NumPy, but not here
            raise NotImplementedError(
                'only weights with dtype that can be cast to float or complex '
                'are supported')
        if weights.dtype.kind == 'c':
            y = cupy.zeros(bin_edges.size - 1, dtype=cupy.complex128)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size,
                                       weights.real, y.real)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size,
                                       weights.imag, y.imag)
        else:
            if weights.dtype.kind in 'bui':
                y = cupy.zeros(bin_edges.size - 1, dtype=int)
            else:
                y = cupy.zeros(bin_edges.size - 1, dtype=cupy.float64)
            _weighted_histogram_kernel(x, bin_edges, bin_edges.size, weights,
                                       y)

    if density:
        db = cupy.array(cupy.diff(bin_edges), cupy.float64)
        return y / db / y.sum(), bin_edges
    return y, bin_edges
def _direct_correlate(in1, in2, mode='full', output=float, convolution=False,
                      boundary='constant', fillvalue=0.0, shift=False):
    if in1.ndim != 1 and (in1.dtype.kind == 'b' or
                          (in1.dtype.kind == 'f' and in1.dtype.itemsize < 4)):
        raise ValueError('unsupported type in SciPy')

    # Swaps inputs so smaller one is in2:
    # NOTE: when mode != 'valid' we can only swap with a constant-0 boundary
    swapped_inputs = False
    orig_in1_shape = in1.shape
    if _inputs_swap_needed(mode, in1.shape, in2.shape) or (
            in2.size > in1.size and boundary == 'constant' and fillvalue == 0):
        in1, in2 = in2, in1
        swapped_inputs = True

    # Due to several optimizations, the second array can only be 2 GiB
    if in2.nbytes >= (1 << 31):
        raise RuntimeError('smaller array must be 2 GiB or less, '
                           'use method="fft" instead')

    # At this point, in1.size > in2.size
    # (except some cases when boundary != 'constant' or fillvalue != 0)
    # Figure out the output shape and the origin of the kernel
    if mode == 'full':
        out_shape = tuple(x1+x2-1 for x1, x2 in zip(in1.shape, in2.shape))
        offsets = tuple(x-1 for x in in2.shape)
    elif mode == 'valid':
        out_shape = tuple(x1-x2+1 for x1, x2 in zip(in1.shape, in2.shape))
        offsets = (0,) * in1.ndim
    else:  # mode == 'same':
        # In correlate2d: When using "same" mode with even-length inputs, the
        # outputs of correlate and correlate2d differ: There is a 1-index
        # offset between them.
        # This is dealt with by using "shift" parameter.
        out_shape = orig_in1_shape
        if orig_in1_shape == in1.shape:
            offsets = tuple((x-shift)//2 for x in in2.shape)
        else:
            offsets = tuple((2*x2-x1-(not convolution)+shift)//2
                            for x1, x2 in zip(in1.shape, in2.shape))

    # Check the output
    # In SciPy, the output dtype is determined by inputs' dtypes
    out_dtype = cupy.promote_types(in1, in2)
    if not isinstance(output, cupy.ndarray):
        if not cupy.can_cast(output, out_dtype):
            raise ValueError('not available for this type')
        output = cupy.empty(out_shape, out_dtype)
    elif output.shape != out_shape:
        raise ValueError('out has wrong shape')
    elif output.dtype != out_dtype:
        raise ValueError('out has wrong dtype')

    # Check input dtypes
    # Internally, the kernel accumulates in in2's type, so if in2 has lower
    # precision (can_cast = True!) we hit overflow easier
    # TODO(leofang): this is a band-aid fix for cupy/cupy#6047
    if cupy.can_cast(in2, in1):
        in2 = in2.astype(out_dtype)  # make a copy while upcasting

    # Get and run the CuPy kernel
    int_type = _util._get_inttype(in1)
    kernel = filters._get_correlate_kernel(
        boundary, in2.shape, int_type, offsets, fillvalue)
    in2 = _reverse(in2) if convolution else in2.conj()
    if not swapped_inputs or convolution:
        kernel(in1, in2, output)
    elif output.dtype.kind != 'c':
        # Avoids one array copy
        kernel(in1, in2, _reverse(output))
    else:
        kernel(in1, in2, output)
        output = cupy.ascontiguousarray(_reverse(output))
        if swapped_inputs and (mode != 'valid' or not shift):
            cupy.conjugate(output, out=output)
    return output