Example #1
0
def _get_bin_edges(a, bins, range):
    """
    Computes the bins used internally by `histogram`.

    Args:
        a (ndarray): Ravelled data array
        bins (int or ndarray): Forwarded argument from `histogram`.
        range (None or tuple): Forwarded argument from `histogram`.

    Returns:
        bin_edges (ndarray): Array of bin edges
    """
    # parse the overloaded bins argument
    n_equal_bins = None
    bin_edges = None

    if isinstance(bins, str):
        raise NotImplementedError(
            'only integer and array bins are implemented')
    elif isinstance(bins, cupy.ndarray) or numpy.ndim(bins) == 1:
        # TODO(okuta): After #3060 is merged, `if cupy.ndim(bins) == 1:`.
        if isinstance(bins, cupy.ndarray):
            bin_edges = bins
        else:
            bin_edges = numpy.asarray(bins)

        if (bin_edges[:-1] > bin_edges[1:]).any():  # synchronize! when CuPy
            raise ValueError(
                '`bins` must increase monotonically, when an array')
        if isinstance(bin_edges, numpy.ndarray):
            bin_edges = cupy.asarray(bin_edges)
    elif numpy.ndim(bins) == 0:
        try:
            n_equal_bins = operator.index(bins)
        except TypeError:
            raise TypeError('`bins` must be an integer, a string, or an array')
        if n_equal_bins < 1:
            raise ValueError('`bins` must be positive, when an integer')

        first_edge, last_edge = _get_outer_edges(a, range)
    else:
        raise ValueError('`bins` must be 1d, when an array')

    if n_equal_bins is not None:
        # numpy's gh-10322 means that type resolution rules are dependent on
        # array shapes. To avoid this causing problems, we pick a type now and
        # stick with it throughout.
        bin_type = cupy.result_type(first_edge, last_edge, a)
        if cupy.issubdtype(bin_type, cupy.integer):
            bin_type = cupy.result_type(bin_type, float)

        # bin edges must be computed
        bin_edges = cupy.linspace(first_edge,
                                  last_edge,
                                  n_equal_bins + 1,
                                  endpoint=True,
                                  dtype=bin_type)
    return bin_edges
Example #2
0
File: csr.py Project: toslunar/cupy
 def _maximum_minimum(self, other, cupy_op, op_name, dense_check):
     if _util.isscalarlike(other):
         other = cupy.asarray(other, dtype=self.dtype)
         if dense_check(other):
             dtype = self.dtype
             # Note: This is a work-around to make the output dtype the same
             # as SciPy. It might be SciPy version dependent.
             if dtype == numpy.float32:
                 dtype = numpy.float64
             elif dtype == numpy.complex64:
                 dtype = numpy.complex128
             dtype = cupy.result_type(dtype, other)
             other = other.astype(dtype, copy=False)
             # Note: The computation steps below are different from SciPy.
             new_array = cupy_op(self.todense(), other)
             return csr_matrix(new_array)
         else:
             self.sum_duplicates()
             new_data = cupy_op(self.data, other)
             return csr_matrix((new_data, self.indices, self.indptr),
                               shape=self.shape,
                               dtype=self.dtype)
     elif _util.isdense(other):
         self.sum_duplicates()
         other = cupy.atleast_2d(other)
         return cupy_op(self.todense(), other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         return binopt_csr(self, other, op_name)
     raise NotImplementedError
Example #3
0
def leslie(f, s):
    """Create a Leslie matrix.

    Given the length n array of fecundity coefficients ``f`` and the length n-1
    array of survival coefficients ``s``, return the associated Leslie matrix.

    Args:
        f (cupy.ndarray): The "fecundity" coefficients.
        s (cupy.ndarray): The "survival" coefficients, has to be 1-D.  The
            length of ``s`` must be one less than the length of ``f``, and it
            must be at least 1.

    Returns:
        cupy.ndarray: The array is zero except for the first row, which is
        ``f``, and the first sub-diagonal, which is ``s``. The data-type of
        the array will be the data-type of ``f[0]+s[0]``.

    .. seealso:: :func:`scipy.linalg.leslie`
    """
    if f.ndim != 1:
        raise ValueError('Incorrect shape for f. f must be 1D')
    if s.ndim != 1:
        raise ValueError('Incorrect shape for s. s must be 1D')
    n = f.size
    if n != s.size + 1:
        raise ValueError('Length of s must be one less than length of f')
    if s.size == 0:
        raise ValueError('The length of s must be at least 1.')
    a = cupy.zeros((n, n), dtype=cupy.result_type(f, s))
    a[0] = f
    cupy.fill_diagonal(a[1:], s)
    return a
Example #4
0
def _linspace_scalar(start,
                     stop,
                     num=50,
                     endpoint=True,
                     retstep=False,
                     dtype=None):
    """Returns an array with evenly-spaced values within a given interval.

    Instead of specifying the step width like :func:`cupy.arange`, this
    function requires the total number of elements specified.

    Args:
        start: Start of the interval.
        stop: End of the interval.
        num: Number of elements.
        endpoint (bool): If ``True``, the stop value is included as the last
            element. Otherwise, the stop value is omitted.
        retstep (bool): If ``True``, this function returns (array, step).
            Otherwise, it returns only the array.
        dtype: Data type specifier. It is inferred from the start and stop
            arguments by default.

    Returns:
        cupy.ndarray: The 1-D array of ranged values.

    """
    dt = cupy.result_type(start, stop, float(num))
    if dtype is None:
        # In actual implementation, only float is used
        dtype = dt

    ret = cupy.empty((num, ), dtype=dt)
    div = (num - 1) if endpoint else num
    if div <= 0:
        if num > 0:
            ret.fill(start)
        step = float('nan')
    else:
        step = float(stop - start) / div
        stop = float(stop)

        if step == 0.0:
            # for underflow
            _linspace_ufunc_underflow(start, stop - start, div, ret)
        else:
            _linspace_ufunc(start, step, ret)

        if endpoint:
            # Here num == div + 1 > 1 is ensured.
            ret[-1] = stop

    if cupy.issubdtype(dtype, cupy.integer):
        cupy.floor(ret, out=ret)

    ret = ret.astype(dtype, copy=False)

    if retstep:
        return ret, step
    else:
        return ret
Example #5
0
def _dot_convolve(a1, a2, mode):
    if a1.size == 0 or a2.size == 0:
        raise ValueError('Array arguments cannot be empty')

    is_inverted = False
    if a1.size < a2.size:
        a1, a2 = a2, a1
        is_inverted = True

    dtype = cupy.result_type(a1, a2)
    n1, n2 = a1.size, a2.size
    a1 = a1.astype(dtype, copy=False)
    a2 = a2.astype(dtype, copy=False)

    if mode == 'full':
        out_size = n1 + n2 - 1
        a1 = cupy.pad(a1, n2 - 1)
    elif mode == 'same':
        out_size = n1
        pad_size = (n2 - 1) // 2
        a1 = cupy.pad(a1, (n2 - 1 - pad_size, pad_size))
    elif mode == 'valid':
        out_size = n1 - n2 + 1

    stride = a1.strides[0]
    a1 = stride_tricks.as_strided(a1, (out_size, n2), (stride, stride))
    output = _dot_kernel(a1, a2[::-1], axis=1)
    return is_inverted, output
Example #6
0
def _fft_convolve(a1, a2, mode):

    if a1.size < a2.size:
        a1, a2 = a2, a1

    if a1.dtype.kind == 'c' or a2.dtype.kind == 'c':
        fft, ifft = cupy.fft.fft, cupy.fft.ifft
    else:
        fft, ifft = cupy.fft.rfft, cupy.fft.irfft

    dtype = cupy.result_type(a1, a2)
    n1, n2 = a1.size, a2.size
    out_size = n1 + n2 - 1
    fa1 = fft(a1, out_size)
    fa2 = fft(a2, out_size)
    out = ifft(fa1 * fa2, out_size)

    if mode == 'full':
        start, end = None, None
    elif mode == 'same':
        start = (n2 - 1) // 2
        end = start + n1
    elif mode == 'valid':
        start, end = n2 - 1, n1
    else:
        raise ValueError(
            'acceptable mode flags are `valid`, `same`, or `full`.')

    out = out[start:end]

    if dtype.kind in 'iu':
        out = cupy.around(out)

    return out.astype(dtype, copy=False)
Example #7
0
def _dot_convolve(a1, a2, mode):

    offset = 0
    if a1.size < a2.size:
        a1, a2 = a2, a1
        offset = 1 - a2.size % 2

    dtype = cupy.result_type(a1, a2)
    n1, n2 = a1.size, a2.size
    a1 = a1.astype(dtype, copy=False)
    a2 = a2.astype(dtype, copy=False)

    if mode == 'full':
        out_size = n1 + n2 - 1
        a1 = cupy.pad(a1, n2 - 1)
    elif mode == 'same':
        out_size = n1
        pad_size = (n2 - 1) // 2 + offset
        a1 = cupy.pad(a1, (n2 - 1 - pad_size, pad_size))
    elif mode == 'valid':
        out_size = n1 - n2 + 1

    stride = a1.strides[0]
    a1 = stride_tricks.as_strided(a1, (out_size, n2), (stride, stride))
    output = _dot_kernel(a1, a2[::-1], axis=1)
    return output
Example #8
0
File: csr.py Project: toslunar/cupy
 def __truediv__(self, other):
     """Point-wise division by another matrix, vector or scalar"""
     if _util.isscalarlike(other):
         dtype = self.dtype
         if dtype == numpy.float32:
             # Note: This is a work-around to make the output dtype the same
             # as SciPy. It might be SciPy version dependent.
             dtype = numpy.float64
         dtype = cupy.result_type(dtype, other)
         d = cupy.reciprocal(other, dtype=dtype)
         return multiply_by_scalar(self, d)
     elif _util.isdense(other):
         other = cupy.atleast_2d(other)
         check_shape_for_pointwise_op(self.shape, other.shape)
         return self.todense() / other
     elif base.isspmatrix(other):
         # Note: If broadcasting is needed, an exception is raised here for
         # compatibility with SciPy, as SciPy does not support broadcasting
         # in the "sparse / sparse" case.
         check_shape_for_pointwise_op(self.shape,
                                      other.shape,
                                      allow_broadcasting=False)
         dtype = numpy.promote_types(self.dtype, other.dtype)
         if dtype.char not in 'FD':
             dtype = numpy.promote_types(numpy.float64, dtype)
         # Note: The following implementation converts two sparse matrices
         # into dense matrices and then performs a point-wise division,
         # which can use lots of memory.
         self_dense = self.todense().astype(dtype, copy=False)
         return self_dense / other.todense()
     raise NotImplementedError
Example #9
0
def _correlate(in1, in2, mode='full', method='auto', convolution=False):
    quick_out = _st_core._check_conv_inputs(in1, in2, mode, convolution)
    if quick_out is not None:
        return quick_out
    if method not in ('auto', 'direct', 'fft'):
        raise ValueError('acceptable methods are "auto", "direct", or "fft"')

    if method == 'auto':
        method = choose_conv_method(in1, in2, mode=mode)

    if method == 'direct':
        return _st_core._direct_correlate(in1, in2, mode, in1.dtype,
                                          convolution)

    # if method == 'fft':
    inputs_swapped = _st_core._inputs_swap_needed(mode, in1.shape, in2.shape)
    if inputs_swapped:
        in1, in2 = in2, in1
    if not convolution:
        in2 = _st_core._reverse_and_conj(in2)
    out = fftconvolve(in1, in2, mode)
    result_type = cupy.result_type(in1, in2)
    if result_type.kind in 'ui':
        out = out.round()
    out = out.astype(result_type, copy=False)
    if not convolution and inputs_swapped:
        out = cupy.ascontiguousarray(_st_core._reverse_and_conj(out))
    return out
Example #10
0
def corr_pairwise(x, y, return_pearson=False):
    """Covariance and Pearson product-moment correlation coefficients on the GPU for paired data with tolerance of NaNs.
       Curently only supports rows as samples and columns as observations.

    Parameters
    ----------
    x : array_like
        The baseline array of values.
    y : array_like
        The comparison array of values.

    Returns
    -------
    corr : cupy ndarray
         Array of correlation values
    """
    def _cov_pairwise(x1, x2, factor):
        return cupy.nansum(x1 * x2, axis=1, keepdims=True) * cupy.true_divide(
            1, factor)

    # Coerce arrays into 2D format and set dtype
    dtype = cupy.result_type(x, y, cupy.float64)
    x = cupy.asarray(x, dtype=dtype)
    y = cupy.asarray(y, dtype=dtype)

    assert x.shape == y.shape
    if x.ndim < 2:
        x = x[None, :]
        y = y[None, :]
    n_samples, n_obs = x.shape

    # Calculate degrees of freedom for each sample pair
    ddof = 1
    nan_count = (cupy.isnan(x) | cupy.isnan(y)).sum(axis=1, keepdims=True)
    fact = n_obs - nan_count - ddof

    # Mean normalize
    x -= cupy.nanmean(x, axis=1, keepdims=True)
    y -= cupy.nanmean(y, axis=1, keepdims=True)

    # Calculate covariance matrix
    corr = _cov_pairwise(x, y, fact)

    if return_pearson:
        x_corr = _cov_pairwise(x, x, fact)
        y_corr = _cov_pairwise(y, y, fact)
        auto_corr = cupy.sqrt(x_corr) * cupy.sqrt(y_corr)
        corr = corr / auto_corr
        corr = cupy.clip(corr.real, -1, 1, out=corr.real)
        return corr

    return corr.squeeze()
Example #11
0
def polysub(a1, a2):
    """Computes the difference of two polynomials.

    Args:
        a1 (scalar, cupy.ndarray or cupy.poly1d): first input polynomial.
        a2 (scalar, cupy.ndarray or cupy.poly1d): second input polynomial.

    Returns:
        cupy.ndarray or cupy.poly1d: The difference of the inputs.

    .. seealso:: :func:`numpy.polysub`

    """
    if a1.shape[0] <= a2.shape[0]:
        out = cupy.pad(a1, (a2.shape[0] - a1.shape[0], 0))
        out = out.astype(cupy.result_type(a1, a2), copy=False)
        out -= a2
    else:
        out = cupy.pad(a2, (a1.shape[0] - a2.shape[0], 0))
        out = out.astype(cupy.result_type(a1, a2), copy=False)
        out -= 2 * out - a1
    return out
Example #12
0
 def __init__(self, h, x_dtype, up, down):
     """Helper for resampling"""
     h = cp.asarray(h)
     if h.ndim != 1 or h.size == 0:
         raise ValueError('h must be 1D with non-zero length')
     self._output_type = cp.result_type(h.dtype, x_dtype, cp.float32)
     h = cp.asarray(h, self._output_type)
     self._up = int(up)
     self._down = int(down)
     if self._up < 1 or self._down < 1:
         raise ValueError('Both up and down must be >= 1')
     # This both transposes, and "flips" each phase for filtering
     self._h_trans_flip = _pad_h(h, self._up)
     self._h_trans_flip = cp.ascontiguousarray(self._h_trans_flip)
Example #13
0
def get_kth_unique_value(data, k, axis=1):
    """Find the kth value along an axis of a matrix on the GPU

    Parameters
    ----------
    data : array_like
        The array of values to be ranked.
    k : {int} kth unique value to be found
    axis : {None, int}, optional
        Axis along which to perform the ranking. Default is 1 -- samples in rows,
        observations in columns

    Returns
    -------
    kth_values : cupy ndarray
         An array of kth values.
    """

    # Coerce data into array -- make a copy since it needs to be sorted
    # TODO -- should the sort be done in Numba kernel (and how to do it)?
    dtype = cupy.result_type(data, cupy.float64)
    data_id = id(data)
    data = cupy.ascontiguousarray(data, dtype=dtype)

    if data_id == id(data):  # Ensure sort is being done on a copy
        data = data.copy()

    assert data.ndim <= 2

    if data.ndim < 2:
        if axis == 0:
            data = data[:, None]
        else:
            data = data[None, :]

    if axis == 0:
        n_obs, n_samples = data.shape
    else:
        n_samples, n_obs = data.shape

    data.sort(axis=axis)
    kth_values = cupy.zeros(n_samples, dtype=data.dtype)
    _get_kth_unique_kernel.forall(n_samples, 1)(data, kth_values, k, axis)

    if axis == 0:
        kth_values = kth_values[None, :]
    else:
        kth_values = kth_values[:, None]

    return kth_values
Example #14
0
def polyval(p, x):
    """Evaluates a polynomial at specific values.

    Args:
        p (cupy.ndarray or cupy.poly1d): input polynomial.
        x (scalar, cupy.ndarray): values at which the polynomial
        is evaluated.

    Returns:
        cupy.ndarray or cupy.poly1d: polynomial evaluated at x.

    .. warning::

        This function doesn't currently support poly1d values to evaluate.

    .. seealso:: :func:`numpy.polyval`

    """
    if isinstance(p, cupy.poly1d):
        p = p.coeffs
    if not isinstance(p, cupy.ndarray) or p.ndim == 0:
        raise TypeError('p can be 1d ndarray or poly1d object only')
    if p.ndim != 1:
        # to be consistent with polyarithmetic routines' behavior of
        # not allowing multidimensional polynomial inputs.
        raise ValueError('p can be 1d ndarray or poly1d object only')
        # TODO(Dahlia-Chehata): Support poly1d x
    if (isinstance(x, cupy.ndarray) and x.ndim <= 1) or numpy.isscalar(x):
        val = cupy.asarray(x).reshape(-1, 1)
    else:
        raise NotImplementedError(
            'poly1d or non 1d values are not currently supported')
    out = p[::-1] * cupy.power(val, cupy.arange(p.size))
    out = out.sum(axis=1)
    dtype = cupy.result_type(p, val)
    if cupy.isscalar(x) or x.ndim == 0:
        return out.astype(dtype, copy=False).reshape()
    if p.dtype == numpy.complex128 and val.dtype in [
            numpy.float16, numpy.float32, numpy.complex64
    ]:
        return out.astype(numpy.complex64, copy=False)
    p_kind_score = numpy.dtype(p.dtype.char.lower()).kind
    x_kind_score = numpy.dtype(val.dtype.char.lower()).kind
    if (p.dtype.kind not in 'c' and
        (p_kind_score == x_kind_score or val.dtype.kind in 'c')) or (
            issubclass(p.dtype.type, numpy.integer)
            and issubclass(val.dtype.type, numpy.floating)):
        return out.astype(val.dtype, copy=False)
    return out.astype(dtype, copy=False)
Example #15
0
def histogram(x, bins=10):
    """Computes the histogram of a set of data.

    Args:
        x (cupy.ndarray): Input array.
        bins (int or cupy.ndarray): If ``bins`` is an int, it represents the
            number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it
            represents a bin edges.

    Returns:
        tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray`
        storing the values of the histogram, and ``bin_edges`` is a
        :class:`cupy.ndarray` storing the bin edges.

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.histogram`
    """

    if x.dtype.kind == 'c':
        # TODO(unno): comparison between complex numbers is not implemented
        raise NotImplementedError('complex number is not supported')

    if isinstance(bins, int):
        if x.size == 0:
            min_value = 0.0
            max_value = 1.0
        else:
            min_value = float(x.min())
            max_value = float(x.max())
        if min_value == max_value:
            min_value -= 0.5
            max_value += 0.5
        bin_type = cupy.result_type(min_value, max_value, x)
        bins = cupy.linspace(min_value, max_value, bins + 1, dtype=bin_type)
    elif isinstance(bins, cupy.ndarray):
        if (bins[:-1] > bins[1:]).any():  # synchronize!
            raise ValueError('bins must increase monotonically.')
    else:
        raise NotImplementedError('Only int or ndarray are supported for bins')

    y = cupy.zeros(bins.size - 1, dtype='l')
    _histogram_kernel(x, bins, bins.size, y)
    return y, bins
Example #16
0
def polyadd(a1, a2):
    """Computes the sum of two polynomials.

    Args:
        a1 (scalar, cupy.ndarray or cupy.poly1d): first input polynomial.
        a2 (scalar, cupy.ndarray or cupy.poly1d): second input polynomial.

    Returns:
        cupy.ndarray or cupy.poly1d: The sum of the inputs.

    .. seealso:: :func:`numpy.polyadd`

    """
    if a1.size < a2.size:
        a1, a2 = a2, a1
    out = cupy.pad(a2, (a1.size - a2.size, 0))
    out = out.astype(cupy.result_type(a1, a2), copy=False)
    out += a1
    return out
Example #17
0
def _bmat(list_obj):
    """
    Helper function to create a block matrix in cupy from a list
    of smaller 2D dense arrays
    """
    n_rows = len(list_obj)
    n_cols = len(list_obj[0])
    final_shape = [0, 0]
    # calculating expected size of output
    for i in range(n_rows):
        final_shape[0] += list_obj[i][0].shape[0]
    for j in range(n_cols):
        final_shape[1] += list_obj[0][j].shape[1]
    # obtaining result's datatype
    dtype = cupy.result_type(
        *[arr.dtype for list_iter in list_obj for arr in list_iter])
    # checking order
    F_order = all(arr.flags['F_CONTIGUOUS'] for list_iter in list_obj
                  for arr in list_iter)
    C_order = all(arr.flags['C_CONTIGUOUS'] for list_iter in list_obj
                  for arr in list_iter)
    order = 'F' if F_order and not C_order else 'C'
    result = cupy.empty(tuple(final_shape), dtype=dtype, order=order)

    start_idx_row = 0
    start_idx_col = 0
    end_idx_row = 0
    end_idx_col = 0
    for i in range(n_rows):
        end_idx_row = start_idx_row + list_obj[i][0].shape[0]
        start_idx_col = 0
        for j in range(n_cols):
            end_idx_col = start_idx_col + list_obj[i][j].shape[1]
            result[start_idx_row:end_idx_row,
                   start_idx_col:end_idx_col] = list_obj[i][j]
            start_idx_col = end_idx_col
        start_idx_row = end_idx_row
    return result
Example #18
0
def _fft_convolve(a1, a2, mode):

    offset = 0
    if a1.size < a2.size:
        a1, a2 = a2, a1
        offset = 1 - a2.size % 2

    # if either of them is complex, the dtype after multiplication will also be
    if a1.dtype.kind == 'c' or a2.dtype.kind == 'c':
        fft, ifft = cupy.fft.fft, cupy.fft.ifft
    else:
        fft, ifft = cupy.fft.rfft, cupy.fft.irfft

    dtype = cupy.result_type(a1, a2)
    n1, n2 = a1.size, a2.size
    out_size = cupyx.scipy.fft.next_fast_len(n1 + n2 - 1)
    fa1 = fft(a1, out_size)
    fa2 = fft(a2, out_size)
    out = ifft(fa1 * fa2, out_size)

    if mode == 'full':
        start, end = 0, n1 + n2 - 1
    elif mode == 'same':
        start = (n2 - 1) // 2 + offset
        end = start + n1
    elif mode == 'valid':
        start, end = n2 - 1, n1
    else:
        raise ValueError(
            'acceptable mode flags are `valid`, `same`, or `full`.')

    out = out[start:end]

    if dtype.kind in 'iu':
        out = cupy.around(out)

    return out.astype(dtype, copy=False)
Example #19
0
File: einsum.py Project: zelo2/cupy
def einsum(*operands, **kwargs):
    """einsum(subscripts, *operands, dtype=False)

    Evaluates the Einstein summation convention on the operands.
    Using the Einstein summation convention, many common multi-dimensional
    array operations can be represented in a simple fashion. This function
    provides a way to compute such summations.

    .. note::
       Memory contiguity of calculation result is not always compatible with
       `numpy.einsum`.
       ``out``, ``order``, and ``casting`` options are not supported.

    Args:
        subscripts (str): Specifies the subscripts for summation.
        operands (sequence of arrays): These are the arrays for the operation.

    Returns:
        cupy.ndarray:
            The calculation based on the Einstein summation convention.

    .. seealso:: :func:`numpy.einsum`

    """

    input_subscripts, output_subscript, operands = \
        _parse_einsum_input(operands)
    assert isinstance(input_subscripts, list)
    assert isinstance(operands, list)

    dtype = kwargs.pop('dtype', None)

    # casting = kwargs.pop('casting', 'safe')
    casting_kwargs = {}  # casting is not supported yet in astype

    optimize = kwargs.pop('optimize', False)
    if optimize is True:
        optimize = 'greedy'
    if kwargs:
        raise TypeError('Did not understand the following kwargs: %s'
                        % list(kwargs.keys))

    result_dtype = cupy.result_type(*operands) if dtype is None else dtype
    operands = [
        cupy.asanyarray(arr)
        for arr in operands
    ]

    input_subscripts = [
        _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim)
        for idx, (sub, arr) in enumerate(zip(input_subscripts, operands))
    ]

    # Get length of each unique dimension and ensure all dimensions are correct
    dimension_dict = {}
    for idx, sub in enumerate(input_subscripts):
        sh = operands[idx].shape
        for axis, label in enumerate(sub):
            dim = sh[axis]
            if label in dimension_dict.keys():
                # For broadcasting cases we always want the largest dim size
                if dimension_dict[label] == 1:
                    dimension_dict[label] = dim
                elif dim not in (1, dimension_dict[label]):
                    dim_old = dimension_dict[label]
                    raise ValueError(
                        'Size of label \'%s\' for operand %d (%d) '
                        'does not match previous terms (%d).'
                        % (_chr(label), idx, dim, dim_old))
            else:
                dimension_dict[label] = dim

    if output_subscript is None:
        # Build output subscripts
        tmp_subscripts = list(itertools.chain.from_iterable(input_subscripts))
        output_subscript = [
            label
            for label in sorted(set(tmp_subscripts))
            if label < 0 or tmp_subscripts.count(label) == 1
        ]
    else:
        if not options['sum_ellipsis']:
            if '@' not in output_subscript and -1 in dimension_dict:
                raise ValueError(
                    'output has more dimensions than subscripts '
                    'given in einstein sum, but no \'...\' ellipsis '
                    'provided to broadcast the extra dimensions.')
        output_subscript = _parse_ellipsis_subscript(
            output_subscript, None,
            ellipsis_len=sum(label < 0 for label in dimension_dict.keys())
        )

        # Make sure output subscripts are in the input
        tmp_subscripts = set(itertools.chain.from_iterable(input_subscripts))
        for label in output_subscript:
            if label not in tmp_subscripts:
                raise ValueError(
                    'einstein sum subscripts string included output subscript '
                    '\'%s\' which never appeared in an input' % _chr(label))
        if len(output_subscript) != len(set(output_subscript)):
            for label in output_subscript:
                if output_subscript.count(label) >= 2:
                    raise ValueError(
                        'einstein sum subscripts string includes output '
                        'subscript \'%s\' multiple times' % _chr(label))

    _einsum_diagonals(input_subscripts, operands)

    # no more raises

    if len(operands) >= 2:
        if any(arr.size == 0 for arr in operands):
            return cupy.zeros(
                tuple(dimension_dict[label] for label in output_subscript),
                dtype=result_dtype
            )

        # Don't squeeze if unary, because this affects later (in trivial sum)
        # whether the return is a writeable view.
        for idx in range(len(operands)):
            arr = operands[idx]
            if 1 in arr.shape:
                squeeze_indices = []
                sub = []
                for axis, label in enumerate(input_subscripts[idx]):
                    if arr.shape[axis] == 1:
                        squeeze_indices.append(axis)
                    else:
                        sub.append(label)
                input_subscripts[idx] = sub
                operands[idx] = cupy.squeeze(arr, axis=tuple(squeeze_indices))
                assert operands[idx].ndim == len(input_subscripts[idx])
            del arr

    # unary einsum without summation should return a (writeable) view
    returns_view = len(operands) == 1

    # unary sum
    for idx, sub in enumerate(input_subscripts):
        other_subscripts = copy.copy(input_subscripts)
        other_subscripts[idx] = output_subscript
        other_subscripts = set(itertools.chain.from_iterable(other_subscripts))
        sum_axes = tuple(
            axis
            for axis, label in enumerate(sub)
            if label not in other_subscripts
        )
        if sum_axes:
            returns_view = False
            input_subscripts[idx] = [
                label
                for axis, label in enumerate(sub)
                if axis not in sum_axes
            ]

            operands[idx] = operands[idx].sum(
                axis=sum_axes, dtype=result_dtype)

    if returns_view:
        operands = [a.view() for a in operands]
    else:
        operands = [
            a.astype(result_dtype, copy=False, **casting_kwargs)
            for a in operands
        ]

    # no more casts

    optimize_algorithms = {
        'greedy': _greedy_path,
        'optimal': _optimal_path,
    }
    if optimize is False:
        path = [tuple(range(len(operands)))]
    elif len(optimize) and (optimize[0] == 'einsum_path'):
        path = optimize[1:]
    else:
        try:
            if len(optimize) == 2 and isinstance(optimize[1], (int, float)):
                algo = optimize_algorithms[optimize[0]]
                memory_limit = int(optimize[1])
            else:
                algo = optimize_algorithms[optimize]
                memory_limit = 2 ** 31  # TODO(kataoka): fix?
        except (TypeError, KeyError):  # unhashable type or not found
            raise TypeError('Did not understand the path (optimize): %s'
                            % str(optimize))
        input_sets = [set(sub) for sub in input_subscripts]
        output_set = set(output_subscript)
        path = algo(input_sets, output_set, dimension_dict, memory_limit)
        if any(len(indices) > 2 for indices in path):
            warnings.warn(
                'memory efficient einsum is not supported yet',
                _util.PerformanceWarning)

    for idx0, idx1 in _iter_path_pairs(path):
        # "reduced" binary einsum
        arr0 = operands.pop(idx0)
        sub0 = input_subscripts.pop(idx0)
        arr1 = operands.pop(idx1)
        sub1 = input_subscripts.pop(idx1)
        sub_others = list(itertools.chain(
            output_subscript,
            itertools.chain.from_iterable(input_subscripts)))
        arr_out, sub_out = reduced_binary_einsum(
            arr0, sub0, arr1, sub1, sub_others)
        operands.append(arr_out)
        input_subscripts.append(sub_out)
        del arr0, arr1

    # unary einsum at last
    arr0, = operands
    sub0, = input_subscripts

    transpose_axes = []
    for label in output_subscript:
        if label in sub0:
            transpose_axes.append(sub0.index(label))

    arr_out = arr0.transpose(transpose_axes).reshape([
        dimension_dict[label]
        for label in output_subscript
    ])
    assert returns_view or arr_out.dtype == result_dtype
    return arr_out
Example #20
0
def rankdata(data,
             method='average',
             na_option='keep',
             axis=1,
             is_symmetric=False):
    """Rank observations for a series of samples, with tie handling
    NOTE: due to a bug with cudf ranking, data will be transposed if row-wise ranking is
    selected

    Parameters
    ----------
    data : array_like
        The array of values to be ranked.
    method : {'average', 'min', 'max', 'dense', 'ordinal'}, optional
        The method used to assign ranks to tied elements.
        The following methods are available (default is 'average'):
          * 'average': The average of the ranks that would have been assigned to
            all the tied values is assigned to each value.
          * 'min': The minimum of the ranks that would have been assigned to all
            the tied values is assigned to each value.  (This is also
            referred to as "competition" ranking.)
          * 'max': The maximum of the ranks that would have been assigned to all
            the tied values is assigned to each value.
          * 'dense': Like 'min', but the rank of the next highest element is
            assigned the rank immediately after those assigned to the tied
            elements.
          * 'ordinal': All values are given a distinct rank, corresponding to
            the order that the values occur in `a`.
    axis : {None, int}, optional
        Axis along which to perform the ranking. Default is 1 -- samples in rows,
        observations in columns
    is_symmetric : {False, bool}, optional
        Will be used to avoid additional data transpose steps if axis = 1

    Returns
    -------
    ranks : cupy ndarray
         An array of size equal to the size of `a`, containing rank
         scores.

    See also scipy.stats.rankdata, for which this function is a replacement
    """

    dtype = cupy.result_type(data.dtype, cupy.float64)
    data = cupy.asarray(data, dtype=dtype)

    if is_symmetric:
        assert data.ndim == 2
        assert data.shape[0] == data.shape[1]

    if data.ndim < 2:
        data = data[:, None]
    elif (data.ndim == 2) & (axis == 1) & (not is_symmetric):
        data = data.T

    ranks = cudf.DataFrame(data).rank(axis=0,
                                      method=method,
                                      na_option=na_option)
    ranks = ranks.values

    if axis == 1:
        ranks = ranks.T
    return ranks
Example #21
0
def histogram(x, bins=10):
    """Computes the histogram of a set of data.

    Args:
        x (cupy.ndarray): Input array.
        bins (int or cupy.ndarray): If ``bins`` is an int, it represents the
            number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it
            represents a bin edges.

    Returns:
        tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray`
        storing the values of the histogram, and ``bin_edges`` is a
        :class:`cupy.ndarray` storing the bin edges.

    .. seealso:: :func:`numpy.histogram`
    """

    if x.dtype.kind == 'c':
        # TODO(unno): comparison between complex numbers is not implemented
        raise NotImplementedError('complex number is not supported')

    if isinstance(bins, int):
        if x.size == 0:
            min_value = 0.0
            max_value = 1.0
        else:
            min_value = float(x.min())
            max_value = float(x.max())
        if min_value == max_value:
            min_value -= 0.5
            max_value += 0.5
        bin_type = cupy.result_type(min_value, max_value, x)
        bins = cupy.linspace(min_value, max_value, bins + 1, dtype=bin_type)
    elif isinstance(bins, cupy.ndarray):
        if cupy.any(bins[:-1] > bins[1:]):
            raise ValueError('bins must increase monotonically.')
    else:
        raise NotImplementedError('Only int or ndarray are supported for bins')

    # atomicAdd only supports int32
    y = cupy.zeros(bins.size - 1, dtype=cupy.int32)

    # TODO(unno): use searchsorted
    cupy.ElementwiseKernel(
        'S x, raw T bins, int32 n_bins', 'raw int32 y', '''
        if (x < bins[0] or bins[n_bins - 1] < x) {
            return;
        }
        int high = n_bins - 1;
        int low = 0;

        while (high - low > 1) {
            int mid = (high + low) / 2;
            if (bins[mid] <= x) {
                low = mid;
            } else {
                high = mid;
            }
        }
        atomicAdd(&y[low], 1);
        ''')(x, bins, bins.size, y)
    return y.astype('l'), bins
Example #22
0
def histogram(x, bins=10, range=None, weights=None, density=False):
    """Computes the histogram of a set of data.

    Args:
        x (cupy.ndarray): Input array.
        bins (int or cupy.ndarray): If ``bins`` is an int, it represents the
            number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it
            represents a bin edges.
        range (2-tuple of float, optional): The lower and upper range of the
            bins.  If not provided, range is simply ``(x.min(), x.max())``.
            Values outside the range are ignored. The first element of the
            range must be less than or equal to the second. `range` affects the
            automatic bin computation as well. While bin width is computed to
            be optimal based on the actual data within `range`, the bin count
            will fill the entire range including portions containing no data.
        density (bool, optional): If False, the default, returns the number of
            samples in each bin. If True, returns the probability *density*
            function at the bin, ``bin_count / sample_count / bin_volume``.
        weights (cupy.ndarray, optional): An array of weights, of the same
            shape as `x`.  Each value in `x` only contributes its associated
            weight towards the bin count (instead of 1).
    Returns:
        tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray`
        storing the values of the histogram, and ``bin_edges`` is a
        :class:`cupy.ndarray` storing the bin edges.

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.histogram`
    """

    if x.dtype.kind == 'c':
        # TODO(unno): comparison between complex numbers is not implemented
        raise NotImplementedError('complex number is not supported')

    if not isinstance(x, cupy.ndarray):
        raise ValueError("x must be a cupy.ndarray")

    x, weights = _ravel_and_check_weights(x, weights)
    bin_edges = _get_bin_edges(x, bins, range)

    if weights is None:
        y = cupy.zeros(bin_edges.size - 1, dtype='l')
        for accelerator in _accelerator.get_routine_accelerators():
            # CUB uses int for bin counts
            # TODO(leofang): support >= 2^31 elements in x?
            if (accelerator == _accelerator.ACCELERATOR_CUB
                    and x.size <= 0x7fffffff and bin_edges.size <= 0x7fffffff):
                # Need to ensure the dtype of bin_edges as it's needed for both
                # the CUB call and the correction later
                if isinstance(bins, cupy.ndarray):
                    bin_type = cupy.result_type(bin_edges, x)
                    if cupy.issubdtype(bin_type, cupy.integer):
                        bin_type = cupy.result_type(bin_type, float)
                    bin_edges = bin_edges.astype(bin_type, copy=False)
                # CUB's upper bin boundary is exclusive for all bins, including
                # the last bin, so we must shift it to comply with NumPy
                if x.dtype.kind in 'ui':
                    bin_edges[-1] += 1
                elif x.dtype.kind == 'f':
                    old_edge = bin_edges[-1].copy()
                    bin_edges[-1] = cupy.nextafter(bin_edges[-1],
                                                   bin_edges[-1] + 1)
                y = cub.device_histogram(x, bin_edges, y)
                # shift the uppermost edge back
                if x.dtype.kind in 'ui':
                    bin_edges[-1] -= 1
                elif x.dtype.kind == 'f':
                    bin_edges[-1] = old_edge

                # TODO(asi1024): Refactor temporary fix for dtype compatibility
                if isinstance(bins, cupy.ndarray):
                    bin_edges = bin_edges.astype(bins.dtype, copy=False)
                break
        else:
            _histogram_kernel(x, bin_edges, bin_edges.size, y)
    else:
        simple_weights = (
            cupy.can_cast(weights.dtype, cupy.float64) or
            cupy.can_cast(weights.dtype, cupy.complex128)
        )
        if not simple_weights:
            # object dtype such as Decimal are supported in NumPy, but not here
            raise NotImplementedError(
                "only weights with dtype that can be cast to float or complex "
                "are supported")
        if weights.dtype.kind == 'c':
            y = cupy.zeros(bin_edges.size - 1, dtype=cupy.complex128)
            _weighted_histogram_kernel(
                x, bin_edges, bin_edges.size, weights.real, y.real)
            _weighted_histogram_kernel(
                x, bin_edges, bin_edges.size, weights.imag, y.imag)
        else:
            if weights.dtype.kind in 'bui':
                y = cupy.zeros(bin_edges.size - 1, dtype=int)
            else:
                y = cupy.zeros(bin_edges.size - 1, dtype=cupy.float64)
            _weighted_histogram_kernel(
                x, bin_edges, bin_edges.size, weights, y)

    if density:
        db = cupy.array(cupy.diff(bin_edges), cupy.float64)
        return y/db/y.sum(), bin_edges
    return y, bin_edges
Example #23
0
def firfilter(b, x, axis=-1, zi=None):
    """
    Filter data along one-dimension with an FIR filter.

    Filter a data sequence, `x`, using a digital filter. This works for many
    fundamental data types (including Object type). Please note, cuSignal
    doesn't support IIR filters presently, and this implementation is optimized
    for large filtering operations (and inherently depends on fftconvolve)

    Parameters
    ----------
    b : array_like
        The numerator coefficient vector in a 1-D sequence.
    x : array_like
        An N-dimensional input array.
    axis : int, optional
        The axis of the input data array along which to apply the
        linear filter. The filter is applied to each subarray along
        this axis.  Default is -1.
    zi : array_like, optional
        Initial conditions for the filter delays.  It is a vector
        (or array of vectors for an N-dimensional input) of length
        ``max(len(a), len(b)) - 1``.  If `zi` is None or is not given then
        initial rest is assumed.  See `lfiltic` for more information.

    Returns
    -------
    y : array
        The output of the digital filter.
    zf : array, optional
        If `zi` is None, this is not returned, otherwise, `zf` holds the
        final filter delay values.
    """
    b = cp.asarray(b)
    if b.ndim != 1:
        raise ValueError('object of too small depth for desired array')

    if x.ndim == 0:
        raise ValueError('x must be at least 1-D')

    inputs = [b, x]
    if zi is not None:
        # _linear_filter does not broadcast zi, but does do expansion of
        # singleton dims.
        zi = cp.asarray(zi)
        if zi.ndim != x.ndim:
            raise ValueError('object of too small depth for desired array')
        expected_shape = list(x.shape)
        expected_shape[axis] = b.shape[0] - 1
        expected_shape = tuple(expected_shape)
        # check the trivial case where zi is the right shape first
        if zi.shape != expected_shape:
            strides = zi.ndim * [None]
            if axis < 0:
                axis += zi.ndim
            for k in range(zi.ndim):
                if k == axis and zi.shape[k] == expected_shape[k]:
                    strides[k] = zi.strides[k]
                elif k != axis and zi.shape[k] == expected_shape[k]:
                    strides[k] = zi.strides[k]
                elif k != axis and zi.shape[k] == 1:
                    strides[k] = 0
                else:
                    raise ValueError('Unexpected shape for zi: expected '
                                     '%s, found %s.' %
                                     (expected_shape, zi.shape))
            zi = cp.lib.stride_tricks.as_strided(zi, expected_shape, strides)
        inputs.append(zi)
    dtype = cp.result_type(*inputs)

    if dtype.char not in 'fdgFDGO':
        raise NotImplementedError("input type '%s' not supported" % dtype)

    b = cp.array(b, dtype=dtype)
    x = cp.array(x, dtype=dtype, copy=False)

    out_full = cp.apply_along_axis(lambda y: cp.convolve(b, y), axis, x)

    ind = out_full.ndim * [slice(None)]
    if zi is not None:
        ind[axis] = slice(zi.shape[axis])
        out_full[tuple(ind)] += zi

    ind[axis] = slice(out_full.shape[axis] - len(b) + 1)
    out = out_full[tuple(ind)]

    if zi is None:
        return out
    else:
        ind[axis] = slice(out_full.shape[axis] - len(b) + 1, None)
        zf = out_full[tuple(ind)]
        return out, zf
Example #24
0
def sosfilt(
    sos,
    x,
    axis=-1,
    zi=None,
):
    """
    Filter data along one dimension using cascaded second-order sections.
    Filter a data sequence, `x`, using a digital IIR filter defined by
    `sos`.

    Parameters
    ----------
    sos : array_like
        Array of second-order filter coefficients, must have shape
        ``(n_sections, 6)``. Each row corresponds to a second-order
        section, with the first three columns providing the numerator
        coefficients and the last three providing the denominator
        coefficients.
    x : array_like
        An N-dimensional input array.
    axis : int, optional
        The axis of the input data array along which to apply the
        linear filter. The filter is applied to each subarray along
        this axis.  Default is -1.
    zi : array_like, optional
        Initial conditions for the cascaded filter delays.  It is a (at
        least 2D) vector of shape ``(n_sections, ..., 2, ...)``, where
        ``..., 2, ...`` denotes the shape of `x`, but with ``x.shape[axis]``
        replaced by 2.  If `zi` is None or is not given then initial rest
        (i.e. all zeros) is assumed.
        Note that these initial conditions are *not* the same as the initial
        conditions given by `lfiltic` or `lfilter_zi`.

    Returns
    -------
    y : ndarray
        The output of the digital filter.
    zf : ndarray, optional
        If `zi` is None, this is not returned, otherwise, `zf` holds the
        final filter delay values.
    See Also
    --------
    zpk2sos, sos2zpk, sosfilt_zi, sosfiltfilt, sosfreqz

    Notes
    -----
    WARNING: This is an experimental API and is prone to change in future
    versions of cuSignal.

    The filter function is implemented as a series of second-order filters
    with direct-form II transposed structure. It is designed to minimize
    numerical precision errors for high-order filters.

    Limitations
    -----------
    1. The number of n_sections must be less than 513.
    2. The number of samples must be greater than the number of sections

    Examples
    --------
    sosfilt is a stable alternative to `lfilter` as using 2nd order sections
    reduces numerical error. We are working on building out sos filter output,
    so please submit GitHub feature requests as needed. You can also generate
    a filter on CPU with scipy.signal and then move that to GPU for actual
    filtering operations with `cp.asarray`.

    Plot a 13th-order filter's impulse response using both `sosfilt`:
    >>> from scipy import signal
    >>> import cusignal
    >>> import cupy as cp
    >>> # Generate filter on CPU with Scipy.Signal
    >>> sos = signal.ellip(13, 0.009, 80, 0.05, output='sos')
    >>> # Move data to GPU
    >>> sos = cp.asarray(sos)
    >>> x = cp.random.randn(100_000_000)
    >>> y = cusignal.sosfilt(sos, x)
    """

    x = cp.asarray(x)
    if x.ndim == 0:
        raise ValueError("x must be at least 1D")

    sos, n_sections = _validate_sos(sos)
    sos = cp.asarray(sos)

    x_zi_shape = list(x.shape)
    x_zi_shape[axis] = 2
    x_zi_shape = tuple([n_sections] + x_zi_shape)
    inputs = [sos, x]

    if zi is not None:
        inputs.append(np.asarray(zi))

    dtype = cp.result_type(*inputs)

    if dtype.char not in "fdgFDGO":
        raise NotImplementedError("input type '%s' not supported" % dtype)
    if zi is not None:
        zi = cp.array(zi, dtype)  # make a copy so that we can operate in place
        if zi.shape != x_zi_shape:
            raise ValueError("Invalid zi shape. With axis=%r, an input with "
                             "shape %r, and an sos array with %d sections, zi "
                             "must have shape %r, got %r." %
                             (axis, x.shape, n_sections, x_zi_shape, zi.shape))
        return_zi = True
    else:
        zi = cp.zeros(x_zi_shape, dtype=dtype)
        return_zi = False

    axis = axis % x.ndim  # make positive
    x = cp.moveaxis(x, axis, -1)
    zi = cp.moveaxis(zi, [0, axis + 1], [-2, -1])
    x_shape, zi_shape = x.shape, zi.shape
    x = cp.reshape(x, (-1, x.shape[-1]))
    x = cp.array(x, dtype, order="C")  # make a copy, can modify in place
    zi = cp.ascontiguousarray(cp.reshape(zi, (-1, n_sections, 2)))
    sos = sos.astype(dtype, copy=False)

    max_smem = _get_max_smem()
    max_tpb = _get_max_tpb()

    # Determine how much shared memory is needed
    out_size = sos.shape[0]
    z_size = zi.shape[1] * zi.shape[2]
    sos_size = sos.shape[0] * sos.shape[1]
    shared_mem = (out_size + z_size + sos_size) * x.dtype.itemsize

    if shared_mem > max_smem:
        max_sections = (max_smem // (1 + zi.shape[2] + sos.shape[1]) //
                        x.dtype.itemsize)
        raise ValueError("The number of sections ({}), requires too much "
                         "shared memory ({}B) > ({}B). \n"
                         "\n**Max sections possible ({})**".format(
                             sos.shape[0], shared_mem, max_smem, max_sections))

    if sos.shape[0] > max_tpb:
        raise ValueError("The number of sections ({}), must be less "
                         "than max threads per block ({})".format(
                             sos.shape[0], max_tpb))

    if sos.shape[0] > x.shape[1]:
        raise ValueError("The number of samples ({}), must be greater "
                         "than the number of sections ({})".format(
                             x.shape[1], sos.shape[0]))

    _sosfilt(sos, x, zi)

    x.shape = x_shape
    x = cp.moveaxis(x, -1, axis)
    if return_zi:
        zi.shape = zi_shape
        zi = cp.moveaxis(zi, [-2, -1], [0, axis + 1])
        out = (x, zi)
    else:
        out = x

    return out
Example #25
0
def _try_use_cutensornet(*args, **kwargs):
    if cupy.cuda.runtime.is_hip:
        return None

    if (_accelerator.ACCELERATOR_CUTENSORNET
            not in _accelerator.get_routine_accelerators()):
        return None

    if cutensornet is None:
        warnings.warn(
            'using the cuTensorNet backend was requested but it cannot be '
            'imported -- maybe you forgot to install cuQuantum Python? '
            'Please do "pip install cuquantum-python" or "conda install '
            '-c conda-forge cuquantum-python" and retry',
            stacklevel=2)
        return None

    # cannot pop as we might still need kwargs later
    dtype = kwargs.get('dtype', None)
    path = kwargs.get('optimize', False)
    if path is True:
        path = 'greedy'

    # we do very lightweight pre-processing here just to inspect the
    # operands; the actual input verification is deferred to cuTensorNet
    # which can generate far better diagonostic messages
    args = _get_einsum_operands(args)
    operands = [cupy.asarray(op) for op in args[1]]

    if len(operands) == 1:
        # As of cuTENSOR 1.5.0 it still chokes with some common operations
        # like trace ("ii->") so it's easier to just skip all single-operand
        # cases instead of whitelisting what could be done explicitly
        return None

    if (any(op.size == 0 for op in operands)
            or any(len(op.shape) == 0 for op in operands)):
        # To cuTensorNet the shape is invalid
        return None

    # all input dtypes must be identical (to a numerical dtype)
    result_dtype = cupy.result_type(*operands) if dtype is None else dtype
    if result_dtype not in (cupy.float32, cupy.float64, cupy.complex64,
                            cupy.complex128):
        return None
    operands = [op.astype(result_dtype, copy=False) for op in operands]

    # prepare cutn inputs
    device = cupy.cuda.runtime.getDevice()
    handle = cutn_handle_cache.get(device, cutensornet.create())
    cutn_options = {
        'device_id': device,
        'handle': handle,
        'memory_limit': 4**31
    }  # TODO(leofang): fix?

    # TODO(leofang): support all valid combinations:
    # - path from user, contract with cutn (done)
    # - path from cupy, contract with cutn (not yet)
    # - path from cutn, contract with cutn (done)
    # - path from cutn, contract with cupy (not yet)
    raise_warning = False
    if path is False:
        # following the same convention (contracting from the right) as would
        # be produced by _iter_path_pairs(), but converting to a list of pairs
        # due to cuTensorNet's requirement
        path = [(i - 1, i - 2) for i in range(len(operands), 1, -1)]
    elif len(path) and path[0] == 'einsum_path':
        # let cuTensorNet check if the format is correct
        path = path[1:]
    elif len(path) == 2:
        if isinstance(path[1], (int, float)):
            raise_warning = True
        if path[0] != 'cutensornet':
            raise_warning = True
        path = None
    else:  # path is a string
        if path != 'cutensornet':
            raise_warning = True
        path = None
    if raise_warning:
        warnings.warn(
            'the cuTensorNet backend ignores the "optimize" option '
            'except when an explicit contraction path is provided '
            'or when optimize=False (disable optimization); also, '
            'the maximum intermediate size, if set, is ignored',
            stacklevel=2)
    cutn_optimizer = {'path': path} if path else None

    if len(args) == 2:
        out = cutensornet.contract(args[0],
                                   *operands,
                                   options=cutn_options,
                                   optimize=cutn_optimizer)
    elif len(args) == 3:
        inputs = [i for pair in zip(operands, args[0]) for i in pair]
        if args[2] is not None:
            inputs.append(args[2])
        out = cutensornet.contract(*inputs,
                                   options=cutn_options,
                                   optimize=cutn_optimizer)
    else:
        assert False

    return out
Example #26
0
def convolve(
    in1,
    in2,
    mode="full",
    method="auto",
):
    """
    Convolve two N-dimensional arrays.

    Convolve `in1` and `in2`, with the output size determined by the
    `mode` argument.

    Parameters
    ----------
    in1 : array_like
        First input.
    in2 : array_like
        Second input. Should have the same number of dimensions as `in1`.
    mode : str {'full', 'valid', 'same'}, optional
        A string indicating the size of the output:

        ``full``
           The output is the full discrete linear convolution
           of the inputs. (Default)
        ``valid``
           The output consists only of those elements that do not
           rely on the zero-padding. In 'valid' mode, either `in1` or `in2`
           must be at least as large as the other in every dimension.
        ``same``
           The output is the same size as `in1`, centered
           with respect to the 'full' output.
    method : str {'auto', 'direct', 'fft'}, optional
        A string indicating which method to use to calculate the convolution.

        ``direct``
           The convolution is determined directly from sums, the definition of
           convolution.
        ``fft``
           The Fourier Transform is used to perform the convolution by calling
           `fftconvolve`.
        ``auto``
           Automatically chooses direct or Fourier method based on an estimate
           of which is faster (default).

    Returns
    -------
    convolve : array
        An N-dimensional array containing a subset of the discrete linear
        convolution of `in1` with `in2`.

    See Also
    --------
    choose_conv_method : chooses the fastest appropriate convolution method
    fftconvolve

    Notes
    -----
    By default, `convolve` and `correlate` use ``method='auto'``, which calls
    `choose_conv_method` to choose the fastest method using pre-computed
    values (`choose_conv_method` can also measure real-world timing with a
    keyword argument). Because `fftconvolve` relies on floating point numbers,
    there are certain constraints that may force `method=direct` (more detail
    in `choose_conv_method` docstring).

    Examples
    --------
    Smooth a square pulse using a Hann window:

    >>> import cusignal
    >>> import cupy as cp
    >>> sig = cp.repeat(cp.asarray([0., 1., 0.]), 100)
    >>> win = cusignal.hann(50)
    >>> filtered = cusignal.convolve(sig, win, mode='same') / cp.sum(win)

    >>> import matplotlib.pyplot as plt
    >>> fig, (ax_orig, ax_win, ax_filt) = plt.subplots(3, 1, sharex=True)
    >>> ax_orig.plot(cp.asnumpy(sig))
    >>> ax_orig.set_title('Original pulse')
    >>> ax_orig.margins(0, 0.1)
    >>> ax_win.plot(cp.asnumpy(win))
    >>> ax_win.set_title('Filter impulse response')
    >>> ax_win.margins(0, 0.1)
    >>> ax_filt.plot(cp.asnumpy(filtered))
    >>> ax_filt.set_title('Filtered signal')
    >>> ax_filt.margins(0, 0.1)
    >>> fig.tight_layout()
    >>> fig.show()

    """

    volume = cp.asarray(in1)
    kernel = cp.asarray(in2)

    if volume.ndim == kernel.ndim == 0:
        return volume * kernel
    elif volume.ndim != kernel.ndim:
        raise ValueError("in1 and in2 should have the same dimensionality")

    if _inputs_swap_needed(mode, volume.shape, kernel.shape):
        # Convolution is commutative
        # order doesn't have any effect on output
        volume, kernel = kernel, volume

    if method == "auto":
        method = choose_conv_method(volume, kernel, mode=mode)

    if method == "fft":
        out = fftconvolve(volume, kernel, mode=mode)
        result_type = cp.result_type(volume, kernel)
        if result_type.kind in {"u", "i"}:
            out = cp.around(out)
        return out.astype(result_type)
    elif method == "direct":
        if volume.ndim > 1:
            raise ValueError("Direct method is only implemented for 1D")

        swapped_inputs = (mode != "valid") and (kernel.size > volume.size)

        if swapped_inputs:
            volume, kernel = kernel, volume

        return _convolution_cuda._convolve(volume, kernel, True,
                                           swapped_inputs, mode)

    else:
        raise ValueError("Acceptable method flags are 'auto',"
                         " 'direct', or 'fft'.")
Example #27
0
def linspace(start,
             stop,
             num=50,
             endpoint=True,
             retstep=False,
             dtype=None,
             axis=0):
    """Returns an array with evenly-spaced values within a given interval.

    Instead of specifying the step width like :func:`cupy.arange`, this
    function requires the total number of elements specified.

    Args:
        start (scalar or array_like): Starting value(s) of the sequence.
        stop (scalar or array_like): Ending value(s) of the sequence, unless
            ``endpoint`` is set to ``False``. In that case, the sequence
            consists of all but the last of ``num + 1`` evenly spaced samples,
            so that ``stop`` is excluded.  Note that the step size changes when
            ``endpoint`` is ``False``.
        num: Number of elements.
        endpoint (bool): If ``True``, the stop value is included as the last
            element. Otherwise, the stop value is omitted.
        retstep (bool): If ``True``, this function returns (array, step).
            Otherwise, it returns only the array.
        dtype: Data type specifier. It is inferred from the start and stop
            arguments by default.
        axis (int):  The axis in the result to store the samples.  Relevant
            only if start or stop are array-like.  By default ``0``, the
            samples will be along a new axis inserted at the beginning.
            Use ``-1`` to get an axis at the end.

    Returns:
        cupy.ndarray: The 1-D array of ranged values.

    .. seealso:: :func:`numpy.linspace`

    """
    if num < 0:
        raise ValueError('linspace with num<0 is not supported')
    div = (num - 1) if endpoint else num

    scalar_start = cupy.isscalar(start)
    scalar_stop = cupy.isscalar(stop)
    if scalar_start and scalar_stop:
        return _linspace_scalar(start, stop, num, endpoint, retstep, dtype)

    if not scalar_start:
        if not (isinstance(start, cupy.ndarray) and start.dtype.kind == 'f'):
            start = cupy.asarray(start) * 1.0

    if not scalar_stop:
        if not (isinstance(stop, cupy.ndarray) and stop.dtype.kind == 'f'):
            stop = cupy.asarray(stop) * 1.0

    dt = cupy.result_type(start, stop, float(num))
    if dtype is None:
        # In actual implementation, only float is used
        dtype = dt

    delta = stop - start

    # ret = cupy.arange(0, num, dtype=dt).reshape((-1,) + (1,) * delta.ndim)
    ret = cupy.empty((num, ), dtype=dt)
    _arange_ufunc(0.0, 1.0, ret, dtype=dt)
    ret = ret.reshape((-1, ) + (1, ) * delta.ndim)

    # In-place multiplication y *= delta/div is faster, but prevents the
    # multiplicant from overriding what class is produced, and thus prevents,
    # e.g. use of Quantities, see numpy#7142. Hence, we multiply in place only
    # for standard scalar types.
    if num > 1:
        step = delta / div
        if cupy.any(step == 0):
            # Special handling for denormal numbers, numpy#5437
            ret /= div
            ret = ret * delta
        else:
            ret = ret * step
    else:
        # 0 and 1 item long sequences have an undefined step
        step = float('nan')
        # Multiply with delta to allow possible override of output class.
        ret = ret * delta

    ret += start
    if endpoint and num > 1:
        ret[-1] = stop

    if axis != 0:
        ret = cupy.moveaxis(ret, 0, axis)

    if cupy.issubdtype(dtype, cupy.integer):
        cupy.floor(ret, out=ret)

    ret = ret.astype(dtype, copy=False)

    if retstep:
        return ret, step
    else:
        return ret
Example #28
0
def _fft_convolve(a1, a2, mode):

    offset = 0
    if a1.size < a2.size:
        a1, a2 = a2, a1
        offset = 1 - a2.size % 2

    # if either of them is complex, the dtype after multiplication will also be
    if a1.dtype.kind == 'c' or a2.dtype.kind == 'c':
        fft, ifft = cupy.fft.fft, cupy.fft.ifft
        is_c2c = True
    else:
        fft, ifft = cupy.fft.rfft, cupy.fft.irfft
        is_c2c = False

    # hack to work around NumPy/CuPy FFT dtype incompatibility:
    # CuPy internally converts fp16 to fp32 before doing FFT (whereas Numpy
    # converts both fp16 and fp32 to fp64), so here we do the cast early and
    # explicitly, and make sure a correct cuFFT plan can be generated. After
    # the fft-ifft round trip, we cast the output dtype to the correct one.
    out_dtype = cupy.result_type(a1, a2)
    dtype = _output_dtype(out_dtype, 'C2C' if is_c2c else 'R2C')
    a1 = a1.astype(dtype, copy=False)
    a2 = a2.astype(dtype, copy=False)

    n1, n2 = a1.size, a2.size
    out_size = cupyx.scipy.fft.next_fast_len(n1 + n2 - 1)
    # skip calling get_fft_plan() as we know the args exactly
    if is_c2c:
        fft_t = cufft.CUFFT_C2C if dtype == cupy.complex64 else cufft.CUFFT_Z2Z
        fft_plan = cufft.Plan1d(out_size, fft_t, 1)
        ifft_plan = fft_plan
    else:
        fft_t = cufft.CUFFT_R2C if dtype == cupy.float32 else cufft.CUFFT_D2Z
        fft_plan = cufft.Plan1d(out_size, fft_t, 1)
        # this is a no-op context manager
        # TODO(leofang): use contextlib.nullcontext() for PY37+?
        ifft_plan = contextlib.suppress()
    with fft_plan:
        fa1 = fft(a1, out_size)
        fa2 = fft(a2, out_size)
    with ifft_plan:
        out = ifft(fa1 * fa2, out_size)

    if mode == 'full':
        start, end = 0, n1 + n2 - 1
    elif mode == 'same':
        start = (n2 - 1) // 2 + offset
        end = start + n1
    elif mode == 'valid':
        start, end = n2 - 1, n1
    else:
        raise ValueError(
            'acceptable mode flags are `valid`, `same`, or `full`.')

    out = out[start:end]

    if out.dtype.kind in 'iu':
        out = cupy.around(out)

    return out.astype(out_dtype, copy=False)
Example #29
0
def _get_bin_edges(a, bins, range):
    """
    Computes the bins used internally by `histogram`.

    Args:
        a (ndarray): Ravelled data array
        bins (int or ndarray): Forwarded argument from `histogram`.
        range (None or tuple): Forwarded argument from `histogram`.

    Returns:
        bin_edges (ndarray): Array of bin edges
        uniform_bins (Number, Number, int): The upper bound, lowerbound, and
        number of bins, used in the implementation of `histogram` that works on
        uniform bins.
    """
    # parse the overloaded bins argument
    n_equal_bins = None
    bin_edges = None

    # if isinstance(bins, cupy.ndarray) and bins.ndim == 0:
    #     # allow uint8 array, etc
    #     if bins.dtype not in 'bui':
    #         raise TypeError(
    #             "`bins` must be an integer, a string, or an array")
    #     bins = int(bins)  # synchronize

    if isinstance(bins, int):  # will not allow 0-dimensional cupy array
        # if cupy.ndim(bins) == 0:
        try:
            n_equal_bins = operator.index(bins)
        except TypeError:
            raise TypeError("`bins` must be an integer, a string, or an array")
        if n_equal_bins < 1:
            raise ValueError("`bins` must be positive, when an integer")

        first_edge, last_edge = _get_outer_edges(a, range)

    elif isinstance(bins, cupy.ndarray):
        if bins.ndim == 1:  # cupy.ndim(bins) == 0:
            bin_edges = cupy.asarray(bins)
            if (bin_edges[:-1] > bin_edges[1:]).any():  # synchronize!
                raise ValueError(
                    "`bins` must increase monotonically, when an array")

    elif isinstance(bins, str):
        raise NotImplementedError(
            "only integer and array bins are implemented")

    if n_equal_bins is not None:
        # numpy's gh-10322 means that type resolution rules are dependent on
        # array shapes. To avoid this causing problems, we pick a type now and
        # stick with it throughout.
        bin_type = cupy.result_type(first_edge, last_edge, a)
        if cupy.issubdtype(bin_type, cupy.integer):
            bin_type = cupy.result_type(bin_type, float)

        # bin edges must be computed
        bin_edges = cupy.linspace(
            first_edge,
            last_edge,
            n_equal_bins + 1,
            endpoint=True,
            dtype=bin_type,
        )
        return bin_edges, (first_edge, last_edge, n_equal_bins)
    else:
        return bin_edges, None
Example #30
0
def select(condlist, choicelist, default=0):
    """Return an array drawn from elements in choicelist, depending on conditions.

    Args:
        condlist (list of bool arrays): The list of conditions which determine
            from which array in `choicelist` the output elements are taken.
            When multiple conditions are satisfied, the first one encountered
            in `condlist` is used.
        choicelist (list of cupy.ndarray): The list of arrays from which the
            output elements are taken. It has to be of the same length
            as `condlist`.
        default (scalar) : If provided, will fill element inserted in `output`
            when all conditions evaluate to False. default value is 0.

    Returns:
        cupy.ndarray: The output at position m is the m-th element of the
        array in `choicelist` where the m-th element of the corresponding
        array in `condlist` is True.

    .. seealso:: :func:`numpy.select`
    """

    if len(condlist) != len(choicelist):
        raise ValueError(
            'list of cases must be same length as list of conditions')

    if len(condlist) == 0:
        raise ValueError("select with an empty condition list is not possible")

    if not cupy.isscalar(default):
        raise TypeError("default only accepts scalar values")

    for i in range(len(choicelist)):
        if not isinstance(choicelist[i], cupy.ndarray):
            raise TypeError("choicelist only accepts lists of cupy ndarrays")
        cond = condlist[i]
        if cond.dtype.type is not cupy.bool_:
            raise ValueError(
                'invalid entry {} in condlist: should be boolean ndarray'.
                format(i))

    dtype = cupy.result_type(*choicelist)

    condlist = cupy.broadcast_arrays(*condlist)
    choicelist = cupy.broadcast_arrays(*choicelist, default)

    if choicelist[0].ndim == 0:
        result_shape = condlist[0].shape
    else:
        result_shape = cupy.broadcast_arrays(condlist[0],
                                             choicelist[0])[0].shape

    result = cupy.empty(result_shape, dtype)
    cupy.copyto(result, default)

    choicelist = choicelist[-2::-1]
    condlist = condlist[::-1]
    for choice, cond in zip(choicelist, condlist):
        cupy.copyto(result, choice, where=cond)

    return result