Esempio n. 1
0
def _check_nd_args(input, weights, mode, origin, wghts_name='filter weights'):
    _util._check_mode(mode)
    # Weights must always be less than 2 GiB
    if weights.nbytes >= (1 << 31):
        raise RuntimeError('weights must be 2 GiB or less, use FFTs instead')
    weight_dims = [x for x in weights.shape if x != 0]
    if len(weight_dims) != input.ndim:
        raise RuntimeError('{} array has incorrect shape'.format(wghts_name))
    origins = _util._fix_sequence_arg(origin, len(weight_dims), 'origin', int)
    for origin, width in zip(origins, weight_dims):
        _util._check_origin(origin, width)
    return tuple(origins), _util._get_inttype(input)
Esempio n. 2
0
def _binary_erosion(input,
                    structure,
                    iterations,
                    mask,
                    output,
                    border_value,
                    origin,
                    invert,
                    brute_force=True):
    try:
        iterations = operator.index(iterations)
    except TypeError:
        raise TypeError('iterations parameter should be an integer')

    if input.dtype.kind == 'c':
        raise TypeError('Complex type not supported')
    if structure is None:
        structure = generate_binary_structure(input.ndim, 1)
        all_weights_nonzero = input.ndim == 1
        center_is_true = True
        default_structure = True
    else:
        structure = structure.astype(dtype=bool, copy=False)
        # transfer to CPU for use in determining if it is fully dense
        # structure_cpu = cupy.asnumpy(structure)
        default_structure = False
    if structure.ndim != input.ndim:
        raise RuntimeError('structure and input must have same dimensionality')
    if not structure.flags.c_contiguous:
        structure = cupy.ascontiguousarray(structure)
    if structure.size < 1:
        raise RuntimeError('structure must not be empty')

    if mask is not None:
        if mask.shape != input.shape:
            raise RuntimeError('mask and input must have equal sizes')
        if not mask.flags.c_contiguous:
            mask = cupy.ascontiguousarray(mask)
        masked = True
    else:
        masked = False
    origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int)

    if isinstance(output, cupy.ndarray):
        if output.dtype.kind == 'c':
            raise TypeError('Complex output type not supported')
    else:
        output = bool
    output = _util._get_output(output, input)
    temp_needed = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
    if temp_needed:
        # input and output arrays cannot share memory
        temp = output
        output = _util._get_output(output.dtype, input)
    if structure.ndim == 0:
        # kernel doesn't handle ndim=0, so special case it here
        if float(structure):
            output[...] = cupy.asarray(input, dtype=bool)
        else:
            output[...] = ~cupy.asarray(input, dtype=bool)
        return output
    origin = tuple(origin)
    int_type = _util._get_inttype(input)
    offsets = _filters_core._origins_to_offsets(origin, structure.shape)
    if not default_structure:
        # synchronize required to determine if all weights are non-zero
        nnz = int(cupy.count_nonzero(structure))
        all_weights_nonzero = nnz == structure.size
        if all_weights_nonzero:
            center_is_true = True
        else:
            center_is_true = _center_is_true(structure, origin)

    erode_kernel = _get_binary_erosion_kernel(
        structure.shape,
        int_type,
        offsets,
        center_is_true,
        border_value,
        invert,
        masked,
        all_weights_nonzero,
    )

    if iterations == 1:
        if masked:
            output = erode_kernel(input, structure, mask, output)
        else:
            output = erode_kernel(input, structure, output)
    elif center_is_true and not brute_force:
        raise NotImplementedError(
            'only brute_force iteration has been implemented')
    else:
        if cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS'):
            raise ValueError('output and input may not overlap in memory')
        tmp_in = cupy.empty_like(input, dtype=output.dtype)
        tmp_out = output
        if iterations >= 1 and not iterations & 1:
            tmp_in, tmp_out = tmp_out, tmp_in
        if masked:
            tmp_out = erode_kernel(input, structure, mask, tmp_out)
        else:
            tmp_out = erode_kernel(input, structure, tmp_out)
        # TODO: kernel doesn't return the changed status, so determine it here
        changed = not (input == tmp_out).all()  # synchronize!
        ii = 1
        while ii < iterations or ((iterations < 1) and changed):
            tmp_in, tmp_out = tmp_out, tmp_in
            if masked:
                tmp_out = erode_kernel(tmp_in, structure, mask, tmp_out)
            else:
                tmp_out = erode_kernel(tmp_in, structure, tmp_out)
            changed = not (tmp_in == tmp_out).all()
            ii += 1
            if not changed and (not ii & 1):  # synchronize!
                # can exit early if nothing changed
                # (only do this after even number of tmp_in/out swaps)
                break
        output = tmp_out
    if temp_needed:
        temp[...] = output
        output = temp
    return output
Esempio n. 3
0
def _direct_correlate(in1,
                      in2,
                      mode='full',
                      output=float,
                      convolution=False,
                      boundary='constant',
                      fillvalue=0.0,
                      shift=False):
    if in1.ndim != 1 and (in1.dtype.kind == 'b' or
                          (in1.dtype.kind == 'f' and in1.dtype.itemsize < 4)):
        raise ValueError('unsupported type in SciPy')

    # Swaps inputs so smaller one is in2:
    # NOTE: when mode != 'valid' we can only swap with a constant-0 boundary
    swapped_inputs = False
    orig_in1_shape = in1.shape
    if _inputs_swap_needed(mode, in1.shape, in2.shape) or (
            in2.size > in1.size and boundary == 'constant' and fillvalue == 0):
        in1, in2 = in2, in1
        swapped_inputs = not convolution

    # Due to several optimizations, the second array can only be 2 GiB
    if in2.nbytes >= (1 << 31):
        raise RuntimeError('smaller array must be 2 GiB or less, '
                           'use method="fft" instead')

    # At this point, in1.size > in2.size
    # (except some cases when boundary != 'constant' or fillvalue != 0)
    # Figure out the output shape and the origin of the kernel
    if mode == 'full':
        out_shape = tuple(x1 + x2 - 1 for x1, x2 in zip(in1.shape, in2.shape))
        offsets = tuple(x - 1 for x in in2.shape)
    elif mode == 'valid':
        out_shape = tuple(x1 - x2 + 1 for x1, x2 in zip(in1.shape, in2.shape))
        offsets = (0, ) * in1.ndim
    else:  # mode == 'same':
        # In correlate2d: When using "same" mode with even-length inputs, the
        # outputs of correlate and correlate2d differ: There is a 1-index
        # offset between them.
        # This is dealt with by using "shift" parameter.
        out_shape = orig_in1_shape
        if orig_in1_shape == in1.shape:
            offsets = tuple((x - shift) // 2 for x in in2.shape)
        else:
            offsets = tuple((2 * x2 - x1 - (not convolution) + shift) // 2
                            for x1, x2 in zip(in1.shape, in2.shape))

    # Check the output
    if not isinstance(output, cupy.ndarray):
        output = cupy.empty(out_shape, output)
    elif output.shape != out_shape:
        raise ValueError("out has wrong shape")

    # Get and run the CuPy kernel
    int_type = _util._get_inttype(in1)
    kernel = filters._get_correlate_kernel(boundary, in2.shape, int_type,
                                           offsets, fillvalue)
    in2 = _reverse_and_conj(in2) if convolution else in2
    if not swapped_inputs:
        kernel(in1, in2, output)
    elif output.dtype.kind != 'c':
        # Avoids one array copy
        kernel(in1, in2, _reverse_and_conj(output))
    else:
        kernel(in1, in2, output)
        output = cupy.ascontiguousarray(_reverse_and_conj(output))
    return output
Esempio n. 4
0
def spline_filter1d(input,
                    order=3,
                    axis=-1,
                    output=cupy.float64,
                    mode='mirror'):
    """
    Calculate a 1-D spline filter along the given axis.

    The lines of the array along the given axis are filtered by a
    spline filter. The order of the spline must be >= 2 and <= 5.

    Args:
        input (cupy.ndarray): The input array.
        order (int): The order of the spline interpolation, default is 3. Must
            be in the range 0-5.
        axis (int): The axis along which the spline filter is applied. Default
            is the last axis.
        output (cupy.ndarray or dtype, optional): The array in which to place
            the output, or the dtype of the returned array. Default is
            ``numpy.float64``.
        mode (str): Points outside the boundaries of the input are filled
            according to the given mode (``'constant'``, ``'nearest'``,
            ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
            ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).

    Returns:
        cupy.ndarray: The result of prefiltering the input.

    .. seealso:: :func:`scipy.spline_filter1d`
    """
    if order < 0 or order > 5:
        raise RuntimeError('spline order not supported')
    x = input
    ndim = x.ndim
    axis = internal._normalize_axis_index(axis, ndim)

    # order 0, 1 don't require reshaping as no CUDA kernel will be called
    # scalar or size 1 arrays also don't need to be filtered
    run_kernel = not (order < 2 or x.ndim == 0 or x.shape[axis] == 1)
    if not run_kernel:
        output = _util._get_output(output, input)
        output[...] = x[...]
        return output

    temp, data_dtype, output_dtype = _get_spline_output(x, output)
    data_type = cupy._core._scalar.get_typename(temp.dtype)
    pole_type = cupy._core._scalar.get_typename(temp.real.dtype)

    index_type = _util._get_inttype(input)
    index_dtype = cupy.int32 if index_type == 'int' else cupy.int64

    n_samples = x.shape[axis]
    n_signals = x.size // n_samples
    info = cupy.array((n_signals, n_samples) + x.shape, dtype=index_dtype)

    # empirical choice of block size that seemed to work well
    block_size = max(2**math.ceil(numpy.log2(n_samples / 32)), 8)
    kern = _spline_prefilter_core.get_raw_spline1d_kernel(
        axis,
        ndim,
        mode,
        order=order,
        index_type=index_type,
        data_type=data_type,
        pole_type=pole_type,
        block_size=block_size,
    )

    # Due to recursive nature, a given line of data must be processed by a
    # single thread. n_signals lines will be processed in total.
    block = (block_size, )
    grid = ((n_signals + block[0] - 1) // block[0], )

    # apply prefilter gain
    poles = _spline_prefilter_core.get_poles(order=order)
    temp *= _spline_prefilter_core.get_gain(poles)

    # apply caual + anti-causal IIR spline filters
    kern(grid, block, (temp, info))

    if isinstance(output, cupy.ndarray) and temp is not output:
        # copy kernel output into the user-provided output array
        output[...] = temp[...]
        return output
    return temp.astype(output_dtype, copy=False)
def _direct_correlate(in1, in2, mode='full', output=float, convolution=False,
                      boundary='constant', fillvalue=0.0, shift=False):
    if in1.ndim != 1 and (in1.dtype.kind == 'b' or
                          (in1.dtype.kind == 'f' and in1.dtype.itemsize < 4)):
        raise ValueError('unsupported type in SciPy')

    # Swaps inputs so smaller one is in2:
    # NOTE: when mode != 'valid' we can only swap with a constant-0 boundary
    swapped_inputs = False
    orig_in1_shape = in1.shape
    if _inputs_swap_needed(mode, in1.shape, in2.shape) or (
            in2.size > in1.size and boundary == 'constant' and fillvalue == 0):
        in1, in2 = in2, in1
        swapped_inputs = True

    # Due to several optimizations, the second array can only be 2 GiB
    if in2.nbytes >= (1 << 31):
        raise RuntimeError('smaller array must be 2 GiB or less, '
                           'use method="fft" instead')

    # At this point, in1.size > in2.size
    # (except some cases when boundary != 'constant' or fillvalue != 0)
    # Figure out the output shape and the origin of the kernel
    if mode == 'full':
        out_shape = tuple(x1+x2-1 for x1, x2 in zip(in1.shape, in2.shape))
        offsets = tuple(x-1 for x in in2.shape)
    elif mode == 'valid':
        out_shape = tuple(x1-x2+1 for x1, x2 in zip(in1.shape, in2.shape))
        offsets = (0,) * in1.ndim
    else:  # mode == 'same':
        # In correlate2d: When using "same" mode with even-length inputs, the
        # outputs of correlate and correlate2d differ: There is a 1-index
        # offset between them.
        # This is dealt with by using "shift" parameter.
        out_shape = orig_in1_shape
        if orig_in1_shape == in1.shape:
            offsets = tuple((x-shift)//2 for x in in2.shape)
        else:
            offsets = tuple((2*x2-x1-(not convolution)+shift)//2
                            for x1, x2 in zip(in1.shape, in2.shape))

    # Check the output
    # In SciPy, the output dtype is determined by inputs' dtypes
    out_dtype = cupy.promote_types(in1, in2)
    if not isinstance(output, cupy.ndarray):
        if not cupy.can_cast(output, out_dtype):
            raise ValueError('not available for this type')
        output = cupy.empty(out_shape, out_dtype)
    elif output.shape != out_shape:
        raise ValueError('out has wrong shape')
    elif output.dtype != out_dtype:
        raise ValueError('out has wrong dtype')

    # Check input dtypes
    # Internally, the kernel accumulates in in2's type, so if in2 has lower
    # precision (can_cast = True!) we hit overflow easier
    # TODO(leofang): this is a band-aid fix for cupy/cupy#6047
    if cupy.can_cast(in2, in1):
        in2 = in2.astype(out_dtype)  # make a copy while upcasting

    # Get and run the CuPy kernel
    int_type = _util._get_inttype(in1)
    kernel = filters._get_correlate_kernel(
        boundary, in2.shape, int_type, offsets, fillvalue)
    in2 = _reverse(in2) if convolution else in2.conj()
    if not swapped_inputs or convolution:
        kernel(in1, in2, output)
    elif output.dtype.kind != 'c':
        # Avoids one array copy
        kernel(in1, in2, _reverse(output))
    else:
        kernel(in1, in2, output)
        output = cupy.ascontiguousarray(_reverse(output))
        if swapped_inputs and (mode != 'valid' or not shift):
            cupy.conjugate(output, out=output)
    return output