def _check_nd_args(input, weights, mode, origin, wghts_name='filter weights'): _util._check_mode(mode) # Weights must always be less than 2 GiB if weights.nbytes >= (1 << 31): raise RuntimeError('weights must be 2 GiB or less, use FFTs instead') weight_dims = [x for x in weights.shape if x != 0] if len(weight_dims) != input.ndim: raise RuntimeError('{} array has incorrect shape'.format(wghts_name)) origins = _util._fix_sequence_arg(origin, len(weight_dims), 'origin', int) for origin, width in zip(origins, weight_dims): _util._check_origin(origin, width) return tuple(origins), _util._get_inttype(input)
def _binary_erosion(input, structure, iterations, mask, output, border_value, origin, invert, brute_force=True): try: iterations = operator.index(iterations) except TypeError: raise TypeError('iterations parameter should be an integer') if input.dtype.kind == 'c': raise TypeError('Complex type not supported') if structure is None: structure = generate_binary_structure(input.ndim, 1) all_weights_nonzero = input.ndim == 1 center_is_true = True default_structure = True else: structure = structure.astype(dtype=bool, copy=False) # transfer to CPU for use in determining if it is fully dense # structure_cpu = cupy.asnumpy(structure) default_structure = False if structure.ndim != input.ndim: raise RuntimeError('structure and input must have same dimensionality') if not structure.flags.c_contiguous: structure = cupy.ascontiguousarray(structure) if structure.size < 1: raise RuntimeError('structure must not be empty') if mask is not None: if mask.shape != input.shape: raise RuntimeError('mask and input must have equal sizes') if not mask.flags.c_contiguous: mask = cupy.ascontiguousarray(mask) masked = True else: masked = False origin = _util._fix_sequence_arg(origin, input.ndim, 'origin', int) if isinstance(output, cupy.ndarray): if output.dtype.kind == 'c': raise TypeError('Complex output type not supported') else: output = bool output = _util._get_output(output, input) temp_needed = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS') if temp_needed: # input and output arrays cannot share memory temp = output output = _util._get_output(output.dtype, input) if structure.ndim == 0: # kernel doesn't handle ndim=0, so special case it here if float(structure): output[...] = cupy.asarray(input, dtype=bool) else: output[...] = ~cupy.asarray(input, dtype=bool) return output origin = tuple(origin) int_type = _util._get_inttype(input) offsets = _filters_core._origins_to_offsets(origin, structure.shape) if not default_structure: # synchronize required to determine if all weights are non-zero nnz = int(cupy.count_nonzero(structure)) all_weights_nonzero = nnz == structure.size if all_weights_nonzero: center_is_true = True else: center_is_true = _center_is_true(structure, origin) erode_kernel = _get_binary_erosion_kernel( structure.shape, int_type, offsets, center_is_true, border_value, invert, masked, all_weights_nonzero, ) if iterations == 1: if masked: output = erode_kernel(input, structure, mask, output) else: output = erode_kernel(input, structure, output) elif center_is_true and not brute_force: raise NotImplementedError( 'only brute_force iteration has been implemented') else: if cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS'): raise ValueError('output and input may not overlap in memory') tmp_in = cupy.empty_like(input, dtype=output.dtype) tmp_out = output if iterations >= 1 and not iterations & 1: tmp_in, tmp_out = tmp_out, tmp_in if masked: tmp_out = erode_kernel(input, structure, mask, tmp_out) else: tmp_out = erode_kernel(input, structure, tmp_out) # TODO: kernel doesn't return the changed status, so determine it here changed = not (input == tmp_out).all() # synchronize! ii = 1 while ii < iterations or ((iterations < 1) and changed): tmp_in, tmp_out = tmp_out, tmp_in if masked: tmp_out = erode_kernel(tmp_in, structure, mask, tmp_out) else: tmp_out = erode_kernel(tmp_in, structure, tmp_out) changed = not (tmp_in == tmp_out).all() ii += 1 if not changed and (not ii & 1): # synchronize! # can exit early if nothing changed # (only do this after even number of tmp_in/out swaps) break output = tmp_out if temp_needed: temp[...] = output output = temp return output
def _direct_correlate(in1, in2, mode='full', output=float, convolution=False, boundary='constant', fillvalue=0.0, shift=False): if in1.ndim != 1 and (in1.dtype.kind == 'b' or (in1.dtype.kind == 'f' and in1.dtype.itemsize < 4)): raise ValueError('unsupported type in SciPy') # Swaps inputs so smaller one is in2: # NOTE: when mode != 'valid' we can only swap with a constant-0 boundary swapped_inputs = False orig_in1_shape = in1.shape if _inputs_swap_needed(mode, in1.shape, in2.shape) or ( in2.size > in1.size and boundary == 'constant' and fillvalue == 0): in1, in2 = in2, in1 swapped_inputs = not convolution # Due to several optimizations, the second array can only be 2 GiB if in2.nbytes >= (1 << 31): raise RuntimeError('smaller array must be 2 GiB or less, ' 'use method="fft" instead') # At this point, in1.size > in2.size # (except some cases when boundary != 'constant' or fillvalue != 0) # Figure out the output shape and the origin of the kernel if mode == 'full': out_shape = tuple(x1 + x2 - 1 for x1, x2 in zip(in1.shape, in2.shape)) offsets = tuple(x - 1 for x in in2.shape) elif mode == 'valid': out_shape = tuple(x1 - x2 + 1 for x1, x2 in zip(in1.shape, in2.shape)) offsets = (0, ) * in1.ndim else: # mode == 'same': # In correlate2d: When using "same" mode with even-length inputs, the # outputs of correlate and correlate2d differ: There is a 1-index # offset between them. # This is dealt with by using "shift" parameter. out_shape = orig_in1_shape if orig_in1_shape == in1.shape: offsets = tuple((x - shift) // 2 for x in in2.shape) else: offsets = tuple((2 * x2 - x1 - (not convolution) + shift) // 2 for x1, x2 in zip(in1.shape, in2.shape)) # Check the output if not isinstance(output, cupy.ndarray): output = cupy.empty(out_shape, output) elif output.shape != out_shape: raise ValueError("out has wrong shape") # Get and run the CuPy kernel int_type = _util._get_inttype(in1) kernel = filters._get_correlate_kernel(boundary, in2.shape, int_type, offsets, fillvalue) in2 = _reverse_and_conj(in2) if convolution else in2 if not swapped_inputs: kernel(in1, in2, output) elif output.dtype.kind != 'c': # Avoids one array copy kernel(in1, in2, _reverse_and_conj(output)) else: kernel(in1, in2, output) output = cupy.ascontiguousarray(_reverse_and_conj(output)) return output
def spline_filter1d(input, order=3, axis=-1, output=cupy.float64, mode='mirror'): """ Calculate a 1-D spline filter along the given axis. The lines of the array along the given axis are filtered by a spline filter. The order of the spline must be >= 2 and <= 5. Args: input (cupy.ndarray): The input array. order (int): The order of the spline interpolation, default is 3. Must be in the range 0-5. axis (int): The axis along which the spline filter is applied. Default is the last axis. output (cupy.ndarray or dtype, optional): The array in which to place the output, or the dtype of the returned array. Default is ``numpy.float64``. mode (str): Points outside the boundaries of the input are filled according to the given mode (``'constant'``, ``'nearest'``, ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``, ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``). Returns: cupy.ndarray: The result of prefiltering the input. .. seealso:: :func:`scipy.spline_filter1d` """ if order < 0 or order > 5: raise RuntimeError('spline order not supported') x = input ndim = x.ndim axis = internal._normalize_axis_index(axis, ndim) # order 0, 1 don't require reshaping as no CUDA kernel will be called # scalar or size 1 arrays also don't need to be filtered run_kernel = not (order < 2 or x.ndim == 0 or x.shape[axis] == 1) if not run_kernel: output = _util._get_output(output, input) output[...] = x[...] return output temp, data_dtype, output_dtype = _get_spline_output(x, output) data_type = cupy._core._scalar.get_typename(temp.dtype) pole_type = cupy._core._scalar.get_typename(temp.real.dtype) index_type = _util._get_inttype(input) index_dtype = cupy.int32 if index_type == 'int' else cupy.int64 n_samples = x.shape[axis] n_signals = x.size // n_samples info = cupy.array((n_signals, n_samples) + x.shape, dtype=index_dtype) # empirical choice of block size that seemed to work well block_size = max(2**math.ceil(numpy.log2(n_samples / 32)), 8) kern = _spline_prefilter_core.get_raw_spline1d_kernel( axis, ndim, mode, order=order, index_type=index_type, data_type=data_type, pole_type=pole_type, block_size=block_size, ) # Due to recursive nature, a given line of data must be processed by a # single thread. n_signals lines will be processed in total. block = (block_size, ) grid = ((n_signals + block[0] - 1) // block[0], ) # apply prefilter gain poles = _spline_prefilter_core.get_poles(order=order) temp *= _spline_prefilter_core.get_gain(poles) # apply caual + anti-causal IIR spline filters kern(grid, block, (temp, info)) if isinstance(output, cupy.ndarray) and temp is not output: # copy kernel output into the user-provided output array output[...] = temp[...] return output return temp.astype(output_dtype, copy=False)
def _direct_correlate(in1, in2, mode='full', output=float, convolution=False, boundary='constant', fillvalue=0.0, shift=False): if in1.ndim != 1 and (in1.dtype.kind == 'b' or (in1.dtype.kind == 'f' and in1.dtype.itemsize < 4)): raise ValueError('unsupported type in SciPy') # Swaps inputs so smaller one is in2: # NOTE: when mode != 'valid' we can only swap with a constant-0 boundary swapped_inputs = False orig_in1_shape = in1.shape if _inputs_swap_needed(mode, in1.shape, in2.shape) or ( in2.size > in1.size and boundary == 'constant' and fillvalue == 0): in1, in2 = in2, in1 swapped_inputs = True # Due to several optimizations, the second array can only be 2 GiB if in2.nbytes >= (1 << 31): raise RuntimeError('smaller array must be 2 GiB or less, ' 'use method="fft" instead') # At this point, in1.size > in2.size # (except some cases when boundary != 'constant' or fillvalue != 0) # Figure out the output shape and the origin of the kernel if mode == 'full': out_shape = tuple(x1+x2-1 for x1, x2 in zip(in1.shape, in2.shape)) offsets = tuple(x-1 for x in in2.shape) elif mode == 'valid': out_shape = tuple(x1-x2+1 for x1, x2 in zip(in1.shape, in2.shape)) offsets = (0,) * in1.ndim else: # mode == 'same': # In correlate2d: When using "same" mode with even-length inputs, the # outputs of correlate and correlate2d differ: There is a 1-index # offset between them. # This is dealt with by using "shift" parameter. out_shape = orig_in1_shape if orig_in1_shape == in1.shape: offsets = tuple((x-shift)//2 for x in in2.shape) else: offsets = tuple((2*x2-x1-(not convolution)+shift)//2 for x1, x2 in zip(in1.shape, in2.shape)) # Check the output # In SciPy, the output dtype is determined by inputs' dtypes out_dtype = cupy.promote_types(in1, in2) if not isinstance(output, cupy.ndarray): if not cupy.can_cast(output, out_dtype): raise ValueError('not available for this type') output = cupy.empty(out_shape, out_dtype) elif output.shape != out_shape: raise ValueError('out has wrong shape') elif output.dtype != out_dtype: raise ValueError('out has wrong dtype') # Check input dtypes # Internally, the kernel accumulates in in2's type, so if in2 has lower # precision (can_cast = True!) we hit overflow easier # TODO(leofang): this is a band-aid fix for cupy/cupy#6047 if cupy.can_cast(in2, in1): in2 = in2.astype(out_dtype) # make a copy while upcasting # Get and run the CuPy kernel int_type = _util._get_inttype(in1) kernel = filters._get_correlate_kernel( boundary, in2.shape, int_type, offsets, fillvalue) in2 = _reverse(in2) if convolution else in2.conj() if not swapped_inputs or convolution: kernel(in1, in2, output) elif output.dtype.kind != 'c': # Avoids one array copy kernel(in1, in2, _reverse(output)) else: kernel(in1, in2, output) output = cupy.ascontiguousarray(_reverse(output)) if swapped_inputs and (mode != 'valid' or not shift): cupy.conjugate(output, out=output) return output