Example #1
0
def _call_kernel(kernel, input, weights, output, weight_dtype=cupy.float64):
    """
    Calls a constructed ElementwiseKernel. The kernel must take an input image,
    an array of weights, and an output array.

    The weights are the only optional part and can be passed as None and then
    one less argument is passed to the kernel. If the output is given as None
    then it will be allocated in this function.

    This function deals with making sure that the weights are contiguous and
    float64 or bool*, that the output is allocated and appriopate shaped. This
    also deals with the situation that the input and output arrays overlap in
    memory.

    * weights is always casted to float64 or bool in order to get an output
    compatible with SciPy, though float32 might be sufficient when input dtype
    is low precision.
    """
    if weights is not None:
        weights = cupy.ascontiguousarray(weights, weight_dtype)

    needs_temp = cupy.shares_memory(output, input, "MAY_SHARE_BOUNDS")
    if needs_temp:
        output, temp = (
            _util._get_output(output.dtype, input, None, weight_dtype),
            output,
        )
    if weights is None:
        kernel(input, output)
    else:
        kernel(input, weights, output)
    if needs_temp:
        temp[...] = output[...]
        output = temp
    return output
Example #2
0
def _call_kernel(
    kernel,
    input,
    weights,
    output,
    structure=None,
    weights_dtype=numpy.float64,
    structure_dtype=numpy.float64,
):
    """
    Calls a constructed ElementwiseKernel. The kernel must take an input image,
    an optional array of weights, an optional array for the structure, and an
    output array.

    weights and structure can be given as None (structure defaults to None) in
    which case they are not passed to the kernel at all. If the output is given
    as None then it will be allocated in this function.

    This function deals with making sure that the weights and structure are
    contiguous and float64 (or bool for weights that are footprints)*, that the
    output is allocated and appriopately shaped. This also deals with the
    situation that the input and output arrays overlap in memory.

    * weights is always cast to float64 or bool in order to get an output
    compatible with SciPy, though float32 might be sufficient when input dtype
    is low precision. If weights_dtype is passed as weights.dtype then no
    dtype conversion will occur. The input and output are never converted.
    """
    args = [input]
    if weights is not None:
        weights = cupy.ascontiguousarray(weights, weights_dtype)
        args.append(weights)
    if structure is not None:
        structure = cupy.ascontiguousarray(structure, structure_dtype)
        args.append(structure)
    output = _util._get_output(output, input, None, cupy.dtype(weights_dtype))
    needs_temp = cupy.shares_memory(output, input, "MAY_SHARE_BOUNDS")
    if needs_temp:
        output, temp = _util._get_output(output.dtype, input), output
    args.append(output)
    kernel(*args)
    if needs_temp:
        temp[...] = output[...]
        output = temp
    return output
Example #3
0
def _run_1d_filters(filters,
                    input,
                    args,
                    output,
                    mode,
                    cval,
                    origin=0,
                    dtype_mode="ndimage"):
    """
    Runs a series of 1D filters forming an nd filter. The filters must be a
    list of callables that take input, arg, axis, output, mode, cval, origin.
    The args is a list of values that are passed for the arg value to the
    filter. Individual filters can be None causing that axis to be skipped.
    """
    output_orig = output
    output = _util._get_output(output, input)
    modes = _util._fix_sequence_arg(mode, input.ndim, "mode",
                                    _util._check_mode)
    origins = _util._fix_sequence_arg(origin, input.ndim, "origin", int)
    n_filters = sum(filter is not None for filter in filters)
    if n_filters == 0:
        output[...] = input
        return output
    # We can't operate in-place efficiently, so use a 2-buffer system
    temp = _util._get_output(output.dtype, input) if n_filters > 1 else None
    first = True
    iterator = zip(filters, args, modes, origins)
    for axis, (fltr, arg, mode, origin) in enumerate(iterator):
        if fltr is None:
            continue
        fltr(input, arg, axis, output, mode, cval, origin)
        input, output = output, temp if first else input
        first = False
    if isinstance(output_orig, cupy.ndarray) and input is not output_orig:
        output_orig[...] = input
        input = output_orig
    return input
Example #4
0
def _correlate_or_convolve(
    input,
    weights,
    output,
    mode,
    cval,
    origin,
    convolution,
    dtype_mode,
    use_weights_mask=False,
):
    # if use_weights_mask:
    #     raise NotImplementedError("TODO")
    origins, int_type = _check_nd_args(input, weights, mode, origin)
    if weights.size == 0:
        return cupy.zeros_like(input)
    if convolution:
        weights = weights[tuple([slice(None, None, -1)] * weights.ndim)]
        origins = list(origins)
        for i, wsize in enumerate(weights.shape):
            origins[i] = -origins[i]
            if wsize % 2 == 0:
                origins[i] -= 1
        origins = tuple(origins)
    elif weights.dtype.kind == "c":
        # numpy.correlate conjugates weights rather than input.
        weights = weights.conj()

    if dtype_mode == "numpy":
        # This "numpy" mode is used by cupyimg.scipy.signal.signaltools
        # numpy.convolve and correlate do not always cast to floats
        dtype = cupy.promote_types(input.dtype, weights.dtype)
        output_dtype = dtype
        if dtype.char == "e":
            # promote internal float type to float32 for accuracy
            dtype = "f"
        if output is not None:
            raise ValueError(
                "dtype_mode == 'numpy' does not support the output "
                "argument")
        weight_dtype = dtype
        if weights.dtype != dtype:
            weights = weights.astype(dtype)
        if input.dtype != dtype:
            input = input.astype(dtype)
        output = cupy.zeros(input.shape, output_dtype)
        weight_dtype = dtype
    else:
        if weights.dtype.kind == "c" or input.dtype.kind == "c":
            if dtype_mode == "ndimage":
                weight_dtype = cupy.complex128
            elif dtype_mode == "float":
                weight_dtype = cupy.promote_types(input.real.dtype,
                                                  cupy.complex64)
        else:
            if dtype_mode == "ndimage":
                weight_dtype = cupy.float64
            elif dtype_mode == "float":
                weight_dtype = cupy.promote_types(input.real.dtype,
                                                  cupy.float32)
        weight_dtype = cupy.dtype(weight_dtype)
        output = _util._get_output(output, input, None, weight_dtype)
    unsigned_output = output.dtype.kind in ["u", "b"]

    if use_weights_mask:
        input = cupy.ascontiguousarray(input)

        # The kernel needs only the non-zero kernel values and their coordinates.
        # This allows us to use a single for loop to compute the ndim convolution.
        # The loop will be over only the the non-zero entries of the filter.
        weights = cupy.ascontiguousarray(weights, weight_dtype)
        wlocs = cupy.nonzero(weights)
        wvals = weights[wlocs]  # (nnz,) array of non-zero values
        wlocs = cupy.stack(
            wlocs)  # (ndim, nnz) array of indices for these values

        return _get_correlate_kernel_masked(
            mode,
            cval,
            input.shape,
            weights.shape,
            wvals.size,
            tuple(origins),
            unsigned_output,
        )(input, wlocs, wvals, output)
    else:
        if mode == "constant":
            # TODO: negative strides gives incorrect result for constant mode
            #       so make sure input is C contiguous.
            input = cupy.ascontiguousarray(input)
        kernel = _get_correlate_kernel(mode, weights.shape, int_type, origins,
                                       cval, unsigned_output)
        return _call_kernel(kernel, input, weights, output, weight_dtype)
Example #5
0
def _binary_erosion(
    input,
    structure,
    iterations,
    mask,
    output,
    border_value,
    origin,
    invert,
    brute_force=True,
):
    try:
        iterations = operator.index(iterations)
    except TypeError:
        raise TypeError("iterations parameter should be an integer")

    if input.dtype.kind == "c":
        raise TypeError("Complex type not supported")
    if any(s < 0 for s in input.strides):
        input = cupy.ascontiguousarray(input)
    if structure is None:
        structure = generate_binary_structure(input.ndim, 1)
        all_weights_nonzero = input.ndim == 1
        center_is_true = True
        default_structure = True
    else:
        structure = structure.astype(dtype=bool, copy=False)
        # transfer to CPU for use in determining if it is fully dense
        # structure_cpu = cupy.asnumpy(structure)
        default_structure = False
    if structure.ndim != input.ndim:
        raise RuntimeError("structure and input must have same dimensionality")
    if not structure.flags.c_contiguous:
        structure = cupy.ascontiguousarray(structure)
    if structure.size < 1:
        raise RuntimeError("structure must not be empty")

    if mask is not None:
        if mask.shape != input.shape:
            raise RuntimeError("mask and input must have equal sizes")
        if not mask.flags.c_contiguous:
            mask = cupy.ascontiguousarray(mask)
        masked = True
    else:
        masked = False
    origin = _util._fix_sequence_arg(origin, input.ndim, "origin", int)

    if isinstance(output, cupy.ndarray):
        if output.dtype.kind == "c":
            raise TypeError("Complex output type not supported")
    else:
        output = bool
    output = _util._get_output(output, input)
    temp_needed = cupy.shares_memory(output, input, "MAY_SHARE_BOUNDS")
    if temp_needed:
        # input and output arrays cannot share memory
        temp = output
        output = _util._get_output(output.dtype, input)
    if structure.ndim == 0:
        # kernel doesn't handle ndim=0, so special case it here
        if float(structure):
            output[...] = cupy.asarray(input, dtype=bool)
        else:
            output[...] = ~cupy.asarray(input, dtype=bool)
        return output
    origin = tuple(origin)
    int_type = _util._get_inttype(input)
    offsets = _filters_core._origins_to_offsets(origin, structure.shape)
    if not default_structure:
        # synchronize required to determine if all weights are non-zero
        nnz = int(cupy.count_nonzero(structure))
        all_weights_nonzero = nnz == structure.size
        if all_weights_nonzero:
            center_is_true = True
        else:
            center_is_true = _center_is_true(structure, origin)

    erode_kernel = _get_binary_erosion_kernel(
        structure.shape,
        int_type,
        offsets,
        center_is_true,
        border_value,
        invert,
        masked,
        all_weights_nonzero,
    )

    if iterations == 1:
        if masked:
            output = erode_kernel(input, structure, mask, output)
        else:
            output = erode_kernel(input, structure, output)
    elif center_is_true and not brute_force:
        raise NotImplementedError(
            "only brute_force iteration has been implemented")
    else:
        if cupy.shares_memory(output, input, "MAY_SHARE_BOUNDS"):
            raise ValueError("output and input may not overlap in memory")
        tmp_in = cupy.empty_like(input, dtype=output.dtype)
        tmp_out = output
        if iterations >= 1 and not iterations & 1:
            tmp_in, tmp_out = tmp_out, tmp_in
        if masked:
            tmp_out = erode_kernel(input, structure, mask, tmp_out)
        else:
            tmp_out = erode_kernel(input, structure, tmp_out)
        # TODO: kernel doesn't return the changed status, so determine it here
        changed = not (input == tmp_out).all()  # synchronize!
        ii = 1
        while ii < iterations or ((iterations < 1) and changed):
            tmp_in, tmp_out = tmp_out, tmp_in
            if masked:
                tmp_out = erode_kernel(tmp_in, structure, mask, tmp_out)
            else:
                tmp_out = erode_kernel(tmp_in, structure, tmp_out)
            changed = not (tmp_in == tmp_out).all()
            ii += 1
            if not changed and (not ii & 1):  # synchronize!
                # can exit early if nothing changed
                # (only do this after even number of tmp_in/out swaps)
                break
        output = tmp_out
    if temp_needed:
        temp[...] = output
        output = temp
    return output