def _generate_interp_custom(coord_func, ndim, large_int, yshape, mode, cval, order, name='', integer_output=False, nprepad=0): """ Args: coord_func (function): generates code to do the coordinate transformation. See for example, `_get_coord_shift`. ndim (int): The number of dimensions. large_int (bool): If true use Py_ssize_t instead of int for indexing. yshape (tuple): Shape of the output array. mode (str): Signal extension mode to use at the array boundaries cval (float): constant value used when `mode == 'constant'`. name (str): base name for the interpolation kernel integer_output (bool): boolean indicating whether the output has an integer type. nprepad (int): integer indicating the amount of prepadding at the boundaries. Returns: operation (str): code body for the ElementwiseKernel name (str): name for the ElementwiseKernel """ ops = [] ops.append('double out = 0.0;') if large_int: uint_t = 'size_t' int_t = 'ptrdiff_t' else: uint_t = 'unsigned int' int_t = 'int' # determine strides for x along each axis for j in range(ndim): ops.append(f'const {int_t} xsize_{j} = x.shape()[{j}];') ops.append(f'const {uint_t} sx_{ndim - 1} = 1;') for j in range(ndim - 1, 0, -1): ops.append(f'const {uint_t} sx_{j - 1} = sx_{j} * xsize_{j};') # create in_coords array to store the unraveled indices ops.append(_unravel_loop_index(yshape, uint_t)) # compute the transformed (target) coordinates, c_j ops = ops + coord_func(ndim, nprepad) if cval is numpy.nan: cval = 'CUDART_NAN' elif cval == numpy.inf: cval = 'CUDART_INF' elif cval == -numpy.inf: cval = '-CUDART_INF' else: cval = f'(double){cval}' if mode == 'constant': # use cval if coordinate is outside the bounds of x _cond = ' || '.join( [f'(c_{j} < 0) || (c_{j} > xsize_{j} - 1)' for j in range(ndim)]) ops.append(f''' if ({_cond}) {{ out = {cval}; }} else {{''') if order == 0: ops.append('double dcoord;') # mode 'wrap' requires this to work for j in range(ndim): # determine nearest neighbor if mode == 'wrap': ops.append(f''' dcoord = c_{j};''') else: ops.append(f''' {int_t} cf_{j} = ({int_t})lrint((double)c_{j});''') # handle boundary if mode != 'constant': if mode == 'wrap': ixvar = 'dcoord' float_ix = True else: ixvar = f'cf_{j}' float_ix = False ops.append( _util._generate_boundary_condition_ops( mode, ixvar, f'xsize_{j}', int_t, float_ix)) if mode == 'wrap': ops.append(f''' {int_t} cf_{j} = ({int_t})floor(dcoord + 0.5);''') # sum over ic_j will give the raveled coordinate in the input ops.append(f''' {int_t} ic_{j} = cf_{j} * sx_{j};''') _coord_idx = ' + '.join([f'ic_{j}' for j in range(ndim)]) if mode == 'grid-constant': _cond = ' || '.join([f'(ic_{j} < 0)' for j in range(ndim)]) ops.append(f''' if ({_cond}) {{ out = (double){cval}; }} else {{ out = x[{_coord_idx}]; }}''') else: ops.append(f''' out = x[{_coord_idx}];''') elif order == 1: for j in range(ndim): # get coordinates for linear interpolation along axis j ops.append(f''' {int_t} cf_{j} = ({int_t})floor((double)c_{j}); {int_t} cc_{j} = cf_{j} + 1; {int_t} n_{j} = (c_{j} == cf_{j}) ? 1 : 2; // points needed ''') if mode == 'wrap': ops.append(f''' double dcoordf = c_{j}; double dcoordc = c_{j} + 1;''') else: # handle boundaries for extension modes. ops.append(f''' {int_t} cf_bounded_{j} = cf_{j}; {int_t} cc_bounded_{j} = cc_{j};''') if mode != 'constant': if mode == 'wrap': ixvar = 'dcoordf' float_ix = True else: ixvar = f'cf_bounded_{j}' float_ix = False ops.append( _util._generate_boundary_condition_ops( mode, ixvar, f'xsize_{j}', int_t, float_ix)) ixvar = 'dcoordc' if mode == 'wrap' else f'cc_bounded_{j}' ops.append( _util._generate_boundary_condition_ops( mode, ixvar, f'xsize_{j}', int_t, float_ix)) if mode == 'wrap': ops.append( f''' {int_t} cf_bounded_{j} = ({int_t})floor(dcoordf);; {int_t} cc_bounded_{j} = ({int_t})floor(dcoordf + 1);; ''' ) ops.append(f''' for (int s_{j} = 0; s_{j} < n_{j}; s_{j}++) {{ W w_{j}; {int_t} ic_{j}; if (s_{j} == 0) {{ w_{j} = (W)cc_{j} - c_{j}; ic_{j} = cf_bounded_{j} * sx_{j}; }} else {{ w_{j} = c_{j} - (W)cf_{j}; ic_{j} = cc_bounded_{j} * sx_{j}; }}''') elif order > 1: if mode == 'grid-constant': spline_mode = 'constant' elif mode == 'nearest': spline_mode = 'nearest' else: spline_mode = _spline_prefilter_core._get_spline_mode(mode) # wx, wy are temporary variables used during spline weight computation ops.append(f''' W wx, wy; {int_t} start;''') for j in range(ndim): # determine weights along the current axis ops.append(f''' W weights_{j}[{order + 1}];''') ops.append(spline_weights_inline[order].format(j=j, order=order)) # get starting coordinate for spline interpolation along axis j if mode in ['wrap']: ops.append(f'double dcoord = c_{j};') coord_var = 'dcoord' ops.append( _util._generate_boundary_condition_ops( mode, coord_var, f'xsize_{j}', int_t, True)) else: coord_var = f'(double)c_{j}' if order & 1: op_str = ''' start = ({int_t})floor({coord_var}) - {order_2};''' else: op_str = ''' start = ({int_t})floor({coord_var} + 0.5) - {order_2};''' ops.append( op_str.format( int_t=int_t, coord_var=coord_var, order_2=order // 2 )) # set of coordinate values within spline footprint along axis j ops.append(f'''{int_t} ci_{j}[{order + 1}];''') for k in range(order + 1): ixvar = f'ci_{j}[{k}]' ops.append(f''' {ixvar} = start + {k};''') ops.append( _util._generate_boundary_condition_ops( spline_mode, ixvar, f'xsize_{j}', int_t)) # loop over the order + 1 values in the spline filter ops.append(f''' W w_{j}; {int_t} ic_{j}; for (int k_{j} = 0; k_{j} <= {order}; k_{j}++) {{ w_{j} = weights_{j}[k_{j}]; ic_{j} = ci_{j}[k_{j}] * sx_{j}; ''') if order > 0: _weight = ' * '.join([f'w_{j}' for j in range(ndim)]) _coord_idx = ' + '.join([f'ic_{j}' for j in range(ndim)]) if mode == 'grid-constant' or (order > 1 and mode == 'constant'): _cond = ' || '.join([f'(ic_{j} < 0)' for j in range(ndim)]) ops.append(f''' if ({_cond}) {{ out += (X){cval} * ({_weight}); }} else {{ X val = x[{_coord_idx}]; out += val * ({_weight}); }}''') else: ops.append(f''' X val = x[{_coord_idx}]; out += val * ({_weight});''') ops.append('}' * ndim) if mode == 'constant': ops.append('}') if integer_output: ops.append('y = (Y)rint((double)out);') else: ops.append('y = (Y)out;') operation = '\n'.join(ops) mode_str = mode.replace('-', '_') # avoid hyphen in kernel name name = 'interpolate_{}_order{}_{}_{}d_y{}'.format( name, order, mode_str, ndim, '_'.join([f'{j}' for j in yshape]), ) if uint_t == 'size_t': name += '_i64' return operation, name
def _generate_nd_kernel(name, pre, found, post, mode, w_shape, int_type, offsets, cval, ctype='X', preamble='', options=(), has_weights=True, has_structure=False, has_mask=False, binary_morphology=False): # Currently this code uses CArray for weights but avoids using CArray for # the input data and instead does the indexing itself since it is faster. # If CArray becomes faster than follow the comments that start with # CArray: to switch over to using CArray for the input data as well. ndim = len(w_shape) in_params = 'raw X x' if has_weights: in_params += ', raw W w' if has_structure: in_params += ', raw S s' if has_mask: in_params += ', raw M mask' out_params = 'Y y' # CArray: remove xstride_{j}=... from string size = ('%s xsize_{j}=x.shape()[{j}], ysize_{j} = _raw_y.shape()[{j}]' ', xstride_{j}=x.strides()[{j}];' % int_type) sizes = [size.format(j=j) for j in range(ndim)] inds = _util._generate_indices_ops(ndim, int_type, offsets) # CArray: remove expr entirely expr = ' + '.join(['ix_{}'.format(j) for j in range(ndim)]) ws_init = ws_pre = ws_post = '' if has_weights or has_structure: ws_init = 'int iws = 0;' if has_structure: ws_pre = 'S sval = s[iws];\n' if has_weights: ws_pre += 'W wval = w[iws];\nif (nonzero(wval))' ws_post = 'iws++;' loops = [] for j in range(ndim): if w_shape[j] == 1: # CArray: string becomes 'inds[{j}] = ind_{j};', remove (int_)type loops.append('{{ {type} ix_{j} = ind_{j} * xstride_{j};'.format( j=j, type=int_type)) else: boundary = _util._generate_boundary_condition_ops( mode, 'ix_{}'.format(j), 'xsize_{}'.format(j)) # CArray: last line of string becomes inds[{j}] = ix_{j}; loops.append(''' for (int iw_{j} = 0; iw_{j} < {wsize}; iw_{j}++) {{ {type} ix_{j} = ind_{j} + iw_{j}; {boundary} ix_{j} *= xstride_{j}; '''.format(j=j, wsize=w_shape[j], boundary=boundary, type=int_type)) # CArray: string becomes 'x[inds]', no format call needed value = '(*(X*)&data[{expr}])'.format(expr=expr) if mode == 'constant': cond = ' || '.join(['(ix_{} < 0)'.format(j) for j in range(ndim)]) if binary_morphology: found = found.format(cond=cond, value=value) else: if mode == 'constant': value = '(({cond}) ? cast<{ctype}>({cval}) : {value})'.format( cond=cond, ctype=ctype, cval=cval, value=value) found = found.format(value=value) # CArray: replace comment and next line in string with # {type} inds[{ndim}] = {{0}}; # and add ndim=ndim, type=int_type to format call operation = ''' {sizes} {inds} // don't use a CArray for indexing (faster to deal with indexing ourselves) const unsigned char* data = (const unsigned char*)&x[0]; {ws_init} {pre} {loops} // inner-most loop {ws_pre} {{ {found} }} {ws_post} {end_loops} {post} '''.format(sizes='\n'.join(sizes), inds=inds, pre=pre, post=post, ws_init=ws_init, ws_pre=ws_pre, ws_post=ws_post, loops='\n'.join(loops), found=found, end_loops='}' * ndim) name = 'cupy_ndimage_{}_{}d_{}_w{}'.format( name, ndim, mode, '_'.join(['{}'.format(x) for x in w_shape])) if int_type == 'ptrdiff_t': name += '_i64' if has_structure: name += '_with_structure' if has_mask: name += '_with_mask' preamble = _CAST_FUNCTION + preamble return cupy.ElementwiseKernel(in_params, out_params, operation, name, reduce_dims=False, preamble=preamble, options=('--std=c++11', ) + options)
def _generate_interp_custom(coord_func, ndim, large_int, yshape, mode, cval, order, name='', integer_output=False): """ Args: coord_func (function): generates code to do the coordinate transformation. See for example, `_get_coord_shift`. ndim (int): The number of dimensions. large_int (bool): If true use Py_ssize_t instead of int for indexing. yshape (tuple): Shape of the output array. mode (str): Signal extension mode to use at the array boundaries cval (float): constant value used when `mode == 'constant'`. name (str): base name for the interpolation kernel integer_output (bool): boolean indicating whether the output has an integer type. Returns: operation (str): code body for the ElementwiseKernel name (str): name for the ElementwiseKernel """ ops = [] ops.append('double out = 0.0;') if large_int: uint_t = 'size_t' int_t = 'ptrdiff_t' else: uint_t = 'unsigned int' int_t = 'int' # determine strides for x along each axis for j in range(ndim): ops.append('const {int_t} xsize_{j} = x.shape()[{j}];'.format( int_t=int_t, j=j)) ops.append('const {uint_t} sx_{j} = 1;'.format(uint_t=uint_t, j=ndim - 1)) for j in range(ndim - 1, 0, -1): ops.append('const {uint_t} sx_{jm} = sx_{j} * xsize_{j};'.format( uint_t=uint_t, jm=j - 1, j=j, )) # create in_coords array to store the unraveled indices ops.append(_unravel_loop_index(yshape, uint_t)) # compute the transformed (target) coordinates, c_j ops = ops + coord_func(ndim) if cval is numpy.nan: cval = 'CUDART_NAN' elif cval == numpy.inf: cval = 'CUDART_INF' elif cval == -numpy.inf: cval = '-CUDART_INF' else: cval = '(double){cval}'.format(cval=cval) if mode == 'constant': # use cval if coordinate is outside the bounds of x _cond = ' || '.join([ '(c_{j} < 0) || (c_{j} > xsize_{j} - 1)'.format(j=j) for j in range(ndim) ]) ops.append(""" if ({cond}) {{ out = {cval}; }} else {{""".format(cond=_cond, cval=cval)) if order == 0: for j in range(ndim): # determine nearest neighbor ops.append(""" {int_t} cf_{j} = ({int_t})lrint((double)c_{j}); """.format(int_t=int_t, j=j)) # handle boundary if mode != 'constant': ixvar = 'cf_{j}'.format(j=j) ops.append( _util._generate_boundary_condition_ops( mode, ixvar, 'xsize_{}'.format(j))) # sum over ic_j will give the raveled coordinate in the input ops.append(""" {int_t} ic_{j} = cf_{j} * sx_{j}; """.format(int_t=int_t, j=j)) _coord_idx = ' + '.join(['ic_{}'.format(j) for j in range(ndim)]) ops.append(""" out = x[{coord_idx}];""".format(coord_idx=_coord_idx)) elif order == 1: for j in range(ndim): # get coordinates for linear interpolation along axis j ops.append(""" {int_t} cf_{j} = ({int_t})floor((double)c_{j}); {int_t} cc_{j} = cf_{j} + 1; {int_t} n_{j} = (c_{j} == cf_{j}) ? 1 : 2; // points needed """.format(int_t=int_t, j=j)) # handle boundaries for extension modes. ops.append(""" {int_t} cf_bounded_{j} = cf_{j}; {int_t} cc_bounded_{j} = cc_{j}; """.format(int_t=int_t, j=j)) if mode != 'constant': ixvar = 'cf_bounded_{j}'.format(j=j) ops.append( _util._generate_boundary_condition_ops( mode, ixvar, 'xsize_{}'.format(j))) ixvar = 'cc_bounded_{j}'.format(j=j) ops.append( _util._generate_boundary_condition_ops( mode, ixvar, 'xsize_{}'.format(j))) ops.append(""" for (int s_{j} = 0; s_{j} < n_{j}; s_{j}++) {{ W w_{j}; {int_t} ic_{j}; if (s_{j} == 0) {{ w_{j} = (W)cc_{j} - c_{j}; ic_{j} = cf_bounded_{j} * sx_{j}; }} else {{ w_{j} = c_{j} - (W)cf_{j}; ic_{j} = cc_bounded_{j} * sx_{j}; }}""".format(int_t=int_t, j=j)) _weight = ' * '.join(['w_{j}'.format(j=j) for j in range(ndim)]) _coord_idx = ' + '.join(['ic_{j}'.format(j=j) for j in range(ndim)]) ops.append(""" X val = x[{coord_idx}]; out += val * ({weight});""".format(coord_idx=_coord_idx, weight=_weight)) ops.append('}' * ndim) if mode == 'constant': ops.append('}') if integer_output: ops.append('y = (Y)rint((double)out);') else: ops.append('y = (Y)out;') operation = '\n'.join(ops) name = 'interpolate_{}_order{}_{}_{}d_y{}'.format( name, order, mode, ndim, "_".join(["{}".format(j) for j in yshape]), ) if uint_t == 'size_t': name += '_i64' return operation, name