def test_rfft_plan_manager(self, xp, scp, dtype): x = testing.shaped_random(self.shape, xp, dtype) x_orig = x.copy() if scp is cupyx.scipy: from cupy.cuda.cufft import get_current_plan plan = scp.fftpack.get_fft_plan(x, shape=self.n, axes=self.axis, value_type='R2C') with plan: assert id(plan) == id(get_current_plan()) out = scp.fftpack.rfft(x, n=self.n, axis=self.axis) assert get_current_plan() is None else: # scipy out = scp.fftpack.rfft(x, n=self.n, axis=self.axis) testing.assert_array_equal(x, x_orig) return out
def test_ifftn_plan_manager(self, xp, scp, dtype): x = testing.shaped_random(self.shape, xp, dtype) # hack: avoid testing the cases when getting a cuFFT plan is impossible if _default_fft_func(x, s=self.s, axes=self.axes) is not _fftn: return x if scp is cupyx.scipy: from cupy.cuda.cufft import get_current_plan plan = scp.fftpack.get_fft_plan(x, shape=self.s, axes=self.axes) with plan: assert id(plan) == id(get_current_plan()) out = scp.fftpack.ifftn(x, shape=self.s, axes=self.axes) assert get_current_plan() is None else: # scipy out = scp.fftpack.ifftn(x, shape=self.s, axes=self.axes) return out
def _default_fft_func(a, s=None, axes=None, plan=None, value_type='C2C'): curr_plan = cufft.get_current_plan() if curr_plan is not None: if plan is None: plan = curr_plan else: raise RuntimeError('Use the cuFFT plan either as a context manager' ' or as an argument.') if isinstance(plan, cufft.PlanNd): # a shortcut for using _fftn return _fftn elif (isinstance(plan, cufft.Plan1d) or a.ndim == 1 or not config.enable_nd_planning): return _fft # cuFFT's N-D C2R/R2C transforms may not agree with NumPy's outcomes if a.flags.f_contiguous and value_type != 'C2C': return _fft _, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type) if len(axes_sorted) > 1 and _nd_plan_is_possible(axes_sorted, a.ndim): # circumvent two potential hipFFT/rocFFT bugs as of ROCm 3.5.0 # TODO(leofang): understand hipFFT better and test newer ROCm versions if cupy.cuda.runtime.is_hip: if (0 == axes_sorted[0] and len(axes_sorted) != a.ndim and a.flags.c_contiguous): return _fft # For C2R, we don't use PlanNd; see the workaround in _exec_fft() if value_type == 'C2R': return _fft # prefer Plan1D in the 1D case return _fftn return _fft
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x, out_size=None, out=None, plan=None): fft_type = _convert_fft_type(a, value_type) if axis % a.ndim != a.ndim - 1: a = a.swapaxes(axis, -1) if a.base is not None or not a.flags.c_contiguous: a = a.copy() if out_size is None: out_size = a.shape[-1] batch = a.size // a.shape[-1] curr_plan = cufft.get_current_plan() if curr_plan is not None: if plan is None: plan = curr_plan else: raise RuntimeError('Use the cuFFT plan either as a context manager' ' or as an argument.') if plan is None: devices = None if not config.use_multi_gpus else config._devices plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices) else: # check plan validity if not isinstance(plan, cufft.Plan1d): raise ValueError('expected plan to have type cufft.Plan1d') if fft_type != plan.fft_type: raise ValueError('CUFFT plan dtype mismatch.') if out_size != plan.nx: raise ValueError('Target array size does not match the plan.') if batch != plan.batch: raise ValueError('Batch size does not match the plan.') if config.use_multi_gpus != plan._use_multi_gpus: raise ValueError('Unclear if multiple GPUs are to be used or not.') if overwrite_x and value_type == 'C2C': out = a elif out is not None: # verify that out has the expected shape and dtype plan.check_output_array(a, out) else: out = plan.get_output_array(a) plan.fft(a, out, direction) sz = out.shape[-1] if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z: sz = a.shape[-1] if norm is None: if direction == cufft.CUFFT_INVERSE: out /= sz else: out /= math.sqrt(sz) if axis % a.ndim != a.ndim - 1: out = out.swapaxes(axis, -1) return out
def _exec_fftn(a, direction, value_type, norm, axes, overwrite_x, plan=None, out=None): fft_type = _convert_fft_type(a, value_type) if fft_type not in [cufft.CUFFT_C2C, cufft.CUFFT_Z2Z]: raise NotImplementedError('Only C2C and Z2Z are supported.') if a.base is not None: a = a.copy() if a.flags.c_contiguous: order = 'C' elif a.flags.f_contiguous: order = 'F' else: raise ValueError('a must be contiguous') curr_plan = cufft.get_current_plan() if curr_plan is not None: plan = curr_plan # don't check repeated usage; it's done in _default_fft_func() if plan is None: # generate a plan plan = _get_cufft_plan_nd(a.shape, fft_type, axes=axes, order=order) else: if not isinstance(plan, cufft.PlanNd): raise ValueError('expected plan to have type cufft.PlanNd') if a.flags.c_contiguous: expected_shape = tuple(a.shape[ax] for ax in axes) else: # plan.shape will be reversed for Fortran-ordered inputs expected_shape = tuple(a.shape[ax] for ax in axes[::-1]) if expected_shape != plan.shape: raise ValueError( 'The CUFFT plan and a.shape do not match: ' 'plan.shape = {}, expected_shape={}, a.shape = {}'.format( plan.shape, expected_shape, a.shape)) if fft_type != plan.fft_type: raise ValueError('CUFFT plan dtype mismatch.') # TODO: also check the strides and axes of the plan? if overwrite_x and value_type == 'C2C': out = a elif out is None: out = plan.get_output_array(a, order=order) else: plan.check_output_array(a, out) plan.fft(a, out, direction) # normalize by the product of the shape along the transformed axes sz = _prod([out.shape[ax] for ax in axes]) if norm is None: if direction == cufft.CUFFT_INVERSE: out /= sz else: out /= math.sqrt(sz) return out
def test_fft_plan_manager(self, xp, scp, dtype): x = testing.shaped_random(self.shape, xp, dtype) # hack: avoid testing the cases when the output array is of size 0 # because cuFFT and numpy raise different kinds of exceptions if self.n == 0: return x x_orig = x.copy() if scp is cupyx.scipy: from cupy.cuda.cufft import get_current_plan plan = scp.fftpack.get_fft_plan(x, shape=self.n, axes=self.axis) with plan: assert id(plan) == id(get_current_plan()) out = scp.fftpack.fft(x, n=self.n, axis=self.axis) assert get_current_plan() is None else: # scipy out = scp.fftpack.fft(x, n=self.n, axis=self.axis) testing.assert_array_equal(x, x_orig) return out
def test_ifftn_plan_manager(self, xp, dtype): x = testing.shaped_random(self.shape, xp, dtype) # hack: avoid testing the cases when getting a cuFFT plan is impossible if _default_fft_func(x, s=self.s, axes=self.axes) is not _fftn: return x x_orig = x.copy() if xp is cp: from cupy.cuda.cufft import get_current_plan plan = _fft_module(xp).get_fft_plan(x, shape=self.s, axes=self.axes) with plan: assert id(plan) == id(get_current_plan()) out = _fft_module(xp).ifftn(x, s=self.s, axes=self.axes) assert get_current_plan() is None else: out = _fft_module(xp).ifftn(x, s=self.s, axes=self.axes) testing.assert_array_equal(x, x_orig) return _correct_np_dtype(xp, dtype, out)
def _default_fft_func(a, s=None, axes=None, plan=None): curr_plan = cufft.get_current_plan() if curr_plan is not None: if plan is None: plan = curr_plan else: raise RuntimeError('Use the cuFFT plan either as a context manager' ' or as an argument.') if isinstance(plan, cufft.PlanNd): # a shortcut for using _fftn return _fftn elif isinstance(plan, cufft.Plan1d): # a shortcut for using _fft return _fft plan_type = _default_plan_type(a, s, axes) if plan_type == 'nd': return _fftn else: return _fft
def _default_fft_func(a, s=None, axes=None, plan=None): curr_plan = cufft.get_current_plan() if curr_plan is not None: if plan is None: plan = curr_plan else: raise RuntimeError('Use the cuFFT plan either as a context manager' ' or as an argument.') if isinstance(plan, cufft.PlanNd): # a shortcut for using _fftn return _fftn elif (isinstance(plan, cufft.Plan1d) or a.ndim == 1 or not config.enable_nd_planning): return _fft _, axes_sorted = _prep_fftn_axes(a.ndim, s, axes) if len(axes_sorted) > 1 and _nd_plan_is_possible(axes_sorted, a.ndim): # prefer Plan1D in the 1D case return _fftn return _fft
def _default_fft_func(a, s=None, axes=None, plan=None, value_type='C2C'): curr_plan = cufft.get_current_plan() if curr_plan is not None: if plan is None: plan = curr_plan else: raise RuntimeError('Use the cuFFT plan either as a context manager' ' or as an argument.') if isinstance(plan, cufft.PlanNd): # a shortcut for using _fftn return _fftn elif (isinstance(plan, cufft.Plan1d) or a.ndim == 1 or not config.enable_nd_planning): return _fft # cuFFT's N-D C2R/R2C transforms may not agree with NumPy's outcomes if a.flags.f_contiguous and value_type != 'C2C': return _fft _, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type) if len(axes_sorted) > 1 and _nd_plan_is_possible(axes_sorted, a.ndim): # prefer Plan1D in the 1D case return _fftn return _fft
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x, out_size=None, out=None, plan=None): fft_type = _convert_fft_type(a.dtype, value_type) if axis % a.ndim != a.ndim - 1: a = a.swapaxes(axis, -1) if a.base is not None or not a.flags.c_contiguous: a = a.copy() elif (value_type == 'C2R' and not overwrite_x and 10010 <= cupy.cuda.runtime.runtimeGetVersion()): # The input array may be modified in CUDA 10.1 and above. # See #3763 for the discussion. a = a.copy() n = a.shape[-1] if n < 1: raise ValueError( 'Invalid number of FFT data points (%d) specified.' % n) if out_size is None: out_size = n batch = a.size // n curr_plan = cufft.get_current_plan() if curr_plan is not None: if plan is None: plan = curr_plan else: raise RuntimeError('Use the cuFFT plan either as a context manager' ' or as an argument.') if plan is None: devices = None if not config.use_multi_gpus else config._devices plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices) else: # check plan validity if not isinstance(plan, cufft.Plan1d): raise ValueError('expected plan to have type cufft.Plan1d') if fft_type != plan.fft_type: raise ValueError('cuFFT plan dtype mismatch.') if out_size != plan.nx: raise ValueError('Target array size does not match the plan.', out_size, plan.nx) if batch != plan.batch: raise ValueError('Batch size does not match the plan.') if config.use_multi_gpus != plan._use_multi_gpus: raise ValueError('Unclear if multiple GPUs are to be used or not.') if overwrite_x and value_type == 'C2C': out = a elif out is not None: # verify that out has the expected shape and dtype plan.check_output_array(a, out) else: out = plan.get_output_array(a) if batch != 0: plan.fft(a, out, direction) sz = out.shape[-1] if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z: sz = n if norm is None: if direction == cufft.CUFFT_INVERSE: out /= sz else: out /= math.sqrt(sz) if axis % a.ndim != a.ndim - 1: out = out.swapaxes(axis, -1) return out
def _exec_fftn(a, direction, value_type, norm, axes, overwrite_x, plan=None, out=None, out_size=None): fft_type = _convert_fft_type(a.dtype, value_type) if a.flags.c_contiguous: order = 'C' elif a.flags.f_contiguous: order = 'F' else: raise ValueError('a must be contiguous') if (value_type == 'C2R' and not overwrite_x and 10010 <= cupy.cuda.runtime.runtimeGetVersion()): # The input array may be modified in CUDA 10.1 and above. # See #3763 for the discussion. a = a.copy() curr_plan = cufft.get_current_plan() if curr_plan is not None: plan = curr_plan # don't check repeated usage; it's done in _default_fft_func() if plan is None: # generate a plan plan = _get_cufft_plan_nd(a.shape, fft_type, axes=axes, order=order, out_size=out_size) else: if not isinstance(plan, cufft.PlanNd): raise ValueError('expected plan to have type cufft.PlanNd') if order != plan.order: raise ValueError('array orders mismatch (plan: {}, input: {})' .format(plan.order, order)) if a.flags.c_contiguous: expected_shape = [a.shape[ax] for ax in axes] if value_type == 'C2R': expected_shape[-1] = out_size else: # plan.shape will be reversed for Fortran-ordered inputs expected_shape = [a.shape[ax] for ax in axes[::-1]] # TODO(leofang): modify the shape for C2R expected_shape = tuple(expected_shape) if expected_shape != plan.shape: raise ValueError( 'The cuFFT plan and a.shape do not match: ' 'plan.shape = {}, expected_shape={}, a.shape = {}'.format( plan.shape, expected_shape, a.shape)) if fft_type != plan.fft_type: raise ValueError('cuFFT plan dtype mismatch.') if value_type != 'C2C': if axes[-1] != plan.last_axis: raise ValueError('The last axis for R2C/C2R mismatch') if out_size != plan.last_size: raise ValueError('The size along the last R2C/C2R axis ' 'mismatch') # TODO(leofang): support in-place transform for R2C/C2R if overwrite_x and value_type == 'C2C': out = a elif out is None: out = plan.get_output_array(a, order=order) else: plan.check_output_array(a, out) if out.size != 0: plan.fft(a, out, direction) # normalize by the product of the shape along the transformed axes arr = a if fft_type in (cufft.CUFFT_R2C, cufft.CUFFT_D2Z) else out sz = _prod([arr.shape[ax] for ax in axes]) if norm is None: if direction == cufft.CUFFT_INVERSE: out /= sz else: out /= math.sqrt(sz) return out
def thread_get_curr_plan(): return get_current_plan()
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x, out_size=None, out=None, plan=None): fft_type = _convert_fft_type(a.dtype, value_type) if axis % a.ndim != a.ndim - 1: a = a.swapaxes(axis, -1) if a.base is not None or not a.flags.c_contiguous: a = a.copy() elif (value_type == 'C2R' and not overwrite_x and 10010 <= cupy.cuda.runtime.runtimeGetVersion()): # The input array may be modified in CUDA 10.1 and above. # See #3763 for the discussion. a = a.copy() elif cupy.cuda.runtime.is_hip and value_type != 'C2C': # hipFFT's R2C would overwrite input # hipFFT's C2R needs a workaround (see below) a = a.copy() n = a.shape[-1] if n < 1: raise ValueError('Invalid number of FFT data points (%d) specified.' % n) # Workaround for hipFFT/rocFFT: # Both cuFFT and hipFFT/rocFFT have this requirement that 0-th and # N/2-th element must be real, but cuFFT internally simply ignores it # while hipFFT handles it badly in both Plan1d and PlanNd, so we must # do the correction ourselves to ensure the condition is met. if cupy.cuda.runtime.is_hip and value_type == 'C2R': a[..., 0] = a[..., 0].real + 0j if out_size is None: a[..., -1] = a[..., -1].real + 0j elif out_size % 2 == 0: a[..., out_size // 2] = a[..., out_size // 2].real + 0j if out_size is None: out_size = n batch = a.size // n # plan search precedence: # 1. plan passed in as an argument # 2. plan as context manager # 3. cached plan # 4. create a new one curr_plan = cufft.get_current_plan() if curr_plan is not None: if plan is None: plan = curr_plan else: raise RuntimeError('Use the cuFFT plan either as a context manager' ' or as an argument.') if plan is None: devices = None if not config.use_multi_gpus else config._devices # TODO(leofang): do we need to add the current stream to keys? keys = (out_size, fft_type, batch, devices) mgr = config.get_current_callback_manager() if mgr is not None: # to avoid a weird segfault, we generate and cache distinct plans # for every possible (load_aux, store_aux) pairs; the plans are # still generated from the same external Python module load_aux = mgr.cb_load_aux_arr store_aux = mgr.cb_store_aux_arr keys += (mgr.cb_load, mgr.cb_store, 0 if load_aux is None else load_aux.data.ptr, 0 if store_aux is None else store_aux.data.ptr) cache = get_plan_cache() cached_plan = cache.get(keys) if cached_plan is not None: plan = cached_plan elif mgr is None: plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices) cache[keys] = plan else: # has callback # TODO(leofang): support multi-GPU callback (devices is ignored) if devices: raise NotImplementedError('multi-GPU cuFFT callbacks are not ' 'yet supported') plan = mgr.create_plan(('Plan1d', keys[:-5])) mgr.set_callbacks(plan) cache[keys] = plan else: # check plan validity if not isinstance(plan, cufft.Plan1d): raise ValueError('expected plan to have type cufft.Plan1d') if fft_type != plan.fft_type: raise ValueError('cuFFT plan dtype mismatch.') if out_size != plan.nx: raise ValueError('Target array size does not match the plan.', out_size, plan.nx) if batch != plan.batch: raise ValueError('Batch size does not match the plan.') if config.use_multi_gpus != (plan.gpus is not None): raise ValueError('Unclear if multiple GPUs are to be used or not.') if overwrite_x and value_type == 'C2C': out = a elif out is not None: # verify that out has the expected shape and dtype plan.check_output_array(a, out) else: out = plan.get_output_array(a) if batch != 0: plan.fft(a, out, direction) sz = out.shape[-1] if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z: sz = n if norm is None: if direction == cufft.CUFFT_INVERSE: out /= sz else: out /= math.sqrt(sz) if axis % a.ndim != a.ndim - 1: out = out.swapaxes(axis, -1) return out