Beispiel #1
0
 def test_rfft_plan_manager(self, xp, scp, dtype):
     x = testing.shaped_random(self.shape, xp, dtype)
     x_orig = x.copy()
     if scp is cupyx.scipy:
         from cupy.cuda.cufft import get_current_plan
         plan = scp.fftpack.get_fft_plan(x, shape=self.n, axes=self.axis,
                                         value_type='R2C')
         with plan:
             assert id(plan) == id(get_current_plan())
             out = scp.fftpack.rfft(x, n=self.n, axis=self.axis)
         assert get_current_plan() is None
     else:  # scipy
         out = scp.fftpack.rfft(x, n=self.n, axis=self.axis)
     testing.assert_array_equal(x, x_orig)
     return out
Beispiel #2
0
 def test_ifftn_plan_manager(self, xp, scp, dtype):
     x = testing.shaped_random(self.shape, xp, dtype)
     # hack: avoid testing the cases when getting a cuFFT plan is impossible
     if _default_fft_func(x, s=self.s, axes=self.axes) is not _fftn:
         return x
     if scp is cupyx.scipy:
         from cupy.cuda.cufft import get_current_plan
         plan = scp.fftpack.get_fft_plan(x, shape=self.s, axes=self.axes)
         with plan:
             assert id(plan) == id(get_current_plan())
             out = scp.fftpack.ifftn(x, shape=self.s, axes=self.axes)
         assert get_current_plan() is None
     else:  # scipy
         out = scp.fftpack.ifftn(x, shape=self.s, axes=self.axes)
     return out
Beispiel #3
0
def _default_fft_func(a, s=None, axes=None, plan=None, value_type='C2C'):
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')

    if isinstance(plan, cufft.PlanNd):  # a shortcut for using _fftn
        return _fftn
    elif (isinstance(plan, cufft.Plan1d) or a.ndim == 1
          or not config.enable_nd_planning):
        return _fft

    # cuFFT's N-D C2R/R2C transforms may not agree with NumPy's outcomes
    if a.flags.f_contiguous and value_type != 'C2C':
        return _fft

    _, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type)
    if len(axes_sorted) > 1 and _nd_plan_is_possible(axes_sorted, a.ndim):
        # circumvent two potential hipFFT/rocFFT bugs as of ROCm 3.5.0
        # TODO(leofang): understand hipFFT better and test newer ROCm versions
        if cupy.cuda.runtime.is_hip:
            if (0 == axes_sorted[0] and len(axes_sorted) != a.ndim
                    and a.flags.c_contiguous):
                return _fft

            # For C2R, we don't use PlanNd; see the workaround in _exec_fft()
            if value_type == 'C2R':
                return _fft

        # prefer Plan1D in the 1D case
        return _fftn
    return _fft
Beispiel #4
0
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x,
              out_size=None, out=None, plan=None):
    fft_type = _convert_fft_type(a, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()

    if out_size is None:
        out_size = a.shape[-1]

    batch = a.size // a.shape[-1]
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')
    if plan is None:
        devices = None if not config.use_multi_gpus else config._devices
        plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)
    else:
        # check plan validity
        if not isinstance(plan, cufft.Plan1d):
            raise ValueError('expected plan to have type cufft.Plan1d')
        if fft_type != plan.fft_type:
            raise ValueError('CUFFT plan dtype mismatch.')
        if out_size != plan.nx:
            raise ValueError('Target array size does not match the plan.')
        if batch != plan.batch:
            raise ValueError('Batch size does not match the plan.')
        if config.use_multi_gpus != plan._use_multi_gpus:
            raise ValueError('Unclear if multiple GPUs are to be used or not.')

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)

    plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = a.shape[-1]
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #5
0
def _exec_fftn(a, direction, value_type, norm, axes, overwrite_x,
               plan=None, out=None):

    fft_type = _convert_fft_type(a, value_type)
    if fft_type not in [cufft.CUFFT_C2C, cufft.CUFFT_Z2Z]:
        raise NotImplementedError('Only C2C and Z2Z are supported.')

    if a.base is not None:
        a = a.copy()

    if a.flags.c_contiguous:
        order = 'C'
    elif a.flags.f_contiguous:
        order = 'F'
    else:
        raise ValueError('a must be contiguous')

    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        plan = curr_plan
        # don't check repeated usage; it's done in _default_fft_func()
    if plan is None:
        # generate a plan
        plan = _get_cufft_plan_nd(a.shape, fft_type, axes=axes, order=order)
    else:
        if not isinstance(plan, cufft.PlanNd):
            raise ValueError('expected plan to have type cufft.PlanNd')
        if a.flags.c_contiguous:
            expected_shape = tuple(a.shape[ax] for ax in axes)
        else:
            # plan.shape will be reversed for Fortran-ordered inputs
            expected_shape = tuple(a.shape[ax] for ax in axes[::-1])
        if expected_shape != plan.shape:
            raise ValueError(
                'The CUFFT plan and a.shape do not match: '
                'plan.shape = {}, expected_shape={}, a.shape = {}'.format(
                    plan.shape, expected_shape, a.shape))
        if fft_type != plan.fft_type:
            raise ValueError('CUFFT plan dtype mismatch.')
        # TODO: also check the strides and axes of the plan?

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is None:
        out = plan.get_output_array(a, order=order)
    else:
        plan.check_output_array(a, out)
    plan.fft(a, out, direction)

    # normalize by the product of the shape along the transformed axes
    sz = _prod([out.shape[ax] for ax in axes])
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    return out
Beispiel #6
0
 def test_fft_plan_manager(self, xp, scp, dtype):
     x = testing.shaped_random(self.shape, xp, dtype)
     # hack: avoid testing the cases when the output array is of size 0
     # because cuFFT and numpy raise different kinds of exceptions
     if self.n == 0:
         return x
     x_orig = x.copy()
     if scp is cupyx.scipy:
         from cupy.cuda.cufft import get_current_plan
         plan = scp.fftpack.get_fft_plan(x, shape=self.n, axes=self.axis)
         with plan:
             assert id(plan) == id(get_current_plan())
             out = scp.fftpack.fft(x, n=self.n, axis=self.axis)
         assert get_current_plan() is None
     else:  # scipy
         out = scp.fftpack.fft(x, n=self.n, axis=self.axis)
     testing.assert_array_equal(x, x_orig)
     return out
Beispiel #7
0
 def test_ifftn_plan_manager(self, xp, dtype):
     x = testing.shaped_random(self.shape, xp, dtype)
     # hack: avoid testing the cases when getting a cuFFT plan is impossible
     if _default_fft_func(x, s=self.s, axes=self.axes) is not _fftn:
         return x
     x_orig = x.copy()
     if xp is cp:
         from cupy.cuda.cufft import get_current_plan
         plan = _fft_module(xp).get_fft_plan(x,
                                             shape=self.s,
                                             axes=self.axes)
         with plan:
             assert id(plan) == id(get_current_plan())
             out = _fft_module(xp).ifftn(x, s=self.s, axes=self.axes)
         assert get_current_plan() is None
     else:
         out = _fft_module(xp).ifftn(x, s=self.s, axes=self.axes)
     testing.assert_array_equal(x, x_orig)
     return _correct_np_dtype(xp, dtype, out)
Beispiel #8
0
def _default_fft_func(a, s=None, axes=None, plan=None):
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')

    if isinstance(plan, cufft.PlanNd):  # a shortcut for using _fftn
        return _fftn
    elif isinstance(plan, cufft.Plan1d):  # a shortcut for using _fft
        return _fft

    plan_type = _default_plan_type(a, s, axes)
    if plan_type == 'nd':
        return _fftn
    else:
        return _fft
Beispiel #9
0
def _default_fft_func(a, s=None, axes=None, plan=None):
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')

    if isinstance(plan, cufft.PlanNd):  # a shortcut for using _fftn
        return _fftn
    elif (isinstance(plan, cufft.Plan1d) or
          a.ndim == 1 or not config.enable_nd_planning):
        return _fft

    _, axes_sorted = _prep_fftn_axes(a.ndim, s, axes)
    if len(axes_sorted) > 1 and _nd_plan_is_possible(axes_sorted, a.ndim):
        # prefer Plan1D in the 1D case
        return _fftn
    return _fft
Beispiel #10
0
def _default_fft_func(a, s=None, axes=None, plan=None, value_type='C2C'):
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')

    if isinstance(plan, cufft.PlanNd):  # a shortcut for using _fftn
        return _fftn
    elif (isinstance(plan, cufft.Plan1d) or
          a.ndim == 1 or not config.enable_nd_planning):
        return _fft

    # cuFFT's N-D C2R/R2C transforms may not agree with NumPy's outcomes
    if a.flags.f_contiguous and value_type != 'C2C':
        return _fft

    _, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type)
    if len(axes_sorted) > 1 and _nd_plan_is_possible(axes_sorted, a.ndim):
        # prefer Plan1D in the 1D case
        return _fftn
    return _fft
Beispiel #11
0
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x,
              out_size=None, out=None, plan=None):
    fft_type = _convert_fft_type(a.dtype, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()
    elif (value_type == 'C2R' and not overwrite_x and
            10010 <= cupy.cuda.runtime.runtimeGetVersion()):
        # The input array may be modified in CUDA 10.1 and above.
        # See #3763 for the discussion.
        a = a.copy()

    n = a.shape[-1]
    if n < 1:
        raise ValueError(
            'Invalid number of FFT data points (%d) specified.' % n)

    if out_size is None:
        out_size = n

    batch = a.size // n

    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')
    if plan is None:
        devices = None if not config.use_multi_gpus else config._devices
        plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)
    else:
        # check plan validity
        if not isinstance(plan, cufft.Plan1d):
            raise ValueError('expected plan to have type cufft.Plan1d')
        if fft_type != plan.fft_type:
            raise ValueError('cuFFT plan dtype mismatch.')
        if out_size != plan.nx:
            raise ValueError('Target array size does not match the plan.',
                             out_size, plan.nx)
        if batch != plan.batch:
            raise ValueError('Batch size does not match the plan.')
        if config.use_multi_gpus != plan._use_multi_gpus:
            raise ValueError('Unclear if multiple GPUs are to be used or not.')

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)

    if batch != 0:
        plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = n
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #12
0
def _exec_fftn(a, direction, value_type, norm, axes, overwrite_x,
               plan=None, out=None, out_size=None):

    fft_type = _convert_fft_type(a.dtype, value_type)

    if a.flags.c_contiguous:
        order = 'C'
    elif a.flags.f_contiguous:
        order = 'F'
    else:
        raise ValueError('a must be contiguous')

    if (value_type == 'C2R' and not overwrite_x and
            10010 <= cupy.cuda.runtime.runtimeGetVersion()):
        # The input array may be modified in CUDA 10.1 and above.
        # See #3763 for the discussion.
        a = a.copy()

    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        plan = curr_plan
        # don't check repeated usage; it's done in _default_fft_func()
    if plan is None:
        # generate a plan
        plan = _get_cufft_plan_nd(a.shape, fft_type, axes=axes, order=order,
                                  out_size=out_size)
    else:
        if not isinstance(plan, cufft.PlanNd):
            raise ValueError('expected plan to have type cufft.PlanNd')
        if order != plan.order:
            raise ValueError('array orders mismatch (plan: {}, input: {})'
                             .format(plan.order, order))
        if a.flags.c_contiguous:
            expected_shape = [a.shape[ax] for ax in axes]
            if value_type == 'C2R':
                expected_shape[-1] = out_size
        else:
            # plan.shape will be reversed for Fortran-ordered inputs
            expected_shape = [a.shape[ax] for ax in axes[::-1]]
            # TODO(leofang): modify the shape for C2R
        expected_shape = tuple(expected_shape)
        if expected_shape != plan.shape:
            raise ValueError(
                'The cuFFT plan and a.shape do not match: '
                'plan.shape = {}, expected_shape={}, a.shape = {}'.format(
                    plan.shape, expected_shape, a.shape))
        if fft_type != plan.fft_type:
            raise ValueError('cuFFT plan dtype mismatch.')
        if value_type != 'C2C':
            if axes[-1] != plan.last_axis:
                raise ValueError('The last axis for R2C/C2R mismatch')
            if out_size != plan.last_size:
                raise ValueError('The size along the last R2C/C2R axis '
                                 'mismatch')

    # TODO(leofang): support in-place transform for R2C/C2R
    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is None:
        out = plan.get_output_array(a, order=order)
    else:
        plan.check_output_array(a, out)

    if out.size != 0:
        plan.fft(a, out, direction)

    # normalize by the product of the shape along the transformed axes
    arr = a if fft_type in (cufft.CUFFT_R2C, cufft.CUFFT_D2Z) else out
    sz = _prod([arr.shape[ax] for ax in axes])
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    return out
Beispiel #13
0
 def thread_get_curr_plan():
     return get_current_plan()
Beispiel #14
0
def _exec_fft(a,
              direction,
              value_type,
              norm,
              axis,
              overwrite_x,
              out_size=None,
              out=None,
              plan=None):
    fft_type = _convert_fft_type(a.dtype, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()
    elif (value_type == 'C2R' and not overwrite_x
          and 10010 <= cupy.cuda.runtime.runtimeGetVersion()):
        # The input array may be modified in CUDA 10.1 and above.
        # See #3763 for the discussion.
        a = a.copy()
    elif cupy.cuda.runtime.is_hip and value_type != 'C2C':
        # hipFFT's R2C would overwrite input
        # hipFFT's C2R needs a workaround (see below)
        a = a.copy()

    n = a.shape[-1]
    if n < 1:
        raise ValueError('Invalid number of FFT data points (%d) specified.' %
                         n)

    # Workaround for hipFFT/rocFFT:
    # Both cuFFT and hipFFT/rocFFT have this requirement that 0-th and
    # N/2-th element must be real, but cuFFT internally simply ignores it
    # while hipFFT handles it badly in both Plan1d and PlanNd, so we must
    # do the correction ourselves to ensure the condition is met.
    if cupy.cuda.runtime.is_hip and value_type == 'C2R':
        a[..., 0] = a[..., 0].real + 0j
        if out_size is None:
            a[..., -1] = a[..., -1].real + 0j
        elif out_size % 2 == 0:
            a[..., out_size // 2] = a[..., out_size // 2].real + 0j

    if out_size is None:
        out_size = n

    batch = a.size // n

    # plan search precedence:
    # 1. plan passed in as an argument
    # 2. plan as context manager
    # 3. cached plan
    # 4. create a new one
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')

    if plan is None:
        devices = None if not config.use_multi_gpus else config._devices
        # TODO(leofang): do we need to add the current stream to keys?
        keys = (out_size, fft_type, batch, devices)
        mgr = config.get_current_callback_manager()
        if mgr is not None:
            # to avoid a weird segfault, we generate and cache distinct plans
            # for every possible (load_aux, store_aux) pairs; the plans are
            # still generated from the same external Python module
            load_aux = mgr.cb_load_aux_arr
            store_aux = mgr.cb_store_aux_arr
            keys += (mgr.cb_load, mgr.cb_store,
                     0 if load_aux is None else load_aux.data.ptr,
                     0 if store_aux is None else store_aux.data.ptr)
        cache = get_plan_cache()
        cached_plan = cache.get(keys)
        if cached_plan is not None:
            plan = cached_plan
        elif mgr is None:
            plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)
            cache[keys] = plan
        else:  # has callback
            # TODO(leofang): support multi-GPU callback (devices is ignored)
            if devices:
                raise NotImplementedError('multi-GPU cuFFT callbacks are not '
                                          'yet supported')
            plan = mgr.create_plan(('Plan1d', keys[:-5]))
            mgr.set_callbacks(plan)
            cache[keys] = plan
    else:
        # check plan validity
        if not isinstance(plan, cufft.Plan1d):
            raise ValueError('expected plan to have type cufft.Plan1d')
        if fft_type != plan.fft_type:
            raise ValueError('cuFFT plan dtype mismatch.')
        if out_size != plan.nx:
            raise ValueError('Target array size does not match the plan.',
                             out_size, plan.nx)
        if batch != plan.batch:
            raise ValueError('Batch size does not match the plan.')
        if config.use_multi_gpus != (plan.gpus is not None):
            raise ValueError('Unclear if multiple GPUs are to be used or not.')

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)

    if batch != 0:
        plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = n
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out