Beispiel #1
0
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x,
              out_size=None, out=None, plan=None):
    fft_type = _convert_fft_type(a, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()

    if out_size is None:
        out_size = a.shape[-1]

    batch = a.size // a.shape[-1]
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')
    if plan is None:
        devices = None if not config.use_multi_gpus else config._devices
        plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)
    else:
        # check plan validity
        if not isinstance(plan, cufft.Plan1d):
            raise ValueError('expected plan to have type cufft.Plan1d')
        if fft_type != plan.fft_type:
            raise ValueError('CUFFT plan dtype mismatch.')
        if out_size != plan.nx:
            raise ValueError('Target array size does not match the plan.')
        if batch != plan.batch:
            raise ValueError('Batch size does not match the plan.')
        if config.use_multi_gpus != plan._use_multi_gpus:
            raise ValueError('Unclear if multiple GPUs are to be used or not.')

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)

    plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = a.shape[-1]
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #2
0
    def test_ifft(self, dtype):
        _skip_multi_gpu_bug(self.shape, self.gpus)

        a = testing.shaped_random(self.shape, numpy, dtype)

        if len(self.shape) == 1:
            batch = 1
            nx = self.shape[0]
        elif len(self.shape) == 2:
            batch = self.shape[0]
            nx = self.shape[1]

        # compute via cuFFT
        cufft_type = _convert_fft_type(a.dtype, 'C2C')
        plan = cufft.Plan1d(nx, cufft_type, batch, devices=config._devices)
        out_cp = numpy.empty_like(a)
        plan.fft(a, out_cp, cufft.CUFFT_INVERSE)
        # normalization
        out_cp /= nx

        out_np = numpy.fft.ifft(a)
        # np.fft.fft alway returns np.complex128
        if dtype is numpy.complex64:
            out_np = out_np.astype(dtype)

        assert numpy.allclose(out_cp, out_np, rtol=1e-4, atol=1e-7)

        # compute it again to ensure Plan1d's internal state is reset
        plan.fft(a, out_cp, cufft.CUFFT_INVERSE)
        # normalization
        out_cp /= nx

        assert numpy.allclose(out_cp, out_np, rtol=1e-4, atol=1e-7)
Beispiel #3
0
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x,
              out_size=None):
    fft_type = _convert_fft_type(a, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None:
        a = a.copy()

    plan = cufft.Plan1d(a.shape[-1] if out_size is None else out_size,
                        fft_type, a.size // a.shape[-1])
    if overwrite_x and value_type == 'C2C':
        plan.fft(a, a, direction)
        out = a
    else:
        out = plan.get_output_array(a)
        plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = a.shape[-1]
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= cupy.sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #4
0
def _exec_fft(a,
              direction,
              value_type,
              norm,
              axis,
              overwrite_x,
              out_size=None,
              out=None,
              plan=None):
    fft_type = _convert_fft_type(a, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()

    if out_size is None:
        out_size = a.shape[-1]

    batch = a.size // a.shape[-1]
    if plan is None:
        plan = cufft.Plan1d(out_size, fft_type, batch)
    else:
        # check plan validity
        if not isinstance(plan, cufft.Plan1d):
            raise ValueError('expected plan to have type cufft.Plan1d')
        if fft_type != plan.fft_type:
            raise ValueError('CUFFT plan dtype mismatch.')
        if out_size != plan.nx:
            raise ValueError('Target array size does not match the plan.')
        if batch != plan.batch:
            raise ValueError('Batch size does not match the plan.')

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)

    plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = a.shape[-1]
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #5
0
def _exec_fft(a,
              direction,
              value_type,
              norm,
              axis,
              overwrite_x,
              out_size=None,
              out=None):
    fft_type = _convert_fft_type(a, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()

    if out_size is None:
        out_size = a.shape[-1]

    batch = a.size // a.shape[-1]
    plan = cufft.Plan1d(out_size, fft_type, batch)
    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)
    plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = a.shape[-1]
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #6
0
def _exec_fft(a, direction, value_type, norm, axis, overwrite_x,
              out_size=None, out=None, plan=None):
    fft_type = _convert_fft_type(a.dtype, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()
    elif (value_type == 'C2R' and not overwrite_x and
            10010 <= cupy.cuda.runtime.runtimeGetVersion()):
        # The input array may be modified in CUDA 10.1 and above.
        # See #3763 for the discussion.
        a = a.copy()

    n = a.shape[-1]
    if n < 1:
        raise ValueError(
            'Invalid number of FFT data points (%d) specified.' % n)

    if out_size is None:
        out_size = n

    batch = a.size // n

    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')
    if plan is None:
        devices = None if not config.use_multi_gpus else config._devices
        plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)
    else:
        # check plan validity
        if not isinstance(plan, cufft.Plan1d):
            raise ValueError('expected plan to have type cufft.Plan1d')
        if fft_type != plan.fft_type:
            raise ValueError('cuFFT plan dtype mismatch.')
        if out_size != plan.nx:
            raise ValueError('Target array size does not match the plan.',
                             out_size, plan.nx)
        if batch != plan.batch:
            raise ValueError('Batch size does not match the plan.')
        if config.use_multi_gpus != plan._use_multi_gpus:
            raise ValueError('Unclear if multiple GPUs are to be used or not.')

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)

    if batch != 0:
        plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = n
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #7
0
def get_fft_plan(a, shape=None, axes=None, value_type='C2C'):
    """ Generate a CUDA FFT plan for transforming up to three axes.

    Args:
        a (cupy.ndarray): Array to be transform, assumed to be either C- or
            F- contiguous.
        shape (None or tuple of ints): Shape of the transformed axes of the
            output. If ``shape`` is not given, the lengths of the input along
            the axes specified by ``axes`` are used.
        axes (None or int or tuple of int):  The axes of the array to
            transform. If `None`, it is assumed that all axes are transformed.

            Currently, for performing N-D transform these must be a set of up
            to three adjacent axes, and must include either the first or the
            last axis of the array.
        value_type (str): The FFT type to perform. Acceptable values are:

            * 'C2C': complex-to-complex transform (default)
            * 'R2C': real-to-complex transform
            * 'C2R': complex-to-real transform

    Returns:
        a cuFFT plan for either 1D transform (``cupy.cuda.cufft.Plan1d``) or
        N-D transform (``cupy.cuda.cufft.PlanNd``).

    .. note::
        The returned plan can not only be passed as one of the arguments of
        the functions in ``cupyx.scipy.fftpack``, but also be used as a
        context manager for both ``cupy.fft`` and ``cupyx.scipy.fftpack``
        functions:

        .. code-block:: python

            x = cupy.random.random(16).reshape(4, 4).astype(cupy.complex)
            plan = cupyx.scipy.fftpack.get_fft_plan(x)
            with plan:
                y = cupy.fft.fftn(x)
                # alternatively:
                y = cupyx.scipy.fftpack.fftn(x)  # no explicit plan is given!
            # alternatively:
            y = cupyx.scipy.fftpack.fftn(x, plan=plan)  # pass plan explicitly

        In the first case, no cuFFT plan will be generated automatically,
        even if ``cupy.fft.config.enable_nd_planning = True`` is set.

    .. note::
        If this function is called under the context of
        :func:`~cupy.fft.config.set_cufft_callbacks`, the generated plan will
        have callbacks enabled.

    .. warning::
        This API is a deviation from SciPy's, is currently experimental, and
        may be changed in the future version.
    """
    # check input array
    if a.flags.c_contiguous:
        order = 'C'
    elif a.flags.f_contiguous:
        order = 'F'
    else:
        raise ValueError('Input array a must be contiguous')

    if isinstance(shape, int):
        shape = (shape, )
    if isinstance(axes, int):
        axes = (axes, )
    if (shape is not None) and (axes is not None) and len(shape) != len(axes):
        raise ValueError('Shape and axes have different lengths.')

    # check axes
    # n=1: 1d (need axis1D); n>1: Nd
    if axes is None:
        n = a.ndim if shape is None else len(shape)
        axes = tuple(i for i in range(-n, 0))
        if n == 1:
            axis1D = 0
    else:  # axes is a tuple
        n = len(axes)
        if n == 1:
            axis1D = axes[0]
            if axis1D >= a.ndim or axis1D < -a.ndim:
                err = 'The chosen axis ({0}) exceeds the number of '\
                      'dimensions of a ({1})'.format(axis1D, a.ndim)
                raise ValueError(err)
        elif n > 3:
            raise ValueError('Only up to three axes is supported')

    # Note that "shape" here refers to the shape along trasformed axes, not
    # the shape of the output array, and we need to convert it to the latter.
    # The result is as if "a=_cook_shape(a); return a.shape" is called.
    # Because of this, we need to use (possibly unsorted) axes.
    transformed_shape = shape
    shape = list(a.shape)
    if transformed_shape is not None:
        for s, axis in zip(transformed_shape, axes):
            if s is not None:
                if axis == axes[-1] and value_type == 'C2R':
                    s = s // 2 + 1
                shape[axis] = s
    shape = tuple(shape)

    # check value_type
    out_dtype = _output_dtype(a.dtype, value_type)
    fft_type = _convert_fft_type(out_dtype, value_type)
    # TODO(leofang): figure out if we really have to skip F-order?
    if n > 1 and value_type != 'C2C' and a.flags.f_contiguous:
        raise ValueError('C2R/R2C PlanNd for F-order arrays is not supported')

    # generate plan
    # (load from cache if it exists, otherwise create one but don't cache it)
    if n > 1:  # ND transform
        if cupy.cuda.runtime.is_hip and value_type == 'C2R':
            raise RuntimeError("hipFFT's C2R PlanNd is buggy and unsupported")
        out_size = _get_fftn_out_size(shape, transformed_shape, axes[-1],
                                      value_type)
        # _get_cufft_plan_nd interacts with plan cache and callback
        plan = _get_cufft_plan_nd(shape,
                                  fft_type,
                                  axes=axes,
                                  order=order,
                                  out_size=out_size,
                                  to_cache=False)
    else:  # 1D transform
        # prepare plan arguments
        if value_type != 'C2R':
            out_size = shape[axis1D]
        else:
            out_size = _get_fftn_out_size(shape, transformed_shape, axis1D,
                                          value_type)
        batch = prod(shape) // shape[axis1D]
        devices = None if not config.use_multi_gpus else config._devices

        keys = (out_size, fft_type, batch, devices)
        mgr = config.get_current_callback_manager()
        if mgr is not None:
            # to avoid a weird segfault, we generate and cache distinct plans
            # for every possible (load_aux, store_aux) pairs; the plans are
            # still generated from the same external Python module
            load_aux = mgr.cb_load_aux_arr
            store_aux = mgr.cb_store_aux_arr
            keys += (mgr.cb_load, mgr.cb_store,
                     0 if load_aux is None else load_aux.data.ptr,
                     0 if store_aux is None else store_aux.data.ptr)
        cache = get_plan_cache()
        cached_plan = cache.get(keys)
        if cached_plan is not None:
            plan = cached_plan
        elif mgr is None:
            plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)
        else:  # has callback
            # TODO(leofang): support multi-GPU callback (devices is ignored)
            if devices:
                raise NotImplementedError('multi-GPU cuFFT callbacks are not '
                                          'yet supported')
            plan = mgr.create_plan(('Plan1d', keys[:-3]))
            mgr.set_callbacks(plan)

    return plan
Beispiel #8
0
def _exec_fft(a,
              direction,
              value_type,
              norm,
              axis,
              overwrite_x,
              out_size=None,
              out=None,
              plan=None):
    fft_type = _convert_fft_type(a.dtype, value_type)

    if axis % a.ndim != a.ndim - 1:
        a = a.swapaxes(axis, -1)

    if a.base is not None or not a.flags.c_contiguous:
        a = a.copy()
    elif (value_type == 'C2R' and not overwrite_x
          and 10010 <= cupy.cuda.runtime.runtimeGetVersion()):
        # The input array may be modified in CUDA 10.1 and above.
        # See #3763 for the discussion.
        a = a.copy()
    elif cupy.cuda.runtime.is_hip and value_type != 'C2C':
        # hipFFT's R2C would overwrite input
        # hipFFT's C2R needs a workaround (see below)
        a = a.copy()

    n = a.shape[-1]
    if n < 1:
        raise ValueError('Invalid number of FFT data points (%d) specified.' %
                         n)

    # Workaround for hipFFT/rocFFT:
    # Both cuFFT and hipFFT/rocFFT have this requirement that 0-th and
    # N/2-th element must be real, but cuFFT internally simply ignores it
    # while hipFFT handles it badly in both Plan1d and PlanNd, so we must
    # do the correction ourselves to ensure the condition is met.
    if cupy.cuda.runtime.is_hip and value_type == 'C2R':
        a[..., 0] = a[..., 0].real + 0j
        if out_size is None:
            a[..., -1] = a[..., -1].real + 0j
        elif out_size % 2 == 0:
            a[..., out_size // 2] = a[..., out_size // 2].real + 0j

    if out_size is None:
        out_size = n

    batch = a.size // n

    # plan search precedence:
    # 1. plan passed in as an argument
    # 2. plan as context manager
    # 3. cached plan
    # 4. create a new one
    curr_plan = cufft.get_current_plan()
    if curr_plan is not None:
        if plan is None:
            plan = curr_plan
        else:
            raise RuntimeError('Use the cuFFT plan either as a context manager'
                               ' or as an argument.')

    if plan is None:
        devices = None if not config.use_multi_gpus else config._devices
        # TODO(leofang): do we need to add the current stream to keys?
        keys = (out_size, fft_type, batch, devices)
        mgr = config.get_current_callback_manager()
        if mgr is not None:
            # to avoid a weird segfault, we generate and cache distinct plans
            # for every possible (load_aux, store_aux) pairs; the plans are
            # still generated from the same external Python module
            load_aux = mgr.cb_load_aux_arr
            store_aux = mgr.cb_store_aux_arr
            keys += (mgr.cb_load, mgr.cb_store,
                     0 if load_aux is None else load_aux.data.ptr,
                     0 if store_aux is None else store_aux.data.ptr)
        cache = get_plan_cache()
        cached_plan = cache.get(keys)
        if cached_plan is not None:
            plan = cached_plan
        elif mgr is None:
            plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)
            cache[keys] = plan
        else:  # has callback
            # TODO(leofang): support multi-GPU callback (devices is ignored)
            if devices:
                raise NotImplementedError('multi-GPU cuFFT callbacks are not '
                                          'yet supported')
            plan = mgr.create_plan(('Plan1d', keys[:-5]))
            mgr.set_callbacks(plan)
            cache[keys] = plan
    else:
        # check plan validity
        if not isinstance(plan, cufft.Plan1d):
            raise ValueError('expected plan to have type cufft.Plan1d')
        if fft_type != plan.fft_type:
            raise ValueError('cuFFT plan dtype mismatch.')
        if out_size != plan.nx:
            raise ValueError('Target array size does not match the plan.',
                             out_size, plan.nx)
        if batch != plan.batch:
            raise ValueError('Batch size does not match the plan.')
        if config.use_multi_gpus != (plan.gpus is not None):
            raise ValueError('Unclear if multiple GPUs are to be used or not.')

    if overwrite_x and value_type == 'C2C':
        out = a
    elif out is not None:
        # verify that out has the expected shape and dtype
        plan.check_output_array(a, out)
    else:
        out = plan.get_output_array(a)

    if batch != 0:
        plan.fft(a, out, direction)

    sz = out.shape[-1]
    if fft_type == cufft.CUFFT_R2C or fft_type == cufft.CUFFT_D2Z:
        sz = n
    if norm is None:
        if direction == cufft.CUFFT_INVERSE:
            out /= sz
    else:
        out /= math.sqrt(sz)

    if axis % a.ndim != a.ndim - 1:
        out = out.swapaxes(axis, -1)

    return out
Beispiel #9
0
def get_fft_plan(a, shape=None, axes=None, value_type='C2C'):
    """ Generate a CUDA FFT plan for transforming up to three axes.

    Args:
        a (cupy.ndarray): Array to be transform, assumed to be either C- or
            F- contiguous.
        shape (None or tuple of ints): Shape of the transformed axes of the
            output. If ``shape`` is not given, the lengths of the input along
            the axes specified by ``axes`` are used.
        axes (None or int or tuple of int):  The axes of the array to
            transform. If `None`, it is assumed that all axes are transformed.

            Currently, for performing N-D transform these must be a set of up
            to three adjacent axes, and must include either the first or the
            last axis of the array.
        value_type (str): The FFT type to perform. Acceptable values are:

            * 'C2C': complex-to-complex transform (default)
            * 'R2C': real-to-complex transform
            * 'C2R': complex-to-real transform

    Returns:
        a cuFFT plan for either 1D transform (``cupy.cuda.cufft.Plan1d``) or
        N-D transform (``cupy.cuda.cufft.PlanNd``).

    .. note::
        The returned plan can not only be passed as one of the arguments of
        the functions in ``cupyx.scipy.fftpack``, but also be used as a
        context manager for both ``cupy.fft`` and ``cupyx.scipy.fftpack``
        functions:

        .. code-block:: python

            x = cupy.random.random(16).reshape(4, 4).astype(cupy.complex)
            plan = cupyx.scipy.fftpack.get_fft_plan(x)
            with plan:
                y = cupy.fft.fftn(x)
                # alternatively:
                y = cupyx.scipy.fftpack.fftn(x)  # no explicit plan is given!
            # alternatively:
            y = cupyx.scipy.fftpack.fftn(x, plan=plan)  # pass plan explicitly

        In the first case, no cuFFT plan will be generated automatically,
        even if ``cupy.fft.config.enable_nd_planning = True`` is set.

    .. warning::
        This API is a deviation from SciPy's, is currently experimental, and
        may be changed in the future version.
    """
    # check input array
    if a.flags.c_contiguous:
        order = 'C'
    elif a.flags.f_contiguous:
        order = 'F'
    else:
        raise ValueError('Input array a must be contiguous')

    if isinstance(shape, int):
        shape = (shape, )
    if isinstance(axes, int):
        axes = (axes, )
    if (shape is not None) and (axes is not None) and len(shape) != len(axes):
        raise ValueError('Shape and axes have different lengths.')

    # check axes
    # n=1: 1d (need axis1D); n>1: Nd
    if axes is None:
        n = a.ndim if shape is None else len(shape)
        axes = tuple(i for i in range(-n, 0))
        if n == 1:
            axis1D = 0
    else:  # axes is a tuple
        n = len(axes)
        if n == 1:
            axis1D = axes[0]
            if axis1D >= a.ndim or axis1D < -a.ndim:
                err = 'The chosen axis ({0}) exceeds the number of '\
                      'dimensions of a ({1})'.format(axis1D, a.ndim)
                raise ValueError(err)
        elif n > 3:
            raise ValueError('Only up to three axes is supported')

    # Note that "shape" here refers to the shape along trasformed axes, not
    # the shape of the output array, and we need to convert it to the latter.
    # The result is as if "a=_cook_shape(a); return a.shape" is called.
    # Because of this, we need to use (possibly unsorted) axes.
    transformed_shape = shape
    shape = list(a.shape)
    if transformed_shape is not None:
        for s, axis in zip(transformed_shape, axes):
            if s is not None:
                if axis == axes[-1] and value_type == 'C2R':
                    s = s // 2 + 1
                shape[axis] = s
    shape = tuple(shape)

    # check value_type
    out_dtype = _output_dtype(a.dtype, value_type)
    fft_type = _convert_fft_type(out_dtype, value_type)
    # TODO(leofang): figure out if we really have to skip F-order?
    if n > 1 and value_type != 'C2C' and a.flags.f_contiguous:
        raise ValueError('C2R/R2C PlanNd for F-order arrays is not supported')

    # generate plan
    if n > 1:  # ND transform
        out_size = _get_fftn_out_size(shape, transformed_shape, axes[-1],
                                      value_type)
        plan = _get_cufft_plan_nd(shape,
                                  fft_type,
                                  axes=axes,
                                  order=order,
                                  out_size=out_size)
    else:  # 1D transform
        if value_type != 'C2R':
            out_size = shape[axis1D]
        else:
            out_size = _get_fftn_out_size(shape, transformed_shape, axis1D,
                                          value_type)
        batch = prod(shape) // shape[axis1D]
        devices = None if not config.use_multi_gpus else config._devices
        plan = cufft.Plan1d(out_size, fft_type, batch, devices=devices)

    return plan
Beispiel #10
0
def _fft_convolve(a1, a2, mode):

    offset = 0
    if a1.size < a2.size:
        a1, a2 = a2, a1
        offset = 1 - a2.size % 2

    # if either of them is complex, the dtype after multiplication will also be
    if a1.dtype.kind == 'c' or a2.dtype.kind == 'c':
        fft, ifft = cupy.fft.fft, cupy.fft.ifft
        is_c2c = True
    else:
        fft, ifft = cupy.fft.rfft, cupy.fft.irfft
        is_c2c = False

    # hack to work around NumPy/CuPy FFT dtype incompatibility:
    # CuPy internally converts fp16 to fp32 before doing FFT (whereas Numpy
    # converts both fp16 and fp32 to fp64), so here we do the cast early and
    # explicitly, and make sure a correct cuFFT plan can be generated. After
    # the fft-ifft round trip, we cast the output dtype to the correct one.
    out_dtype = cupy.result_type(a1, a2)
    dtype = _output_dtype(out_dtype, 'C2C' if is_c2c else 'R2C')
    a1 = a1.astype(dtype, copy=False)
    a2 = a2.astype(dtype, copy=False)

    n1, n2 = a1.size, a2.size
    out_size = cupyx.scipy.fft.next_fast_len(n1 + n2 - 1)
    # skip calling get_fft_plan() as we know the args exactly
    if is_c2c:
        fft_t = cufft.CUFFT_C2C if dtype == cupy.complex64 else cufft.CUFFT_Z2Z
        fft_plan = cufft.Plan1d(out_size, fft_t, 1)
        ifft_plan = fft_plan
    else:
        fft_t = cufft.CUFFT_R2C if dtype == cupy.float32 else cufft.CUFFT_D2Z
        fft_plan = cufft.Plan1d(out_size, fft_t, 1)
        # this is a no-op context manager
        # TODO(leofang): use contextlib.nullcontext() for PY37+?
        ifft_plan = contextlib.suppress()
    with fft_plan:
        fa1 = fft(a1, out_size)
        fa2 = fft(a2, out_size)
    with ifft_plan:
        out = ifft(fa1 * fa2, out_size)

    if mode == 'full':
        start, end = 0, n1 + n2 - 1
    elif mode == 'same':
        start = (n2 - 1) // 2 + offset
        end = start + n1
    elif mode == 'valid':
        start, end = n2 - 1, n1
    else:
        raise ValueError(
            'acceptable mode flags are `valid`, `same`, or `full`.')

    out = out[start:end]

    if out.dtype.kind in 'iu':
        out = cupy.around(out)

    return out.astype(out_dtype, copy=False)
Beispiel #11
0
    def __init__(self, Nr, Nz, use_cuda=False, nthreads=None):
        """
        Initialize an FFT object

        Parameters
        ----------
        Nr: int
           Number of grid points along the r axis (axis -1)

        Nz: int
           Number of grid points along the z axis (axis 0)

        use_cuda: bool, optional
           Whether to perform the Fourier transform on the z axis

        nthreads : int, optional
            Number of threads for the FFTW transform.
            If None, the default number of threads of numba is used
            (environment variable NUMBA_NUM_THREADS)
        """
        # Check whether to use cuda
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
            print('** Cuda not available for Fourier transform.')
            print('** Performing the Fourier transform on the CPU.')

        # Check whether to use MKL
        self.use_mkl = mkl_installed

        # Initialize the object for calculation on the GPU
        if self.use_cuda:
            # Set optimal number of CUDA threads per block
            # for copy 1d/2d kernels (determined empirically)
            copy_tpb = (8, 32) if cuda_gpu_model == "V100" else (2, 16)
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr, *copy_tpb)
            # Initialize 1d buffer for cufft
            self.buffer1d_in = cupy.empty((Nz * Nr, ), dtype=np.complex128)
            self.buffer1d_out = cupy.empty((Nz * Nr, ), dtype=np.complex128)
            # Initialize the CUDA FFT plan object
            self.fft = cufft.Plan1d(Nz, cufft.CUFFT_Z2Z, Nr)
            self.inv_Nz = 1. / Nz  # For normalization of the iFFT

        # Initialize the object for calculation on the CPU
        else:
            # For MKL FFT
            if self.use_mkl:
                # Initialize the MKL plan with dummy array
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.mklfft = MKLFFT(spect_buffer)

            # For FFTW
            else:
                # Determine number of threads
                if nthreads is None:
                    # Get the default number of threads for numba
                    nthreads = numba.config.NUMBA_NUM_THREADS
                # Initialize the FFT plan with dummy arrays
                interp_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.fft = pyfftw.FFTW(interp_buffer,
                                       spect_buffer,
                                       axes=(0, ),
                                       direction='FFTW_FORWARD',
                                       threads=nthreads)
                self.ifft = pyfftw.FFTW(spect_buffer,
                                        interp_buffer,
                                        axes=(0, ),
                                        direction='FFTW_BACKWARD',
                                        threads=nthreads)
Beispiel #12
0
def get_fft_plan(a, shape=None, axes=None, value_type='C2C'):
    """ Generate a CUDA FFT plan for transforming up to three axes.

    Args:
        a (cupy.ndarray): Array to be transform, assumed to be either C- or
            F- contiguous.
        shape (None or tuple of ints): Shape of the transformed axes of the
            output. If ``shape`` is not given, the lengths of the input along
            the axes specified by ``axes`` are used.
        axes (None or int or tuple of int):  The axes of the array to
            transform. If `None`, it is assumed that all axes are transformed.

            Currently, for performing N-D transform these must be a set of up
            to three adjacent axes, and must include either the first or the
            last axis of the array.
        value_type ('C2C'): The FFT type to perform.
            Currently only complex-to-complex transforms are supported.

    Returns:
        a cuFFT plan for either 1D transform (``cupy.cuda.cufft.Plan1d``) or
        N-D transform (``cupy.cuda.cufft.PlanNd``).

    .. note::
        The returned plan can not only be passed as one of the arguments of
        the functions in ``cupyx.scipy.fftpack``, but also be used as a
        context manager for both ``cupy.fft`` and ``cupyx.scipy.fftpack``
        functions:

        .. code-block:: python

            x = cupy.random.random(16).reshape(4, 4).astype(cupy.complex)
            plan = cupyx.scipy.fftpack.get_fft_plan(x)
            with plan:
                y = cupy.fft.fftn(x)
                # alternatively:
                y = cupyx.scipy.fftpack.fftn(x)  # no explicit plan is given!
            # alternatively:
            y = cupyx.scipy.fftpack.fftn(x, plan=plan)  # pass plan explicitly

        In the first case, no cuFFT plan will be generated automatically,
        even if ``cupy.fft.config.enable_nd_planning = True`` is set.

    .. warning::
        This API is a deviation from SciPy's, is currently experimental, and
        may be changed in the future version.
    """
    # check input array
    if a.flags.c_contiguous:
        order = 'C'
    elif a.flags.f_contiguous:
        order = 'F'
    else:
        raise ValueError('Input array a must be contiguous')

    # check axes
    # n=1: 1d (need axis1D); n>1: Nd
    if axes is None:
        n = a.ndim
        axes = tuple(i for i in range(n))
        if n == 1:
            axis1D = 0
    elif isinstance(axes, int):
        n = 1
        axis1D = axes
        axes = (axes, )
        if axis1D >= a.ndim or axis1D < -a.ndim:
            raise ValueError('The chosen axis ({0}) exceeds the number of '
                             'dimensions of a ({1})'.format(axis1D, a.ndim))
    else:  # axes is a tuple
        n = len(axes)
        if n == 1:
            axis1D = axes[0]
        elif n > 3:
            raise ValueError('Only up to three axes is supported')

    # check shape
    if isinstance(shape, int):
        shape = (shape, )
    if (shape is not None) and len(shape) != n:
        raise ValueError('Shape and axes have different lengths.')
    # Note that "shape" here refers to the shape along trasformed axes, not
    # the shape of the output array, and we need to convert it to the latter.
    # The result is as if "a=_cook_shape(a); return a.shape" is called.
    transformed_shape = shape
    shape = list(a.shape)
    if transformed_shape is not None:
        for s, axis in zip(transformed_shape, axes):
            shape[axis] = s
    shape = tuple(shape)

    # check value_type
    fft_type = _convert_fft_type(a, value_type)
    if n > 1 and fft_type not in [cufft.CUFFT_C2C, cufft.CUFFT_Z2Z]:
        raise NotImplementedError('Only C2C and Z2Z are supported for N-dim'
                                  ' transform.')

    # generate plan
    if n > 1:  # ND transform
        plan = _get_cufft_plan_nd(shape, fft_type, axes=axes, order=order)
    else:  # 1D transform
        out_size = shape[axis1D]
        batch = prod(shape) // out_size
        plan = cufft.Plan1d(out_size, fft_type, batch)

    return plan
Beispiel #13
0
def get_fft_plan(a, shape=None, axes=None, value_type='C2C'):
    """ Generate a CUDA FFT plan for transforming up to three axes.

    Args:
        a (cupy.ndarray): Array to be transform, assumed to be either C- or
            F- contiguous.
        shape (None or tuple of ints): Shape of the transformed axes of the
            output. If ``shape`` is not given, the lengths of the input along
            the axes specified by ``axes`` are used.
        axes (None or int or tuple of int):  The axes of the array to
            transform. If `None`, it is assumed that all axes are transformed.

            Currently, for performing N-D transform these must be a set of up
            to three adjacent axes, and must include either the first or the
            last axis of the array.
        value_type ('C2C'): The FFT type to perform.
            Currently only complex-to-complex transforms are supported.

    Returns:
        plan: a cuFFT plan for either 1D transform (cupy.cuda.cufft.Plan1d)
            or N-D transform (cupy.cuda.cufft.PlanNd).
    """
    # check input array
    if a.flags.c_contiguous:
        order = 'C'
    elif a.flags.f_contiguous:
        order = 'F'
    else:
        raise ValueError('Input array a must be contiguous')

    # check axes
    # n=1: 1d (need axis1D); n>1: Nd
    if axes is None:
        n = a.ndim
        axes = tuple(i for i in range(n))
        if n == 1:
            axis1D = 0
    elif isinstance(axes, int):
        n = 1
        axis1D = axes
        axes = (axes, )
        if axis1D >= a.ndim or axis1D < -a.ndim:
            raise ValueError('The chosen axis ({0}) exceeds the number of '
                             'dimensions of a ({1})'.format(axis1D, a.ndim))
    else:  # axes is a tuple
        n = len(axes)
        if n == 1:
            axis1D = axes[0]
        elif n > 3:
            raise ValueError('Only up to three axes is supported')

    # check shape
    if isinstance(shape, int):
        shape = (shape, )
    if (shape is not None) and len(shape) != n:
        raise ValueError('Shape and axes have different lengths.')
    # Note that "shape" here refers to the shape along trasformed axes, not
    # the shape of the output array, and we need to convert it to the latter.
    # The result is as if "a=_cook_shape(a); return a.shape" is called.
    transformed_shape = shape
    shape = list(a.shape)
    if transformed_shape is not None:
        for s, axis in zip(transformed_shape, axes):
            shape[axis] = s
    shape = tuple(shape)

    # check value_type
    fft_type = _convert_fft_type(a, value_type)
    if n > 1 and fft_type not in [cufft.CUFFT_C2C, cufft.CUFFT_Z2Z]:
        raise NotImplementedError('Only C2C and Z2Z are supported for N-dim'
                                  ' transform.')

    # generate plan
    if n > 1:  # ND transform
        plan = _get_cufft_plan_nd(shape, fft_type, axes=axes, order=order)
    else:  # 1D transform
        out_size = shape[axis1D]
        batch = prod(shape) // out_size
        plan = cufft.Plan1d(out_size, fft_type, batch)

    return plan