def test_dtypes(thr_and_double, arg_dtypes): thr, double = thr_and_double c1, c2 = arg_dtypes dtype = numpy.float64 if double else numpy.float32 dtype1 = dtypes.complex_for(dtype) if c1 else dtype dtype2 = dtypes.complex_for(dtype) if c2 else dtype check_errors(thr, (30, 40, 50), dtype1, (30, 50, 60), dtype2)
def __init__(self, arr_t): out_arr = Type(dtypes.complex_for(arr_t.dtype), arr_t.shape[:-1] + (arr_t.shape[-1] // 2, )) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('input', Annotation(arr_t, 'i')) ])
def __init__(self, arr_t): out_arr = Type( dtypes.complex_for(arr_t.dtype), arr_t.shape[:-1] + (arr_t.shape[-1] // 2,)) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('input', Annotation(arr_t, 'i'))])
def __init__(self, x, NFFT=256, noverlap=128, pad_to=None, window=hanning_window): # print("x Data type = %s" % x.dtype) # print("Is Real = %s" % dtypes.is_real(x.dtype)) # print("dim = %s" % x.ndim) assert dtypes.is_real(x.dtype) assert x.ndim == 1 rolling_frame_trf = rolling_frame(x, NFFT, noverlap, pad_to) complex_dtype = dtypes.complex_for(x.dtype) fft_arr = Type(complex_dtype, rolling_frame_trf.output.shape) real_fft_arr = Type(x.dtype, rolling_frame_trf.output.shape) window_trf = window(real_fft_arr, NFFT) broadcast_zero_trf = transformations.broadcast_const(real_fft_arr, 0) to_complex_trf = transformations.combine_complex(fft_arr) amplitude_trf = transformations.norm_const(fft_arr, 1) crop_trf = crop_frequencies(amplitude_trf.output) fft = FFT(fft_arr, axes=(1, )) fft.parameter.input.connect(to_complex_trf, to_complex_trf.output, input_real=to_complex_trf.real, input_imag=to_complex_trf.imag) fft.parameter.input_imag.connect(broadcast_zero_trf, broadcast_zero_trf.output) fft.parameter.input_real.connect(window_trf, window_trf.output, unwindowed_input=window_trf.input) fft.parameter.unwindowed_input.connect( rolling_frame_trf, rolling_frame_trf.output, flat_input=rolling_frame_trf.input) fft.parameter.output.connect(amplitude_trf, amplitude_trf.input, amplitude=amplitude_trf.output) fft.parameter.amplitude.connect(crop_trf, crop_trf.input, cropped_amplitude=crop_trf.output) self._fft = fft self._transpose = Transpose(fft.parameter.cropped_amplitude) Computation.__init__(self, [ Parameter('output', Annotation(self._transpose.parameter.output, 'o')), Parameter('input', Annotation(fft.parameter.flat_input, 'i')) ])
def get_complex_trf(arr): complex_dtype = dtypes.complex_for(arr.dtype) return Transformation( [Parameter('output', Annotation(Type(complex_dtype, arr.shape), 'o')), Parameter('input', Annotation(arr, 'i'))], """ ${output.store_same}( COMPLEX_CTR(${output.ctype})( ${input.load_same}, 0)); """)
def __init__(self, arr_t, dont_store_last=False): self._dont_store_last = dont_store_last output_size = arr_t.shape[-1] // 2 + (0 if dont_store_last else 1) out_arr = Type( dtypes.complex_for(arr_t.dtype), arr_t.shape[:-1] + (output_size,)) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('input', Annotation(arr_t, 'i'))])
def __init__(self, arr_t, dont_store_last=False): self._dont_store_last = dont_store_last output_size = arr_t.shape[-1] // 2 + (0 if dont_store_last else 1) out_arr = Type(dtypes.complex_for(arr_t.dtype), arr_t.shape[:-1] + (output_size, )) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('input', Annotation(arr_t, 'i')) ])
def prepare_rfft_input(arr): res = Type(dtypes.complex_for(arr.dtype), arr.shape[:-1] + (arr.shape[-1] // 2,)) return Transformation( [ Parameter('output', Annotation(res, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ <% batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1]) %> ${input.ctype} re = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2); ${input.ctype} im = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1); ${output.store_same}(COMPLEX_CTR(${output.ctype})(re, im)); """, connectors=['output'])
def prepare_rfft_input(arr): res = Type(dtypes.complex_for(arr.dtype), arr.shape[:-1] + (arr.shape[-1] // 2, )) return Transformation([ Parameter('output', Annotation(res, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ <% batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1]) %> ${input.ctype} re = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2); ${input.ctype} im = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1); ${output.store_same}(COMPLEX_CTR(${output.ctype})(re, im)); """, connectors=['output'])
def __init__(self, arr_t, padding=False, axes=None, **kwargs): ''' Wrapper around `reikna.fft.FFT` with automatic real-to-complex casting and optional padding for higher performance. Input ----- padding: bool, default=True If True, the input array is padded to the next power of two on the transformed axes. axes: tuple Axes over which to perform the transform. Defaults to all axes. Note ---- Because reikna does not allow nodes of the transformation tree with the identical names, the input array is called `input_`. ''' if axes is None: axes = range(len(arr_t.shape)) # if axes is None else tuple(axes) else: axes = tuple(v + len(arr_t.shape) if v < 0 else v for v in axes) for v in axes: if v not in range(0, len(arr_t.shape)): raise IndexError('axis is out of range') dtype = (arr_t.dtype if dtypes.is_complex(arr_t.dtype) else dtypes.complex_for(arr_t.dtype)) if padding: shape = tuple(1 << int(np.ceil(np.log2(v))) if ax in axes else v for ax, v in enumerate(arr_t.shape)) else: shape = arr_t.shape super(FFT, self).__init__(Type(dtype, shape), axes=axes, **kwargs) input = self.parameter.input if dtype != arr_t.dtype: complex_tr = Complex(Type(arr_t.dtype, input.shape)) input.connect(complex_tr, complex_tr.output, in_real=complex_tr.input) input = self.parameter.in_real if shape != arr_t.shape: pad_tr = Padded(input, arr_t, default='0.') input.connect(pad_tr, pad_tr.output, in_padded=pad_tr.input) input = self.parameter.in_padded copy_tr = copy(input) input.connect(copy_tr, copy_tr.output, input_=copy_tr.input)
def normal_bm(bijection, dtype, mean=0, std=1): """ Generates normally distributed random numbers with the mean ``mean`` and the standard deviation ``std`` using Box-Muller transform. Supported dtypes: ``float(32/64)``, ``complex(64/128)``. Produces two random numbers per call for real types and one number for complex types. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. .. note:: In case of a complex ``dtype``, ``std`` refers to the standard deviation of the complex numbers (same as ``numpy.std()`` returns), not real and imaginary components (which will be normally distributed with the standard deviation ``std / sqrt(2)``). Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real. """ if dtypes.is_complex(dtype): r_dtype = dtypes.real_for(dtype) c_dtype = dtype else: r_dtype = dtype c_dtype = dtypes.complex_for(dtype) uf = uniform_float(bijection, r_dtype, low=0, high=1) module = Module(TEMPLATE.get_def("normal_bm"), render_kwds=dict(complex_res=dtypes.is_complex(dtype), r_dtype=r_dtype, r_ctype=dtypes.ctype(r_dtype), c_dtype=c_dtype, c_ctype=dtypes.ctype(c_dtype), polar_unit=functions.polar_unit(r_dtype), bijection=bijection, mean=mean, std=std, uf=uf)) return Sampler(bijection, module, dtype, deterministic=uf.deterministic, randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
def rfft(self, a, nthreads=ncpu): a = self.check_array(a, RTYPES, RTYPE) if SCIK and self.is_gpu_memory_enough(a): shape = [s for s in a.shape] shape[-1] = shape[-1]//2 + 1 dtype = G_RTYPES[a.dtype.type] func = fft.fft af = self._fft_scik(a, func, shape, dtype) elif REIK and self.is_gpu_memory_enough(a): thr = self.api.Thread(self.dev) plan = FFT(Type(complex_for(a.dtype), a.shape)) # combines two real-valued inputs into a complex-valued input of the same shape cc = combine_complex(plan.parameter.input) # supplies a constant output bc = broadcast_const(cc.imag, 0) plan.parameter.input.connect(cc, cc.output, real_input=cc.real, imag_input=cc.imag) plan.parameter.imag_input.connect(bc, bc.output) fftc = plan.compile(thr, fast_math=True) a_dev = thr.to_device(a) a_out_dev = thr.empty_like(plan.parameter.output) fftc(a_out_dev, a_dev) af = a_out_dev.get() af = N.fft.fftshift(af) elif FFTW: func = pyfftw.builders.rfftn af = self._fftw(a, func, nthreads) else: af = N.fft.rfftn(a) return af
def normal_bm(bijection, dtype, mean=0, std=1): """ Generates normally distributed random numbers with the mean ``mean`` and the standard deviation ``std`` using Box-Muller transform. Supported dtypes: ``float(32/64)``, ``complex(64/128)``. Produces two random numbers per call for real types and one number for complex types. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. .. note:: In case of a complex ``dtype``, ``std`` refers to the standard deviation of the complex numbers (same as ``numpy.std()`` returns), not real and imaginary components (which will be normally distributed with the standard deviation ``std / sqrt(2)``). Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real. """ if dtypes.is_complex(dtype): r_dtype = dtypes.real_for(dtype) c_dtype = dtype else: r_dtype = dtype c_dtype = dtypes.complex_for(dtype) uf = uniform_float(bijection, r_dtype, low=0, high=1) module = Module( TEMPLATE.get_def("normal_bm"), render_kwds=dict( complex_res=dtypes.is_complex(dtype), r_dtype=r_dtype, r_ctype=dtypes.ctype(r_dtype), c_dtype=c_dtype, c_ctype=dtypes.ctype(c_dtype), polar_unit=functions.polar_unit(r_dtype), bijection=bijection, mean=mean, std=std, uf=uf)) return Sampler( bijection, module, dtype, deterministic=uf.deterministic, randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
def __init__(self, x, NFFT=256, noverlap=128, pad_to=None, window=hanning_window): assert dtypes.is_real(x.dtype) assert x.ndim == 1 rolling_frame_trf = rolling_frame(x, NFFT, noverlap, pad_to) complex_dtype = dtypes.complex_for(x.dtype) fft_arr = Type(complex_dtype, rolling_frame_trf.output.shape) real_fft_arr = Type(x.dtype, rolling_frame_trf.output.shape) window_trf = window(real_fft_arr, NFFT) broadcast_zero_trf = transformations.broadcast_const(real_fft_arr, 0) to_complex_trf = transformations.combine_complex(fft_arr) amplitude_trf = transformations.norm_const(fft_arr, 1) crop_trf = crop_frequencies(amplitude_trf.output) fft = FFT(fft_arr, axes=(1,)) fft.parameter.input.connect( to_complex_trf, to_complex_trf.output, input_real=to_complex_trf.real, input_imag=to_complex_trf.imag) fft.parameter.input_imag.connect( broadcast_zero_trf, broadcast_zero_trf.output) fft.parameter.input_real.connect( window_trf, window_trf.output, unwindowed_input=window_trf.input) fft.parameter.unwindowed_input.connect( rolling_frame_trf, rolling_frame_trf.output, flat_input=rolling_frame_trf.input) fft.parameter.output.connect( amplitude_trf, amplitude_trf.input, amplitude=amplitude_trf.output) fft.parameter.amplitude.connect( crop_trf, crop_trf.input, cropped_amplitude=crop_trf.output) self._fft = fft self._transpose = Transpose(fft.parameter.cropped_amplitude) Computation.__init__(self, [Parameter('output', Annotation(self._transpose.parameter.output, 'o')), Parameter('input', Annotation(fft.parameter.flat_input, 'i'))])
import numpy as np from reikna import cluda from reikna.cluda import dtypes from reikna.fft import FFT from reikna.cluda.tempalloc import TrivialManager tr = Transformation( inputs=['in_re'], outputs=['out_c'], derive_o_from_is=lambda in_re: dtypes.complex_for(in_re), snippet="${out_c.store}(COMPLEX_CTR(${out_c.ctype})(${in_re.load}, 0));") def main(): api = cluda.ocl_api() thr = api.Thread.create(temp_alloc=dict(cls=TrivialManager)) N = 256 M = 10000 data_in = np.random.rand(N) data_in = data_in.astype(np.float32) cl_data_in = thr.to_device(data_in) cl_data_out = thr.array(data_in.shape, np.complex64) fft = FFT(thr) fft.connect(tr, 'input', ['input_re']) fft.prepare_for(cl_data_out, cl_data_in, -1, axes=(0, ))
def __init__(self, in1_type, in2_type, axis=-1): ''' Fast convolution with FFT Uses transforms of length N1+N2 padded to a power of two, because overlap-add is not significantly faster for the indended shape ranges. Input ----- in1_type, in2_type: `reikna.core.Type` Shape and dtype of the arrays to be convolved. axis: `int` Array axis over which the convolution is evaluated. Notes ----- * The output is always an array of complex numbers. * The arrays are matched using numpy's broadcasting rules. ''' self._thread = None # normalize axis ndim = max(len(in1_type.shape), len(in2_type.shape)) if axis < 0: axis += ndim if axis not in range(ndim): raise ValueError('axis is out of range.') # check if in1 and in2 are broadcastable for ax, s1, s2 in zip(range(ndim - 1, 0, -1), in1_type.shape[::-1], in2_type.shape[::-1]): if (ax != axis) and (s1 != s2) and (s1 != 1) and (s2 != 1): raise ValueError('in1 and in2 have incompatible shapes') # calculate shapes in1_shape = (1, ) * (ndim - len(in1_type.shape)) + in1_type.shape in2_shape = (1, ) * (ndim - len(in2_type.shape)) + in2_type.shape in1_padded = in1_shape[:axis] + (in1_shape[axis] + in2_shape[axis] - 1, ) + in1_shape[axis + 1:] in2_padded = in2_shape[:axis] + (in1_shape[axis] + in2_shape[axis] - 1, ) + in2_shape[axis + 1:] out_shape = tuple( max(s1, s2) for s1, s2 in zip(in1_padded, in2_padded)) out_dtype = (in1_type.dtype if dtypes.is_complex(in1_type.dtype) else dtypes.complex_for(in1_type.dtype)) fft1 = FFT(Type(in1_type.dtype, in1_padded), axes=(axis, )) pad_in1 = Padded(fft1.parameter.input_, Type(in1_type.dtype, in1_shape), default='0.') fft1.parameter.input_.connect(pad_in1, pad_in1.output, input_p=pad_in1.input) fft2 = FFT(Type(in2_type.dtype, in2_padded), axes=(axis, )) pad_in2 = Padded(fft2.parameter.input_, Type(in2_type.dtype, in2_shape), default='0.') fft2.parameter.input_.connect(pad_in2, pad_in2.output, input_p=pad_in2.input) mul = Multiply(Type(out_dtype, out_shape)) bcast_in1 = Broadcast(out_shape, fft1.parameter.output) mul.parameter.in1.connect(bcast_in1, bcast_in1.output, in1_p=bcast_in1.input) bcast_in2 = Broadcast(out_shape, fft2.parameter.output) mul.parameter.in2.connect(bcast_in2, bcast_in2.output, in2_p=bcast_in2.input) ifft = FFT(Type(out_dtype, out_shape), axes=(axis, )) self._comps = [fft1, fft2, mul, ifft] # emulate reikna parameter attribute parameters = namedtuple('DummyParameters', ['output', 'in1', 'in2']) self.parameter = parameters(ifft.parameter.output, in1_type, in2_type)