def __init__(self, iring, axes, inverse=False, real_output=False, axis_labels=None, apply_fftshift=False, *args, **kwargs): super(FftBlock, self).__init__(iring, *args, **kwargs) if not isinstance(axes, list) or isinstance(axes, tuple): axes = [axes] if not isinstance(axis_labels, list) or isinstance(axis_labels, tuple): axis_labels = [axis_labels] self.specified_axes = axes self.real_output = real_output self.inverse = inverse self.axis_labels = axis_labels self.apply_fftshift = apply_fftshift self.space = self.irings[0].space self.fft = Fft() self.plan_ishape = None self.plan_oshape = None self.plan_istrides = None self.plan_ostrides = None
def run_test_r2c_dtype(self, shape, axes, dtype=np.float32, scale=1., misalign=0): known_data = np.random.uniform(size=shape).astype(np.float32) * 2 - 1 known_data = (known_data * scale).astype(dtype) # Force misaligned data padded_shape = shape[:-1] + (shape[-1] + misalign, ) known_data = np.resize(known_data, padded_shape) idata = bf.ndarray(known_data, space='cuda') known_data = known_data[..., misalign:] idata = idata[..., misalign:] oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) known_result = gold_rfftn(known_data.astype(np.float32) / scale, axes=axes) np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_c2c_impl(self, shape, axes, inverse=False, fftshift=False): shape = list(shape) shape[-1] *= 2 # For complex known_data = np.random.uniform(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty_like(idata) fft = Fft() fft.init(idata, odata, axes=axes, apply_fftshift=fftshift) fft.execute(idata, odata, inverse) if inverse: if fftshift: known_data = np.fft.ifftshift(known_data, axes=axes) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [known_data.shape[d] for d in axes]) known_result = gold_ifftn(known_data, axes=axes) * norm else: known_result = gold_fftn(known_data, axes=axes) if fftshift: known_result = np.fft.fftshift(known_result, axes=axes) x = (np.abs(odata.copy('system') - known_result) / known_result > RTOL).astype(np.int32) a = odata.copy('system') b = known_result np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_r2c_dtype(self, shape, axes, dtype=np.float32, scale=1., misalign=0): known_data = np.random.normal(size=shape).astype(np.float32) known_data = (known_data * scale).astype(dtype) # Force misaligned data padded_shape = shape[:-1] + (shape[-1] + misalign, ) known_data = np.resize(known_data, padded_shape) idata = bf.ndarray(known_data, space='cuda_managed') known_data = known_data[..., misalign:] idata = idata[..., misalign:] oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda_managed') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) stream_synchronize() known_result = gold_rfftn(known_data.astype(np.float32) / scale, axes=axes) compare(odata, known_result)
def run_test_r2c(self, shape, axes): known_data = np.random.uniform(size=shape).astype(np.float32) idata = bf.ndarray(known_data, space='cuda') oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) known_result = gold_rfftn(known_data, axes=axes) np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_c2r(self, shape, axes): ishape = list(shape) ishape[axes[-1]] = shape[axes[-1]] // 2 + 1 ishape[-1] *= 2 # For complex known_data = np.random.uniform(size=ishape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.ndarray(shape=shape, dtype='f32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [shape[d] for d in axes]) known_result = gold_irfftn(known_data, axes=axes) * norm np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_c2r_impl(self, shape, axes, fftshift=False): ishape = list(shape) oshape = list(shape) ishape[axes[-1]] = shape[axes[-1]] // 2 + 1 oshape[axes[-1]] = (ishape[axes[-1]] - 1) * 2 ishape[-1] *= 2 # For complex known_data = np.random.normal(size=ishape).astype(np.float32).view(np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.ndarray(shape=oshape, dtype='f32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes, apply_fftshift=fftshift) fft.execute(idata, odata) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [shape[d] for d in axes]) if fftshift: known_data = np.fft.ifftshift(known_data, axes=axes) known_result = gold_irfftn(known_data, axes=axes) * norm compare(odata.copy('system'), known_result)
class FftBlock(TransformBlock): # TODO: Add support for sizes (aka 's') parameter that defines transform # length in each dimension (i.e., cropped/padded transforms). # Should be able to do this using an input callback and padded # output dims. def __init__(self, iring, axes, inverse=False, real_output=False, axis_labels=None, apply_fftshift=False, *args, **kwargs): super(FftBlock, self).__init__(iring, *args, **kwargs) if not isinstance(axes, list) or isinstance(axes, tuple): axes = [axes] if not isinstance(axis_labels, list) or isinstance(axis_labels, tuple): axis_labels = [axis_labels] self.specified_axes = axes self.real_output = real_output self.inverse = inverse self.axis_labels = axis_labels self.apply_fftshift = apply_fftshift self.space = self.irings[0].space self.fft = Fft() self.plan_ishape = None self.plan_oshape = None self.plan_istrides = None self.plan_ostrides = None def define_valid_input_spaces(self): return ('cuda', ) def on_sequence(self, iseq): ihdr = iseq.header itensor = ihdr['_tensor'] # TODO: DataType cast should be done inside ring2 # **This tensor stuff generally needs to be cleaned up itype = DataType(itensor['dtype']) # TODO: This is slightly hacky; it needs to emulate the type casting # that Bifrost does internally for the FFT. itype = itype.as_floating_point() # Get axis indices, allowing for lookup-by-label self.axes = [ itensor['labels'].index(axis) if isinstance(axis, basestring) else axis for axis in self.specified_axes ] axes = self.axes shape = [itensor['shape'][ax] for ax in axes] otype = itype.as_real() if self.real_output else itype.as_complex() ohdr = deepcopy(ihdr) otensor = ohdr['_tensor'] otensor['dtype'] = str(otype) if itype.is_real and otype.is_complex: self.mode = 'r2c' elif itype.is_complex and otype.is_real: self.mode = 'c2r' else: self.mode = 'c2c' frame_axis = itensor['shape'].index(-1) if frame_axis in axes: raise KeyError( "Cannot transform frame axis; reshape the data stream first") # Adjust output shape for real transforms if self.mode == 'r2c': otensor['shape'][axes[-1]] //= 2 otensor['shape'][axes[-1]] += 1 elif self.mode == 'c2r': otensor['shape'][axes[-1]] -= 1 otensor['shape'][axes[-1]] *= 2 shape[-1] -= 1 shape[-1] *= 2 for i, (ax, length) in enumerate(zip(axes, shape)): if 'units' in otensor: units = otensor['units'][ax] otensor['units'][ax] = transform_units(units, -1) if 'scales' in otensor: otensor['scales'][ax][0] = 0 # TODO: Is this OK? scale = otensor['scales'][ax][1] otensor['scales'][ax][1] = 1. / (scale * length) if 'labels' in otensor and self.axis_labels is not None: otensor['labels'][ax] = self.axis_labels[i] return ohdr def on_data(self, ispan, ospan): idata = ispan.data odata = ospan.data # Check if shapes or strides have changed if (idata.shape != self.plan_ishape or odata.shape != self.plan_oshape or idata.strides != self.plan_istrides or odata.strides != self.plan_ostrides): # (Re-)generate the FFT plan self.fft.init(idata, odata, axes=self.axes, apply_fftshift=self.apply_fftshift) self.plan_ishape = idata.shape self.plan_oshape = odata.shape self.plan_istrides = idata.strides self.plan_ostrides = odata.strides size = self.fft.workspace_size with self.get_temp_storage(self.space).allocate(size) as workspace: self.fft.execute_workspace(idata, odata, workspace.ptr, workspace.size, inverse=self.inverse)