def run_test_r2c_dtype(self, shape, axes, dtype=np.float32, scale=1., misalign=0): known_data = np.random.uniform(size=shape).astype(np.float32) * 2 - 1 known_data = (known_data * scale).astype(dtype) # Force misaligned data padded_shape = shape[:-1] + (shape[-1] + misalign, ) known_data = np.resize(known_data, padded_shape) idata = bf.ndarray(known_data, space='cuda') known_data = known_data[..., misalign:] idata = idata[..., misalign:] oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) known_result = gold_rfftn(known_data.astype(np.float32) / scale, axes=axes) np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_c2c_impl(self, shape, axes, inverse=False, fftshift=False): shape = list(shape) shape[-1] *= 2 # For complex known_data = np.random.uniform(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty_like(idata) fft = Fft() fft.init(idata, odata, axes=axes, apply_fftshift=fftshift) fft.execute(idata, odata, inverse) if inverse: if fftshift: known_data = np.fft.ifftshift(known_data, axes=axes) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [known_data.shape[d] for d in axes]) known_result = gold_ifftn(known_data, axes=axes) * norm else: known_result = gold_fftn(known_data, axes=axes) if fftshift: known_result = np.fft.fftshift(known_result, axes=axes) x = (np.abs(odata.copy('system') - known_result) / known_result > RTOL).astype(np.int32) a = odata.copy('system') b = known_result np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_r2c_dtype(self, shape, axes, dtype=np.float32, scale=1., misalign=0): known_data = np.random.normal(size=shape).astype(np.float32) known_data = (known_data * scale).astype(dtype) # Force misaligned data padded_shape = shape[:-1] + (shape[-1] + misalign, ) known_data = np.resize(known_data, padded_shape) idata = bf.ndarray(known_data, space='cuda_managed') known_data = known_data[..., misalign:] idata = idata[..., misalign:] oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda_managed') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) stream_synchronize() known_result = gold_rfftn(known_data.astype(np.float32) / scale, axes=axes) compare(odata, known_result)
def run_test_r2c(self, shape, axes): known_data = np.random.uniform(size=shape).astype(np.float32) idata = bf.ndarray(known_data, space='cuda') oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) known_result = gold_rfftn(known_data, axes=axes) np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_c2r(self, shape, axes): ishape = list(shape) ishape[axes[-1]] = shape[axes[-1]] // 2 + 1 ishape[-1] *= 2 # For complex known_data = np.random.uniform(size=ishape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.ndarray(shape=shape, dtype='f32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [shape[d] for d in axes]) known_result = gold_irfftn(known_data, axes=axes) * norm np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_c2r_impl(self, shape, axes, fftshift=False): ishape = list(shape) oshape = list(shape) ishape[axes[-1]] = shape[axes[-1]] // 2 + 1 oshape[axes[-1]] = (ishape[axes[-1]] - 1) * 2 ishape[-1] *= 2 # For complex known_data = np.random.normal(size=ishape).astype(np.float32).view(np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.ndarray(shape=oshape, dtype='f32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes, apply_fftshift=fftshift) fft.execute(idata, odata) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [shape[d] for d in axes]) if fftshift: known_data = np.fft.ifftshift(known_data, axes=axes) known_result = gold_irfftn(known_data, axes=axes) * norm compare(odata.copy('system'), known_result)