def run_simple_test(self, axes, dtype): idata = np.arange(43401).reshape((23,37,51)) % 251 iarray = bf.ndarray(idata, dtype=dtype, space='cuda') oarray = bf.empty_like(iarray.transpose(axes)) bf.transpose.transpose(oarray, iarray, axes) np.testing.assert_equal(oarray.copy('system'), idata.transpose(axes))
def test_3d_initial(self): shape = self.shape3D known_data = np.random.normal(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty_like(idata) coeffs = self.coeffs * 1.0 coeffs.shape += (1, ) coeffs = np.repeat(coeffs, idata.shape[1] * idata.shape[2], axis=1) coeffs.shape = (coeffs.shape[0], idata.shape[1], idata.shape[2]) coeffs = bf.ndarray(coeffs, space='cuda') fir = Fir() fir.init(coeffs, 1) fir.execute(idata, odata) odata = odata.copy('system') for i in xrange(known_data.shape[1]): for j in xrange(known_data.shape[2]): zf = lfiltic(self.coeffs, 1.0, 0.0) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i, j], zi=zf) compare(odata[:, i, j], known_result)
def test_polarisation_products(self): n = 89 real = np.random.randint(-127, 128, size=(n, 2)).astype(np.float32) imag = np.random.randint(-127, 128, size=(n, 2)).astype(np.float32) a = real + 1j * imag a_orig = a a = bf.asarray(a, space='cuda') b = bf.empty_like(a) for _ in range(3): bf.map(''' auto x = a(_,0); auto y = a(_,1); b(_,0).assign(x.mag2(), y.mag2()); b(_,1) = x*y.conj(); ''', shape=b.shape[:-1], data={ 'a': a, 'b': b }) b = b.copy('system') a = a_orig gold = np.empty_like(a) def mag2(x): return x.real * x.real + x.imag * x.imag gold[..., 0] = mag2(a[..., 0]) + 1j * mag2(a[..., 1]) gold[..., 1] = a[..., 0] * a[..., 1].conj() np.testing.assert_equal(b, gold)
def run_simple_test(self, axes, dtype, shape): n = reduce(lambda a,b:a*b, shape) idata = (np.arange(n).reshape(shape) % 251).astype(dtype) odata_gold = idata.transpose(axes) iarray = bf.ndarray(idata, space='cuda') oarray = bf.empty_like(iarray.transpose(axes)) bf.transpose.transpose(oarray, iarray, axes) oarray = oarray.copy('system') np.testing.assert_array_equal(oarray, odata_gold)
def test_shift(self): shape = (55, 66, 77) a = np.random.randint(65536, size=shape).astype(np.int32) a = bf.asarray(a, space='cuda') b = bf.empty_like(a) bf.map("b = a(_-a.shape()/2)", a=a, b=b) a = a.copy('system') b = b.copy('system') np.testing.assert_equal(b, np.fft.fftshift(a))
def test_manydim(self): known_data = np.arange(3**8).reshape([3] * 8).astype(np.float32) a = bf.asarray(known_data, space='cuda') a = a[:, :, :, :, :2, :, :, :] b = bf.empty_like(a) for _ in range(3): bf.map("b = a+1", data={'a': a, 'b': b}) a = a.copy('system') b = b.copy('system') np.testing.assert_equal(b, a + 1)
def test_scalar(self): n = 7919 # Note: Python integer division rounds to -inf, while C rounds toward 0 # We avoid the problem here by using only positive values x = np.random.randint(1, 256, size=n) x = bf.asarray(x, space='cuda') y = bf.empty_like(x) bf.map("y = (x-m)/s", x=x, y=y, m=1, s=3) x = x.copy('system') y = y.copy('system') np.testing.assert_equal(y, (x - 1) // 3)
def test_scalar(self): n = 7919 # Note: Python integer division rounds to -inf, while C rounds toward 0 # We avoid the problem here by using only positive values x = np.random.randint(1, 256, size=n) x = bf.asarray(x, space='cuda') y = bf.empty_like(x) for _ in xrange(3): bf.map("y = (x-m)/s", data={'x': x, 'y': y, 'm': 1, 's': 3}) x = x.copy('system') y = y.copy('system') np.testing.assert_equal(y, (x - 1) // 3)
def run_reduce_test(self, shape, axis, n, op='sum', dtype=np.float32): a = ((np.random.random(size=shape) * 2 - 1) * 127).astype( np.int8).astype(dtype) if op[:3] == 'pwr': b_gold = pwrscrunch(a.astype(np.float32), n, axis, NP_OPS[op[3:]]) else: b_gold = scrunch(a.astype(np.float32), n, axis, NP_OPS[op]) a = bf.asarray(a, space='cuda_managed') b = bf.empty_like(b_gold, space='cuda_managed') bf.reduce(a, b, op) stream_synchronize() np.testing.assert_allclose(b, b_gold)
def run_simple_test(self, x, funcstr, func): x_orig = x x = bf.asarray(x, 'cuda') y = bf.empty_like(x) x.flags['WRITEABLE'] = False x.bf.immutable = True # TODO: Is this actually doing anything? (flags is, just not sure about bf.immutable) bf.map(funcstr, x=x, y=y) x = x.copy('system') y = y.copy('system') if isinstance(x_orig, bf.ndarray): x_orig = x # Note: Using func(x) is dangerous because bf.ndarray does things like # lazy .conj(), which break when used as if it were np.ndarray. np.testing.assert_equal(y, func(x_orig))
def run_simple_test(self, x, funcstr, func): x_orig = x x = bf.asarray(x, 'cuda_managed') y = bf.empty_like(x) x.flags['WRITEABLE'] = False x.bf.immutable = True # TODO: Is this actually doing anything? (flags is, just not sure about bf.immutable) for _ in range(3): bf.map(funcstr, {'x': x, 'y': y}) stream_synchronize() if isinstance(x_orig, bf.ndarray): x_orig = x # Note: Using func(x) is dangerous because bf.ndarray does things like # lazy .conj(), which break when used as if it were np.ndarray. np.testing.assert_equal(y, func(x_orig))
def run_reduce_test(self, shape, axis, n, op='sum', dtype=np.float32): a = ((np.random.random(size=shape) * 2 - 1) * 127).astype( np.int8).astype(dtype) b_gold = scrunch(a.astype(np.float32), n, axis, NP_OPS[op]) a = bf.asarray(a, space='cuda') b = bf.empty_like(b_gold, space='cuda') bf.reduce(a, b, op) #for _ in xrange(10): # bf.reduce(a, b, op) #bf.device.stream_synchronize(); #t0 = time.time() #nrep = 30 #for _ in xrange(nrep): # bf.reduce(a, b, op) #bf.device.stream_synchronize(); #dt = time.time() - t0 #print nrep * (a.nbytes + b.nbytes) / dt / 1e9, 'GB/s', shape, axis, n, dtype b = b.copy('system') np.testing.assert_allclose(b, b_gold)
def run_test_c2c_impl(self, shape, axes, inverse=False): shape = list(shape) shape[-1] *= 2 # For complex known_data = np.random.uniform(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty_like(idata) fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata, inverse) if inverse: # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [known_data.shape[d] for d in axes]) known_result = gold_ifftn(known_data, axes=axes) * norm else: known_result = gold_fftn(known_data, axes=axes) np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_test_c2c_impl(self, shape, axes, inverse=False, fftshift=False): shape = list(shape) shape[-1] *= 2 # For complex known_data = np.random.normal(size=shape).astype(np.float32).view(np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty_like(idata) fft = Fft() fft.init(idata, odata, axes=axes, apply_fftshift=fftshift) fft.execute(idata, odata, inverse) if inverse: if fftshift: known_data = np.fft.ifftshift(known_data, axes=axes) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [known_data.shape[d] for d in axes]) known_result = gold_ifftn(known_data, axes=axes) * norm else: known_result = gold_fftn(known_data, axes=axes) if fftshift: known_result = np.fft.fftshift(known_result, axes=axes) x = (np.abs(odata.copy('system') - known_result) / known_result > RTOL).astype(np.int32) a = odata.copy('system') b = known_result compare(odata.copy('system'), known_result)
def test_2d_initial(self): shape = self.shape2D known_data = np.random.normal(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda_managed') odata = bf.empty_like(idata) coeffs = self.coeffs * 1.0 coeffs.shape += (1, ) coeffs = np.repeat(coeffs, idata.shape[1], axis=1) coeffs.shape = (coeffs.shape[0], idata.shape[1]) coeffs = bf.ndarray(coeffs, space='cuda_managed') fir = Fir() fir.init(coeffs, 1) fir.execute(idata, odata) stream_synchronize() for i in range(known_data.shape[1]): zf = lfiltic(self.coeffs, 1.0, 0.0) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i], zi=zf) compare(odata[:, i], known_result)