def run_complex_reduce_test(self, shape, axis, n, op='sum', dtype=np.complex64): a = ((np.random.random(size=shape)*2-1)*127).astype(np.int8).astype(dtype) \ + 1j*((np.random.random(size=shape)*2-1)*127).astype(np.int8).astype(dtype) if op[:3] == 'pwr': b_gold = pwrscrunch(a.astype(np.complex64), n, axis, NP_OPS[op[3:]]).astype(np.float32) else: b_gold = scrunch(a.astype(np.complex64), n, axis, NP_OPS[op]) a = bf.asarray(a, space='cuda') b = bf.empty_like(b_gold, space='cuda') bf.reduce(a, b, op) #for _ in xrange(10): # bf.reduce(a, b, op) #bf.device.stream_synchronize(); #t0 = time.time() #nrep = 30 #for _ in xrange(nrep): # bf.reduce(a, b, op) #bf.device.stream_synchronize(); #dt = time.time() - t0 #print nrep * (a.nbytes + b.nbytes) / dt / 1e9, 'GB/s', shape, axis, n, dtype b = b.copy('system') np.testing.assert_allclose(b, b_gold, rtol=1e-3 if op[:3] == 'pwr' else 1e-7)
def run_reduce_test(self, shape, axis, n, op='sum', dtype=np.float32): a = ((np.random.random(size=shape) * 2 - 1) * 127).astype( np.int8).astype(dtype) if op[:3] == 'pwr': b_gold = pwrscrunch(a.astype(np.float32), n, axis, NP_OPS[op[3:]]) else: b_gold = scrunch(a.astype(np.float32), n, axis, NP_OPS[op]) a = bf.asarray(a, space='cuda_managed') b = bf.empty_like(b_gold, space='cuda_managed') bf.reduce(a, b, op) stream_synchronize() np.testing.assert_allclose(b, b_gold)
def on_data(self, ispan, ospan): idata, odata = ispan.data, ospan.data bf.reduce(idata, odata, self.op)