def test_atomic_add3(self): ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8) orig = ary.copy() hsa_atomic_add3 = hsa.jit('void(uint32[:,:])')(atomic_add3) hsa_atomic_add3[1, (4, 8)](ary) self.assertTrue(np.all(ary == orig + 1))
def test_atomic_add(self): ary = np.random.randint(0, 32, size=32).astype(np.uint32) orig = ary.copy() hsa_atomic_add = hsa.jit('void(uint32[:])')(atomic_add) hsa_atomic_add[1, 32](ary) gold = np.zeros(32, dtype=np.uint32) for i in range(orig.size): gold[orig[i]] += 1 self.assertTrue(np.all(ary == gold))
def test_autojit_kernel(self): kernel = hsa.jit(copy_kernel) inp = np.arange(10) out = np.zeros_like(inp) kernel.forall(out.size)(out, inp) np.testing.assert_equal(inp, out)
def _compile_kernel(self, fnobj, sig): return hsa.jit(sig)(fnobj)
def _compile_core(self, sig): hsadevfn = hsa.jit(sig, device=True)(self.pyfunc) return hsadevfn, hsadevfn.cres.signature.return_type
def _get_globals(self, sig): corefn = hsa.jit(sig, device=True)(self.pyfunc) glbls = self.py_func.__globals__.copy() glbls.update({'__hsa__': hsa, '__core__': corefn}) return glbls