def grid_expand(ndim): """grid(ndim) Return the absolute position of the current thread in the entire grid of blocks. *ndim* should correspond to the number of dimensions declared when instantiating the kernel. If *ndim* is 1, a single integer is returned. If *ndim* is 2 or 3, a tuple of the given number of integers is returned. Computation of the first integer is as follows:: cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x and is similar for the other two indices, but using the ``y`` and ``z`` attributes. """ if ndim == 1: fname = "ptx.grid.1d" restype = types.int32 elif ndim == 2: fname = "ptx.grid.2d" restype = types.UniTuple(types.int32, 2) elif ndim == 3: fname = "ptx.grid.3d" restype = types.UniTuple(types.int32, 3) else: raise ValueError('argument can only be 1, 2, 3') return ir.Intrinsic(fname, typing.signature(restype, types.intp), args=[ndim])
def local_array(shape, dtype): shape = _legalize_shape(shape) ndim = len(shape) fname = "ptx.lmem.alloc" restype = types.Array(dtype, ndim, 'C') sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any) return ir.Intrinsic(fname, sig, args=(shape, dtype))
def gridsize_expand(ndim): """ Return the absolute size (or shape) in threads of the entire grid of blocks. *ndim* should correspond to the number of dimensions declared when instantiating the kernel. Computation of the first integer is as follows:: cuda.blockDim.x * cuda.gridDim.x and is similar for the other two indices, but using the ``y`` and ``z`` attributes. """ if ndim == 1: fname = "ptx.gridsize.1d" restype = types.int32 elif ndim == 2: fname = "ptx.gridsize.2d" restype = types.UniTuple(types.int32, 2) elif ndim == 3: fname = "ptx.gridsize.3d" restype = types.UniTuple(types.int32, 3) else: raise ValueError('argument can only be 1, 2 or 3') return ir.Intrinsic(fname, typing.signature(restype, types.intp), args=[ndim])
def _expand_non_callable_macro(self, macro, loc): """ Return the IR expression of expanding the non-callable macro. """ intr = ir.Intrinsic(macro.name, macro.func, args=()) new_expr = ir.Expr.call(func=intr, args=(), kws=(), loc=loc) return new_expr
def shared_array(shape, dtype): shape = _legalize_shape(shape) ndim = len(shape) fname = "hsail.smem.alloc" restype = types.Array(dtype, ndim, "C") sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any) return ir.Intrinsic(fname, sig, args=(shape, dtype))
def const_array_like(ndarray): fname = "ptx.cmem.arylike" from .descriptor import CUDATargetDesc aryty = CUDATargetDesc.typingctx.resolve_argument_type(ndarray) sig = typing.signature(aryty, aryty) return ir.Intrinsic(fname, sig, args=[ndarray])
def test_intrinsic(self): a = ir.Intrinsic('foo', 'bar', (0, ), self.loc1) b = ir.Intrinsic('foo', 'bar', (0, ), self.loc1) c = ir.Intrinsic('foo', 'bar', (0, ), self.loc2) d = ir.Intrinsic('baz', 'bar', (0, ), self.loc1) e = ir.Intrinsic('foo', 'baz', (0, ), self.loc1) f = ir.Intrinsic('foo', 'bar', (1, ), self.loc1) self.check(a, same=[b, c], different=[d, e, f])
def test_intrinsic(self): a = ir.Intrinsic("foo", "bar", (0,), self.loc1) b = ir.Intrinsic("foo", "bar", (0,), self.loc1) c = ir.Intrinsic("foo", "bar", (0,), self.loc2) d = ir.Intrinsic("baz", "bar", (0,), self.loc1) e = ir.Intrinsic("foo", "baz", (0,), self.loc1) f = ir.Intrinsic("foo", "bar", (1,), self.loc1) self.check(a, same=[b, c], different=[d, e, f])