def const_array_like(ndarray): fname = "ptx.cmem.arylike" from .descriptor import CUDATargetDesc aryty = CUDATargetDesc.typingctx.resolve_argument_type(ndarray) sig = typing.signature(aryty, aryty) return ir.Intrinsic(fname, sig, args=[ndarray]) raise NotImplementedError [aryarg] = args ary = aryarg.value count = reduce(operator.mul, ary.shape) dtype = types.from_dtype(numpy.dtype(ary.dtype)) def impl(context, args, argtys, retty): builder = context.builder lmod = builder.basic_block.function.module addrspace = nvvm.ADDRSPACE_CONSTANT data_t = dtype.llvm_as_value() flat = ary.flatten(order='A') # preserve order constvals = [dtype.llvm_const(flat[i]) for i in range(flat.size)] constary = lc.Constant.array(data_t, constvals) gv = lmod.add_global_variable(constary.type, "cmem", addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary byte = lc.Type.int(8) byte_ptr_as = lc.Type.pointer(byte, addrspace) to_generic = nvvmutils.insert_addrspace_conv(lmod, byte, addrspace) rawdata = builder.call(to_generic, [builder.bitcast(gv, byte_ptr_as)]) data = builder.bitcast(rawdata, lc.Type.pointer(data_t)) llintp = types.intp.llvm_as_value() cshape = lc.Constant.array(llintp, map(types.const_intp, ary.shape)) cstrides = lc.Constant.array(llintp, map(types.const_intp, ary.strides)) res = lc.Constant.struct([lc.Constant.null(data.type), cshape, cstrides]) res = builder.insert_value(res, data, 0) return res if ary.flags['C_CONTIGUOUS']: contig = 'C' elif ary.flags['F_CONTIGUOUS']: contig = 'F' else: raise TypeError("array must be either C/F contiguous to be used as a " "constant") impl.codegen = True impl.return_type = types.arraytype(dtype, ary.ndim, 'A') return impl
def grid_expand(ndim): """grid(ndim) ndim: [int] 1, 2 or 3 if ndim == 1: return cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x elif ndim == 2: x = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x y = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y return x, y elif ndim == 3: x = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x y = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y z = cuda.threadIdx.z + cuda.blockIdx.z * cuda.blockDim.z return x, y, z """ if ndim == 1: fname = "ptx.grid.1d" restype = types.int32 elif ndim == 2: fname = "ptx.grid.2d" restype = types.UniTuple(types.int32, 2) elif ndim == 3: fname = "ptx.grid.3d" restype = types.UniTuple(types.int32, 3) else: raise ValueError('argument can only be 1, 2, 3') return ir.Intrinsic(fname, typing.signature(restype, types.intp), args=[ndim])
def local_array(shape, dtype): shape = _legalize_shape(shape) ndim = len(shape) fname = "ptx.lmem.alloc" restype = types.Array(dtype, ndim, 'C') sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any) return ir.Intrinsic(fname, sig, args=(shape, dtype))
def grid_expand(ndim): """grid(ndim) Return the absolute position of the current thread in the entire grid of blocks. *ndim* should correspond to the number of dimensions declared when instantiating the kernel. If *ndim* is 1, a single integer is returned. If *ndim* is 2 or 3, a tuple of the given number of integers is returned. Computation of the first integer is as follows:: cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x and is similar for the other two indices, but using the ``y`` and ``z`` attributes. """ if ndim == 1: fname = "ptx.grid.1d" restype = types.int32 elif ndim == 2: fname = "ptx.grid.2d" restype = types.UniTuple(types.int32, 2) elif ndim == 3: fname = "ptx.grid.3d" restype = types.UniTuple(types.int32, 3) else: raise ValueError('argument can only be 1, 2, 3') return ir.Intrinsic(fname, typing.signature(restype, types.intp), args=[ndim])
def gridsize_expand(ndim): """ Return the absolute size (or shape) in threads of the entire grid of blocks. *ndim* should correspond to the number of dimensions declared when instantiating the kernel. Computation of the first integer is as follows:: cuda.blockDim.x * cuda.gridDim.x and is similar for the other two indices, but using the ``y`` and ``z`` attributes. """ if ndim == 1: fname = "ptx.gridsize.1d" restype = types.int32 elif ndim == 2: fname = "ptx.gridsize.2d" restype = types.UniTuple(types.int32, 2) elif ndim == 3: fname = "ptx.gridsize.3d" restype = types.UniTuple(types.int32, 3) else: raise ValueError('argument can only be 1, 2 or 3') return ir.Intrinsic(fname, typing.signature(restype, types.intp), args=[ndim])
def _expand_non_callable_macro(self, macro, loc): """ Return the IR expression of expanding the non-callable macro. """ intr = ir.Intrinsic(macro.name, macro.func, args=()) new_expr = ir.Expr.call(func=intr, args=(), kws=(), loc=loc) return new_expr
def expand_macros_in_block(constants, block): calls = [] for inst in block.body: if isinstance(inst, ir.Assign): rhs = inst.value if isinstance(rhs, ir.Expr) and rhs.op == 'call': callee = rhs.func macro = constants.get(callee.name) if isinstance(macro, Macro): # Rewrite calling macro assert macro.callable calls.append((inst, macro)) args = [constants[arg.name] for arg in rhs.args] kws = dict((k, constants[v.name]) for k, v in rhs.kws) result = macro.func(*args, **kws) if result: # Insert a new function result.loc = rhs.loc inst.value = ir.Expr.call(func=result, args=rhs.args, kws=rhs.kws, loc=rhs.loc) elif isinstance(rhs, ir.Expr) and rhs.op == 'getattr': # Rewrite get attribute to macro call # Non-calling macro must be triggered by get attribute base = constants.get(rhs.value.name) if base: value = getattr(base, rhs.attr) if isinstance(value, Macro): macro = value if not macro.callable: intr = ir.Intrinsic(macro.name, macro.func, args=()) inst.value = ir.Expr.call(func=intr, args=(), kws=(), loc=rhs.loc)
def const_array_like(ndarray): fname = "ptx.cmem.arylike" from .descriptor import CUDATargetDesc aryty = CUDATargetDesc.typingctx.resolve_argument_type(ndarray) sig = typing.signature(aryty, aryty) return ir.Intrinsic(fname, sig, args=[ndarray])
def test_intrinsic(self): a = ir.Intrinsic('foo', 'bar', (0, ), self.loc1) b = ir.Intrinsic('foo', 'bar', (0, ), self.loc1) c = ir.Intrinsic('foo', 'bar', (0, ), self.loc2) d = ir.Intrinsic('baz', 'bar', (0, ), self.loc1) e = ir.Intrinsic('foo', 'baz', (0, ), self.loc1) f = ir.Intrinsic('foo', 'bar', (1, ), self.loc1) self.check(a, same=[b, c], different=[d, e, f])
def local_array(shape, dtype): ndim = 1 if isinstance(shape, tuple): ndim = len(shape) fname = "ptx.lmem.alloc" restype = types.Array(dtype, ndim, 'C') if ndim == 1: sig = typing.signature(restype, types.intp, types.Any) else: sig = typing.signature(restype, types.UniTuple(types.intp, ndim), types.Any) return ir.Intrinsic(fname, sig, args=(shape, dtype))
def gridsize_expand(ndim): """gridsize(ndim) ndim: [int] 1 or 2 if ndim == 1: return cuda.blockDim.x * cuda.gridDim.x elif ndim == 2: x = cuda.blockDim.x * cuda.gridDim.x y = cuda.blockDim.y * cuda.gridDim.y return x, y """ if ndim == 1: fname = "ptx.gridsize.1d" restype = types.int32 elif ndim == 2: fname = "ptx.gridsize.2d" restype = types.UniTuple(types.int32, 2) else: raise ValueError('argument can only be 1 or 2') return ir.Intrinsic(fname, typing.signature(restype, types.intp), args=[ndim])
def expand_macros_in_block(constants, block): ''' Performs macro expansion on a block. Args ---- constants: dict The pool of constants which contains the values which contains mappings from variable names to callee names block: ir.Block The block to perform macro expansion on return: bool True if any macros were expanded ''' expanded = False for inst in block.body: if isinstance(inst, ir.Assign): rhs = inst.value if isinstance(rhs, ir.Expr) and rhs.op == 'call': callee = rhs.func macro = constants.get(callee.name) if isinstance(macro, Macro): # Rewrite calling macro assert macro.callable args = [constants[arg.name] for arg in rhs.args] kws = {} for k, v in rhs.kws: if v.name in constants: kws[k] = constants[v.name] else: msg = "Argument {name!r} must be a " \ "constant at {loc}".format(name=k, loc=inst.loc) raise ValueError(msg) try: result = macro.func(*args, **kws) except BaseException as e: msg = str(e) headfmt = "Macro expansion failed at {line}" head = headfmt.format(line=inst.loc) newmsg = "{0}:\n{1}".format(head, msg) raise MacroError(newmsg) if result: # Insert a new function result.loc = rhs.loc inst.value = ir.Expr.call(func=result, args=rhs.args, kws=rhs.kws, loc=rhs.loc) expanded = True elif isinstance(rhs, ir.Expr) and rhs.op == 'getattr': # Rewrite get attribute to macro call # Non-calling macro must be triggered by get attribute base = constants.get(rhs.value.name) if base is not None: value = getattr(base, rhs.attr) if isinstance(value, Macro): macro = value if not macro.callable: intr = ir.Intrinsic(macro.name, macro.func, args=()) inst.value = ir.Expr.call(func=intr, args=(), kws=(), loc=rhs.loc) expanded = True return expanded