def test_components(): assert (clutil.KernelArg("__global float4* a").components == [ "__global", "float4*", "a" ]) assert clutil.KernelArg("const int b").components == ["const", "int", "b"] assert clutil.KernelArg("const __restrict int c").components == [ "const", "int", "c" ]
def __deepcopy__(self, memo: dict = {}): """ Make a deep copy of a payload. This means duplicating all host data, and constructing new OpenCL mem objects with pointers to this host data. Note that this DOES NOT copy the OpenCL context associated with the payload. Returns: KernelPayload: A new kernel payload instance containing copies of all data. """ args = [clutil.KernelArg(a.string) for a in self.args] for src, dst in zip(self.args, args): if src.hostdata is None and src.is_local: # Copy a local memory buffer. dst.hostdata = None dst.bufsize = src.bufsize dst.devdata = cl.LocalMemory(src.bufsize) elif src.hostdata is None: # Copy a scalar value. dst.hostdata = None dst.devdata = deepcopy(src.devdata) else: # Copy a global memory buffer. dst.hostdata = deepcopy(src.hostdata, memo=memo) dst.flags = src.flags dst.devdata = cl.Buffer(self.context, src.flags, hostbuf=dst.hostdata) return KernelPayload(self.context, args, self.ndrange, self.transfersize)
def test_arg4(): a = clutil.KernelArg("const uchar16 z") assert "uchar16" == a.type assert "uchar" == a.bare_type assert not a.is_pointer assert not a.is_global assert not a.is_local assert a.is_const assert np.uint8 == a.numpy_type assert 16 == a.vector_width
def test_arg3(): a = clutil.KernelArg("const unsigned int z") assert "unsigned int" == a.type assert "unsigned int" == a.bare_type assert not a.is_pointer assert not a.is_global assert not a.is_local assert a.is_const assert np.uint32 == a.numpy_type assert 1 == a.vector_width
def test_arg2(): a = clutil.KernelArg("__global float4* a") assert "float4*" == a.type assert "float" == a.bare_type assert a.is_pointer assert a.is_global assert not a.is_local assert not a.is_const assert np.float32 == a.numpy_type assert 4 == a.vector_width
def test_numpy_type(): assert clutil.KernelArg("__local float4* a").numpy_type == np.float32 assert clutil.KernelArg("const int b").numpy_type == np.int32
def test_is_local(): assert clutil.KernelArg("__local float4* a").is_local assert not clutil.KernelArg("const int b").is_local
def test_is_const(): assert not clutil.KernelArg("__global float4* a").is_const assert clutil.KernelArg("const int b").is_const
def test_bare_type(): assert clutil.KernelArg("__global float4* a").bare_type == "float" assert clutil.KernelArg("const int b").bare_type == "int"
def test_vector_width(): assert clutil.KernelArg("__global float4* a").vector_width == 4 assert clutil.KernelArg("const int32 b").vector_width == 32 assert clutil.KernelArg("const int c").vector_width == 1
def test_qualifiers(): assert clutil.KernelArg("__global float4* a").qualifiers == ["__global"] assert clutil.KernelArg("const int b").qualifiers == ["const"] assert clutil.KernelArg("int c").qualifiers == []
def test_kernelarg_name(): assert clutil.KernelArg("__global float4* a").name == "a" assert clutil.KernelArg("const int b").name == "b"
def test_string(): assert clutil.KernelArg("global float4* a").string == "global float4* a" assert clutil.KernelArg("const int b").string == "const int b"
def _create_payload(nparray, driver, size): """ Create a payload. Arguments: nparray (function): Numpy array generator. driver (KernelDriver): Driver. size (int): Payload size parameter. Returns: KernelPayload: Generated payload. Raises: E_BAD_ARGS: If payload can't be synthesized for kernel argument(s). """ assert (callable(nparray)) assert (isinstance(driver, KernelDriver)) assert (isinstance(size, Number)) args = [clutil.KernelArg(arg.string) for arg in driver.prototype.args] transfer = 0 try: for arg in args: arg.hostdata = None dtype = arg.numpy_type veclength = size * arg.vector_width if arg.is_pointer and arg.is_local: # If arg is a pointer to local memory, then we # create a read/write buffer: nonbuf = nparray(veclength) arg.bufsize = nonbuf.nbytes arg.devdata = cl.LocalMemory(arg.bufsize) elif arg.is_pointer: # If arg is a pointer to global memory, then we # allocate host memory and populate with values: arg.hostdata = nparray(veclength).astype(dtype) # Determine flags to pass to OpenCL buffer creation: arg.flags = cl.mem_flags.COPY_HOST_PTR if arg.is_const: arg.flags |= cl.mem_flags.READ_ONLY else: arg.flags |= cl.mem_flags.READ_WRITE # Allocate device memory: arg.devdata = cl.Buffer(driver.context, arg.flags, hostbuf=arg.hostdata) # Record transfer overhead. If it's a const buffer, # we're not reading back to host. if arg.is_const: transfer += arg.hostdata.nbytes else: transfer += 2 * arg.hostdata.nbytes else: # If arg is not a pointer, then it's a scalar value: arg.devdata = dtype(size) except Exception as e: raise E_BAD_ARGS(e) return KernelPayload(driver.context, args, (size, ), transfer)