def function_prepared_async_call(func, grid, block, stream, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn( "Not passing the block size to prepared_async_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2) args = (stream, ) + args stream = block from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) if stream is None: func._launch_grid(*grid) else: grid_x, grid_y = grid func._launch_grid_async(grid_x, grid_y, stream)
def function_prepared_call_pre_v4(func, grid, block, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn( "Not passing the block size to prepared_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2, ) args = (block, ) + args shared_size = kwargs.pop("shared_size", None) if shared_size is not None: func._set_shared_size(shared_size) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.keys())) from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) func._launch_grid(*grid)
def function_prepared_async_call(func, grid, block, stream, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn( "Not passing the block size to prepared_async_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2, ) args = (stream,) + args stream = block from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) if stream is None: func._launch_grid(*grid) else: grid_x, grid_y = grid func._launch_grid_async(grid_x, grid_y, stream)
def function_prepared_timed_call(func, grid, block, *args, **kwargs): shared_size = kwargs.pop("shared_size", 0) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.keys())) from pycuda._pvt_struct import pack arg_buf = pack(func.arg_format, *args) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func._launch_kernel(grid, block, arg_buf, shared_size, None) end.record() def get_call_time(): end.synchronize() return end.time_since(start) * 1e-3 return get_call_time
def function_prepared_timed_call_pre_v4(func, grid, block, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn("Not passing the block size to prepared_timed_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2) args = (block,) + args shared_size = kwargs.pop("shared_size", None) if shared_size is not None: func._set_shared_size(shared_size) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.iterkeys())) from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func._launch_grid(*grid) end.record() def get_call_time(): end.synchronize() return end.time_since(start)*1e-3 return get_call_time
def function_prepared_async_call(func, grid, block, stream, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn( "Not passing the block size to prepared_async_call is " "deprecated as of version 2011.1.", DeprecationWarning, stacklevel=2, ) args = (stream, ) + args stream = block shared_size = kwargs.pop("shared_size", 0) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.keys())) from pycuda._pvt_struct import pack arg_buf = pack(func.arg_format, *args) for texref in func.texrefs: func.param_set_texref(texref) func._launch_kernel(grid, block, arg_buf, shared_size, stream)
def function_prepared_timed_call(func, grid, block, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn("Not passing the block size to prepared_timed_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2) args = (block,) + args from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func._launch_grid(*grid) end.record() def get_call_time(): end.synchronize() return end.time_since(start)*1e-3 return get_call_time
def _build_arg_buf(args): handlers = [] arg_data = [] format = "" for i, arg in enumerate(args): if isinstance(arg, np.number): arg_data.append(arg) format += arg.dtype.char elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)): arg_data.append(int(arg)) format += "P" elif isinstance(arg, ArgumentHandler): handlers.append(arg) arg_data.append(int(arg.get_device_alloc())) format += "P" elif isinstance(arg, np.ndarray): arg_data.append(arg) format += "%ds" % arg.nbytes else: try: gpudata = np.intp(arg.gpudata) except AttributeError: raise TypeError("invalid type on parameter #%d (0-based)" % i) else: # for gpuarrays arg_data.append(int(gpudata)) format += "P" from pycuda._pvt_struct import pack return handlers, pack(format, *arg_data)
def function_prepared_async_call_pre_v4(func, grid, block, stream, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn("Not passing the block size to prepared_async_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2) args = (stream,) + args stream = block shared_size = kwargs.pop("shared_size", None) if shared_size is not None: func._set_shared_size(shared_size) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.iterkeys())) from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) if stream is None: func._launch_grid(*grid) else: grid_x, grid_y = grid func._launch_grid_async(grid_x, grid_y, stream)
def function_prepared_timed_call(func, grid, block, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn( "Not passing the block size to prepared_timed_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2) args = (block, ) + args from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func._launch_grid(*grid) end.record() def get_call_time(): end.synchronize() return end.time_since(start) * 1e-3 return get_call_time
def function_prepared_async_call(func, grid, block, stream, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn( "Not passing the block size to prepared_async_call is deprecated as of " "version 2011.1.", DeprecationWarning, stacklevel=2, ) args = (stream,) + args stream = block shared_size = kwargs.pop("shared_size", 0) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.iterkeys())) from pycuda._pvt_struct import pack arg_buf = pack(func.arg_format, *args) for texref in func.texrefs: func.param_set_texref(texref) func._launch_kernel(grid, block, arg_buf, shared_size, stream)
def function_prepared_call(func, grid, *args): from pycuda._pvt_struct import pack func.param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) func.launch_grid(*grid)
def function_prepared_async_call(func, grid, stream, *args): from pycuda._pvt_struct import pack func.param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) if stream is None: func.launch_grid(*grid) else: grid_x, grid_y = grid func.launch_grid_async(grid_x, grid_y, stream)
def _build_arg_buf(args): handlers = [] arg_data = [] format = "" for i, arg in enumerate(args): if isinstance(arg, np.number): arg_data.append(arg) format += arg.dtype.char elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)): arg_data.append(int(arg)) format += "P" elif isinstance(arg, ArgumentHandler): handlers.append(arg) arg_data.append(int(arg.get_device_alloc())) format += "P" elif isinstance(arg, np.ndarray): if isinstance(arg.base, ManagedAllocationOrStub): arg_data.append(int(arg.base)) format += "P" else: arg_data.append(arg) format += "%ds" % arg.nbytes elif isinstance(arg, np.void): arg_data.append(_my_bytes(_memoryview(arg))) format += "%ds" % arg.itemsize else: cai = getattr(arg, "__cuda_array_interface__", None) if cai: arg_data.append(cai["data"][0]) format += "P" continue try: gpudata = np.uintp(arg.gpudata) except AttributeError: raise TypeError("invalid type on parameter #%d (0-based)" % i) else: # for gpuarrays arg_data.append(int(gpudata)) format += "P" from pycuda._pvt_struct import pack return handlers, pack(format, *arg_data)
def function_prepared_timed_call(func, grid, *args): from pycuda._pvt_struct import pack func.param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func.launch_grid(*grid) end.record() def get_call_time(): end.synchronize() return end.time_since(start) * 1e-3 return get_call_time
def function_prepared_timed_call(func, grid, *args): from pycuda._pvt_struct import pack func.param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func.launch_grid(*grid) end.record() def get_call_time(): end.synchronize() return end.time_since(start)*1e-3 return get_call_time
def function_param_set(func, *args): try: import numpy except ImportError: numpy = None handlers = [] arg_data = [] format = "" for i, arg in enumerate(args): if numpy is not None and isinstance(arg, numpy.number): arg_data.append(arg) format += arg.dtype.char elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)): arg_data.append(int(arg)) format += "P" elif isinstance(arg, ArgumentHandler): handlers.append(arg) arg_data.append(int(arg.get_device_alloc())) format += "P" elif isinstance(arg, buffer): arg_data.append(arg) format += "s" else: try: gpudata = arg.gpudata except AttributeError: raise TypeError("invalid type on parameter #%d (0-based)" % i) else: # for gpuarrays arg_data.append(int(gpudata)) format += "P" from pycuda._pvt_struct import pack buf = pack(format, *arg_data) func.param_setv(0, buf) func.param_set_size(len(buf)) return handlers
def function_prepared_timed_call_pre_v4(func, grid, block, *args, **kwargs): if isinstance(block, tuple): func._set_block_shape(*block) else: from warnings import warn warn( "Not passing the block size to prepared_timed_call is " "deprecated as of version 2011.1.", DeprecationWarning, stacklevel=2) args = (block, ) + args shared_size = kwargs.pop("shared_size", None) if shared_size is not None: func._set_shared_size(shared_size) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(six.iterkeys(kwargs))) from pycuda._pvt_struct import pack func._param_setv(0, pack(func.arg_format, *args)) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func._launch_grid(*grid) end.record() def get_call_time(): end.synchronize() return end.time_since(start) * 1e-3 return get_call_time
def function_prepared_timed_call(func, grid, block, *args, **kwargs): shared_size = kwargs.pop("shared_size", 0) if kwargs: raise TypeError("unknown keyword arguments: " + ", ".join(kwargs.iterkeys())) from pycuda._pvt_struct import pack arg_buf = pack(func.arg_format, *args) for texref in func.texrefs: func.param_set_texref(texref) start = Event() end = Event() start.record() func._launch_kernel(grid, block, arg_buf, shared_size, None) end.record() def get_call_time(): end.synchronize() return end.time_since(start)*1e-3 return get_call_time