def load_symbol(name): mem, sz = cufunc.module.get_global_symbol("%s__%s__" % (cufunc.name, name)) val = ctypes.c_int() driver.device_to_host(ctypes.addressof(val), mem, sz) return val.value
def _kernel_call(self, args, griddim, blockdim, stream=0, sharedmem=0): # Prepare kernel cufunc = self._func.get() if self.debug: excname = cufunc.name + "__errcode__" excmem, excsz = cufunc.module.get_global_symbol(excname) assert excsz == ctypes.sizeof(ctypes.c_int) excval = ctypes.c_int() excmem.memset(0, stream=stream) # Prepare arguments retr = [] # hold functors for writeback kernelargs = [] for t, v in zip(self.argument_types, args): self._prepare_args(t, v, stream, retr, kernelargs) # Configure kernel cu_func = cufunc.configure(griddim, blockdim, stream=stream, sharedmem=sharedmem) # Invoke kernel cu_func(*kernelargs) if self.debug: driver.device_to_host(ctypes.addressof(excval), excmem, excsz) if excval.value != 0: # An error occurred def load_symbol(name): mem, sz = cufunc.module.get_global_symbol( "%s__%s__" % (cufunc.name, name)) val = ctypes.c_int() driver.device_to_host(ctypes.addressof(val), mem, sz) return val.value tid = [load_symbol("tid" + i) for i in 'zyx'] ctaid = [load_symbol("ctaid" + i) for i in 'zyx'] code = excval.value exccls, exc_args = self.call_helper.get_exception(code) # Prefix the exception message with the thread position prefix = "tid=%s ctaid=%s" % (tid, ctaid) if exc_args: exc_args = ("%s: %s" % (prefix, exc_args[0]), ) + exc_args[1:] else: exc_args = prefix, raise exccls(*exc_args) # retrieve auto converted arrays for wb in retr: wb()
def _kernel_call(self, args, griddim, blockdim, stream=0, sharedmem=0): # Prepare kernel cufunc = self._func.get() if self.debug: excname = cufunc.name + "__errcode__" excmem, excsz = cufunc.module.get_global_symbol(excname) assert excsz == ctypes.sizeof(ctypes.c_int) excval = ctypes.c_int() excmem.memset(0, stream=stream) # Prepare arguments retr = [] # hold functors for writeback kernelargs = [] for t, v in zip(self.argument_types, args): self._prepare_args(t, v, stream, retr, kernelargs) # Configure kernel cu_func = cufunc.configure(griddim, blockdim, stream=stream, sharedmem=sharedmem) # Invoke kernel cu_func(*kernelargs) if self.debug: driver.device_to_host(ctypes.addressof(excval), excmem, excsz) if excval.value != 0: # An error occurred def load_symbol(name): mem, sz = cufunc.module.get_global_symbol("%s__%s__" % (cufunc.name, name)) val = ctypes.c_int() driver.device_to_host(ctypes.addressof(val), mem, sz) return val.value tid = [load_symbol("tid" + i) for i in 'zyx'] ctaid = [load_symbol("ctaid" + i) for i in 'zyx'] code = excval.value exccls, exc_args = self.call_helper.get_exception(code) # Prefix the exception message with the thread position prefix = "tid=%s ctaid=%s" % (tid, ctaid) if exc_args: exc_args = ("%s: %s" % (prefix, exc_args[0]),) + exc_args[1:] else: exc_args = prefix, raise exccls(*exc_args) # retrieve auto converted arrays for wb in retr: wb()