def __call__(self, *args): ctx, symbol, kernargs, kernarg_region = self.bind() # Unpack pyobject values into ctypes scalar values expanded_values = [] for ty, val in zip(self.argument_types, args): _unpack_argument(ty, val, expanded_values) # Insert kernel arguments base = 0 for av in expanded_values: # Adjust for alignemnt align = ctypes.sizeof(av) pad = _calc_padding_for_alignment(align, base) base += pad # Move to offset offseted = ctypes.addressof(kernargs) + base asptr = ctypes.cast(offseted, ctypes.POINTER(type(av))) # Assign value asptr[0] = av # Increment offset base += align assert base <= ctypes.sizeof( kernargs), "Kernel argument size is invalid" # Actual Kernel launch qq = ctx.default_queue # Dispatch qq.dispatch(symbol, kernargs, workgroup_size=self.local_size, grid_size=self.global_size) # Free kernel region kernarg_region.free(kernargs)
def load_symbol(name): mem, sz = cufunc.module.get_global_symbol("%s__%s__" % (cufunc.name, name)) val = ctypes.c_int() driver.device_to_host(ctypes.addressof(val), mem, sz) return val.value
def fix_python_api(): """ Execute once to install special symbols into the LLVM symbol table """ c_helpers = _helperlib.c_helpers le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone)) le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor()) le.dylib_add_symbol("NumbaNDArrayNew", _numpyadapt.get_ndarray_new()) le.dylib_add_symbol("NumbaComplexAdaptor", c_helpers["complex_adaptor"]) le.dylib_add_symbol("NumbaNativeError", id(NativeError)) le.dylib_add_symbol("NumbaExtractRecordData", c_helpers["extract_record_data"]) le.dylib_add_symbol("NumbaReleaseRecordBuffer", c_helpers["release_record_buffer"]) le.dylib_add_symbol("NumbaRecreateRecord", c_helpers["recreate_record"]) le.dylib_add_symbol("NumbaGILEnsure", c_helpers["gil_ensure"]) le.dylib_add_symbol("NumbaGILRelease", c_helpers["gil_release"]) # Add all built-in exception classes for obj in utils.builtins.__dict__.values(): if isinstance(obj, type) and issubclass(obj, BaseException): le.dylib_add_symbol("PyExc_%s" % (obj.__name__), id(obj))
def fix_python_api(): """ Execute once to install special symbols into the LLVM symbol table """ le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone)) le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor()) le.dylib_add_symbol("NumbaComplexAdaptor", _helperlib.get_complex_adaptor()) le.dylib_add_symbol("NumbaNativeError", id(NativeError)) le.dylib_add_symbol("PyExc_NameError", id(NameError))
def _kernel_call(self, args, griddim, blockdim, stream=0, sharedmem=0): # Prepare kernel cufunc = self._func.get() if self.debug: excname = cufunc.name + "__errcode__" excmem, excsz = cufunc.module.get_global_symbol(excname) assert excsz == ctypes.sizeof(ctypes.c_int) excval = ctypes.c_int() excmem.memset(0, stream=stream) # Prepare arguments retr = [] # hold functors for writeback kernelargs = [] for t, v in zip(self.argument_types, args): self._prepare_args(t, v, stream, retr, kernelargs) # Configure kernel cu_func = cufunc.configure(griddim, blockdim, stream=stream, sharedmem=sharedmem) # Invoke kernel cu_func(*kernelargs) if self.debug: driver.device_to_host(ctypes.addressof(excval), excmem, excsz) if excval.value != 0: # An error occurred def load_symbol(name): mem, sz = cufunc.module.get_global_symbol( "%s__%s__" % (cufunc.name, name)) val = ctypes.c_int() driver.device_to_host(ctypes.addressof(val), mem, sz) return val.value tid = [load_symbol("tid" + i) for i in 'zyx'] ctaid = [load_symbol("ctaid" + i) for i in 'zyx'] code = excval.value exccls, exc_args = self.call_helper.get_exception(code) # Prefix the exception message with the thread position prefix = "tid=%s ctaid=%s" % (tid, ctaid) if exc_args: exc_args = ("%s: %s" % (prefix, exc_args[0]), ) + exc_args[1:] else: exc_args = prefix, raise exccls(*exc_args) # retrieve auto converted arrays for wb in retr: wb()
def _kernel_call(self, args, griddim, blockdim, stream=0, sharedmem=0): # Prepare kernel cufunc = self._func.get() if self.debug: excname = cufunc.name + "__errcode__" excmem, excsz = cufunc.module.get_global_symbol(excname) assert excsz == ctypes.sizeof(ctypes.c_int) excval = ctypes.c_int() excmem.memset(0, stream=stream) # Prepare arguments retr = [] # hold functors for writeback kernelargs = [] for t, v in zip(self.argument_types, args): self._prepare_args(t, v, stream, retr, kernelargs) # Configure kernel cu_func = cufunc.configure(griddim, blockdim, stream=stream, sharedmem=sharedmem) # Invoke kernel cu_func(*kernelargs) if self.debug: driver.device_to_host(ctypes.addressof(excval), excmem, excsz) if excval.value != 0: # An error occurred def load_symbol(name): mem, sz = cufunc.module.get_global_symbol("%s__%s__" % (cufunc.name, name)) val = ctypes.c_int() driver.device_to_host(ctypes.addressof(val), mem, sz) return val.value tid = [load_symbol("tid" + i) for i in 'zyx'] ctaid = [load_symbol("ctaid" + i) for i in 'zyx'] code = excval.value exccls, exc_args = self.call_helper.get_exception(code) # Prefix the exception message with the thread position prefix = "tid=%s ctaid=%s" % (tid, ctaid) if exc_args: exc_args = ("%s: %s" % (prefix, exc_args[0]),) + exc_args[1:] else: exc_args = prefix, raise exccls(*exc_args) # retrieve auto converted arrays for wb in retr: wb()
def fix_python_api(): """ Execute once to install special symbols into the LLVM symbol table """ le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone)) le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor()) le.dylib_add_symbol("NumbaComplexAdaptor", _helperlib.get_complex_adaptor()) le.dylib_add_symbol("NumbaNativeError", id(NativeError)) le.dylib_add_symbol("NumbaExtractRecordData", _helperlib.get_extract_record_data()) le.dylib_add_symbol("NumbaReleaseRecordBuffer", _helperlib.get_release_record_buffer()) le.dylib_add_symbol("NumbaRecreateRecord", _helperlib.get_recreate_record()) le.dylib_add_symbol("PyExc_NameError", id(NameError))
def fix_python_api(): """ Execute once to install special symbols into the LLVM symbol table """ ll.add_symbol("Py_None", ctypes.addressof(_PyNone)) ll.add_symbol("numba_native_error", id(NativeError)) # Add C helper functions c_helpers = _helperlib.c_helpers for py_name in c_helpers: c_name = "numba_" + py_name c_address = c_helpers[py_name] ll.add_symbol(c_name, c_address) # Add all built-in exception classes for obj in utils.builtins.__dict__.values(): if isinstance(obj, type) and issubclass(obj, BaseException): ll.add_symbol("PyExc_%s" % (obj.__name__), id(obj))
def get(self): ctx = devices.get_context() result = self._cache.get(ctx) # The program does not exist as GCN yet. if result is None: # generate GCN symbol = '{0}'.format(self._entry_name) agent = ctx.agent ba = bytearray(self._binary) bblob = ctypes.c_byte * len(self._binary) bas = bblob.from_buffer(ba) code_ptr = drvapi.hsa_code_object_t() driver.hsa.hsa_code_object_deserialize( ctypes.addressof(bas), len(self._binary), None, ctypes.byref(code_ptr) ) code = driver.CodeObject(code_ptr) ex = driver.Executable() ex.load(agent, code) ex.freeze() symobj = ex.get_symbol(agent, symbol) regions = agent.regions.globals for reg in regions: if reg.host_accessible: if reg.supports(enums.HSA_REGION_GLOBAL_FLAG_KERNARG): kernarg_region = reg break assert kernarg_region is not None # Cache the GCN program result = _CacheEntry(symbol=symobj, executable=ex, kernarg_region=kernarg_region) self._cache[ctx] = result return ctx, result