Beispiel #1
0
    def __call__(self, *args):
        ctx, symbol, kernargs, kernarg_region = self.bind()

        # Unpack pyobject values into ctypes scalar values
        expanded_values = []
        for ty, val in zip(self.argument_types, args):
            _unpack_argument(ty, val, expanded_values)

        # Insert kernel arguments
        base = 0
        for av in expanded_values:
            # Adjust for alignemnt
            align = ctypes.sizeof(av)
            pad = _calc_padding_for_alignment(align, base)
            base += pad
            # Move to offset
            offseted = ctypes.addressof(kernargs) + base
            asptr = ctypes.cast(offseted, ctypes.POINTER(type(av)))
            # Assign value
            asptr[0] = av
            # Increment offset
            base += align

        assert base <= ctypes.sizeof(
            kernargs), "Kernel argument size is invalid"

        # Actual Kernel launch
        qq = ctx.default_queue

        # Dispatch
        qq.dispatch(symbol, kernargs, workgroup_size=self.local_size,
                    grid_size=self.global_size)

        # Free kernel region
        kernarg_region.free(kernargs)
Beispiel #2
0
 def load_symbol(name):
     mem, sz = cufunc.module.get_global_symbol("%s__%s__" %
                                               (cufunc.name,
                                                name))
     val = ctypes.c_int()
     driver.device_to_host(ctypes.addressof(val), mem, sz)
     return val.value
Beispiel #3
0
def fix_python_api():
    """
    Execute once to install special symbols into the LLVM symbol table
    """
    c_helpers = _helperlib.c_helpers
    le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone))
    le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor())
    le.dylib_add_symbol("NumbaNDArrayNew", _numpyadapt.get_ndarray_new())

    le.dylib_add_symbol("NumbaComplexAdaptor",
                        c_helpers["complex_adaptor"])
    le.dylib_add_symbol("NumbaNativeError", id(NativeError))
    le.dylib_add_symbol("NumbaExtractRecordData",
                        c_helpers["extract_record_data"])
    le.dylib_add_symbol("NumbaReleaseRecordBuffer",
                        c_helpers["release_record_buffer"])
    le.dylib_add_symbol("NumbaRecreateRecord",
                        c_helpers["recreate_record"])

    le.dylib_add_symbol("NumbaGILEnsure",
                        c_helpers["gil_ensure"])
    le.dylib_add_symbol("NumbaGILRelease",
                        c_helpers["gil_release"])
    # Add all built-in exception classes
    for obj in utils.builtins.__dict__.values():
        if isinstance(obj, type) and issubclass(obj, BaseException):
            le.dylib_add_symbol("PyExc_%s" % (obj.__name__), id(obj))
Beispiel #4
0
 def load_symbol(name):
     mem, sz = cufunc.module.get_global_symbol("%s__%s__" %
                                               (cufunc.name,
                                                name))
     val = ctypes.c_int()
     driver.device_to_host(ctypes.addressof(val), mem, sz)
     return val.value
Beispiel #5
0
def fix_python_api():
    """
    Execute once to install special symbols into the LLVM symbol table
    """
    le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone))
    le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor())
    le.dylib_add_symbol("NumbaComplexAdaptor",
                        _helperlib.get_complex_adaptor())
    le.dylib_add_symbol("NumbaNativeError", id(NativeError))
    le.dylib_add_symbol("PyExc_NameError", id(NameError))
Beispiel #6
0
def fix_python_api():
    """
    Execute once to install special symbols into the LLVM symbol table
    """
    le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone))
    le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor())
    le.dylib_add_symbol("NumbaComplexAdaptor",
                        _helperlib.get_complex_adaptor())
    le.dylib_add_symbol("NumbaNativeError", id(NativeError))
    le.dylib_add_symbol("PyExc_NameError", id(NameError))
Beispiel #7
0
    def _kernel_call(self, args, griddim, blockdim, stream=0, sharedmem=0):
        # Prepare kernel
        cufunc = self._func.get()

        if self.debug:
            excname = cufunc.name + "__errcode__"
            excmem, excsz = cufunc.module.get_global_symbol(excname)
            assert excsz == ctypes.sizeof(ctypes.c_int)
            excval = ctypes.c_int()
            excmem.memset(0, stream=stream)

        # Prepare arguments
        retr = []  # hold functors for writeback

        kernelargs = []
        for t, v in zip(self.argument_types, args):
            self._prepare_args(t, v, stream, retr, kernelargs)

        # Configure kernel
        cu_func = cufunc.configure(griddim,
                                   blockdim,
                                   stream=stream,
                                   sharedmem=sharedmem)
        # Invoke kernel
        cu_func(*kernelargs)

        if self.debug:
            driver.device_to_host(ctypes.addressof(excval), excmem, excsz)
            if excval.value != 0:
                # An error occurred
                def load_symbol(name):
                    mem, sz = cufunc.module.get_global_symbol(
                        "%s__%s__" % (cufunc.name, name))
                    val = ctypes.c_int()
                    driver.device_to_host(ctypes.addressof(val), mem, sz)
                    return val.value

                tid = [load_symbol("tid" + i) for i in 'zyx']
                ctaid = [load_symbol("ctaid" + i) for i in 'zyx']
                code = excval.value
                exccls, exc_args = self.call_helper.get_exception(code)
                # Prefix the exception message with the thread position
                prefix = "tid=%s ctaid=%s" % (tid, ctaid)
                if exc_args:
                    exc_args = ("%s: %s" %
                                (prefix, exc_args[0]), ) + exc_args[1:]
                else:
                    exc_args = prefix,
                raise exccls(*exc_args)

        # retrieve auto converted arrays
        for wb in retr:
            wb()
Beispiel #8
0
    def _kernel_call(self, args, griddim, blockdim, stream=0, sharedmem=0):
        # Prepare kernel
        cufunc = self._func.get()

        if self.debug:
            excname = cufunc.name + "__errcode__"
            excmem, excsz = cufunc.module.get_global_symbol(excname)
            assert excsz == ctypes.sizeof(ctypes.c_int)
            excval = ctypes.c_int()
            excmem.memset(0, stream=stream)

        # Prepare arguments
        retr = []                       # hold functors for writeback

        kernelargs = []
        for t, v in zip(self.argument_types, args):
            self._prepare_args(t, v, stream, retr, kernelargs)

        # Configure kernel
        cu_func = cufunc.configure(griddim, blockdim,
                                   stream=stream,
                                   sharedmem=sharedmem)
        # Invoke kernel
        cu_func(*kernelargs)

        if self.debug:
            driver.device_to_host(ctypes.addressof(excval), excmem, excsz)
            if excval.value != 0:
                # An error occurred
                def load_symbol(name):
                    mem, sz = cufunc.module.get_global_symbol("%s__%s__" %
                                                              (cufunc.name,
                                                               name))
                    val = ctypes.c_int()
                    driver.device_to_host(ctypes.addressof(val), mem, sz)
                    return val.value

                tid = [load_symbol("tid" + i) for i in 'zyx']
                ctaid = [load_symbol("ctaid" + i) for i in 'zyx']
                code = excval.value
                exccls, exc_args = self.call_helper.get_exception(code)
                # Prefix the exception message with the thread position
                prefix = "tid=%s ctaid=%s" % (tid, ctaid)
                if exc_args:
                    exc_args = ("%s: %s" % (prefix, exc_args[0]),) + exc_args[1:]
                else:
                    exc_args = prefix,
                raise exccls(*exc_args)

        # retrieve auto converted arrays
        for wb in retr:
            wb()
Beispiel #9
0
def fix_python_api():
    """
    Execute once to install special symbols into the LLVM symbol table
    """
    le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone))
    le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor())
    le.dylib_add_symbol("NumbaComplexAdaptor",
                        _helperlib.get_complex_adaptor())
    le.dylib_add_symbol("NumbaNativeError", id(NativeError))
    le.dylib_add_symbol("NumbaExtractRecordData",
                        _helperlib.get_extract_record_data())
    le.dylib_add_symbol("NumbaReleaseRecordBuffer",
                        _helperlib.get_release_record_buffer())
    le.dylib_add_symbol("NumbaRecreateRecord",
                        _helperlib.get_recreate_record())
    le.dylib_add_symbol("PyExc_NameError", id(NameError))
Beispiel #10
0
def fix_python_api():
    """
    Execute once to install special symbols into the LLVM symbol table
    """
    le.dylib_add_symbol("Py_None", ctypes.addressof(_PyNone))
    le.dylib_add_symbol("NumbaArrayAdaptor", _numpyadapt.get_ndarray_adaptor())
    le.dylib_add_symbol("NumbaComplexAdaptor",
                        _helperlib.get_complex_adaptor())
    le.dylib_add_symbol("NumbaNativeError", id(NativeError))
    le.dylib_add_symbol("NumbaExtractRecordData",
                        _helperlib.get_extract_record_data())
    le.dylib_add_symbol("NumbaReleaseRecordBuffer",
                        _helperlib.get_release_record_buffer())
    le.dylib_add_symbol("NumbaRecreateRecord",
                        _helperlib.get_recreate_record())
    le.dylib_add_symbol("PyExc_NameError", id(NameError))
Beispiel #11
0
def fix_python_api():
    """
    Execute once to install special symbols into the LLVM symbol table
    """

    ll.add_symbol("Py_None", ctypes.addressof(_PyNone))
    ll.add_symbol("numba_native_error", id(NativeError))

    # Add C helper functions
    c_helpers = _helperlib.c_helpers
    for py_name in c_helpers:
        c_name = "numba_" + py_name
        c_address = c_helpers[py_name]
        ll.add_symbol(c_name, c_address)

    # Add all built-in exception classes
    for obj in utils.builtins.__dict__.values():
        if isinstance(obj, type) and issubclass(obj, BaseException):
            ll.add_symbol("PyExc_%s" % (obj.__name__), id(obj))
Beispiel #12
0
def fix_python_api():
    """
    Execute once to install special symbols into the LLVM symbol table
    """

    ll.add_symbol("Py_None", ctypes.addressof(_PyNone))
    ll.add_symbol("numba_native_error", id(NativeError))

    # Add C helper functions
    c_helpers = _helperlib.c_helpers
    for py_name in c_helpers:
        c_name = "numba_" + py_name
        c_address = c_helpers[py_name]
        ll.add_symbol(c_name, c_address)

    # Add all built-in exception classes
    for obj in utils.builtins.__dict__.values():
        if isinstance(obj, type) and issubclass(obj, BaseException):
            ll.add_symbol("PyExc_%s" % (obj.__name__), id(obj))
Beispiel #13
0
    def get(self):
        ctx = devices.get_context()
        result = self._cache.get(ctx)
        # The program does not exist as GCN yet.
        if result is None:

            # generate GCN
            symbol = '{0}'.format(self._entry_name)
            agent = ctx.agent

            ba = bytearray(self._binary)
            bblob = ctypes.c_byte * len(self._binary)
            bas = bblob.from_buffer(ba)

            code_ptr = drvapi.hsa_code_object_t()
            driver.hsa.hsa_code_object_deserialize(
                    ctypes.addressof(bas),
                    len(self._binary),
                    None,
                    ctypes.byref(code_ptr)
                    )

            code = driver.CodeObject(code_ptr)

            ex = driver.Executable()
            ex.load(agent, code)
            ex.freeze()
            symobj = ex.get_symbol(agent, symbol)
            regions = agent.regions.globals
            for reg in regions:
                if reg.host_accessible:
                    if reg.supports(enums.HSA_REGION_GLOBAL_FLAG_KERNARG):
                        kernarg_region = reg
                        break
            assert kernarg_region is not None

            # Cache the GCN program
            result = _CacheEntry(symbol=symobj, executable=ex,
                                 kernarg_region=kernarg_region)
            self._cache[ctx] = result

        return ctx, result
Beispiel #14
0
    def get(self):
        ctx = devices.get_context()
        result = self._cache.get(ctx)
        # The program does not exist as GCN yet.
        if result is None:

            # generate GCN
            symbol = '{0}'.format(self._entry_name)
            agent = ctx.agent

            ba = bytearray(self._binary)
            bblob = ctypes.c_byte * len(self._binary)
            bas = bblob.from_buffer(ba)

            code_ptr = drvapi.hsa_code_object_t()
            driver.hsa.hsa_code_object_deserialize(
                    ctypes.addressof(bas),
                    len(self._binary),
                    None,
                    ctypes.byref(code_ptr)
                    )

            code = driver.CodeObject(code_ptr)

            ex = driver.Executable()
            ex.load(agent, code)
            ex.freeze()
            symobj = ex.get_symbol(agent, symbol)
            regions = agent.regions.globals
            for reg in regions:
                if reg.host_accessible:
                    if reg.supports(enums.HSA_REGION_GLOBAL_FLAG_KERNARG):
                        kernarg_region = reg
                        break
            assert kernarg_region is not None

            # Cache the GCN program
            result = _CacheEntry(symbol=symobj, executable=ex,
                                 kernarg_region=kernarg_region)
            self._cache[ctx] = result

        return ctx, result