Exemple #1
0
def _get_equal(context, module, datamodel, container_element_type):
    assert datamodel.contains_nrt_meminfo()

    fe_type = datamodel.fe_type
    data_ptr_ty = datamodel.get_data_type().as_pointer()

    wrapfnty = context.call_conv.get_function_type(types.int32,
                                                   [fe_type, fe_type])
    argtypes = [fe_type, fe_type]

    def build_wrapper(fn):
        builder = Builder(fn.append_basic_block())
        args = context.call_conv.decode_arguments(builder, argtypes, fn)

        sig = typing.signature(types.boolean, fe_type, fe_type)
        op = operator.eq
        fnop = context.typing_context.resolve_value_type(op)
        fnop.get_call_type(context.typing_context, sig.args, {})
        eqfn = context.get_function(fnop, sig)
        res = eqfn(builder, args)
        intres = context.cast(builder, res, types.boolean, types.int32)
        context.call_conv.return_value(builder, intres)

    wrapfn = cgutils.get_or_insert_function(
        module,
        wrapfnty,
        name='.numba_{}.{}_equal.wrap'.format(context.fndesc.mangled_name,
                                              container_element_type))
    build_wrapper(wrapfn)

    equal_fnty = ir.FunctionType(ir.IntType(32), [data_ptr_ty, data_ptr_ty])
    equal_fn = cgutils.get_or_insert_function(
        module,
        equal_fnty,
        name='.numba_{}.{}_equal'.format(context.fndesc.mangled_name,
                                         container_element_type),
    )
    builder = Builder(equal_fn.append_basic_block())
    lhs = datamodel.load_from_data_pointer(builder, equal_fn.args[0])
    rhs = datamodel.load_from_data_pointer(builder, equal_fn.args[1])

    status, retval = context.call_conv.call_function(
        builder,
        wrapfn,
        types.boolean,
        argtypes,
        [lhs, rhs],
    )
    with builder.if_then(status.is_ok, likely=True):
        with builder.if_then(status.is_none):
            builder.ret(context.get_constant(types.int32, 0))
        retval = context.cast(builder, retval, types.boolean, types.int32)
        builder.ret(retval)
    # Error out
    builder.ret(context.get_constant(types.int32, -1))

    return equal_fn
Exemple #2
0
    def lower_finalize_func(self, lower):
        """
        Lower the generator's finalizer.
        """
        fnty = Type.function(Type.void(),
                             [self.context.get_value_type(self.gentype)])
        function = lower.module.get_or_insert_function(
            fnty, name=self.gendesc.llvm_finalizer_name)
        entry_block = function.append_basic_block('entry')
        builder = Builder(entry_block)

        genptrty = self.context.get_value_type(self.gentype)
        genptr = builder.bitcast(function.args[0], genptrty)
        self.lower_finalize_func_body(builder, genptr)
Exemple #3
0
    def lower_finalize_func(self, lower):
        """
        Lower the generator's finalizer.
        """
        fnty = Type.function(Type.void(),
                             [self.context.get_value_type(self.gentype)])
        function = lower.module.get_or_insert_function(
            fnty, name=self.gendesc.llvm_finalizer_name)
        entry_block = function.append_basic_block('entry')
        builder = Builder(entry_block)

        genptrty = self.context.get_value_type(self.gentype)
        genptr = builder.bitcast(function.args[0], genptrty)
        self.lower_finalize_func_body(builder, genptr)
Exemple #4
0
    def test_nvvm_from_llvm(self):
        m = Module("test_nvvm_from_llvm")
        fty = Type.function(Type.void(), [Type.int()])
        kernel = m.add_function(fty, name='mycudakernel')
        bldr = Builder(kernel.append_basic_block('entry'))
        bldr.ret_void()
        set_cuda_kernel(kernel)

        fix_data_layout(m)
        ptx = llvm_to_ptx(str(m)).decode('utf8')
        self.assertTrue('mycudakernel' in ptx)
        if is64bit:
            self.assertTrue('.address_size 64' in ptx)
        else:
            self.assertTrue('.address_size 32' in ptx)
Exemple #5
0
    def test_nvvm_from_llvm(self):
        m = Module("test_nvvm_from_llvm")
        fty = Type.function(Type.void(), [Type.int()])
        kernel = m.add_function(fty, name='mycudakernel')
        bldr = Builder(kernel.append_basic_block('entry'))
        bldr.ret_void()
        set_cuda_kernel(kernel)

        fix_data_layout(m)
        ptx = llvm_to_ptx(str(m)).decode('utf8')
        self.assertTrue('mycudakernel' in ptx)
        if is64bit:
            self.assertTrue('.address_size 64' in ptx)
        else:
            self.assertTrue('.address_size 32' in ptx)
Exemple #6
0
    def __init__(self, context, library, fndesc, interp):
        self.context = context
        self.library = library
        self.fndesc = fndesc
        self.blocks = utils.SortedMap(utils.iteritems(interp.blocks))

        # Initialize LLVM
        self.module = self.library.create_ir_module(self.fndesc.unique_name)

        # Python execution environment (will be available to the compiled
        # function).
        self.env = _dynfunc.Environment(
            globals=self.fndesc.lookup_module().__dict__)

        # Mapping of error codes to exception classes or instances
        self.exceptions = {}

        # Setup function
        self.function = context.declare_function(self.module, fndesc)
        self.entry_block = self.function.append_basic_block('entry')
        self.builder = Builder.new(self.entry_block)

        # Internal states
        self.blkmap = {}
        self.varmap = {}
        self.firstblk = min(self.blocks.keys())
        self.loc = -1

        # Subclass initialization
        self.init()
Exemple #7
0
    def __init__(self, context, library, fndesc, interp):
        self.context = context
        self.library = library
        self.fndesc = fndesc
        self.blocks = utils.SortedMap(utils.iteritems(interp.blocks))
        self.interp = interp
        self.call_conv = context.call_conv

        # Initialize LLVM
        self.module = self.library.create_ir_module(self.fndesc.unique_name)

        # Python execution environment (will be available to the compiled
        # function).
        self.env = _dynfunc.Environment(
            globals=self.fndesc.lookup_module().__dict__)

        # Setup function
        self.function = context.declare_function(self.module, fndesc)
        self.entry_block = self.function.append_basic_block('entry')
        self.builder = Builder.new(self.entry_block)
        self.call_helper = self.call_conv.init_call_helper(self.builder)

        # Internal states
        self.blkmap = {}
        self.varmap = {}
        self.firstblk = min(self.blocks.keys())
        self.loc = -1

        # Subclass initialization
        self.init()
Exemple #8
0
    def build_wrapper(fn):
        builder = Builder(fn.append_basic_block())
        args = context.call_conv.decode_arguments(builder, argtypes, fn)

        sig = typing.signature(types.boolean, fe_type, fe_type)
        op = operator.eq
        fnop = context.typing_context.resolve_value_type(op)
        fnop.get_call_type(context.typing_context, sig.args, {})
        eqfn = context.get_function(fnop, sig)
        res = eqfn(builder, args)
        intres = context.cast(builder, res, types.boolean, types.int32)
        context.call_conv.return_value(builder, intres)
Exemple #9
0
    def code(self, codegen):
        klass = codegen.current_class

        method_name = f'{klass.name}::{self.name}'

        func = list(
            filter(lambda f: f.name == method_name,
                   codegen.module.functions))[0]

        codegen.function_stack.append(func)

        old_func = codegen.current_function
        old_builder = codegen.builder
        codegen.current_function = func
        entry_block = codegen.add_block('entry')
        exit_block = codegen.add_block('exit')
        codegen.exit_blocks.append(exit_block)
        codegen.builder = Builder(entry_block)

        if self.is_constructor:
            this = codegen.gep(func.args[0], INDICES)
            codegen.builder.store(
                codegen.module.get_global(f'{klass.name}_vtable'), this)

        body = self.body
        if body:
            ret = codegen.visit(body)
        else:
            ret = None

        codegen.branch(exit_block)

        if not ret:
            codegen.position_at_end(exit_block)
            codegen.builder.ret_void()

        codegen.current_function = old_func
        codegen.builder = old_builder
        codegen.exit_blocks.pop()
        codegen.function_stack.pop()

        return func
    def test_inline_rsqrt(self):
        mod = Module.new(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, "cu_rsqrt")
        bldr = Builder.new(fn.append_basic_block("entry"))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue("rsqrt.approx.f32" in str(ptx))
Exemple #11
0
    def test_inline_rsqrt(self):
        mod = Module(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, 'cu_rsqrt')
        bldr = Builder(fn.append_basic_block('entry'))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty,
                                  'rsqrt.approx.f32 $0, $1;',
                                  '=f,f',
                                  side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue('rsqrt.approx.f32' in str(ptx))
Exemple #12
0
    def build(self):
        wrapname = "wrapper.%s" % self.func.name

        # This is the signature of PyCFunctionWithKeywords
        # (see CPython's methodobject.h)
        pyobj = self.context.get_argument_type(types.pyobject)
        wrapty = Type.function(pyobj, [pyobj, pyobj, pyobj])
        wrapper = self.module.add_function(wrapty, name=wrapname)

        builder = Builder.new(wrapper.append_basic_block('entry'))

        # - `closure` will receive the `self` pointer stored in the
        #   PyCFunction object (see _dynfunc.c)
        # - `args` and `kws` will receive the tuple and dict objects
        #   of positional and keyword arguments, respectively.
        closure, args, kws = wrapper.args
        closure.name = 'py_closure'
        args.name = 'py_args'
        kws.name = 'py_kws'

        api = self.context.get_python_api(builder)
        self.build_wrapper(api, builder, closure, args, kws)

        return wrapper, api
Exemple #13
0
def build_ufunc_wrapper(library, context, fname, signature, objmode, envptr,
                        env):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    assert isinstance(fname, str)
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapperlib = context.codegen().create_library('ufunc_wrapper')
    wrapper_module = wrapperlib.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)

    func = wrapper_module.add_function(func_type, name=fname)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ))

    # Prepare output
    out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays),
                    signature.return_type)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    pyapi = context.get_python_api(builder)
    if objmode:
        # General loop
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            slowloop = build_obj_loop_body(context, func, builder, arrays, out,
                                           offsets, store_offset, signature,
                                           pyapi, envptr, env)
        pyapi.gil_release(gil)
        builder.ret_void()

    else:
        with builder.if_else(unit_strided) as (is_unit_strided, is_strided):
            with is_unit_strided:
                with cgutils.for_range(builder, loopcount,
                                       intp=intp_t) as loop:
                    fastloop = build_fast_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    loop.index, pyapi)

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    slowloop = build_slow_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    pyapi)

        builder.ret_void()
    del builder

    # Link and finalize
    wrapperlib.add_ir_module(wrapper_module)
    wrapperlib.add_linking_library(library)
    return wrapperlib.get_pointer_to_function(wrapper.name)
Exemple #14
0
class CodeGenerator(Printable):
    def __init__(self):
        # TODO: come up with a less naive way of handling the symtab and types
        self.classes = None
        self.symtab = {}
        self.typetab = {}
        self.is_break = False
        self.current_class = None

        self.loop_end_blocks = []
        self.loop_cond_blocks = []
        context = ir.Context()
        self.module = Module(name='opal-lang', context=context)
        self.blocks = []
        self.scope = {}

        self._add_builtins()

        func_ty = ir.FunctionType(ir.VoidType(), [])
        func = Function(self.module, func_ty, 'main')

        self.current_function = func
        entry_block = self.add_block('entry')
        exit_block = self.add_block('exit')

        self.function_stack = [func]
        self.builder = Builder(entry_block)
        self.exit_blocks = [exit_block]
        self.block_stack = [entry_block]

    def __str__(self):
        return str(self.module)

    def _add_builtins(self):
        malloc_ty = ir.FunctionType(Int8.as_llvm().as_pointer(),
                                    [Integer.as_llvm()])
        ir.Function(self.module, malloc_ty, 'malloc')

        free_ty = ir.FunctionType(Any.as_llvm(), [Int8.as_llvm().as_pointer()])
        ir.Function(self.module, free_ty, 'free')

        puts_ty = ir.FunctionType(Integer.as_llvm(),
                                  [Int8.as_llvm().as_pointer()])
        ir.Function(self.module, puts_ty, 'puts')

        int_to_string_ty = ir.FunctionType(Int8.as_llvm().as_pointer(), [
            Integer.as_llvm(),
            Int8.as_llvm().as_pointer(),
            Integer.as_llvm()
        ])
        ir.Function(self.module, int_to_string_ty, 'int_to_string')

        printf_ty = ir.FunctionType(Integer.as_llvm(),
                                    [Int8.as_llvm().as_pointer()],
                                    var_arg=True)
        ir.Function(self.module, printf_ty, 'printf')

        vector_init_ty = ir.FunctionType(Any.as_llvm(),
                                         [List.as_llvm().as_pointer()])
        ir.Function(self.module, vector_init_ty, 'vector_init')

        vector_append_ty = ir.FunctionType(
            Any.as_llvm(),
            [List.as_llvm().as_pointer(),
             Int8.as_llvm().as_pointer()])
        ir.Function(self.module, vector_append_ty, 'vector_append')

        vector_get_ty = ir.FunctionType(
            Int8.as_llvm().as_pointer(),
            [List.as_llvm().as_pointer(),
             Integer.as_llvm()])
        ir.Function(self.module, vector_get_ty, 'vector_get')

        vector_size_ty = ir.FunctionType(Integer.as_llvm(),
                                         [List.as_llvm().as_pointer()])
        ir.Function(self.module, vector_size_ty, 'vector_size')

    def alloc(self, typ, name=''):
        return self.builder.alloca(typ, name=name)

    def alloc_and_store(self, val, typ, name=''):
        var_addr = self.alloc(typ, name)
        self.builder.store(val, var_addr)
        return var_addr

    def add_block(self, name):
        return self.current_function.append_basic_block(name)

    def assign(self, name, value, typ, is_class=False):
        if is_class:
            self.symtab[name] = value
            self.typetab[name] = typ
            return value

        old_val = self.symtab.get(name)
        if old_val:
            new_val = self.builder.store(value, old_val)
            self.symtab[name] = new_val.operands[1]
            return new_val

        var_address = self.alloc_and_store(value, typ, name=name)

        self.symtab[name] = var_address
        self.typetab[name] = typ
        return var_address

    def get_var(self, name):
        return self.symtab[name]

    def get_var_type(self, name):
        return self.typetab[name]

    # noinspection SpellCheckingInspection
    def bitcast(self, value, type_):
        return self.builder.bitcast(value, type_)

    def branch(self, block):
        return self.builder.branch(block)

    # noinspection SpellCheckingInspection
    def cbranch(self, cond, true_block, false_block):
        return self.builder.cbranch(cond, true_block, false_block)

    def gep(self, ptr, indices, inbounds=False, name=''):
        return self.builder.gep(ptr, indices, inbounds, name)

    def generate_code(self, code):
        visitor = ASTVisitor()
        ast = visitor.transform(parser.parse(f"{code}\n"))
        self.classes = visitor.classes

        for klass in self.classes:
            self.generate_classes_metadata(klass)

        assert isinstance(ast, Program)
        return ast.accept(self)

    def load(self, ptr, name=''):
        return self.builder.load(ptr, name)

    def position_at_end(self, block):
        return self.builder.position_at_end(block)

    def select(self, val, true, false):
        return self.builder.select(val, true, false)

    @staticmethod
    def insert_const_string(module, string):
        text = Constant.stringz(string)
        name = CodeGenerator.get_string_name(string)
        gv = module.globals.get(name)
        if gv is None:
            gv = module.add_global_variable(text.type, name=name)
            gv.linkage = PRIVATE_LINKAGE
            gv.unnamed_addr = True
            gv.global_constant = True
            gv.initializer = text

        return gv

    @staticmethod
    def get_string_name(string):
        m = sha3_256()
        m.update(string.encode('utf-8'))

        return '_'.join(['str', str(m.hexdigest())])

    def call(self, name, args):
        func = self.module.get_global(name)

        return self.builder.call(func, args)

    def const(self, val):
        # has to come first because freaking `isinstance(True, int) == True`
        if isinstance(val, bool):
            return ir.Constant(Bool.as_llvm(), val and 1 or 0)
        if isinstance(val, int):
            return ir.Constant(Integer.as_llvm(), val)
        if isinstance(val, float):
            return ir.Constant(Float.as_llvm(), val)

        raise NotImplementedError

    @staticmethod
    def generic_codegen(node):
        raise NotImplementedError('No visit_{} method'.format(
            type(node).__name__.lower()))

    def visit(self, node: ASTNode):
        """
        Dynamically invoke the code generator for each specific node
        :param node: ASTNode
        """

        if self.is_break:
            return

        can_code_gen = hasattr(node, 'code')
        if can_code_gen:
            # noinspection PyUnresolvedReferences
            return node.code(codegen=self)

        method = 'visit_' + type(node).__name__.lower()  # pragma: no cover

        return getattr(self, method,
                       self.generic_codegen)(node)  # pragma: no cover

    # TODO: refactor to create smaller, specific functions
    def generate_classes_metadata(self, klass: Klass):
        name = klass.name
        parent = klass.parent

        undefined_parent_class = name != 'Object' and parent not in [
            c.name for c in self.classes
        ]

        if undefined_parent_class:
            raise CodegenError(f'Parent class {parent} not defined')

        vtable_typ_name = f"{name}_vtable_type"
        vtable_typ = self.module.context.get_identified_type(vtable_typ_name)
        type_ = self.module.context.get_identified_type(name)

        funk_types = OrderedDict()
        funktions = OrderedDict()

        object_type = self.module.context.get_identified_type('Object')

        for func in klass.functions:
            funk_name = f'{name}::{func.name}'

            signature = [
                get_param_type(param.type, object_type)
                for param in func.params
            ]
            if func.ret_type:
                ret = get_param_type(func.ret_type, object_type)
            else:
                ret = ir.VoidType()

            func_ty = ir.FunctionType(ret, [type_.as_pointer()] + signature)
            funk_types[funk_name] = func_ty
            funk = Function(self.module, func_ty, funk_name)
            funktions[funk_name] = funk

        vtable_name = f"{name}_vtable"

        vtable_elements = [el.type for el in funktions.values()]

        vtable_type_name = f"{parent}_vtable_type"

        parent_type = \
            parent and self.module.context.get_identified_type(vtable_type_name) or vtable_typ

        vtable_elements.insert(0, parent_type.as_pointer())
        vtable_elements.insert(1, ir.IntType(8).as_pointer())
        vtable_typ.set_body(*vtable_elements)

        # --
        class_string = CodeGenerator.insert_const_string(self.module, name)
        if klass.parent:
            parent_table_typ = self.module.context.get_identified_type(
                f"{parent}_vtable_type")
            vtable_constant = ir.Constant(
                parent_table_typ.as_pointer(),
                self.module.get_global(f'{parent}_vtable').get_reference())
        else:
            vtable_constant = ir.Constant(vtable_typ.as_pointer(), None)

        fields = [vtable_constant, class_string.gep(INDICES)]

        fields += [
            ir.Constant(item.type, item.get_reference())
            for item in funktions.values()
        ]

        vtable = self.module.add_global_variable(vtable_typ, name=vtable_name)
        vtable.linkage = PRIVATE_LINKAGE
        vtable.unnamed_addr = False
        vtable.global_constant = True
        vtable.initializer = vtable_typ(fields)

        type_ = self.module.context.get_identified_type(name)

        elements = []
        elements.insert(0, vtable_typ.as_pointer())
        type_.set_body(*elements)

    def vector_get(self, vector, index):
        val = self.call('vector_get', [vector, index])
        val = self.builder.ptrtoint(val, Integer.as_llvm())
        return val

    def cast(self, from_, to):
        if from_.type == Integer.as_llvm() and to is Bool:
            result = self.alloc_and_store(from_, Integer.as_llvm())
            result = self.load(result)
            return self.builder.icmp_signed('!=', result, self.const(0))
        if from_.type == Float.as_llvm() and to is Bool:
            result = self.alloc_and_store(from_, Float.as_llvm())
            result = self.load(result)
            return self.builder.fcmp_ordered('!=', result, self.const(0.0))

        raise NotImplementedError('Unsupported cast')

    def get_klass_by_name(self, name):
        for klass in self.classes:
            if klass.name == name:
                return klass
Exemple #15
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module
        argtys = [self.get_argument_type(ty) for ty in argtypes]
        wrapfnty = Type.function(Type.void(), argtys)
        wrapper_module = self.create_module("cuda.kernel.wrapper")
        fnty = Type.function(Type.int(),
                             [self.get_return_type(types.pyobject)] + argtys)
        func = wrapper_module.add_function(fnty, name=func.name)
        wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name)
        builder = Builder.new(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = wrapper_module.add_global_variable(Type.int(),
                                                    name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = []
        for at, av in zip(argtypes, wrapfn.args):
            av = self.get_argument_value(builder, at, av)
            callargs.append(av)

        status, _ = self.call_function(builder, func, types.void, argtypes,
                                       callargs)

        # Check error status
        with cgutils.if_likely(builder, status.ok):
            builder.ret_void()

        with cgutils.ifthen(builder, builder.not_(status.exc)):
            # User exception raised
            old = Constant.null(gv_exc.type.pointee)

            # Use atomic cmpxchg to prevent rewriting the error status
            # Only the first error is recorded

            casfnty = lc.Type.function(old.type, [gv_exc.type, old.type,
                                                  old.type])

            casfn = wrapper_module.add_function(casfnty,
                                                name="___numba_cas_hack")
            xchg = builder.call(casfn, [gv_exc, old, status.code])
            changed = builder.icmp(ICMP_EQ, xchg, old)

            # If the xchange is successful, save the thread ID.
            sreg = nvvmutils.SRegBuilder(builder)
            with cgutils.ifthen(builder, changed):
                for dim, ptr, in zip("xyz", gv_tid):
                    val = sreg.tid(dim)
                    builder.store(val, ptr)

                for dim, ptr, in zip("xyz", gv_ctaid):
                    val = sreg.ctaid(dim)
                    builder.store(val, ptr)

        builder.ret_void()
        # force inline
        # inline_function(status.code)
        nvvm.set_cuda_kernel(wrapfn)
        module.link_in(ll.parse_assembly(str(wrapper_module)))
        module.verify()

        wrapfn = module.get_function(wrapfn.name)
        return wrapfn
Exemple #16
0
 def setup_function(self, fndesc):
     # Setup function
     self.function = self.context.declare_function(self.module, fndesc)
     self.entry_block = self.function.append_basic_block('entry')
     self.builder = Builder(self.entry_block)
     self.call_helper = self.call_conv.init_call_helper(self.builder)
Exemple #17
0
def build_ufunc_wrapper(library, context, func, signature, objmode, env):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapper_module = library.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)
    oldfunc = func
    func = wrapper_module.add_function(func_type, name=func.name)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder.new(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    actual_args = context.call_conv.get_arguments(func)

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(
            UArrayArg(context, builder, arg_args, arg_steps, i,
                      context.get_argument_type(typ)))

    # Prepare output
    valty = context.get_data_type(signature.return_type)
    out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args),
                    valty)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    if objmode:
        # General loop
        pyapi = context.get_python_api(builder)
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            slowloop = build_obj_loop_body(context, func, builder, arrays, out,
                                           offsets, store_offset, signature,
                                           pyapi, env)
        pyapi.gil_release(gil)
        builder.ret_void()

    else:

        with cgutils.ifelse(builder,
                            unit_strided) as (is_unit_strided, is_strided):

            with is_unit_strided:
                with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
                    fastloop = build_fast_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    ind)
                builder.ret_void()

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    slowloop = build_slow_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature)

                builder.ret_void()

        builder.ret_void()
    del builder

    # Run optimizer
    library.add_ir_module(wrapper_module)
    wrapper = library.get_function(wrapper.name)
    oldfunc.linkage = LINKAGE_INTERNAL

    return wrapper
Exemple #18
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module

        arginfo = self.get_arg_packer(argtypes)
        argtys = list(arginfo.argument_types)
        wrapfnty = Type.function(Type.void(), argtys)
        wrapper_module = self.create_module("cuda.kernel.wrapper")
        fnty = Type.function(Type.int(),
                             [self.call_conv.get_return_type(types.pyobject)] +
                             argtys)
        func = wrapper_module.add_function(fnty, name=func.name)
        wrapfn = wrapper_module.add_function(wrapfnty,
                                             name="cudaPy_" + func.name)
        builder = Builder.new(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = wrapper_module.add_global_variable(Type.int(),
                                                    name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = arginfo.from_arguments(builder, wrapfn.args)
        status, _ = self.call_conv.call_function(builder, func, types.void,
                                                 argtypes, callargs)

        # Check error status
        with cgutils.if_likely(builder, status.is_ok):
            builder.ret_void()

        with builder.if_then(builder.not_(status.is_python_exc)):
            # User exception raised
            old = Constant.null(gv_exc.type.pointee)

            # Use atomic cmpxchg to prevent rewriting the error status
            # Only the first error is recorded

            casfnty = lc.Type.function(old.type,
                                       [gv_exc.type, old.type, old.type])

            casfn = wrapper_module.add_function(casfnty,
                                                name="___numba_cas_hack")
            xchg = builder.call(casfn, [gv_exc, old, status.code])
            changed = builder.icmp(ICMP_EQ, xchg, old)

            # If the xchange is successful, save the thread ID.
            sreg = nvvmutils.SRegBuilder(builder)
            with builder.if_then(changed):
                for dim, ptr, in zip("xyz", gv_tid):
                    val = sreg.tid(dim)
                    builder.store(val, ptr)

                for dim, ptr, in zip("xyz", gv_ctaid):
                    val = sreg.ctaid(dim)
                    builder.store(val, ptr)

        builder.ret_void()
        # force inline
        # inline_function(status.code)
        nvvm.set_cuda_kernel(wrapfn)
        module.link_in(ll.parse_assembly(str(wrapper_module)))
        module.verify()

        wrapfn = module.get_function(wrapfn.name)
        return wrapfn
Exemple #19
0
    def generate_kernel_wrapper(self, library, fname, argtypes, debug):
        """
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped have the name ``fname`` and argument types
        ``argtypes``.  The wrapper function is returned.
        """
        arginfo = self.get_arg_packer(argtypes)
        argtys = list(arginfo.argument_types)
        wrapfnty = Type.function(Type.void(), argtys)
        wrapper_module = self.create_module("cuda.kernel.wrapper")
        fnty = Type.function(Type.int(),
                             [self.call_conv.get_return_type(types.pyobject)] +
                             argtys)
        func = wrapper_module.add_function(fnty, name=fname)

        prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
        wrapfn = wrapper_module.add_function(wrapfnty, name=prefixed)
        builder = Builder(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = wrapper_module.add_global_variable(Type.int(),
                                                    name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = arginfo.from_arguments(builder, wrapfn.args)
        status, _ = self.call_conv.call_function(builder, func, types.void,
                                                 argtypes, callargs)

        if debug:
            # Check error status
            with cgutils.if_likely(builder, status.is_ok):
                builder.ret_void()

            with builder.if_then(builder.not_(status.is_python_exc)):
                # User exception raised
                old = Constant.null(gv_exc.type.pointee)

                # Use atomic cmpxchg to prevent rewriting the error status
                # Only the first error is recorded

                casfnty = lc.Type.function(old.type,
                                           [gv_exc.type, old.type, old.type])

                casfn = wrapper_module.add_function(casfnty,
                                                    name="___numba_cas_hack")
                xchg = builder.call(casfn, [gv_exc, old, status.code])
                changed = builder.icmp(ICMP_EQ, xchg, old)

                # If the xchange is successful, save the thread ID.
                sreg = nvvmutils.SRegBuilder(builder)
                with builder.if_then(changed):
                    for dim, ptr, in zip("xyz", gv_tid):
                        val = sreg.tid(dim)
                        builder.store(val, ptr)

                    for dim, ptr, in zip("xyz", gv_ctaid):
                        val = sreg.ctaid(dim)
                        builder.store(val, ptr)

        builder.ret_void()

        nvvm.set_cuda_kernel(wrapfn)
        library.add_ir_module(wrapper_module)
        library.finalize()
        wrapfn = library.get_function(wrapfn.name)
        return wrapfn
Exemple #20
0
 def setup_function(self, fndesc):
     # Setup function
     self.function = self.context.declare_function(self.module, fndesc)
     self.entry_block = self.function.append_basic_block('entry')
     self.builder = Builder.new(self.entry_block)
     self.call_helper = self.call_conv.init_call_helper(self.builder)
Exemple #21
0
    def generate_kernel_wrapper(self, library, fname, argtypes):
        """
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped have the name ``fname`` and argument types
        ``argtypes``.  The wrapper function is returned.
        """
        arginfo = self.get_arg_packer(argtypes)
        argtys = list(arginfo.argument_types)
        wrapfnty = Type.function(Type.void(), argtys)
        wrapper_module = self.create_module("cuda.kernel.wrapper")
        fnty = Type.function(Type.int(),
                             [self.call_conv.get_return_type(types.pyobject)] + argtys)
        func = wrapper_module.add_function(fnty, name=fname)

        wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name)
        builder = Builder(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = wrapper_module.add_global_variable(Type.int(),
                                                    name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = arginfo.from_arguments(builder, wrapfn.args)
        status, _ = self.call_conv.call_function(
            builder, func, types.void, argtypes, callargs)

        # Check error status
        with cgutils.if_likely(builder, status.is_ok):
            builder.ret_void()

        with builder.if_then(builder.not_(status.is_python_exc)):
            # User exception raised
            old = Constant.null(gv_exc.type.pointee)

            # Use atomic cmpxchg to prevent rewriting the error status
            # Only the first error is recorded

            casfnty = lc.Type.function(old.type, [gv_exc.type, old.type,
                                                  old.type])

            casfn = wrapper_module.add_function(casfnty,
                                                name="___numba_cas_hack")
            xchg = builder.call(casfn, [gv_exc, old, status.code])
            changed = builder.icmp(ICMP_EQ, xchg, old)

            # If the xchange is successful, save the thread ID.
            sreg = nvvmutils.SRegBuilder(builder)
            with builder.if_then(changed):
                for dim, ptr, in zip("xyz", gv_tid):
                    val = sreg.tid(dim)
                    builder.store(val, ptr)

                for dim, ptr, in zip("xyz", gv_ctaid):
                    val = sreg.ctaid(dim)
                    builder.store(val, ptr)

        builder.ret_void()

        nvvm.set_cuda_kernel(wrapfn)
        library.add_ir_module(wrapper_module)
        library.finalize()
        wrapfn = library.get_function(wrapfn.name)
        return wrapfn
Exemple #22
0
    def build(self):
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t,
                                           intp_ptr_t, byte_ptr_t])

        wrapper_module = self.library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        func = wrapper_module.add_function(func_type, name=self.func.name)
        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty,
                                              "__gufunc__." + self.func.name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder.new(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")
        pyapi = self.context.get_python_api(builder)

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(builder.gep(arg_dims,
                                                  [self.context.get_constant(
                                                      types.intp,
                                                      i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(zip(self.signature.args,
                                           self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args,
                             arg_steps, i, step_offset, typ, sym, sym_dim)
            step_offset += len(sym)
            arrays.append(ary)

        bbreturn = builder.append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder, pyapi)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
            args = [a.get_array_at_offset(loop.index) for a in arrays]
            innercall, error = self.gen_loop_body(builder, pyapi, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder, pyapi)

        builder.ret_void()

        self.library.add_ir_module(wrapper_module)
        wrapper = self.library.get_function(wrapper.name)

        # Set core function to internal so that it is not generated
        self.func.linkage = LINKAGE_INTERNAL

        return wrapper, self.env
Exemple #23
0
def build_ufunc_wrapper(library, context, func, signature, objmode, envptr, env):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t,
                                       intp_ptr_t, byte_ptr_t])

    wrapper_module = library.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)
    oldfunc = func
    func = wrapper_module.add_function(func_type,
                                       name=func.name)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder.new(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ))

    # Prepare output
    out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays),
                    signature.return_type)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    pyapi = context.get_python_api(builder)
    if objmode:
        # General loop
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            slowloop = build_obj_loop_body(context, func, builder,
                                           arrays, out, offsets,
                                           store_offset, signature,
                                           pyapi, envptr, env)
        pyapi.gil_release(gil)
        builder.ret_void()

    else:
        with builder.if_else(unit_strided) as (is_unit_strided, is_strided):
            with is_unit_strided:
                with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
                    fastloop = build_fast_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    loop.index, pyapi)

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    slowloop = build_slow_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    pyapi)

        builder.ret_void()
    del builder

    # Run optimizer
    library.add_ir_module(wrapper_module)
    wrapper = library.get_function(wrapper.name)

    return wrapper
Exemple #24
0
    def _build_wrapper(self, library, name):
        """
        The LLVM IRBuilder code to create the gufunc wrapper.
        The *library* arg is the CodeLibrary for which the wrapper should
        be added to.  The *name* arg is the name of the wrapper function being
        created.
        """
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t,
                                           intp_ptr_t, byte_ptr_t])

        wrapper_module = library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        fname = self.fndesc.llvm_func_name
        func = wrapper_module.add_function(func_type, name=fname)

        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty, name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")
        pyapi = self.context.get_python_api(builder)

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(builder.gep(arg_dims,
                                                  [self.context.get_constant(
                                                      types.intp,
                                                      i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(zip(self.signature.args,
                                           self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args,
                             arg_steps, i, step_offset, typ, sym, sym_dim)
            step_offset += len(sym)
            arrays.append(ary)

        bbreturn = builder.append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder, pyapi)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
            args = [a.get_array_at_offset(loop.index) for a in arrays]
            innercall, error = self.gen_loop_body(builder, pyapi, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder, pyapi)

        builder.ret_void()

        # Link
        library.add_ir_module(wrapper_module)
        library.add_linking_library(self.library)
Exemple #25
-1
    def _build_wrapper(self, library, name):
        """
        The LLVM IRBuilder code to create the gufunc wrapper.
        The *library* arg is the CodeLibrary for which the wrapper should
        be added to.  The *name* arg is the name of the wrapper function being
        created.
        """
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(
            Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

        wrapper_module = library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        fname = self.fndesc.llvm_func_name
        func = wrapper_module.add_function(func_type, name=fname)

        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty, name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")
        pyapi = self.context.get_python_api(builder)

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(
                builder.gep(arg_dims,
                            [self.context.get_constant(types.intp, i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(
                zip(self.signature.args, self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i,
                             step_offset, typ, sym, sym_dim)
            step_offset += len(sym)
            arrays.append(ary)

        bbreturn = builder.append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder, pyapi)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
            args = [a.get_array_at_offset(loop.index) for a in arrays]
            innercall, error = self.gen_loop_body(builder, pyapi, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder, pyapi)

        builder.ret_void()

        # Link
        library.add_ir_module(wrapper_module)
        library.add_linking_library(self.library)
Exemple #26
-1
    def build(self):
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(
            Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

        wrapper_module = self.library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        func = wrapper_module.add_function(func_type, name=self.func.name)
        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty,
                                              "__gufunc__." + self.func.name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder.new(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(
                builder.gep(arg_dims,
                            [self.context.get_constant(types.intp, i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(
                zip(self.signature.args, self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_dims,
                             arg_steps, i, step_offset, typ, sym, sym_dim)
            if not ary.as_scalar:
                step_offset += ary.ndim
            arrays.append(ary)

        bbreturn = cgutils.get_function(builder).append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
            args = [a.array_value for a in arrays]
            innercall, error = self.gen_loop_body(builder, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

            for a in arrays:
                a.next(ind)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder)

        builder.ret_void()

        self.library.add_ir_module(wrapper_module)
        wrapper = self.library.get_function(wrapper.name)

        # Set core function to internal so that it is not generated
        self.func.linkage = LINKAGE_INTERNAL

        return wrapper, self.env