Beispiel #1
0
    def close(self):
        '''end code generation
        '''
        # Close declaration block
        with _change_block_temporarily(self.builder, self.declare_block):
            self.builder.branch(self.first_body_block)

        # Do the auto inlining
        for callinst in self._auto_inline_list:
            lc.inline_function(callinst)
Beispiel #2
0
    def close(self):
        '''end code generation
        '''
        # Close declaration block
        with _change_block_temporarily(self.builder, self.declare_block):
            self.builder.branch(self.first_body_block)

        # Do the auto inlining
        for callinst in self._auto_inline_list:
            lc.inline_function(callinst)
Beispiel #3
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module
        argtys = self.get_arguments(func.type.pointee)
        fnty = Type.function(Type.void(), argtys)
        wrapfn = module.add_function(fnty, name="cudaPy_" + func.name)
        builder = Builder.new(wrapfn.append_basic_block(''))

        callargs = []
        for at, av in zip(argtypes, wrapfn.args):
            av = self.get_argument_value(builder, at, av)
            callargs.append(av)

        status, _ = self.call_function(builder, func, types.void, argtypes,
                                       callargs)
        # TODO handle status

        builder.ret_void()
        del builder
        # force inline
        inline_function(status.code)

        module.verify()
        return wrapfn
Beispiel #4
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module
        argtys = self.get_arguments(func.type.pointee)
        fnty = Type.function(Type.void(), argtys)
        wrapfn = module.add_function(fnty, name="cudaPy_" + func.name)
        builder = Builder.new(wrapfn.append_basic_block(''))

        callargs = []
        for at, av in zip(argtypes, wrapfn.args):
            av = self.get_argument_value(builder, at, av)
            callargs.append(av)

        status, _ = self.call_function(builder, func, types.void, argtypes,
                                       callargs)
        # TODO handle status

        builder.ret_void()
        del builder
        # force inline
        inline_function(status.code)

        module.verify()
        return wrapfn
Beispiel #5
0
    def test_inline_call(self):
        mod = Module.new(__name__)
        callee = mod.add_function(Type.function(Type.int(), [Type.int()]), name="bar")

        builder = Builder.new(callee.append_basic_block("entry"))
        builder.ret(builder.add(callee.args[0], callee.args[0]))

        caller = mod.add_function(Type.function(Type.int(), []), name="foo")

        builder = Builder.new(caller.append_basic_block("entry"))
        callinst = builder.call(callee, [Constant.int(Type.int(), 1234)])
        builder.ret(callinst)

        pre_inlining = str(caller)
        self.assertIn("call", pre_inlining)

        self.assertTrue(inline_function(callinst))

        post_inlining = str(caller)
        self.assertNotIn("call", post_inlining)
        self.assertIn("2468", post_inlining)
Beispiel #6
0
    def test_inline_call(self):
        mod = Module.new(__name__)
        callee = mod.add_function(Type.function(Type.int(), [Type.int()]),
                                  name='bar')

        builder = Builder.new(callee.append_basic_block('entry'))
        builder.ret(builder.add(callee.args[0], callee.args[0]))

        caller = mod.add_function(Type.function(Type.int(), []), name='foo')

        builder = Builder.new(caller.append_basic_block('entry'))
        callinst = builder.call(callee, [Constant.int(Type.int(), 1234)])
        builder.ret(callinst)

        pre_inlining = str(caller)
        self.assertIn('call', pre_inlining)

        self.assertTrue(inline_function(callinst))

        post_inlining = str(caller)
        self.assertNotIn('call', post_inlining)
        self.assertIn('2468', post_inlining)
Beispiel #7
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module
        argtys = self.get_arguments(func.type.pointee)
        fnty = Type.function(Type.void(), argtys)
        wrapfn = module.add_function(fnty, name="cudaPy_" + func.name)
        builder = Builder.new(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = module.add_global_variable(Type.int(),
                                            name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = []
        for at, av in zip(argtypes, wrapfn.args):
            av = self.get_argument_value(builder, at, av)
            callargs.append(av)

        status, _ = self.call_function(builder, func, types.void, argtypes,
                                       callargs)

        # Check error status
        with cgutils.if_likely(builder, status.ok):
            builder.ret_void()

        with cgutils.ifthen(builder, builder.not_(status.exc)):
            # User exception raised
            old = Constant.null(gv_exc.type.pointee)

            # Use atomic cmpxchg to prevent rewriting the error status
            # Only the first error is recorded
            xchg = builder.atomic_cmpxchg(gv_exc, old, status.code,
                                          "monotonic")
            changed = builder.icmp(ICMP_EQ, xchg, old)

            # If the xchange is successful, save the thread ID.
            sreg = nvvmutils.SRegBuilder(builder)
            with cgutils.ifthen(builder, changed):
                for dim, ptr, in zip("xyz", gv_tid):
                    val = sreg.tid(dim)
                    builder.store(val, ptr)

                for dim, ptr, in zip("xyz", gv_ctaid):
                    val = sreg.ctaid(dim)
                    builder.store(val, ptr)

        builder.ret_void()

        # force inline
        inline_function(status.code)

        module.verify()
        return wrapfn
Beispiel #8
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module
        argtys = self.get_arguments(func.type.pointee)
        fnty = Type.function(Type.void(), argtys)
        wrapfn = module.add_function(fnty, name="cudaPy_" + func.name)
        builder = Builder.new(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = module.add_global_variable(Type.int(),
                                            name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = []
        for at, av in zip(argtypes, wrapfn.args):
            av = self.get_argument_value(builder, at, av)
            callargs.append(av)

        status, _ = self.call_function(builder, func, types.void, argtypes,
                                       callargs)

        # Check error status
        with cgutils.if_likely(builder, status.ok):
            builder.ret_void()

        with cgutils.ifthen(builder, builder.not_(status.exc)):
            # User exception raised
            old = Constant.null(gv_exc.type.pointee)

            # Use atomic cmpxchg to prevent rewriting the error status
            # Only the first error is recorded
            xchg = builder.atomic_cmpxchg(gv_exc, old, status.code,
                                          "monotonic")
            changed = builder.icmp(ICMP_EQ, xchg, old)

            # If the xchange is successful, save the thread ID.
            sreg = nvvmutils.SRegBuilder(builder)
            with cgutils.ifthen(builder, changed):
                for dim, ptr, in zip("xyz", gv_tid):
                    val = sreg.tid(dim)
                    builder.store(val, ptr)

                for dim, ptr, in zip("xyz", gv_ctaid):
                    val = sreg.ctaid(dim)
                    builder.store(val, ptr)

        builder.ret_void()

        # force inline
        inline_function(status.code)

        module.verify()
        return wrapfn
Beispiel #9
0
def build_ufunc_wrapper(context, func, signature):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    module = func.module

    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t,
                                       intp_ptr_t, byte_ptr_t])

    wrapper = module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder.new(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    actual_args = context.get_arguments(func)

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i,
                                context.get_argument_type(typ)))

    # Prepare output
    valty = context.get_data_type(signature.return_type)
    out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args),
                    valty)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    with cgutils.ifelse(builder, unit_strided) as (is_unit_strided,
                                                   is_strided):

        with is_unit_strided:
            with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
                fastloop = build_fast_loop_body(context, func, builder,
                                                arrays, out, offsets,
                                                store_offset, signature, ind)
            builder.ret_void()

        with is_strided:
            # General loop
            with cgutils.for_range(builder, loopcount, intp=intp_t):
                slowloop = build_slow_loop_body(context, func, builder,
                                                arrays, out, offsets,
                                                store_offset, signature)

            builder.ret_void()

    builder.ret_void()
    del builder

    # Set core function to internal so that it is not generated
    func.linkage = LINKAGE_INTERNAL
    # Force inline of code function
    inline_function(slowloop)
    inline_function(fastloop)
    # Run optimizer
    context.optimize(module)

    if config.DUMP_OPTIMIZED:
        print(module)

    return wrapper
Beispiel #10
0
    def build(self):
        module = self.func.module

        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t,
                                           intp_ptr_t, byte_ptr_t])

        wrapper = module.add_function(fnty, "__gufunc__." + self.func.name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder.new(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(builder.gep(arg_dims,
                                                  [self.context.get_constant(
                                                      types.intp,
                                                      i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(zip(self.signature.args,
                                           self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_dims,
                             arg_steps, i, step_offset, typ, sym, sym_dim)
            if not ary.as_scalar:
                step_offset += ary.ndim
            arrays.append(ary)

        bbreturn = cgutils.get_function(builder).append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
            args = [a.array_value for a in arrays]
            innercall, error = self.gen_loop_body(builder, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

            for a in arrays:
                a.next(ind)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder)

        builder.ret_void()

        module.verify()
        # Set core function to internal so that it is not generated
        self.func.linkage = LINKAGE_INTERNAL
        # Force inline of code function
        inline_function(innercall)
        # Run optimizer
        self.context.optimize(module)

        if config.DUMP_OPTIMIZED:
            print(module)

        wrapper.verify()
        return wrapper, self.env
Beispiel #11
0
def build_gufunc_wrapper(context, func, signature, sin, sout):
    module = func.module

    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t,
                                       intp_ptr_t, byte_ptr_t])

    wrapper = module.add_function(fnty, "__gufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder.new(wrapper.append_basic_block("entry"))
    loopcount = builder.load(arg_dims, name="loopcount")

    # Unpack shapes
    unique_syms = set()
    for grp in (sin, sout):
        for syms in grp:
            unique_syms |= set(syms)

    sym_map = {}
    for grp in (sin, sout):
        for syms in sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

    sym_dim = {}
    for s, i in sym_map.items():
        sym_dim[s] = builder.load(builder.gep(arg_dims,
                                              [context.get_constant(types.intp,
                                                                    i + 1)]))

    # Prepare inputs
    arrays = []
    step_offset = len(sin) + len(sout)
    for i, (typ, sym) in enumerate(zip(signature.args, sin + sout)):
        ary = GUArrayArg(context, builder, arg_args, arg_dims, arg_steps, i,
                         step_offset, typ, sym, sym_dim)
        if not ary.as_scalar:
            step_offset += ary.ndim
        arrays.append(ary)

    # Loop
    with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
        args = [a.array_value for a in arrays]

        status, retval = context.call_function(builder, func,
                                               signature.return_type,
                                               signature.args, args)
        # ignore status
        # ignore retval

        for a in arrays:
            a.next(ind)

    builder.ret_void()

    # Set core function to internal so that it is not generated
    func.linkage = LINKAGE_INTERNAL
    # Force inline of code function
    inline_function(status.code)
    # Run optimizer
    context.optimize(module)

    if config.DUMP_OPTIMIZED:
        print(module)

    wrapper.verify()
    return wrapper
Beispiel #12
0
def build_ufunc_wrapper(context, func, signature):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    module = func.module

    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapper = module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder.new(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    actual_args = context.get_arguments(func)

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(
            UArrayArg(context, builder, arg_args, arg_steps, i,
                      context.get_argument_type(typ)))

    # Prepare output
    valty = context.get_data_type(signature.return_type)
    out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args),
                    valty)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    with cgutils.ifelse(builder,
                        unit_strided) as (is_unit_strided, is_strided):

        with is_unit_strided:
            with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
                fastloop = build_fast_loop_body(context, func, builder, arrays,
                                                out, offsets, store_offset,
                                                signature, ind)
            builder.ret_void()

        with is_strided:
            # General loop
            with cgutils.for_range(builder, loopcount, intp=intp_t):
                slowloop = build_slow_loop_body(context, func, builder, arrays,
                                                out, offsets, store_offset,
                                                signature)

            builder.ret_void()

    builder.ret_void()
    del builder

    # Set core function to internal so that it is not generated
    func.linkage = LINKAGE_INTERNAL
    # Force inline of code function
    inline_function(slowloop)
    inline_function(fastloop)
    # Run optimizer
    context.optimize(module)

    if config.DUMP_OPTIMIZED:
        print(module)

    return wrapper
Beispiel #13
0
    def build(self):
        module = self.func.module

        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(
            Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

        wrapper = module.add_function(fnty, "__gufunc__." + self.func.name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder.new(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(
                builder.gep(arg_dims,
                            [self.context.get_constant(types.intp, i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(
                zip(self.signature.args, self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_dims,
                             arg_steps, i, step_offset, typ, sym, sym_dim)
            if not ary.as_scalar:
                step_offset += ary.ndim
            arrays.append(ary)

        bbreturn = cgutils.get_function(builder).append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
            args = [a.array_value for a in arrays]
            innercall, error = self.gen_loop_body(builder, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

            for a in arrays:
                a.next(ind)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder)

        builder.ret_void()

        module.verify()
        # Set core function to internal so that it is not generated
        self.func.linkage = LINKAGE_INTERNAL
        # Force inline of code function
        inline_function(innercall)
        # Run optimizer
        self.context.optimize(module)

        if config.DUMP_OPTIMIZED:
            print(module)

        wrapper.verify()
        return wrapper, self.env