예제 #1
0
    def __init__(self, module, opt_level=3, loop_vectorize=True):
        # opt_level is used for both module level (opt) and
        # instruction level optimization (cg) for TargetMachine
        # and PassManager

        if not detect_avx_support():
            tm = le.TargetMachine.new(
                opt = opt_level,
                cm  = le.CM_JITDEFAULT,
                features='-avx',
            )
        else:
            tm = le.TargetMachine.new(
                opt = opt_level,
                cm  = le.CM_JITDEFAULT,
                features='' ,
            )

        pass_opts = dict(
            fpm = False,
            mod = module,
            opt = opt_level,
            vectorize = False,
            loop_vectorize = loop_vectorize,
            inline_threshold=self.inline_threshold,
        )

        pms = lp.build_pass_managers(tm = tm, **pass_opts)
        pms.pm.run(module)
예제 #2
0
 def __init__(self, module_name, 
              optimize = llvm_config.llvm_optimize,
              verify = llvm_config.llvm_verify):
   self.module = core.Module.new(module_name)
   self.engine_builder = ee.EngineBuilder.new(self.module)
   self.engine_builder.force_jit()
   opt_level = 3 if optimize else 0
   if optimize:
     self.engine_builder.opt(opt_level)
   else:
     self.engine_builder.opt(opt_level)
   self.exec_engine = self.engine_builder.create()
   tm = ee.TargetMachine.new(opt = opt_level, cm=ee.CM_JITDEFAULT)
   self.tm = tm 
   _, fpm = passes.build_pass_managers(tm, 
                                    opt = opt_level,
                                    loop_vectorize = (opt_level > 0), 
                                    mod = self.module, 
                                    vectorize = (opt_level > 0), 
                                    )
   self.pass_manager = fpm 
   # self.fpm = fpm 
   for p in self._verify_passes:
     self.pass_manager.add(p)
   if optimize:
     for p in (self._opt_passes + self._verify_passes):
       self.pass_manager.add(p)
예제 #3
0
    def __initialize(self, opt, cg, inline):
        assert self.__singleton is None
        m = self.__module = lc.Module.new("numba_executable_module")
        # Create the TargetMachine
        features = ''
        # try:
        #     from llvm.workaround.avx_support import detect_avx_support
        #     if not detect_avx_support():
        #         features = '-avx'
        # except ImportError:
        #     # Old llvm, disable AVX for all
        features = '-avx'
        tm = self.__machine = le.TargetMachine.new(opt=cg,
                                                   cm=le.CM_JITDEFAULT,
                                                   features=features)
        # Create the ExceutionEngine
        self.__engine = le.EngineBuilder.new(m).create(tm)
        # Build a PassManager which will be used for every module/
        has_loop_vectorizer = llvm.version >= (3, 2)
        passmanagers = lp.build_pass_managers(
            tm,
            opt=opt,
            inline_threshold=inline,
            loop_vectorize=has_loop_vectorizer,
            fpm=False)
        self.__pm = passmanagers.pm

        self.__string_constants = {}
예제 #4
0
 def __init__(self, module, opt_level=3):
     tc = le.TargetMachine.new(features='', cm=le.CM_JITDEFAULT)
     self.pm, self.fpm = lp.build_pass_managers(tc,
                                                loop_vectorize=False,
                                                vectorize=False,
                                                fpm=False,
                                                mod=module)
예제 #5
0
def example(title, module_builder, opt):
    print(title.center(80, '='))
    mod, fn = module_builder()

    eb = le.EngineBuilder.new(mod).opt(3)
    if opt:
        print('opt')
        tm = eb.select_target()
        pms = lp.build_pass_managers(mod=mod,
                                     tm=tm,
                                     opt=3,
                                     loop_vectorize=True,
                                     fpm=False)
        pms.pm.run(mod)

    print(mod)
    print(mod.to_native_assembly())

    engine = eb.create()
    ptr = engine.get_pointer_to_function(fn)

    callable = CFUNCTYPE(None, POINTER(c_float), POINTER(c_float),
                         POINTER(c_float), c_int)(ptr)

    N = 20
    in1 = (c_float * N)(*range(N))
    in2 = (c_float * N)(*range(N))
    out = (c_float * N)()

    print('in1: ', list(in1))
    print('in1: ', list(in2))

    callable(in1, in2, out, N)

    print('out', list(out))
예제 #6
0
def example(title, module_builder, opt):
    print(title.center(80, '='))
    mod, fn = module_builder()

    eb = le.EngineBuilder.new(mod).opt(3)
    if opt:
        print('opt')
        tm = eb.select_target()
        pms = lp.build_pass_managers(mod=mod, tm=tm, opt=3, loop_vectorize=True,
                                     fpm=False)
        pms.pm.run(mod)

    print(mod)
    print(mod.to_native_assembly())

    engine = eb.create()
    ptr = engine.get_pointer_to_function(fn)

    callable = CFUNCTYPE(None, POINTER(c_float), POINTER(c_float),
                         POINTER(c_float), c_int)(ptr)

    N = 20
    in1 = (c_float * N)(*range(N))
    in2 = (c_float * N)(*range(N))
    out = (c_float * N)()

    print('in1: ', list(in1))
    print('in1: ', list(in2))

    callable(in1, in2, out, N)

    print('out', list(out))
예제 #7
0
파일: llvmcontext.py 프로젝트: dwf/numba
    def __initialize(self, opt, cg, inline):
        assert self.__singleton is None
        m = self.__module = lc.Module.new("numba_executable_module")
        # Create the TargetMachine
        features = ''
        try:
            from llvm.workaround.avx_support import detect_avx_support
            if not detect_avx_support():
                features = '-avx'
        except ImportError:
            # Old llvm, disable AVX for all
            features = '-avx'
        tm = self.__machine = le.TargetMachine.new(opt=cg, cm=le.CM_JITDEFAULT,
                                                   features=features)
        # Create the ExceutionEngine
        self.__engine = le.EngineBuilder.new(m).create(tm)
        # Build a PassManager which will be used for every module/
        has_loop_vectorizer = llvm.version >= (3, 2)
        passmanagers = lp.build_pass_managers(tm, opt=opt,
                                              inline_threshold=inline,
                                              loop_vectorize=has_loop_vectorizer,
                                              fpm=False)
        self.__pm = passmanagers.pm

        self.__string_constants = {}
예제 #8
0
 def build_pass_manager(self):
     opt = 0  # let Impala optimize
     # opt = 3 # optimize ourselves
     pms = lp.build_pass_managers(tm=self.tm,
                                  opt=opt,
                                  loop_vectorize=True,
                                  fpm=False)
     return pms.pm
예제 #9
0
    def _cull_exports(self):
        """Read all the exported functions/modules in the translator
        environment, and join them into a single LLVM module.

        Resets the export environment afterwards.
        """
        self.exported_signatures = export_registry

        # Create new module containing everything
        llvm_module = lc.Module.new(self.module_name)

        # Compile all exported functions
        typing_ctx = CPUTarget.typing_context
        # TODO Use non JIT-ing target
        target_ctx = CPUTarget.target_context
        modules = []
        flags = Flags()
        if not self.export_python_wrap:
            flags.set("no_compile")

        for entry in self.exported_signatures:
            cres = compile_extra(typing_ctx,
                                 target_ctx,
                                 entry.function,
                                 entry.signature.args,
                                 entry.signature.return_type,
                                 flags,
                                 locals={})

            if self.export_python_wrap:
                module = cres.llvm_func.module
                cres.llvm_func.linkage = lc.LINKAGE_INTERNAL
                wrappername = "wrapper." + cres.llvm_func.name
                wrapper = module.get_function_named(wrappername)
                wrapper.name = entry.symbol
            else:
                cres.llvm_func.name = entry.symbol

            modules.append(cres.llvm_module)

        # Link all exported functions
        for mod in modules:
            llvm_module.link_in(mod, preserve=self.export_python_wrap)

        # Optimize
        tm = le.TargetMachine.new(opt=3)
        pms = lp.build_pass_managers(tm=tm,
                                     opt=3,
                                     loop_vectorize=True,
                                     fpm=False)
        pms.pm.run(llvm_module)

        if self.export_python_wrap:
            self._emit_python_wrapper(llvm_module)

        del self.exported_signatures[:]
        print(llvm_module)
        return llvm_module
예제 #10
0
파일: cpu.py 프로젝트: ASPP/numba
 def optimize_function(self, func):
     """Run O1 function passes
     """
     pms = lp.build_pass_managers(tm=self.tm, opt=1, pm=False,
                                  mod=func.module)
     fpm = pms.fpm
     fpm.initialize()
     fpm.run(func)
     fpm.finalize()
예제 #11
0
파일: support.py 프로젝트: B-Rich/llvmmath
def make_llvm_context(name="mymodule"):
    "Return an LLVM context (engine, module, passmanager)"
    module = lc.Module.new("executable_module")
    features = '-avx'
    tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features=features)
    engine = le.EngineBuilder.new(module).create(tm)
    passmanagers = lp.build_pass_managers(tm, opt=3,
                                          inline_threshold=1000,
                                          fpm=False)
    return LLVMContext(engine, module, passmanagers.pm)
예제 #12
0
 def optimize_function(self, func):
     """Run O1 function passes
     """
     pms = lp.build_pass_managers(tm=self.tm,
                                  opt=1,
                                  pm=False,
                                  mod=func.module)
     fpm = pms.fpm
     fpm.initialize()
     fpm.run(func)
     fpm.finalize()
예제 #13
0
    def optimize_pythonapi(self, func):
        # Simplify the function using
        pms = lp.build_pass_managers(tm=self.tm, opt=1, mod=func.module)
        fpm = pms.fpm

        fpm.initialize()
        fpm.run(func)
        fpm.finalize()

        # remove extra refct api calls
        remove_refct_calls(func)
예제 #14
0
파일: support.py 프로젝트: toobaz/llvmmath
def make_llvm_context(name="mymodule"):
    "Return an LLVM context (engine, module, passmanager)"
    module = lc.Module.new("executable_module")
    features = '-avx'
    tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features=features)
    engine = le.EngineBuilder.new(module).create(tm)
    passmanagers = lp.build_pass_managers(tm,
                                          opt=3,
                                          inline_threshold=1000,
                                          fpm=False)
    return LLVMContext(engine, module, passmanagers.pm)
예제 #15
0
파일: cpu.py 프로젝트: neeck/numba
    def optimize_pythonapi(self, func):
        # Simplify the function using
        pms = lp.build_pass_managers(tm=self.tm, opt=1, mod=func.module)
        fpm = pms.fpm

        fpm.initialize()
        fpm.run(func)
        fpm.finalize()

        # remove extra refct api calls
        remove_refct_calls(func)
예제 #16
0
파일: compiler.py 프로젝트: ewiger/numba
    def _cull_exports(self):
        """Read all the exported functions/modules in the translator
        environment, and join them into a single LLVM module.

        Resets the export environment afterwards.
        """
        self.exported_signatures = export_registry

        # Create new module containing everything
        llvm_module = lc.Module.new(self.module_name)

        # Compile all exported functions
        typing_ctx = CPUTarget.typing_context
        # TODO Use non JIT-ing target
        target_ctx = CPUTarget.target_context
        modules = []
        flags = Flags()
        if not self.export_python_wrap:
            flags.set("no_compile")

        for entry in self.exported_signatures:
            cres = compile_extra(typing_ctx, target_ctx, entry.function,
                                 entry.signature.args,
                                 entry.signature.return_type, flags,
                                 locals={})

            if self.export_python_wrap:
                module = cres.llvm_func.module
                cres.llvm_func.linkage = lc.LINKAGE_INTERNAL
                wrappername = "wrapper." + cres.llvm_func.name
                wrapper = module.get_function_named(wrappername)
                wrapper.name = entry.symbol
            else:
                cres.llvm_func.name = entry.symbol

            modules.append(cres.llvm_module)

        # Link all exported functions
        for mod in modules:
            llvm_module.link_in(mod, preserve=self.export_python_wrap)

        # Optimize
        tm = le.TargetMachine.new(opt=3)
        pms = lp.build_pass_managers(tm=tm, opt=3, loop_vectorize=True,
                                     fpm=False)
        pms.pm.run(llvm_module)

        if self.export_python_wrap:
            self._emit_python_wrapper(llvm_module)

        #del self.exported_signatures[:]
        print(llvm_module)
        return llvm_module
예제 #17
0
파일: llrt.py 프로젝트: tpn/llvmpy
def load(arch):
    '''Load the LLRT module corresponding to the given architecture
    Creates a new module and optimizes it using the information from 
    the host machine.
    '''
    if arch != 'x86_64':
        arch = 'x86'
    path = os.path.join(os.path.dirname(__file__), 'llrt', 'llrt_%s.ll' % arch)
    with open(path) as fin:
        lib = lc.Module.from_assembly(fin)

    # run passes to optimize
    tm = le.TargetMachine.new()
    pms = lp.build_pass_managers(tm, opt=3, fpm=False)
    pms.pm.run(lib)
    return lib
예제 #18
0
파일: numpile.py 프로젝트: cpcloud/numpile
def codegen(ast, specializer, retty, argtys):
    cgen = LLVMEmitter(specializer, retty, argtys)
    cgen.function.verify()

    tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='')
    pms = lp.build_pass_managers(tm=tm,
                                 fpm=False,
                                 mod=module,
                                 opt=3,
                                 vectorize=False,
                                 loop_vectorize=True)
    pms.pm.run(module)

    debug(cgen.function)
    debug(module.to_native_assembly())
    return cgen.function
예제 #19
0
def codegen(ast, specializer, retty, argtys):
    cgen = LLVMEmitter(specializer, retty, argtys)
    mod = cgen.visit(ast)
    cgen.function.verify()

    tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='')
    pms = lp.build_pass_managers(tm=tm,
                                 fpm=False,
                                 mod=module,
                                 opt=3,
                                 vectorize=False,
                                 loop_vectorize=True)
    pms.pm.run(module)

    debug(cgen.function)
    debug(module.to_native_assembly())
    return cgen.function
예제 #20
0
    def build_pass_manager(self):
        if config.OPT == 3:
            # This uses the same passes for clang -O3
            pms = lp.build_pass_managers(tm=self.tm,
                                         opt=3,
                                         loop_vectorize=True,
                                         fpm=False)
            return pms.pm
        else:
            # This uses minimum amount of passes for fast code.
            # TODO: make it generate vector code
            tm = self.tm
            pm = lp.PassManager.new()
            pm.add(tm.target_data.clone())
            pm.add(lp.TargetLibraryInfo.new(tm.triple))
            # Re-enable for target infomation for vectorization
            # tm.add_analysis_passes(pm)
            passes = '''
            basicaa
            scev-aa
            mem2reg
            sroa
            adce
            dse
            sccp
            instcombine
            simplifycfg
            loops
            indvars
            loop-simplify
            licm
            simplifycfg
            instcombine
            loop-vectorize
            instcombine
            simplifycfg
            globalopt
            globaldce
            '''.split()

            for p in passes:
                pm.add(lp.Pass.new(p))
            return pm
예제 #21
0
파일: cpu.py 프로젝트: ASPP/numba
 def build_pass_manager(self):
     if 0 < config.OPT <= 3:
         # This uses the same passes for clang -O3
         pms = lp.build_pass_managers(tm=self.tm, opt=config.OPT,
                                      loop_vectorize=config.LOOP_VECTORIZE,
                                      fpm=False)
         return pms.pm
     else:
         # This uses minimum amount of passes for fast code.
         # TODO: make it generate vector code
         tm = self.tm
         pm = lp.PassManager.new()
         pm.add(tm.target_data.clone())
         pm.add(lp.TargetLibraryInfo.new(tm.triple))
         # Re-enable for target infomation for vectorization
         # tm.add_analysis_passes(pm)
         passes = '''
         basicaa
         scev-aa
         mem2reg
         sroa
         adce
         dse
         sccp
         instcombine
         simplifycfg
         loops
         indvars
         loop-simplify
         licm
         simplifycfg
         instcombine
         loop-vectorize
         instcombine
         simplifycfg
         globalopt
         globaldce
         '''.split()
         for p in passes:
             pm.add(lp.Pass.new(p))
         return pm
예제 #22
0
 def __initialize(self, opt, cg, inline):
     assert self.__singleton is None
     m = self.__module = lc.Module.new("numba_executable_module")
     # Create the TargetMachine
     # FIXME: The follow is a workaround for missing AVX support
     #        in old linux kernel.
     from llvm.ee import FORCE_DISABLE_AVX
     if FORCE_DISABLE_AVX:
         features = '-avx'
     else:
         features = ''
     tm = self.__machine = le.TargetMachine.new(opt=cg, cm=le.CM_JITDEFAULT,
                                                features=features)
     # Create the ExceutionEngine
     self.__engine = le.EngineBuilder.new(m).create(tm)
     # Build a PassManager which will be used for every module/
     has_loop_vectorizer = llvm.version >= (3, 2)
     passmanagers = lp.build_pass_managers(tm, opt=opt,
                                           inline_threshold=inline,
                                           loop_vectorize=has_loop_vectorizer,
                                           fpm=False)
     self.__pm = passmanagers.pm
예제 #23
0
 def func_ptr(self):
     if self._func_ptr is None:
         module = self.module.clone()
         if self._ee is None:
             from llvm.passes import build_pass_managers
             import llvm.ee as le
             tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='')
             pms = build_pass_managers(tm, opt=3, fpm=False,
                             vectorize=True, loop_vectorize=True)
             pms.pm.run(module)
             if sys.version_info >= (3,):
                 import builtins
             else:
                 import __builtin__ as builtins
             builtins._temp = module.clone()
             builtins._tempname = self.func.name
             #self._ee = le.ExecutionEngine.new(module)
             # FIXME: Temporarily disabling AVX, because of misdetection
             #        in linux VMs. Some code is in llvmpy's workarounds
             #        submodule related to this.
             self._ee = le.EngineBuilder.new(module).mattrs("-avx").create()
         func = module.get_function_named(self.func.name)
         self._func_ptr = self._ee.get_pointer_to_function(func)
     return self._func_ptr
예제 #24
0
    def unbound_single_ckernel(self):
        """Creates an UnboundCKernelFunction with the ExprSingleOperation prototype.
        """
        import ctypes
        if self._unbound_single_ckernel is None:
            i8_p_type = Type.pointer(Type.int(8))
            func_type = Type.function(void_type,
                            [i8_p_type, Type.pointer(i8_p_type), i8_p_type])
            module = self.module.clone()
            single_ck_func_name = self.func.name +"_single_ckernel"
            single_ck_func = Function.new(module, func_type,
                                              name=single_ck_func_name)
            block = single_ck_func.append_basic_block('entry')
            builder = lc.Builder.new(block)
            dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = single_ck_func.args
            dst_ptr_arg.name = 'dst_ptr'
            src_ptr_arr_arg.name = 'src_ptrs'
            extra_ptr_arg.name = 'extra_ptr'
            # Build up the kernel data structure. Currently, this means
            # adding a shape field for each array argument. First comes
            # the kernel data prefix with a spot for the 'owner' reference added.
            input_field_indices = []
            kernel_data_fields = [Type.struct([i8_p_type]*3)]
            kernel_data_ctypes_fields = [('base', JITKernelData)]
            for i, (kind, a) in enumerate(izip(self.kinds, self.argtypes)):
                if isinstance(kind, tuple):
                    if kind[0] != lla.C_CONTIGUOUS:
                        raise ValueError('only support C contiguous array presently')
                    input_field_indices.append(len(kernel_data_fields))
                    kernel_data_fields.append(Type.array(
                                    intp_type, len(self.dshapes[i])-1))
                    kernel_data_ctypes_fields.append(('operand_%d' % i,
                                    c_ssize_t * (len(self.dshapes[i])-1)))
                elif kind in [SCALAR, POINTER]:
                    input_field_indices.append(None)
                else:
                    raise TypeError(("unbound_single_ckernel codegen doesn't " +
                                    "support the parameter kind %r yet") % (k,))
            # Make an LLVM and ctypes type for the extra data pointer.
            kernel_data_llvmtype = Type.struct(kernel_data_fields)
            class kernel_data_ctypestype(ctypes.Structure):
                _fields_ = kernel_data_ctypes_fields
            # Cast the extra pointer to the right llvm type
            extra_struct = builder.bitcast(extra_ptr_arg,
                            Type.pointer(kernel_data_llvmtype))
            # Convert the src pointer args to the
            # appropriate kinds for the llvm call
            args = []
            for i, (kind, atype) in enumerate(izip(self.kinds[:-1], self.argtypes)):
                if kind == SCALAR:
                    src_ptr = builder.bitcast(builder.load(
                                    builder.gep(src_ptr_arr_arg,
                                            (lc.Constant.int(intp_type, i),))),
                                        Type.pointer(atype))
                    src_val = builder.load(src_ptr)
                    args.append(src_val)
                elif kind == POINTER:
                    src_ptr = builder.bitcast(builder.load(
                                    builder.gep(src_ptr_arr_arg,
                                            (lc.Constant.int(intp_type, i),))),
                                        Type.pointer(atype))                    
                    args.append(src_ptr)
                elif isinstance(kind, tuple):
                    src_ptr = builder.bitcast(builder.load(
                                    builder.gep(src_ptr_arr_arg,
                                            (lc.Constant.int(intp_type, i),))),
                                        Type.pointer(kind[2]))
                    # First get the shape of this parameter. This will
                    # be a combination of Fixed and TypeVar (Var unsupported
                    # here for now)
                    shape = self.dshapes[i][:-1]
                    # Get the llvm array
                    arr_var = builder.alloca(atype.pointee)
                    builder.store(src_ptr,
                                    builder.gep(arr_var,
                                    (lc.Constant.int(int32_type, 0),
                                     lc.Constant.int(int32_type, 0))))
                    for j, sz in enumerate(shape):
                        if isinstance(sz, Fixed):
                            # If the shape is already known at JIT compile time,
                            # insert the constant
                            shape_el_ptr = builder.gep(arr_var,
                                            (lc.Constant.int(int32_type, 0),
                                             lc.Constant.int(int32_type, 1),
                                             lc.Constant.int(intp_type, j)))
                            builder.store(lc.Constant.int(intp_type,
                                                    operator.index(sz)),
                                            shape_el_ptr)
                        elif isinstance(sz, TypeVar):
                            # TypeVar types are only known when the kernel is bound,
                            # so copy it from the extra data pointer
                            sz_from_extra_ptr = builder.gep(extra_struct,
                                            (lc.Constant.int(int32_type, 0),
                                             lc.Constant.int(int32_type,
                                                    input_field_indices[i]),
                                             lc.Constant.int(intp_type, j)))
                            sz_from_extra = builder.load(sz_from_extra_ptr)
                            shape_el_ptr = builder.gep(arr_var,
                                            (lc.Constant.int(int32_type, 0),
                                             lc.Constant.int(int32_type, 1),
                                             lc.Constant.int(intp_type, j)))
                            builder.store(sz_from_extra, shape_el_ptr)
                        else:
                            raise TypeError(("unbound_single_ckernel codegen doesn't " +
                                            "support dimension type %r") % type(sz))
                    args.append(arr_var)
            # Call the function and store in the dst
            kind = self.kinds[-1]
            func = module.get_function_named(self.func.name)
            if kind == SCALAR:
                dst_ptr = builder.bitcast(dst_ptr_arg,
                                Type.pointer(self.return_type))
                dst_val = builder.call(func, args)
                builder.store(dst_val, dst_ptr)
            elif kind == POINTER:
                dst_ptr = builder.bitcast(dst_ptr_arg,
                                Type.pointer(self.return_type))                
                builder.call(func, args + [dst_ptr])
            elif isinstance(kind, tuple):
                dst_ptr = builder.bitcast(dst_ptr_arg,
                                Type.pointer(kind[2]))
                # First get the shape of the output. This will
                # be a combination of Fixed and TypeVar (Var unsupported
                # here for now)
                shape = self.dshapes[-1][:-1]
                # Get the llvm array
                arr_var = builder.alloca(self.argtypes[-1].pointee)
                builder.store(dst_ptr,
                                builder.gep(arr_var,
                                    (lc.Constant.int(int32_type, 0),
                                    lc.Constant.int(int32_type, 0))))
                for j, sz in enumerate(shape):
                    if isinstance(sz, Fixed):
                        # If the shape is already known at JIT compile time,
                        # insert the constant
                        shape_el_ptr = builder.gep(arr_var,
                                        (lc.Constant.int(int32_type, 0),
                                         lc.Constant.int(int32_type, 1),
                                         lc.Constant.int(intp_type, j)))
                        builder.store(lc.Constant.int(intp_type,
                                                operator.index(sz)),
                                        shape_el_ptr)
                    elif isinstance(sz, TypeVar):
                        # TypeVar types are only known when the kernel is bound,
                        # so copy it from the extra data pointer
                        sz_from_extra_ptr = builder.gep(extra_struct,
                                        (lc.Constant.int(int32_type, 0),
                                         lc.Constant.int(int32_type,
                                                input_field_indices[-1]),
                                         lc.Constant.int(intp_type, j)))
                        sz_from_extra = builder.load(sz_from_extra_ptr)
                        shape_el_ptr = builder.gep(arr_var,
                                        (lc.Constant.int(int32_type, 0),
                                         lc.Constant.int(int32_type, 1),
                                         lc.Constant.int(intp_type, j)))
                        builder.store(sz_from_extra, shape_el_ptr)
                    else:
                        raise TypeError(("unbound_single_ckernel codegen doesn't " +
                                        "support dimension type %r") % type(sz))
                builder.call(func, args + [arr_var])
            else:
                raise TypeError(("single_ckernel codegen doesn't " +
                                "support kind %r") % kind)
            builder.ret_void()

            #print("Function before optimization passes:")
            #print(single_ck_func)
            #module.verify()

            import llvm.ee as le
            from llvm.passes import build_pass_managers
            tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='')
            pms = build_pass_managers(tm, opt=3, fpm=False,
                            vectorize=True, loop_vectorize=True)
            pms.pm.run(module)

            #print("Function after optimization passes:")
            #print(single_ck_func)

            # DEBUGGING: Verify the module.
            #module.verify()
            # TODO: Cache the EE - the interplay with the func_ptr
            #       was broken, so just avoiding caching for now
            # FIXME: Temporarily disabling AVX, because of misdetection
            #        in linux VMs. Some code is in llvmpy's workarounds
            #        submodule related to this.
            ee = le.EngineBuilder.new(module).mattrs("-avx").create()
            func_ptr = ee.get_pointer_to_function(single_ck_func)
            # Create a function which copies the shape from data
            # descriptors to the extra data struct.
            if len(kernel_data_ctypes_fields) == 1:
                def bind_func(estruct, dst_dd, src_dd_list):
                    pass
            else:
                def bind_func(estruct, dst_dd, src_dd_list):
                    for i, (ds, dd) in enumerate(
                                    izip(self.dshapes, src_dd_list + [dst_dd])):
                        shape = [operator.index(dim)
                                        for dim in dd.dshape[-len(ds):-1]]
                        cshape = getattr(estruct, 'operand_%d' % i)
                        for j, dim_size in enumerate(shape):
                            cshape[j] = dim_size

            self._unbound_single_ckernel = UnboundCKernelFunction(
                            ExprSingleOperation(func_ptr),
                            kernel_data_ctypestype,
                            bind_func,
                            (ee, func_ptr))

        return self._unbound_single_ckernel
예제 #25
0
def optimize(module, lfunc):
    tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='')
    pms = build_pass_managers(tm, opt=3, fpm=False,
                    vectorize=True, loop_vectorize=True)
    pms.pm.run(module)
예제 #26
0
 def build_pass_manager(self):
     pms = lp.build_pass_managers(tm=self.tm,
                                  opt=3,
                                  loop_vectorize=True,
                                  fpm=False)
     return pms.pm
예제 #27
0
 def __init__(self, module, opt_level=3):
     tc = le.TargetMachine.new(features='', cm=le.CM_JITDEFAULT)
     self.pm, self.fpm = lp.build_pass_managers(tc, loop_vectorize=False,
             vectorize=True, fpm=False, mod=module)
예제 #28
0
파일: util.py 프로젝트: meteogrid/ufuncexpr
def optimize_llvm_function(func, opt_level=3, inline_threshold=15000):
    tm = TargetMachine.new(opt=opt_level)
    pm = lp.build_pass_managers(tm, opt=opt_level,
                                loop_vectorize=True, fpm=False,
                                inline_threshold=inline_threshold).pm
    pm.run(func.module)
예제 #29
0
파일: impala.py 프로젝트: B-Rich/numba
 def build_pass_manager(self):
     pms = lp.build_pass_managers(tm=self.tm, opt=3, loop_vectorize=True,
                                  fpm=False)
     return pms.pm
예제 #30
0
파일: target.py 프로젝트: fkaufer/impyla
 def build_pass_manager(self):
     opt = 0 # let Impala optimize
     # opt = 3 # optimize ourselves
     pms = lp.build_pass_managers(tm=self.tm, opt=opt, loop_vectorize=True,
                                  fpm=False)
     return pms.pm
예제 #31
0
def jit_compile_unbound_single_ckernel(bek, strided):
    """Creates an UnboundCKernelFunction with either the
    ExprSingleOperation prototype or the ExprStridedOperation
    prototype depending on the `strided` parameter.

    Parameters
    ----------
    bek : BlazeElementKernel
        The blaze kernel to compile into an unbound single ckernel.
    strided : bool
        If true, returns an ExprStridedOperation, otherwise an
        ExprSingleOperation.
    """
    inarg_count = len(bek.kinds)-1
    module = bek.module.clone()
    if strided:
        ck_func_name = bek.func.name +"_strided_ckernel"
        ck_func = Function.new(module, strided_ckernel_func_type,
                                          name=ck_func_name)
    else:
        ck_func_name = bek.func.name +"_single_ckernel"
        ck_func = Function.new(module, single_ckernel_func_type,
                                          name=ck_func_name)
    entry_block = ck_func.append_basic_block('entry')
    builder = lc.Builder.new(entry_block)
    if strided:
        dst_ptr_arg, dst_stride_arg, \
            src_ptr_arr_arg, src_stride_arr_arg, \
            count_arg, extra_ptr_arg = ck_func.args
        dst_stride_arg.name = 'dst_stride'
        src_stride_arr_arg.name = 'src_strides'
        count_arg.name = 'count'
    else:
        dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = ck_func.args
    dst_ptr_arg.name = 'dst_ptr'
    src_ptr_arr_arg.name = 'src_ptrs'
    extra_ptr_arg.name = 'extra_ptr'

    # Build llvm and ctypes structures for the kernel data, using
    # the argument types.
    kd_llvmtype, kd_ctypestype = args_to_kernel_data_struct(bek.kinds, bek.argtypes)
    # Cast the extra pointer to the right llvm type
    extra_struct = builder.bitcast(extra_ptr_arg,
                    Type.pointer(kd_llvmtype))

    if strided:
        # Allocate an array of pointer counters for the
        # strided loop
        src_ptr_arr_tmp = builder.alloca_array(int8_p_type,
                        lc.Constant.int(int32_type, inarg_count), 'src_ptr_arr')
        # Copy the pointers
        for i in range(inarg_count):
            builder.store(builder.load(builder.gep(src_ptr_arr_arg,
                            (lc.Constant.int(int32_type, i),))),
                          builder.gep(src_ptr_arr_tmp,
                            (lc.Constant.int(int32_type, i),)))
        # Get all the src strides
        src_stride_vals = [builder.load(builder.gep(src_stride_arr_arg,
                                        (lc.Constant.int(int32_type, i),)))
                            for i in range(inarg_count)]
        # Replace src_ptr_arr_arg with this local variable
        src_ptr_arr_arg = src_ptr_arr_tmp

        # Initialize some more basic blocks for the strided loop
        looptest_block = ck_func.append_basic_block('looptest')
        loopbody_block = ck_func.append_basic_block('loopbody')
        end_block = ck_func.append_basic_block('finish')

        # Finish the entry block by branching
        # to the looptest block
        builder.branch(looptest_block)

        # The looptest block continues the loop while counter != 0
        builder.position_at_end(looptest_block)
        counter_phi = builder.phi(count_arg.type)
        counter_phi.add_incoming(count_arg, entry_block)
        dst_ptr_phi = builder.phi(dst_ptr_arg.type)
        dst_ptr_phi.add_incoming(dst_ptr_arg, entry_block)
        dst_ptr_arg = dst_ptr_phi
        kzero = lc.Constant.int(count_arg.type, 0)
        pred = builder.icmp(lc.ICMP_NE, counter_phi, kzero)
        builder.cbranch(pred, loopbody_block, end_block)

        # The loopbody block decrements the counter, and executes
        # one kernel iteration
        builder.position_at_end(loopbody_block)
        kone = lc.Constant.int(counter_phi.type, 1)
        counter_dec = builder.sub(counter_phi, kone)
        counter_phi.add_incoming(counter_dec, loopbody_block)

    # Convert the src pointer args to the
    # appropriate kinds for the llvm call
    args = build_llvm_src_ptrs(builder, src_ptr_arr_arg,
                    bek.dshapes, bek.kinds[:-1], bek.argtypes)
    # Call the function and store in the dst
    kind = bek.kinds[-1]
    func = module.get_function_named(bek.func.name)
    if kind == lla.SCALAR:
        dst_ptr = builder.bitcast(dst_ptr_arg,
                        Type.pointer(bek.return_type))
        dst_val = builder.call(func, args)
        builder.store(dst_val, dst_ptr)
    else:
        dst_ptr = build_llvm_arg_ptr(builder, dst_ptr_arg,
                        bek.dshapes[-1], kind, bek.argtypes[-1])
        builder.call(func, args + [dst_ptr])

    if strided:
        # Finish the loopbody block by incrementing all the pointers
        # and branching to the looptest block
        dst_ptr_inc = builder.gep(dst_ptr_arg, (dst_stride_arg,))
        dst_ptr_phi.add_incoming(dst_ptr_inc, loopbody_block)
        # Increment the src pointers
        for i in range(inarg_count):
            src_ptr_val = builder.load(builder.gep(src_ptr_arr_tmp,
                            (lc.Constant.int(int32_type, i),)))
            src_ptr_inc = builder.gep(src_ptr_val, (src_stride_vals[i],))
            builder.store(src_ptr_inc,
                          builder.gep(src_ptr_arr_tmp,
                            (lc.Constant.int(int32_type, i),)))
        builder.branch(looptest_block)

        # The end block just returns
        builder.position_at_end(end_block)

    builder.ret_void()

    #print("Function before optimization passes:")
    #print(ck_func)
    #module.verify()

    import llvm.ee as le
    from llvm.passes import build_pass_managers
    tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='')
    pms = build_pass_managers(tm, opt=3, fpm=False,
                    vectorize=True, loop_vectorize=True)
    pms.pm.run(module)

    #print("Function after optimization passes:")
    #print(ck_func)

    # DEBUGGING: Verify the module.
    #module.verify()
    # TODO: Cache the EE - the interplay with the func_ptr
    #       was broken, so just avoiding caching for now
    # FIXME: Temporarily disabling AVX, because of misdetection
    #        in linux VMs. Some code is in llvmpy's workarounds
    #        submodule related to this.
    ee = le.EngineBuilder.new(module).mattrs("-avx").create()
    func_ptr = ee.get_pointer_to_function(ck_func)
    # Create a function which copies the shape from data
    # descriptors to the extra data struct.
    if len(kd_ctypestype._fields_) == 1:
        # If there were no extra data fields, it's a no-op function
        def bind_func(estruct, dst_dd, src_dd_list):
            pass
    else:
        def bind_func(estruct, dst_dd, src_dd_list):
            for i, (ds, dd) in enumerate(
                            izip(bek.dshapes, src_dd_list + [dst_dd])):
                shape = [operator.index(dim)
                                for dim in dd.dshape[-len(ds):-1]]
                cshape = getattr(estruct, 'operand_%d' % i)
                for j, dim_size in enumerate(shape):
                    cshape[j] = dim_size

    if strided:
        optype = ExprStridedOperation
    else:
        optype = ExprSingleOperation

    return UnboundCKernelFunction(
                    optype(func_ptr),
                    kd_ctypestype,
                    bind_func,
                    (ee, func_ptr))
예제 #32
0
def optimize(module, lfunc):
    tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='')
    pms = build_pass_managers(tm, opt=3, fpm=False,
                    vectorize=True, loop_vectorize=True)
    pms.pm.run(module)