def _ptx_jit_constructor(): _binding_initialize() # PassManagerBuilder can be shared __pass_manager_builder = binding.PassManagerBuilder() __pass_manager_builder.inlining_threshold = 99999 # Inline all function calls __pass_manager_builder.loop_vectorize = True __pass_manager_builder.slp_vectorize = True __pass_manager_builder.opt_level = 3 # Most aggressive optimizations # Use default device # TODO: Add support for multiple devices __compute_capability = pycuda_default.device.compute_capability() __ptx_sm = "sm_{}{}".format(__compute_capability[0], __compute_capability[1]) # Create compilation target, use 64bit triple __ptx_target = binding.Target.from_triple("nvptx64-nvidia-cuda") __ptx_target_machine = __ptx_target.create_target_machine( cpu=__ptx_sm, opt=3, codemodel='small') __ptx_pass_manager = binding.ModulePassManager() __ptx_target_machine.add_analysis_passes(__ptx_pass_manager) # __pass_manager_builder.populate(__ptx_pass_manager) return __ptx_pass_manager, __ptx_target_machine
def optimize_module(self, level=2): if level == 0: return elif level < 0 or level >= 3: msg = 'Undefined optimization level: {}'.format(level) raise PCLCodegenError(msg) # Initialize pass manager builder self.pmb = binding.PassManagerBuilder() # Declare optimization level self.pmb.opt_level = level # Run LOCAL optimizations on functions self.fpm = binding.FunctionPassManager(self.module) self.pmb.populate(self.fpm) self.fpm.initialize() for fcn in self.module.functions: self.fpm.run(fcn) self.fpm.finalize() # Configure module pass manager self.mpm = binding.ModulePassManager() self.pmb.populate(self.mpm) # Run GLOBAL optimizations on the module self.mpm.run(self.module)
def optimize_module(module_assembly, options): """Verify and optimise the passed LLVM module assembly. Args: module_assembly (str): LLVM module assembly options (CompilerOptions): options for the compiler Returns: A llvmlite.binding.ModuleRef for the verified and optimised module. """ _ensure_llvm() # Parse LLVM module assembly module = llvm.parse_assembly(module_assembly) module.verify() # Create optimiser pass manager pass_manager = llvm.ModulePassManager() # Populate with target passes options.machine.target_data.add_pass(pass_manager) # Populate with optimisation passes pass_manager_builder = llvm.PassManagerBuilder() pass_manager_builder.opt_level = options.opt_level pass_manager_builder.populate(pass_manager) # Run optimiser pass_manager.run(module) return module
def _cpu_jit_constructor(): _binding_initialize() # PassManagerBuilder can be shared __pass_manager_builder = binding.PassManagerBuilder() __pass_manager_builder.loop_vectorize = True __pass_manager_builder.slp_vectorize = True __pass_manager_builder.opt_level = 2 __cpu_features = binding.get_host_cpu_features().flatten() __cpu_name = binding.get_host_cpu_name() # Create compilation target, use default triple __cpu_target = binding.Target.from_default_triple() # FIXME: reloc='static' is needed to avoid crashes on win64 # see: https://github.com/numba/llvmlite/issues/457 __cpu_target_machine = __cpu_target.create_target_machine( cpu=__cpu_name, features=__cpu_features, opt=2, reloc='static') __cpu_pass_manager = binding.ModulePassManager() __cpu_target_machine.add_analysis_passes(__cpu_pass_manager) __pass_manager_builder.populate(__cpu_pass_manager) # And an execution engine with a builtins backing module builtins_module = _generate_cpu_builtins_module( LLVMBuilderContext.float_ty) if "llvm" in debug_env: with open(builtins_module.name + '.parse.ll', 'w') as dump_file: dump_file.write(str(builtins_module)) __backing_mod = binding.parse_assembly(str(builtins_module)) __cpu_jit_engine = binding.create_mcjit_compiler(__backing_mod, __cpu_target_machine) return __cpu_jit_engine, __cpu_pass_manager, __cpu_target_machine
def _cpu_jit_constructor(): _binding_initialize() # PassManagerBuilder can be shared __pass_manager_builder = binding.PassManagerBuilder() __pass_manager_builder.inlining_threshold = 99999 # Inline all function calls __pass_manager_builder.loop_vectorize = True __pass_manager_builder.slp_vectorize = True __pass_manager_builder.opt_level = 3 # Most aggressive optimizations __cpu_features = binding.get_host_cpu_features().flatten() __cpu_name = binding.get_host_cpu_name() # Create compilation target, use default triple __cpu_target = binding.Target.from_default_triple() __cpu_target_machine = __cpu_target.create_target_machine( cpu=__cpu_name, features=__cpu_features, opt=3) __cpu_pass_manager = binding.ModulePassManager() __cpu_target_machine.add_analysis_passes(__cpu_pass_manager) __pass_manager_builder.populate(__cpu_pass_manager) # And an execution engine with a builtins backing module builtins_module = _generate_cpu_builtins_module(_float_ty) if "llvm" in debug_env: with open(builtins_module.name + '.parse.ll', 'w') as dump_file: dump_file.write(str(builtins_module)) __backing_mod = binding.parse_assembly(str(builtins_module)) __cpu_jit_engine = binding.create_mcjit_compiler(__backing_mod, __cpu_target_machine) return __cpu_jit_engine, __cpu_pass_manager, __cpu_target_machine
def assemble(module): opt = binding.ModulePassManager() builder = binding.PassManagerBuilder() builder.opt_level = 3 builder.populate(opt) mod = binding.parse_assembly(str(module)) mod.verify() opt.run(mod) return mod
def bind(module, *args, optimize=False): module = inject_built_in(module) llvm_ir_parsed = llvm.parse_assembly(str(module)) if False: pmb = llvm.create_pass_manager_builder() pmb.opt_level = 3 fpm = llvm.create_function_pass_manager(llvm_ir_parsed) pmb.populate(fpm) pm = llvm.create_module_pass_manager() pmb.populate(pm) a = pm.run(llvm_ir_parsed) if optimize: opt_manager = llvm.PassManagerBuilder() mod_manager = llvm.ModulePassManager() mod_manager.add_constant_merge_pass() mod_manager.add_dead_arg_elimination_pass() mod_manager.add_function_inlining_pass(225) mod_manager.add_global_dce_pass() mod_manager.add_global_optimizer_pass() mod_manager.add_ipsccp_pass() mod_manager.add_dead_code_elimination_pass() mod_manager.add_cfg_simplification_pass() mod_manager.add_gvn_pass() mod_manager.add_instruction_combining_pass() mod_manager.add_licm_pass() mod_manager.add_sccp_pass() mod_manager.add_type_based_alias_analysis_pass() mod_manager.add_basic_alias_analysis_pass() mod_manager.run(llvm_ir_parsed) #################################################################### llvm_ir_parsed.verify() # JIT target_machine = llvm.Target.from_default_triple().create_target_machine() engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine) engine.finalize_object() entry = engine.get_function_address("main") cfunc = CFUNCTYPE(c_int64)(entry) result = cfunc() #print() #print("Programa main:: {}".format(result)) return [llvm_ir_parsed, result]
def _optimize_final_module(self): # Run some lightweight optimization to simplify the module. # This seems to workaround a libnvvm compilation bug (see #1341) pmb = ll.PassManagerBuilder() pmb.opt_level = 1 pmb.disable_unit_at_a_time = False pmb.disable_unroll_loops = True pmb.loop_vectorize = False pmb.slp_vectorize = False pm = ll.ModulePassManager() pmb.populate(pm) pm.run(self._final_module)
def execute(module, optimization): parsed_module = llvm.parse_assembly(str(module)) if optimization: # initialize pass manager builder pmb = llvm.PassManagerBuilder() pmb.opt_level = 3 # initialize function pass manager fpm = llvm.create_function_pass_manager(parsed_module) pmb.populate(fpm) # initialize module pass manager pm = llvm.ModulePassManager() pmb.populate(pm) # add optimization passes pm.add_constant_merge_pass() pm.add_dead_arg_elimination_pass() pm.add_function_attrs_pass() pm.add_function_inlining_pass(200) # threshold = 200 pm.add_global_dce_pass() pm.add_global_optimizer_pass() pm.add_ipsccp_pass() pm.add_dead_code_elimination_pass() pm.add_cfg_simplification_pass() pm.add_gvn_pass() pm.add_instruction_combining_pass() pm.add_licm_pass() pm.add_sccp_pass() pm.add_sroa_pass() pm.add_type_based_alias_analysis_pass() pm.add_basic_alias_analysis_pass() # run optimization passes on the module is_modified = pm.run(parsed_module) # check if the optimizations made any modification to the module print("Optimizations made modification to the module: ", is_modified) parsed_module.verify() target_machine = llvm.Target.from_default_triple().create_target_machine() engine = llvm.create_mcjit_compiler(parsed_module, target_machine) engine.finalize_object() entry = engine.get_function_address("run") cfunc = CFUNCTYPE(c_int)(entry) result = cfunc() print("\nexit: {}".format(result)) return parsed_module
def _compile_ir_module(self, ir_module): module = llvm.parse_assembly(str(ir_module)) module.name = ir_module.name module.verify() self.engine.add_module(module) self.engine.finalize_object() pmb = llvm.PassManagerBuilder() pmb.opt_level = 0 pm = llvm.ModulePassManager() pmb.populate(pm) #pm.add_dead_code_elimination_pass() pm.run(module) return module
def _optimize_final_module(self): # Run some lightweight optimization to simplify the module. pmb = ll.PassManagerBuilder() # Make optimization level depending on config.OPT variable pmb.opt_level = config.OPT pmb.disable_unit_at_a_time = False pmb.disable_unroll_loops = True pmb.loop_vectorize = False pmb.slp_vectorize = False pm = ll.ModulePassManager() pmb.populate(pm) pm.run(self._final_module)
def compile_ir(self, llvm_ir, name, verbose, optimize=True): """ Compile the LLVM IR string with the given engine. The compiled module object is returned. """ engine = self.engine # Create a LLVM module object from the IR mod = binding.parse_assembly(llvm_ir) mod.verify() # Assign triple, so the IR can be saved and compiled with llc mod.triple = self.triple if verbose: print('====== IR (parsed) ======') print(mod) # Optimize if optimize: pmb = binding.PassManagerBuilder() pmb.opt_level = 2 # 0-3 (default=2) pmb.loop_vectorize = True mpm = binding.ModulePassManager() # Needed for automatic vectorization triple = binding.get_process_triple() target = binding.Target.from_triple(triple) tm = target.create_target_machine() tm.add_analysis_passes(mpm) pmb.populate(mpm) mpm.run(mod) if verbose: print('====== IR (optimized) ======') print(mod) # Now add the module and make sure it is ready for execution engine.add_module(mod) engine.finalize_object() engine.run_static_constructors() return mod
def codegen(ast, specializer, retty, argtys): cgen = LLVMEmitter(specializer, retty, argtys) cgen.visit(ast) mod = llvm.parse_assembly(str(module)) mod.verify() pmb = llvm.PassManagerBuilder() pmb.opt_level=3 pmb.loop_vectorize = True pm = llvm.ModulePassManager() pmb.populate(pm) pm.run(mod) engine.add_module(mod) debug(cgen.function) debug(target_machine.emit_assembly(mod)) return cgen.function
def _ptx_jit_constructor(): _binding_initialize() # PassManagerBuilder can be shared __pass_manager_builder = binding.PassManagerBuilder() __pass_manager_builder.opt_level = 1 # Basic optimizations __pass_manager_builder.size_level = 1 # asic size optimizations # Use default device # TODO: Add support for multiple devices __compute_capability = pycuda_default.device.compute_capability() __ptx_sm = "sm_{}{}".format(__compute_capability[0], __compute_capability[1]) # Create compilation target, use 64bit triple __ptx_target = binding.Target.from_triple("nvptx64-nvidia-cuda") __ptx_target_machine = __ptx_target.create_target_machine(cpu=__ptx_sm) __ptx_pass_manager = binding.ModulePassManager() __ptx_target_machine.add_analysis_passes(__ptx_pass_manager) __pass_manager_builder.populate(__ptx_pass_manager) return __ptx_pass_manager, __ptx_target_machine
# All these initializations are required for code generation! llvm.initialize() llvm.initialize_native_target() llvm.initialize_native_asmprinter() # yes, even this one # Could be useful if you want to compile for other targets. # llvmlite.binding.initialize_all_targets() # Ensure JIT execution is allowed llvm.check_jit_execution() target = llvm.Target.from_triple(llvm.get_process_triple()) target_machine = target.create_target_machine(codemodel="default") target_data = target_machine.target_data # Configure optimization pass manager builder # https://llvmlite.readthedocs.io/en/latest/user-guide/binding/optimization-passes.html#llvmlite.binding.PassManagerBuilder pm_builder = llvm.PassManagerBuilder() pm_builder.disable_unroll_loops = False pm_builder.inlining_threshold = 100 pm_builder.loop_vectorize = True pm_builder.slp_vectorize = True pm_builder.opt_level = 3 pm_builder.size_level = 0 pass_manager = llvm.ModulePassManager() pm_builder.populate(pass_manager) # Target specific optimizations target_machine.add_analysis_passes(pass_manager)
with open(sys.argv[1], 'rb') as f: code = f.read() disasm = disassemble(code) module = translate(disasm) llvm.initialize() llvm.initialize_native_target() llvm.initialize_native_asmprinter() llvm_module = llvm.parse_assembly(str(module)) tm = llvm.Target.from_default_triple().create_target_machine() pmb = llvm.PassManagerBuilder() pmb.inlining_threshold = 10000 mpm = llvm.ModulePassManager() pmb.populate(mpm) mpm.add_dead_arg_elimination_pass() mpm.add_cfg_simplification_pass() tm.add_analysis_passes(mpm) mpm.run(llvm_module) print(llvm_module) with llvm.create_mcjit_compiler(llvm_module, tm) as ee: ee.finalize_object() obj = tm.emit_object(llvm_module)
def main(): """ Main entry point. """ better_exchook.install() parser = argparse.ArgumentParser(description='Compile a Sleepy program to object code.') parser.add_argument('program', help='Path to source code') parser.add_argument( '--execute', dest='execute', default=False, action='store_true', help='Run program after compilation using JIT.') parser.add_argument( '--emit-ir', '-ir', dest='emit_ir', action='store_true', help='Emit LLVM intermediate representation.') parser.add_argument( '--emit-object', '-c', dest='emit_object', action='store_true', help='Emit object code, but do not link.') parser.add_argument('--compile-libs', '-libs', nargs='*', help='External libraries to link with', default=['m']) parser.add_argument( '--verbose', dest='verbose', action='store_true', help='Print full stacktrace for all compiler errors.') parser.add_argument( '--Optimization', '-O', dest='opt', action='store', type=int, default=0, help='Optimize code.') parser.add_argument('--no-preamble', default=False, action='store_true', help='Do not add preamble to source code.') parser.add_argument('--debug', default=False, action='store_true', help='Add debug symbols.') parser.add_argument('--output', default=None, action='store', help='output file path') args = parser.parse_args() main_func_identifier = 'main' source_file_path: Path = Path(args.program) try: ast = make_translation_unit_ast(source_file_path, add_preamble=not args.no_preamble) module_ir, symbol_table, exported_functions = ast.make_module_ir_and_symbol_table( module_name='default_module', emit_debug=args.debug, main_file_path=source_file_path, implicitly_exported_functions={main_func_identifier}) except CompilerError as ce: if args.verbose: raise ce else: print(str(ce)) exit(1) return if args.execute: # Execute directly using JIT compilation. concrete_main_func = next((func for func in exported_functions if func.identifier == main_func_identifier), None) assert concrete_main_func is not None, 'main function not exported' with make_execution_engine() as engine: compile_ir(engine, module_ir) py_func = concrete_main_func.make_py_func(engine) return_val = py_func() print('\nExited with return value %r of type %r' % (return_val, concrete_main_func.return_type)) return object_file_name = _make_file_name(source_file_path, '.o', allow_exist=True) module_ref = llvm.parse_assembly(str(module_ir)) print(f'Opt: {args.opt}') if args.opt != 0: # run optimizations on module, optimizations during emit_object are different and less powerful module_passes = llvm.ModulePassManager() builder = llvm.PassManagerBuilder() builder.opt_level = args.opt builder.inlining_threshold = 250 builder.populate(module_passes) module_passes.run(module_ref) if args.emit_ir: ir_file_name = _make_file_name(source_file_path, '.ll', allow_exist=True) with open(ir_file_name, 'w') as file: file.write(str(module_ir)) return target = llvm.Target.from_default_triple() machine = target.create_target_machine(opt=args.opt) with open(object_file_name, 'wb') as file: file.write(machine.emit_object(module_ref)) if args.emit_object: return if args.output is not None: exec_file_name = args.output else: exec_file_name = _make_file_name(source_file_path, '', allow_exist=True) import subprocess subprocess.run( ['gcc'] + (['-g'] if args.debug else []) + ['-o', exec_file_name, object_file_name, PREAMBLE_BINARIES_PATH + '_static.a'] + ['-l%s' % lib_name for lib_name in args.compile_libs])
def pmb(self): return llvm.PassManagerBuilder()
def bind(module, *args, optimize = False): module = inject_built_in(module) llvm_ir_parsed = llvm.parse_assembly(str(module)) if False: #general way of optimizing # print("from optimize") pmb = llvm.create_pass_manager_builder() pmb.opt_level = 3 fpm = llvm.create_function_pass_manager(llvm_ir_parsed) pmb.populate(fpm) pm = llvm.create_module_pass_manager() pmb.populate(pm) a = pm.run(llvm_ir_parsed) # print(f'something was optimized {a}') #################################################################### if optimize: #more specific way of optimizing opt_manager = llvm.PassManagerBuilder() mod_manager = llvm.ModulePassManager() mod_manager.add_constant_merge_pass() mod_manager.add_dead_arg_elimination_pass() mod_manager.add_function_inlining_pass(225) mod_manager.add_global_dce_pass() mod_manager.add_global_optimizer_pass() mod_manager.add_ipsccp_pass() mod_manager.add_dead_code_elimination_pass() mod_manager.add_cfg_simplification_pass() mod_manager.add_gvn_pass() mod_manager.add_instruction_combining_pass() mod_manager.add_licm_pass() mod_manager.add_sccp_pass() mod_manager.add_type_based_alias_analysis_pass() mod_manager.add_basic_alias_analysis_pass() mod_manager.run(llvm_ir_parsed) #################################################################### llvm_ir_parsed.verify() # JIT target_machine = llvm.Target.from_default_triple().create_target_machine() engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine) engine.finalize_object() entry = engine.get_function_address("run") # arg_types = [] # for arg in args: # if type(arg) == int: # arg_types.append(c_int) # elif type(arg) == float: # arg_types.append(c_float) # cfunc = CFUNCTYPE(c_int)(entry) # if len(arg_types) != 0: # cfunc = CFUNCTYPE(*arg_types)(entry) # arg_values = [] # for arg in args: # if type(arg) == int: # arg_values.append(arg) # elif type(arg) == float: # arg_values.append(c_float(arg)) # result = cfunc(*arg_values) result = cfunc() print() print("program returns: {}".format(result)) return llvm_ir_parsed