def test_mysin(self): if sys.platform == 'win32' and BITS == 32: # float32 support is known to fail on 32-bit Windows return # mysin(x) = sqrt(1.0 - pow(cos(x), 2)) mod = Module.new('test') float = Type.float() mysinty = Type.function( float, [float] ) mysin = mod.add_function(mysinty, "mysin") block = mysin.append_basic_block("entry") b = Builder.new(block) sqrt = Function.intrinsic(mod, lc.INTR_SQRT, [float]) pow = Function.intrinsic(mod, lc.INTR_POWI, [float]) cos = Function.intrinsic(mod, lc.INTR_COS, [float]) mysin.args[0].name = "x" x = mysin.args[0] one = Constant.real(float, "1") cosx = b.call(cos, [x], "cosx") cos2 = b.call(pow, [cosx, Constant.int(Type.int(), 2)], "cos2") onemc2 = b.fsub(one, cos2, "onemc2") # Should use fsub sin = b.call(sqrt, [onemc2], "sin") b.ret(sin) #logging.debug(mod) # ; ModuleID = 'test' # # define void @showme() { # entry: # call i32 @llvm.bswap.i32( i32 42 ) ; <i32>:0 [#uses # } # # declare i32 @llvm.bswap.i32(i32) nounwind readnone # # define float @mysin(float %x) { # entry: # %cosx = call float @llvm.cos.f32( float %x ) ; <float # %cos2 = call float @llvm.powi.f32( float %cosx, i32 2 ) # %onemc2 = sub float 1.000000e+00, %cos2 ; <float> [#uses # %sin = call float @llvm.sqrt.f32( float %onemc2 ) # ret float %sin # } # # declare float @llvm.sqrt.f32(float) nounwind readnone # # declare float @llvm.powi.f32(float, i32) nounwind readnone # # declare float @llvm.cos.f32(float) nounwind readnone # let's run the function ee = le.ExecutionEngine.new(mod) arg = le.GenericValue.real(Type.float(), 1.234) retval = ee.run_function(mysin, [arg]) golden = math.sin(1.234) answer = retval.as_real(Type.float()) self.assertTrue(abs(answer-golden)/golden < 1e-5)
def test_mysin(self): # mysin(x) = sqrt(1.0 - pow(cos(x), 2)) mod = Module.new('test') float = Type.float() mysinty = Type.function(float, [float]) mysin = mod.add_function(mysinty, "mysin") block = mysin.append_basic_block("entry") b = Builder.new(block) sqrt = Function.intrinsic(mod, lc.INTR_SQRT, [float]) pow = Function.intrinsic(mod, lc.INTR_POWI, [float]) cos = Function.intrinsic(mod, lc.INTR_COS, [float]) mysin.args[0].name = "x" x = mysin.args[0] one = Constant.real(float, "1") cosx = b.call(cos, [x], "cosx") cos2 = b.call(pow, [cosx, Constant.int(Type.int(), 2)], "cos2") onemc2 = b.fsub(one, cos2, "onemc2") # Should use fsub sin = b.call(sqrt, [onemc2], "sin") b.ret(sin) #logging.debug(mod) # ; ModuleID = 'test' # # define void @showme() { # entry: # call i32 @llvm.bswap.i32( i32 42 ) ; <i32>:0 [#uses # } # # declare i32 @llvm.bswap.i32(i32) nounwind readnone # # define float @mysin(float %x) { # entry: # %cosx = call float @llvm.cos.f32( float %x ) ; <float # %cos2 = call float @llvm.powi.f32( float %cosx, i32 2 ) # %onemc2 = sub float 1.000000e+00, %cos2 ; <float> [#uses # %sin = call float @llvm.sqrt.f32( float %onemc2 ) # ret float %sin # } # # declare float @llvm.sqrt.f32(float) nounwind readnone # # declare float @llvm.powi.f32(float, i32) nounwind readnone # # declare float @llvm.cos.f32(float) nounwind readnone # let's run the function ee = le.ExecutionEngine.new(mod) arg = le.GenericValue.real(Type.float(), 1.234) retval = ee.run_function(mysin, [arg]) golden = math.sin(1.234) answer = retval.as_real(Type.float()) self.assertTrue(abs(answer - golden) / golden < 1e-5)
def declare_runtime_library(self): self.runtime = {} self.runtime['_print_int'] = Function.new(self.module, Type.function(Type.void(), [int_type], False), "_print_int") self.runtime['_print_float'] = Function.new(self.module, Type.function(Type.void(), [float_type], False), "_print_float") self.runtime['_print_bool'] = Function.new(self.module, Type.function(Type.void(), [bool_type], False), "_print_bool")
def test_bswap(self): # setup a function and a builder mod = Module.new('test') functy = Type.function(Type.int(), []) func = mod.add_function(functy, "showme") block = func.append_basic_block("entry") b = Builder.new(block) # let's do bswap on a 32-bit integer using llvm.bswap val = Constant.int(Type.int(), 0x42) bswap = Function.intrinsic(mod, lc.INTR_BSWAP, [Type.int()]) bswap_res = b.call(bswap, [val]) b.ret(bswap_res) # logging.debug(mod) # the output is: # # ; ModuleID = 'test' # # define void @showme() { # entry: # %0 = call i32 @llvm.bswap.i32(i32 42) # ret i32 %0 # } # let's run the function ee = le.ExecutionEngine.new(mod) retval = ee.run_function(func, []) self.assertEqual(retval.as_int(), 0x42000000)
def CodeGen(self): # Make the function type, eg. double(double,double). funct_type = Type.function(Type.double(), [Type.double()] * len(self.args), False) function = Function.new(g_llvm_module, funct_type, self.name) # If the name conflicted, there was already something with the same name. # If it has a body, don't allow redefinition or reextern. if function.name != self.name: function.delete() function = g_llvm_module.get_function_named(self.name) # If the function already has a body, reject this. if not function.is_declaration: raise RuntimeError('Redefinition of function.') # If the function took a different number of args, reject. if len(function.args) != len(self.args): raise RuntimeError('Redeclaration of a function with different number of args.') # Set names for all arguments and add them to the variables symbol table. for arg, arg_name in zip(function.args, self.args): arg.name = arg_name return function
def codegen_proto(proto): name, args = proto[1], proto[2] double_types = [double_type] * len(args) func_type = Type.function(double_type, double_types) try: func = Function.get(the_module, name) if func.basic_block_count: raise CodegenError("redefinition of function") if len(func.args) != len(args): raise CodegenError("redefinition of function with different # args") except LLVMException: func = Function.new(the_module, func_type, name) for arg, name in zip(func.args, args): arg.name = name named_values[name] = arg return func
def CodeGen(self): # Make the function type, ex: double(double, double). function_type = Type.function(Type.double(), [Type.double()] * len(self.args), False) function = Function.new(g_llvm_module, function_type, self.name) # If the name conflicts, already something with the same name # If it has a body, don't allow redefinition or re-extern if function.name != self.name: function.delete() function = g_llvm_module.get_function_named(self.name) # If the function already has a body, reject it if not function.is_declaration: raise RuntimeError('Redefinition of function.') # THIS IS ESSENTIALLY FUNCTION OVERLOADING, MAYBE CHANGE IN FUTURE # If function took different number of args, reject it if len(callee.args) != len(self.args): raise RuntimeError('Redeclaration of function with different' + ' number of args') # Set names for all args and add them to var symbol table for arg, arg_name in zip(function.args, self.args): arg.name = arg_name # add args to variable symbol table g_named_values[arg_name] = arg return function
def test_bswap(self): # setup a function and a builder mod = Module.new("test") functy = Type.function(Type.int(), []) func = mod.add_function(functy, "showme") block = func.append_basic_block("entry") b = Builder.new(block) # let's do bswap on a 32-bit integer using llvm.bswap val = Constant.int(Type.int(), 0x42) bswap = Function.intrinsic(mod, lc.INTR_BSWAP, [Type.int()]) bswap_res = b.call(bswap, [val]) b.ret(bswap_res) # logging.debug(mod) # the output is: # # ; ModuleID = 'test' # # define void @showme() { # entry: # %0 = call i32 @llvm.bswap.i32(i32 42) # ret i32 %0 # } # let's run the function ee = le.ExecutionEngine.new(mod) retval = ee.run_function(func, []) self.assertEqual(retval.as_int(), 0x42000000)
def add_prelude(self): for name, function in prelude.iteritems(): self.intrinsics[name] = Function.new( self.module, function, name )
def __init__(self, env, node, restype, argtypes): self.env = env self.node = node self.restype = ctypes[restype] self.argtypes = [ctypes[t] for t in argtypes] self.fntype = Type.function(self.restype, self.argtypes) self.fn = Function.new(env.options['module'], self.fntype, node.value)
def code_gen(self, from_definition=False): top_context = self.context.parent_context func_name_with_tag = self.func_name_token.word + "()" return_type = Helper.get_type(self.ret_type.word) arg_types = [Helper.get_type(arg[1]) for arg in self.args if True] func_type = Type.function(return_type, arg_types, False) if not func_name_with_tag in top_context.type_table: function = Function.new(g_llvm_module, func_type, self.func_name_token.word) top_context.type_table[func_name_with_tag] = func_type for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type(arg[1]) return [function, self.context] else: old_func_type = top_context.type_table[func_name_with_tag] if old_func_type == func_type: if from_definition: for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type( arg[1]) return [ g_llvm_module.get_function_named( self.func_name_token.word), self.context ] else: raise cmexception.RedefineException( self.func_name_token, 'function') else: raise cmexception.RedefineException(self.func_name_token, 'function')
def codegen_proto(proto): name, args = proto[1], proto[2] double_types = [double_type] * len(args) func_type = Type.function(double_type, double_types) try: func = Function.get(the_module, name) if func.basic_block_count: raise CodegenError("redefinition of function") if len(func.args) != len(args): raise CodegenError( "redefinition of function with different # args") except LLVMException: func = Function.new(the_module, func_type, name) for arg, name in zip(func.args, args): arg.name = name named_values[name] = arg return func
def emit_extern_func(self, extnode): assert extnode.name not in self.functions fntype = Type.function(self.type_map[extnode.rettype], [self.type_map[t] for _, t in funcnode], extnode.varargs) self.functions[extnode.name] = Function.new(self.module, fntype, extnode.name) return self.funcnode[extnode.name]
def test_sin_f32(self): if sys.platform == 'win32' and BITS == 32: # float32 support is known to fail on 32-bit Windows return float = Type.float() mod, func, b = self._build_module(float) intr = Function.intrinsic(mod, lc.INTR_SIN, [float]) b.ret(b.call(intr, func.args)) self._template(mod, func, math.sin)
def _build_test_module(self): mod = Module.new("test") float = Type.double() mysinty = Type.function(float, [float]) mysin = mod.add_function(mysinty, "mysin") block = mysin.append_basic_block("entry") b = Builder.new(block) sqrt = Function.intrinsic(mod, lc.INTR_SQRT, [float]) pow = Function.intrinsic(mod, lc.INTR_POWI, [float]) cos = Function.intrinsic(mod, lc.INTR_COS, [float]) mysin.args[0].name = "x" x = mysin.args[0] one = Constant.real(float, "1") cosx = b.call(cos, [x], "cosx") cos2 = b.call(pow, [cosx, Constant.int(Type.int(), 2)], "cos2") onemc2 = b.fsub(one, cos2, "onemc2") # Should use fsub sin = b.call(sqrt, [onemc2], "sin") b.ret(sin) return mod, mysin
def init_llvm(self): mod = Module.new("exprllvm") self.engine = ExecutionEngine.new(mod) # functions self.llvm_functions = {} func = Function.new(mod, Type.function( Type.void(), [], False), "main") self.llvm_functions['main'] = func block = func.append_basic_block("entry") # builder builder = Builder.new(block) self.builder = builder # add some pre-defined functions print_int = Function.new(mod, Type.function( Type.void(), [Type.int()], False), "print_int") self.llvm_functions['print_int'] = print_int self.builder.call(print_int, [Constant.int(Type.int(),3)]) self.builder.ret_void()
def _build_test_module(self): mod = Module.new('test') float = Type.double() mysinty = Type.function(float, [float]) mysin = mod.add_function(mysinty, "mysin") block = mysin.append_basic_block("entry") b = Builder.new(block) sqrt = Function.intrinsic(mod, lc.INTR_SQRT, [float]) pow = Function.intrinsic(mod, lc.INTR_POWI, [float]) cos = Function.intrinsic(mod, lc.INTR_COS, [float]) mysin.args[0].name = "x" x = mysin.args[0] one = Constant.real(float, "1") cosx = b.call(cos, [x], "cosx") cos2 = b.call(pow, [cosx, Constant.int(Type.int(), 2)], "cos2") onemc2 = b.fsub(one, cos2, "onemc2") # Should use fsub sin = b.call(sqrt, [onemc2], "sin") b.ret(sin) return mod, mysin
def build_intrinsics(mod): # XXX define in seperate module and then link in ins = {} for name, intr in intrinsics.llvm_intrinsics.items(): # get the function signature name, retty, argtys = getattr(intrinsics, name) largtys = list(map(arg_typemap, argtys)) lretty = arg_typemap(retty) lfunc = Function.intrinsic(mod, intr, largtys) ins[name] = lfunc return mod, ins
def declare_function(self, func_block): """Declare a Function and add it to self.vars""" _, name, ret_type, *args = func_block.instructions[0] args = [] if args == [[]] else args if not ret_type: ret_type = args[0] arg_types = [] else: arg_types = [typemap[t] for t in args[1::2]] ret_type = typemap[ret_type] function = Function.new( self.module, Type.function(ret_type, arg_types, False), name ) for i, argname in enumerate(args[0::2]): function.args[i].name = argname self.vars[name] = function
def buildLoad(module, memory): '''Build function load value from memory at address''' # Declare load loadType = Type.function(Type.int(), [Type.int()], False) load = Function.new(module, loadType, 'load') # Build body body = load.append_basic_block('body') builder = Builder.new(body) # Get pointer to memory at address addr = builder.sext(load.args[0], Type.int(bits=64)) value = builder.load(builder.gep(memory, [num(0), addr])) # Return value builder.ret(value) load.verify()
def start_function(self, name, retty, argtys): rettype = arg_typemap(retty) argtypes = [arg_typemap(arg) for arg in argtys] func_type = Type.function(rettype, argtypes, False) self.function = Function.new(self.module, func_type, name) self.block = self.function.append_basic_block("entry") self.builder = Builder.new(self.block) self.exit_block = self.function.append_basic_block("exit") self.locals = {} self.stack = {} if rettype is not void_type: self.locals['retval'] = self.builder.alloca(rettype, "retval") self.globals[name] = self.function
def build_intrinsics(mod): # XXX define in seperate module and then link in import intrinsics ins = {} for name, intr in intrinsics.llvm_intrinsics.iteritems(): # get the function signature name, retty, argtys = getattr(intrinsics, name) largtys = map(arg_typemap, argtys) #lretty = arg_typemap(retty) lfunc = Function.intrinsic(mod, intr, largtys) ins[name] = lfunc #mod.verify() return mod, ins
def buildSave(module, memory): '''Build function to save value to memory at address''' # Declare save saveType = Type.function(Type.void(), [Type.int(), Type.int()], False) save = Function.new(module, saveType, 'save') # Build body body = save.append_basic_block('body') builder = Builder.new(body) # Get pointer to memory at address value, addr = save.args addr64 = builder.sext(addr, Type.int(bits=64)) builder.store(value, builder.gep(memory, [num(0), addr64])) # Exit function builder.ret_void() save.verify()
def emit_func(self, funcnode): assert funcnode.name not in self.functions fntype = Type.function(self.type_map[funcnode.rettype], [self.type_map[t] for _, t in funcnode.args], False) fn = Function.new(self.module, fntype, funcnode.name) s = Scope("", fn) for arg, (name, ty) in zip(fn.args, funcnode.args): arg.name = name var = s.locals[name] = s.builder.alloca(self.type_map[ty], name) s.builder.store(arg, var) self.scopes.append(s) self.functions[funcnode.name] = self.current_func = f self.emit_block(funcnode.body) self.scopes.pop() fn.verify() return f
def gen_code(self, module, builder, variables): funct_type = Type.function(Type.double(), [Type.double()] * len(self.args), False) function = Function.new(module, funct_type, self.name) variables = {} for arg, arg_name in zip(function.args, self.args): arg.name = arg_name variables[arg_name] = arg block = function.append_basic_block('entry') builder = Builder.new(block) return_value = self.body.gen_code(module, builder, variables) builder.ret(return_value) function.verify() return function
def code_gen(self): funct_type = Type.function( Type.double(), [Type.double()] * len(self.args), False) function = Function.new(g_llvm_module, funct_type, self.name) if function.name != self.name: function.delete() function = g_llvm_module.get_function_named(self.name) if not function.is_declaration: raise RuntimeError('Redefinition of a function.') if len(self.callee.args) != self.args): raise RuntimeError('Redeclaration of a function with different number of args.') for arg, arg_name in zip(function.args, self.args): arg.name = arg_name g_named_values[arg_name] = arg return function
def codegen_expr(builder, expr): kind = expr[0] if kind == 'number': number = expr[1] return Constant.real(double_type, number) elif kind == 'variable': name = expr[1] try: return named_values[name] except KeyError: raise CodegenError("unknown variable name") elif kind == 'binary': op, lhs, rhs = expr[1], expr[2], expr[3] lhs_val = codegen_expr(builder, lhs) rhs_val = codegen_expr(builder, rhs) if op == '+': return builder.add(lhs_val, rhs_val, 'addtmp') elif op == '-': return builder.sub(lhs_val, rhs_val, 'subtmp') elif op == '*': return builder.mul(lhs_val, rhs_val, 'multmp') elif op == '<': i = builder.fcmp(FCMP_ULT, lhs_val, rhs_val, 'cmptmp') return builder.uitofp(i, 'booltmp') else: raise CodegenError("invalid binary operator") elif kind == 'call': name, args = expr[1], expr[2] try: callee = Function.get(the_module, name) except LLVMException: raise CodegenError("unknown function referenced") if len(callee.args) != len(args): raise CodegenError("incorrect # arguments passed") arg_vals = [] for arg in args: arg_val = codegen_expr(builder, arg) arg_vals.append(arg_val) return builder.call(callee, arg_vals, 'calltmp')
def codegen_expr(builder, expr): kind = expr[0] if kind == "number": number = expr[1] return Constant.real(double_type, number) elif kind == "variable": name = expr[1] try: return named_values[name] except KeyError: raise CodegenError("unknown variable name") elif kind == "binary": op, lhs, rhs = expr[1], expr[2], expr[3] lhs_val = codegen_expr(builder, lhs) rhs_val = codegen_expr(builder, rhs) if op == "+": return builder.add(lhs_val, rhs_val, "addtmp") elif op == "-": return builder.sub(lhs_val, rhs_val, "subtmp") elif op == "*": return builder.mul(lhs_val, rhs_val, "multmp") elif op == "<": i = builder.fcmp(FCMP_ULT, lhs_val, rhs_val, "cmptmp") return builder.uitofp(i, "booltmp") else: raise CodegenError("invalid binary operator") elif kind == "call": name, args = expr[1], expr[2] try: callee = Function.get(the_module, name) except LLVMException: raise CodegenError("unknown function referenced") if len(callee.args) != len(args): raise CodegenError("incorrect # arguments passed") arg_vals = [] for arg in args: arg_val = codegen_expr(builder, arg) arg_vals.append(arg_val) return builder.call(callee, arg_vals, "calltmp")
def buildMain(module, source): '''Build main function''' # Declare main function mainType = Type.function(Type.int(), [], False) main = Function.new(module, mainType, 'main') # Build entry block entry = main.append_basic_block('entry') builder = Builder.new(entry) next = builder.alloca(Type.int(), 'next') builder.store(num(0), next) # Build exit block exit = main.append_basic_block('exit') builder = Builder.new(exit) builder.ret(num(0)) # Build block for switch-case loop = main.append_basic_block('loop') builder = Builder.new(loop) jump = builder.load(next, 'jump') switch = builder.switch(jump, exit) builder = Builder.new(entry) builder.branch(loop) # For each expression build a block that jumps back up to switch # and add label-block pair to switch block for label, expression in sorted(source.items()): stack = [] case = main.append_basic_block('case-{}'.format(label)) builder = Builder.new(case) for instruction in expression: instruction.gen(module, builder, stack) builder.store(stack.pop(), next) builder.branch(loop) switch.add_case(num(label), case) return main
def code_gen(self, from_definition=False): top_context = self.context.parent_context func_name_with_tag = self.func_name_token.word + "()" return_type = Helper.get_type(self.ret_type.word) arg_types = [Helper.get_type(arg[1]) for arg in self.args if True] func_type = Type.function(return_type, arg_types, False) if not func_name_with_tag in top_context.type_table: function = Function.new(g_llvm_module, func_type, self.func_name_token.word) top_context.type_table[func_name_with_tag] = func_type for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type(arg[1]) return [function, self.context] else: old_func_type = top_context.type_table[func_name_with_tag] if old_func_type == func_type: if from_definition: for arg in self.args: self.context.type_table[arg[0]] = Helper.get_type(arg[1]) return [g_llvm_module.get_function_named(self.func_name_token.word), self.context] else: raise cmexception.RedefineException(self.func_name_token, 'function') else: raise cmexception.RedefineException(self.func_name_token, 'function')
def CodeGen(self): print >> stderr, "codegening prototype node" funct_type = Type.function( Type.pointer(Type.int(8)), [Type.pointer(Type.int(8))] * len(self.args), False) function = Function.new(G_LLVM_MODULE, funct_type, self.name) function.calling_convention = self.calling_convention if function.name != self.name: function.delete() function = G_LLVM_MODULE.get_function_named(self.name) function.calling_convention = self.calling_convention if not function.is_declaration: raise RuntimeError('Redefinition of function.') if len(function.args) != len(self.args): raise RuntimeError('Redeclaration of a function with different number of args.') for arg, arg_name in zip(function.args, self.args): arg.name = arg_name return function
def code(self, context): # Make the function type, eg. double(double, double). func_args = (Type.double(),) * len(self.args) func_type = Type.function(Type.double(), func_args, False) for func in context.module.functions: if func.name == self.name: if not func.is_declaration: raise RuntimeError('Redefinition of function.') if len(func.args) != len(self.args): raise RuntimeError('Redeclaration of a function with a ' 'different number of args.') break else: func = Func.new(context.module, func_type, self.name) for arg, name in zip(func.args, self.args): arg.name = name context.scope[name] = arg # Add arguments to symbol table. return func
def fuse_kerneltree(tree, module_or_name): """Fuse the kernel tree into a single kernel object with the common names Examples: add(multiply(b,c),subtract(d,f)) var tmp0 = multiply(b,c) var tmp1 = subtract(d,f) return add(tmp0, tmp1) var tmp0; var tmp1; multiply(b,c,&tmp0) subtract(d,f,&tmp1) add(tmp0, tmp1, &res) """ if isinstance(module_or_name, _strtypes): module = Module.new(module_or_name) else: module = module_or_name args, func_type = get_fused_type(tree) outdshape = tree.kernel.dshapes[-1] try: func = module.get_function_named(tree.name+"_fused") except LLVMException: func = Function.new(module, func_type, tree.name+"_fused") block = func.append_basic_block('entry') builder = lc.Builder.new(block) # TODO: Create wrapped function for functions # that need to loop over their inputs # Attach the llvm_object to the Argument objects for i, arg in enumerate(args): arg.llvm_obj = func.args[i] # topologically sort the kernel-tree nodes and then for each node # site we issue instructions to compute the value nodelist = tree.sorted_nodes() cleanup = [] # Objects to deallocate any temporary heap memory needed # ust have a _dealloc method def _temp_cleanup(): for obj in cleanup: if obj is not None: obj._dealloc() #import pdb #pdb.set_trace() for node in nodelist[:-1]: node.kernel.attach(module) new = insert_instructions(node, builder) cleanup.append(new) nodelist[-1].kernel.attach(module) if tree.kernel.kinds[-1] == SCALAR: new = insert_instructions(nodelist[-1], builder) cleanup.append(new) _temp_cleanup() builder.ret(nodelist[-1].llvm_obj) else: new = insert_instructions(nodelist[-1], builder, func.args[-1]) cleanup.append(new) _temp_cleanup() builder.ret_void() dshapes = [get_kernel_dshape(arg) for arg in args] dshapes.append(outdshape) newkernel = BlazeElementKernel(func, dshapes) return newkernel, args
def lift(self, outrank, outkind): """Take the current kernel and "lift" it so that the output has rank given by output_rank and kind given by outkind. All arguments will have the same kind as outkind in the signature of the lifted kernel and all ranks will be adjusted the same amount as output_rank This creates a new BlazeElementKernel whose function calls the underlying kernel's function multiple times. Example: (let rn == rank-n) We need an r2, r2 -> r2 kernel and we have an r1, r1 -> r1 kernel. We create a kernel with rank r2, r2 -> r2 that does the equivalent of for i in range(n0): out[i] = inner_kernel(in0[i], in1[i]) """ if outkind in 'CFS': from .llvm_array import kindfromchar outkind = kindfromchar[outkind] name = self.func.name + "_lifted_%d_%s" % (outrank, orderchar[outkind]) try_bk = self._lifted_cache.get(name, None) if try_bk is not None: return try_bk if outkind not in array_kinds[:3]: raise ValueError("Invalid kind specified for output: %s" % outkind) cur_rank = self.ranks[-1] if outrank == cur_rank: if not (outrank == 0 and all(x in [SCALAR, POINTER] for x in self.kinds)): return self # no-op dr = outrank - cur_rank if dr < 0: raise ValueError("Output rank (%d) must be greater than current " "rank (%d)" % (outrank, cur_rank)) if not all((x in [SCALAR, POINTER] or x[0]==outkind) for x in self.kinds): raise ValueError("Incompatible kernel arguments for " "lifting: %s" % self.kinds) # Replace any None values with difference in ranks outranks = [ri + dr for ri in self.ranks] func_type = self._lifted_func_type(outranks, outkind) func = Function.new(self.module, func_type, name=name) block = func.append_basic_block('entry') builder = lc.Builder.new(block) def ensure_llvm(arg, kind): if isinstance(arg, LLArray): return arg.array_ptr else: return arg arg_arrays = [LLArray(arg, builder) for arg in func.args] begins = [const_intp(0)]*dr # This is the shape of the output array ends = arg_arrays[-1].shape loop_nest_ctx = loop_nest(builder, begins, ends) with loop_nest_ctx as loop: if self.kinds[-1] == SCALAR: inargs = arg_arrays[:-1] inkinds = self.kinds[:-1] else: inargs = arg_arrays inkinds = self.kinds callargs = [ensure_llvm(arg[loop.indices], kind) for arg, kind in zip(inargs, inkinds)] res = builder.call(self.func, callargs) if self.kinds[-1] == SCALAR: arg_arrays[-1][loop.indices] = res builder.branch(loop.incr) builder.branch(loop.entry) builder.position_at_end(loop.end) builder.ret_void() def add_rank(dshape, dr): new = ["L%d, " % i for i in range(dr)] new.append(str(dshape)) return make_dshape("".join(new)) dshapes = [add_rank(dshape, dr) for dshape in self.dshapes] try_bk = BlazeElementKernel(func, dshapes) self._lifted_cache[name] = try_bk return try_bk
def unbound_single_ckernel(self): """Creates an UnboundCKernelFunction with the ExprSingleOperation prototype. """ import ctypes if self._unbound_single_ckernel is None: i8_p_type = Type.pointer(Type.int(8)) func_type = Type.function(void_type, [i8_p_type, Type.pointer(i8_p_type), i8_p_type]) module = self.module.clone() single_ck_func_name = self.func.name +"_single_ckernel" single_ck_func = Function.new(module, func_type, name=single_ck_func_name) block = single_ck_func.append_basic_block('entry') builder = lc.Builder.new(block) dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = single_ck_func.args dst_ptr_arg.name = 'dst_ptr' src_ptr_arr_arg.name = 'src_ptrs' extra_ptr_arg.name = 'extra_ptr' # Build up the kernel data structure. Currently, this means # adding a shape field for each array argument. First comes # the kernel data prefix with a spot for the 'owner' reference added. input_field_indices = [] kernel_data_fields = [Type.struct([i8_p_type]*3)] kernel_data_ctypes_fields = [('base', JITKernelData)] for i, (kind, a) in enumerate(izip(self.kinds, self.argtypes)): if isinstance(kind, tuple): if kind[0] != lla.C_CONTIGUOUS: raise ValueError('only support C contiguous array presently') input_field_indices.append(len(kernel_data_fields)) kernel_data_fields.append(Type.array( intp_type, len(self.dshapes[i])-1)) kernel_data_ctypes_fields.append(('operand_%d' % i, c_ssize_t * (len(self.dshapes[i])-1))) elif kind in [SCALAR, POINTER]: input_field_indices.append(None) else: raise TypeError(("unbound_single_ckernel codegen doesn't " + "support the parameter kind %r yet") % (k,)) # Make an LLVM and ctypes type for the extra data pointer. kernel_data_llvmtype = Type.struct(kernel_data_fields) class kernel_data_ctypestype(ctypes.Structure): _fields_ = kernel_data_ctypes_fields # Cast the extra pointer to the right llvm type extra_struct = builder.bitcast(extra_ptr_arg, Type.pointer(kernel_data_llvmtype)) # Convert the src pointer args to the # appropriate kinds for the llvm call args = [] for i, (kind, atype) in enumerate(izip(self.kinds[:-1], self.argtypes)): if kind == SCALAR: src_ptr = builder.bitcast(builder.load( builder.gep(src_ptr_arr_arg, (lc.Constant.int(intp_type, i),))), Type.pointer(atype)) src_val = builder.load(src_ptr) args.append(src_val) elif kind == POINTER: src_ptr = builder.bitcast(builder.load( builder.gep(src_ptr_arr_arg, (lc.Constant.int(intp_type, i),))), Type.pointer(atype)) args.append(src_ptr) elif isinstance(kind, tuple): src_ptr = builder.bitcast(builder.load( builder.gep(src_ptr_arr_arg, (lc.Constant.int(intp_type, i),))), Type.pointer(kind[2])) # First get the shape of this parameter. This will # be a combination of Fixed and TypeVar (Var unsupported # here for now) shape = self.dshapes[i][:-1] # Get the llvm array arr_var = builder.alloca(atype.pointee) builder.store(src_ptr, builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 0)))) for j, sz in enumerate(shape): if isinstance(sz, Fixed): # If the shape is already known at JIT compile time, # insert the constant shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(lc.Constant.int(intp_type, operator.index(sz)), shape_el_ptr) elif isinstance(sz, TypeVar): # TypeVar types are only known when the kernel is bound, # so copy it from the extra data pointer sz_from_extra_ptr = builder.gep(extra_struct, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, input_field_indices[i]), lc.Constant.int(intp_type, j))) sz_from_extra = builder.load(sz_from_extra_ptr) shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(sz_from_extra, shape_el_ptr) else: raise TypeError(("unbound_single_ckernel codegen doesn't " + "support dimension type %r") % type(sz)) args.append(arr_var) # Call the function and store in the dst kind = self.kinds[-1] func = module.get_function_named(self.func.name) if kind == SCALAR: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(self.return_type)) dst_val = builder.call(func, args) builder.store(dst_val, dst_ptr) elif kind == POINTER: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(self.return_type)) builder.call(func, args + [dst_ptr]) elif isinstance(kind, tuple): dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(kind[2])) # First get the shape of the output. This will # be a combination of Fixed and TypeVar (Var unsupported # here for now) shape = self.dshapes[-1][:-1] # Get the llvm array arr_var = builder.alloca(self.argtypes[-1].pointee) builder.store(dst_ptr, builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 0)))) for j, sz in enumerate(shape): if isinstance(sz, Fixed): # If the shape is already known at JIT compile time, # insert the constant shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(lc.Constant.int(intp_type, operator.index(sz)), shape_el_ptr) elif isinstance(sz, TypeVar): # TypeVar types are only known when the kernel is bound, # so copy it from the extra data pointer sz_from_extra_ptr = builder.gep(extra_struct, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, input_field_indices[-1]), lc.Constant.int(intp_type, j))) sz_from_extra = builder.load(sz_from_extra_ptr) shape_el_ptr = builder.gep(arr_var, (lc.Constant.int(int32_type, 0), lc.Constant.int(int32_type, 1), lc.Constant.int(intp_type, j))) builder.store(sz_from_extra, shape_el_ptr) else: raise TypeError(("unbound_single_ckernel codegen doesn't " + "support dimension type %r") % type(sz)) builder.call(func, args + [arr_var]) else: raise TypeError(("single_ckernel codegen doesn't " + "support kind %r") % kind) builder.ret_void() #print("Function before optimization passes:") #print(single_ck_func) #module.verify() import llvm.ee as le from llvm.passes import build_pass_managers tm = le.TargetMachine.new(opt=3, cm=le.CM_JITDEFAULT, features='') pms = build_pass_managers(tm, opt=3, fpm=False, vectorize=True, loop_vectorize=True) pms.pm.run(module) #print("Function after optimization passes:") #print(single_ck_func) # DEBUGGING: Verify the module. #module.verify() # TODO: Cache the EE - the interplay with the func_ptr # was broken, so just avoiding caching for now # FIXME: Temporarily disabling AVX, because of misdetection # in linux VMs. Some code is in llvmpy's workarounds # submodule related to this. ee = le.EngineBuilder.new(module).mattrs("-avx").create() func_ptr = ee.get_pointer_to_function(single_ck_func) # Create a function which copies the shape from data # descriptors to the extra data struct. if len(kernel_data_ctypes_fields) == 1: def bind_func(estruct, dst_dd, src_dd_list): pass else: def bind_func(estruct, dst_dd, src_dd_list): for i, (ds, dd) in enumerate( izip(self.dshapes, src_dd_list + [dst_dd])): shape = [operator.index(dim) for dim in dd.dshape[-len(ds):-1]] cshape = getattr(estruct, 'operand_%d' % i) for j, dim_size in enumerate(shape): cshape[j] = dim_size self._unbound_single_ckernel = UnboundCKernelFunction( ExprSingleOperation(func_ptr), kernel_data_ctypestype, bind_func, (ee, func_ptr)) return self._unbound_single_ckernel
def create_ckernel_interface(bek, strided): """Create a function wrapper with a CKernel interface according to `strided`. Parameters ---------- bek : BlazeElementKernel The blaze kernel to compile into an unbound single ckernel. strided : bool If true, returns an ExprStridedOperation, otherwise an ExprSingleOperation. """ # TODO: Decouple this from BlazeElementKernel inarg_count = len(bek.kinds)-1 module = bek.module.clone() if strided: ck_func_name = bek.func.name +"_strided_ckernel" ck_func = Function.new(module, strided_ckernel_func_type, name=ck_func_name) else: ck_func_name = bek.func.name +"_single_ckernel" ck_func = Function.new(module, single_ckernel_func_type, name=ck_func_name) entry_block = ck_func.append_basic_block('entry') builder = lc.Builder.new(entry_block) if strided: dst_ptr_arg, dst_stride_arg, \ src_ptr_arr_arg, src_stride_arr_arg, \ count_arg, extra_ptr_arg = ck_func.args dst_stride_arg.name = 'dst_stride' src_stride_arr_arg.name = 'src_strides' count_arg.name = 'count' else: dst_ptr_arg, src_ptr_arr_arg, extra_ptr_arg = ck_func.args dst_ptr_arg.name = 'dst_ptr' src_ptr_arr_arg.name = 'src_ptrs' extra_ptr_arg.name = 'extra_ptr' if strided: # Allocate an array of pointer counters for the # strided loop src_ptr_arr_tmp = builder.alloca_array(int8_p_type, lc.Constant.int(int32_type, inarg_count), 'src_ptr_arr') # Copy the pointers for i in range(inarg_count): builder.store(builder.load(builder.gep(src_ptr_arr_arg, (lc.Constant.int(int32_type, i),))), builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) # Get all the src strides src_stride_vals = [builder.load(builder.gep(src_stride_arr_arg, (lc.Constant.int(int32_type, i),))) for i in range(inarg_count)] # Replace src_ptr_arr_arg with this local variable src_ptr_arr_arg = src_ptr_arr_tmp # Initialize some more basic blocks for the strided loop looptest_block = ck_func.append_basic_block('looptest') loopbody_block = ck_func.append_basic_block('loopbody') end_block = ck_func.append_basic_block('finish') # Finish the entry block by branching # to the looptest block builder.branch(looptest_block) # The looptest block continues the loop while counter != 0 builder.position_at_end(looptest_block) counter_phi = builder.phi(count_arg.type) counter_phi.add_incoming(count_arg, entry_block) dst_ptr_phi = builder.phi(dst_ptr_arg.type) dst_ptr_phi.add_incoming(dst_ptr_arg, entry_block) dst_ptr_arg = dst_ptr_phi kzero = lc.Constant.int(count_arg.type, 0) pred = builder.icmp(lc.ICMP_NE, counter_phi, kzero) builder.cbranch(pred, loopbody_block, end_block) # The loopbody block decrements the counter, and executes # one kernel iteration builder.position_at_end(loopbody_block) kone = lc.Constant.int(counter_phi.type, 1) counter_dec = builder.sub(counter_phi, kone) counter_phi.add_incoming(counter_dec, loopbody_block) # Convert the src pointer args to the # appropriate kinds for the llvm call args = build_llvm_src_ptrs(builder, src_ptr_arr_arg, bek.dshapes, bek.kinds[:-1], bek.argtypes) # Call the function and store in the dst kind = bek.kinds[-1] func = module.get_function_named(bek.func.name) if kind == lla.SCALAR: dst_ptr = builder.bitcast(dst_ptr_arg, Type.pointer(bek.return_type)) dst_val = builder.call(func, args) builder.store(dst_val, dst_ptr) else: dst_ptr = build_llvm_arg_ptr(builder, dst_ptr_arg, bek.dshapes[-1], kind, bek.argtypes[-1]) builder.call(func, args + [dst_ptr]) if strided: # Finish the loopbody block by incrementing all the pointers # and branching to the looptest block dst_ptr_inc = builder.gep(dst_ptr_arg, (dst_stride_arg,)) dst_ptr_phi.add_incoming(dst_ptr_inc, loopbody_block) # Increment the src pointers for i in range(inarg_count): src_ptr_val = builder.load(builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) src_ptr_inc = builder.gep(src_ptr_val, (src_stride_vals[i],)) builder.store(src_ptr_inc, builder.gep(src_ptr_arr_tmp, (lc.Constant.int(int32_type, i),))) builder.branch(looptest_block) # The end block just returns builder.position_at_end(end_block) builder.ret_void() #print("Function before optimization passes:") #print(ck_func) #module.verify() return module, ck_func
def test_powi_f64(self): float = Type.double() mod, func, b = self._build_module(float) intr = Function.intrinsic(mod, lc.INTR_POWI, [float]) b.ret(b.call(intr, [func.args[0], lc.Constant.int(Type.int(), 2)])) self._template(mod, func, lambda x: x**2)
def test_sin_f64(self): float = Type.double() mod, func, b = self._build_module(float) intr = Function.intrinsic(mod, lc.INTR_SIN, [float]) b.ret(b.call(intr, func.args)) self._template(mod, func, math.sin)
def impl(module, builder, args): intr = Function.intrinsic(module, intrcode, types) r = builder.call(intr, args) return r
def add_prelude(self): for name, function in prelude.iteritems(): self.intrinsics[name] = Function.new(self.module, function, name)
def op_DEF_FOREIGN(self, name, retty, argtys): largtys = map(arg_typemap, argtys) lretty = arg_typemap(retty) func_type = Type.function(lretty, largtys, False) self.globals[name] = Function.new(self.module, func_type, name)
def test_objcache(self): # Testing module aliasing m1 = Module.new('a') t = Type.int() ft = Type.function(t, [t]) f1 = m1.add_function(ft, "func") m2 = f1.module self.assert_(m1 is m2) # Testing global vairable aliasing 1 gv1 = GlobalVariable.new(m1, t, "gv") gv2 = GlobalVariable.get(m1, "gv") self.assert_(gv1 is gv2) # Testing global vairable aliasing 2 gv3 = m1.global_variables[0] self.assert_(gv1 is gv3) # Testing global vairable aliasing 3 gv2 = None gv3 = None gv1.delete() gv4 = GlobalVariable.new(m1, t, "gv") self.assert_(gv1 is not gv4) # Testing function aliasing 1 b1 = f1.append_basic_block('entry') f2 = b1.function self.assert_(f1 is f2) # Testing function aliasing 2 f3 = m1.get_function_named("func") self.assert_(f1 is f3) # Testing function aliasing 3 f4 = Function.get_or_insert(m1, ft, "func") self.assert_(f1 is f4) # Testing function aliasing 4 f5 = Function.get(m1, "func") self.assert_(f1 is f5) # Testing function aliasing 5 f6 = m1.get_or_insert_function(ft, "func") self.assert_(f1 is f6) # Testing function aliasing 6 f7 = m1.functions[0] self.assert_(f1 is f7) # Testing argument aliasing a1 = f1.args[0] a2 = f1.args[0] self.assert_(a1 is a2) # Testing basic block aliasing 1 b2 = f1.basic_blocks[0] self.assert_(b1 is b2) # Testing basic block aliasing 2 b3 = f1.get_entry_basic_block() self.assert_(b1 is b3) # Testing basic block aliasing 3 b31 = f1.entry_basic_block self.assert_(b1 is b31) # Testing basic block aliasing 4 bldr = Builder.new(b1) b4 = bldr.basic_block self.assert_(b1 is b4) # Testing basic block aliasing 5 i1 = bldr.ret_void() b5 = i1.basic_block self.assert_(b1 is b5) # Testing instruction aliasing 1 i2 = b5.instructions[0] self.assert_(i1 is i2) # phi node phi = bldr.phi(t) phi.add_incoming(f1.args[0], b1) v2 = phi.get_incoming_value(0) b6 = phi.get_incoming_block(0) # Testing PHI / basic block aliasing 5 self.assert_(b1 is b6) # Testing PHI / value aliasing self.assert_(f1.args[0] is v2)
def test_sqrt_f32(self): float = Type.float() mod, func, b = self._build_module(float) intr = Function.intrinsic(mod, lc.INTR_SQRT, [float]) b.ret(b.call(intr, func.args)) self._template(mod, func, math.sqrt)