def gera_codigo(arvore, tabela_simbolos, sema_success): global modulo global info # Define as variáveis globais e funções for simbolo in tabela_simbolos: # Se o simbolo for uma variavel if (simbolo["simbolo_tipo"] == "variable" and simbolo["escopo"] == "global"): var_type = simbolo["tipo_valor"] # Verifica se o tipo é inteiro if (var_type == "inteiro"): if (len(simbolo["dimensoes"]) == 0): g = ir.GlobalVariable(modulo, ir.IntType(32), simbolo["nome"]) if (len(simbolo["dimensoes"]) == 1): g_type = ir.ArrayType(ir.IntType(32), int(simbolo["dimensoes"][0])) g = ir.GlobalVariable(modulo, g_type, simbolo["nome"]) info["variaveis_globais"].append(g) # Verifica se o tipo é flutuante elif (var_type == "flutuante"): if (len(simbolo["dimensoes"]) == 0): g = ir.GlobalVariable(modulo, ir.FloatType(), simbolo["nome"]) if (len(simbolo["dimensoes"]) == 1): g_type = ir.ArrayType(ir.FloatType(), int(simbolo["dimensoes"][0])) g = ir.GlobalVariable(modulo, g_type, simbolo["nome"]) g.linkage = "common" g.align = 4 info["variaveis_globais"].append(g) # Se o simbolo for uma funcao elif (simbolo["simbolo_tipo"] == "function"): if (simbolo["nome"] == "principal"): simbolo["nome"] = "main" # Lista de argumentos arguments_list = [] if (len(simbolo["parametros"]) > 0): for a in simbolo["parametros"]: if (a["par_type"] == "inteiro"): arguments_list.append(ir.IntType(32)) else: arguments_list.append(ir.FloatType()) if (len(simbolo["return"]) > 0): if (simbolo["return"][0]["ret_type"] == "inteiro"): f_ret = ir.IntType(32) else: f_ret = ir.FloatType() f_func = ir.FunctionType(f_ret, arguments_list) f = ir.Function(modulo, f_func, name=simbolo["nome"]) entryBlock = f.append_basic_block('entry') builder = ir.IRBuilder(entryBlock) else: f_func = ir.FunctionType(ir.VoidType(), arguments_list) f = ir.Function(modulo, f_func, name=simbolo["nome"]) entryBlock = f.append_basic_block('entry') builder = ir.IRBuilder(entryBlock) for i in range(len(f.args)): f.args[i].name = simbolo["parametros"][i]["par_name"] funcoes.append({ "function": f, "builder": builder, "arguments": f.args }) # Chama a funcao recursiva que passa pela arvore passar_por_arvore(arvore, funcoes) file = open('modulo.ll', 'w') file.write(str(modulo)) file.close() print(modulo)
def compile_bf(prog): """ Compiles the given BF program text into a llvm.ModuleRef object. The module will contain 3 globals: - the main function, void bfmain() - the memory, int8_t memory[] - the memory index, int32_t index """ module = ll.Module() func = ll.Function(module, ll.FunctionType(ll.VoidType(), []), 'bfmain') builder = ll.IRBuilder() g_memory = ll.GlobalVariable(module, MEMORY_TYPE, 'memory') g_index = ll.GlobalVariable(module, i32, 'index') # Initialize the memory and index pointer to 0. g_memory.initializer = ll.Constant(MEMORY_TYPE, None) g_index.initializer = ll.Constant(i32, None) # Create the "putc" function. putc_type = ll.FunctionType(ll.VoidType(), [i8]) getc_type = ll.FunctionType(i8, []) f_putc = create_thunk(module, putc_type, PUTC_WRAPPER, 'putc') f_getc = create_thunk(module, getc_type, GETC_WRAPPER, 'getc') # The block_stack tracks the current block and remaining blocks. The top # block is what we currently compile into, the one below it is the block # that follows the next ] (if we're in a loop). block_stack = [func.append_basic_block('entry')] loop_stack = [] builder.position_at_end(block_stack[-1]) def current_index_ptr(): index = builder.load(g_index) # The extra dereference here with ZERO is required because g_memory is # itself a pointer, so this becomes &g_memory[0][index]. Yeah, it's # weird. # Ref: https://llvm.org/docs/GetElementPtr.html return builder.gep(g_memory, [ZERO_i32, index]) line = 1 col = 1 for ch in prog: col += 1 if ch == '\n': col = 1 line += 1 elif ch == '.': ptr = current_index_ptr() value = builder.load(ptr) builder.call(f_putc, [value]) elif ch == ',': res = builder.call(f_getc, []) ptr = current_index_ptr() builder.store(res, ptr) elif ch == '>': # builder.call(f_putc, [ll.Constant(i8, ord('>'))]) index = builder.load(g_index) index = builder.add(index, ONE_i32) index = builder.and_(index, MEMORY_MASK_i32) builder.store(index, g_index) elif ch == '<': # builder.call(f_putc, [ll.Constant(i8, ord('<'))]) index = builder.load(g_index) index = builder.sub(index, ONE_i32) index = builder.and_(index, MEMORY_MASK_i32) builder.store(index, g_index) elif ch == '+': # builder.call(f_putc, [ll.Constant(i8, ord('+'))]) ptr = current_index_ptr() value = builder.load(ptr) value = builder.add(value, ONE_i8) builder.store(value, ptr) elif ch == '-': # builder.call(f_putc, [ll.Constant(i8, ord('-'))]) ptr = current_index_ptr() value = builder.load(ptr) value = builder.sub(value, ONE_i8) builder.store(value, ptr) elif ch == '[': # start a loop # builder.call(f_putc, [ll.Constant(i8, ord('['))]) loop_block = func.append_basic_block() tail_block = func.append_basic_block() # If memory[index] != 0, enter loop, otherwise skip. ptr = current_index_ptr() value = builder.load(ptr) nonzero = builder.icmp_unsigned('!=', value, ZERO_i8) builder.cbranch(nonzero, loop_block, tail_block) # Update our block stack. The current block is finished. block_stack.pop() block_stack.append(tail_block) block_stack.append(loop_block) loop_stack.append(loop_block) builder.position_at_end(block_stack[-1]) elif ch == ']': # end a loop # builder.call(f_putc, [ll.Constant(i8, ord(']'))]) if len(block_stack) <= 1: raise ValueError('{}:{}: unmatched ]'.format(line, col)) # If memory[index] != 0, repeat current loop, otherwise break. ptr = current_index_ptr() value = builder.load(ptr) nonzero = builder.icmp_unsigned('!=', value, ZERO_i8) builder.cbranch(nonzero, loop_stack[-1], block_stack[-2]) # Update our block stack. The current block is finished. block_stack.pop() loop_stack.pop() builder.position_at_end(block_stack[-1]) else: continue if len(block_stack) != 1: raise ValueError('{}:{}: unmatched ['.format(line, col)) # Finish the function. builder.ret_void() assembly = str(module) # print(assembly) return llvm.parse_assembly(assembly)
# memory size NUM_CELLS = 30000 # Types cell_t = ir.IntType(8) pcell_t = cell_t.as_pointer() memory_t = ir.ArrayType(cell_t, NUM_CELLS) int32_t = ir.IntType(32) # Constants zero = cell_t(0) one = cell_t(1) minus_one = cell_t(-1) # Globals memory = ir.GlobalVariable(module, memory_t, "memory") memory.initializer = memory_t([0] * NUM_CELLS) ptr = irbuilder.gep(memory, [zero, zero], "ptr") # Function declarations putchar_t = ir.FunctionType(int32_t, [int32_t]) putchar = ir.Function(module, putchar_t, 'putchar') getchar_t = ir.FunctionType(int32_t, []) getchar = ir.Function(module, getchar_t, 'getchar') # for loops stack = []
def lift(filename): root = et.parse(filename).getroot() module = ir.Module(name="lifted") for register in root.find('globals').findall('register'): if register.get('name') in flags: var = ir.GlobalVariable(module, ir.IntType(1), register.get('name')) var.initializer = ir.Constant(ir.IntType(1), None) var.linkage = 'internal' registers[register.get('name')] = var elif register.get('name') in pointers: var = ir.GlobalVariable(module, ir.PointerType(ir.IntType(8)), register.get('name')) var.initializer = ir.Constant(ir.PointerType(ir.IntType(8)), None) var.linkage = 'internal' registers[register.get('name')] = var else: var = ir.GlobalVariable(module, ir.IntType(8 * int(register.get('size'))), register.get('name')) var.initializer = ir.Constant( ir.IntType(8 * int(register.get('size'))), None) var.linkage = 'internal' registers[register.get('name')] = var for memory_location in root.find('memory').findall('memory'): var = ir.GlobalVariable( module, ir.IntType(8 * int(memory_location.get('size'))), memory_location.get('name')) var.initializer = ir.Constant( ir.IntType(8 * int(memory_location.get('size'))), None) var.linkage = 'internal' memory[memory_location.get('name')] = var func_return = ir.VoidType() fnty = ir.FunctionType(func_return, []) ir_func = ir.Function(module, fnty, "intra_function_branch") internal_functions["intra_function_branch"] = ir_func func_return = ir.VoidType() fnty = ir.FunctionType(func_return, []) ir_func = ir.Function(module, fnty, "call_indirect") internal_functions["call_indirect"] = ir_func func_return = ir.VoidType() fnty = ir.FunctionType(func_return, []) ir_func = ir.Function(module, fnty, "bit_extraction") internal_functions["bit_extraction"] = ir_func for function in root.findall('function'): name = function.get('name') x = 1 while name in function_names: name = name + "_" + str(x) x += 1 function_names.append(name) address = function.get('address') functions[address] = [build_function(name, module), function] for address in functions: ir_func, function = functions[address] populate_func(ir_func, function) return module
D[0][1] = D[1][1] + 10; return 0; } */ ''' # Cria o módulo. module = ir.Module('meu_modulo.bc') # Array global de 2048 x 2048 elementos. typeB_0 = ir.ArrayType(ir.IntType(32), 2048) typeB = ir.ArrayType(typeB_0, 2048) arrayB = ir.GlobalVariable(module, typeB, "B") arrayB.linkage = "common" arrayB.initializer = ir.Constant(typeB, None) arrayB.align = 4 # Cria um valor zero para colocar no retorno. Zero64 = ir.Constant(ir.IntType(32), 0) # Declara o tipo do retorno da função main. mainFnReturnType = ir.IntType(32) # Cria a função main. t_func_main = ir.FunctionType(mainFnReturnType, ()) # Declara a função main. main = ir.Function(module, t_func_main, name='main')
A[50] = A[49] + 5; B[0] = B[1] + 10; return 0; } ''' # Cria o módulo. module = ir.Module('meu_modulo.bc') # Array global de 1024 elementos. typeA = ir.ArrayType(ir.IntType(64), 1024) arrayA = ir.GlobalVariable(module, typeA, "A") arrayA.initializer = ir.Constant.array(ir.IntType(64), 0) # arrayA.initializer = ir.IntType(64) arrayA.linkage = "common" # arrayA.initializer = ir.Constant(ir.IntType(64), 0) arrayA.align = 16 # Cria um valor zero para colocar no retorno. Zero64 = ir.Constant(ir.IntType(64), 0) # Declara o tipo do retorno da função main. mainFnReturnType = ir.IntType(64) # Cria a função main. t_func_main = ir.FunctionType(mainFnReturnType, ())
def proc_stmt(self, node): if node.children[0].type == 'READ': addr = self.symbol_table.find( node.children[2].children[0].name)['entry'] python_sca = "" ran = str(randint(0, 0x7FFFFFFF)) voidptr_ty = ir.IntType(8).as_pointer() scanf = self.module.globals.get('scanf', None) if not scanf: scanf_ty = ir.FunctionType(ir.IntType(32), [voidptr_ty], var_arg=True) scanf = ir.Function(self.module, scanf_ty, name="scanf") if addr.type.pointee.intrinsic_name == 'i32': python_sca = python_sca + '%d\0' elif addr.type.pointee.intrinsic_name == 'f64': python_sca = python_sca + '%f\0' elif addr.type.pointee.intrinsic_name == 'i8': python_sca = python_sca + '%c\0' else: python_sca = python_sca + '%s\0' fmt_sca = ir.Constant(ir.ArrayType(ir.IntType(8), len(python_sca)), bytearray(python_sca.encode("utf8"))) global_sca = ir.GlobalVariable(self.module, fmt_sca.type, name='sca' + ran) global_sca.linkage = 'internal' global_sca.global_constant = True global_sca.initializer = fmt_sca sca_arg = self.builder.bitcast(global_sca, voidptr_ty) self.builder.call(scanf, [sca_arg, addr]) self.builder.load(addr) return args = self.args_list(node.children[2]) if node.children[0].name == 'write': ran = str(randint(0, 0x7FFFFFFF)) voidptr_ty = ir.IntType(8).as_pointer() printf = self.module.globals.get('printf', None) if not printf: printf_ty = ir.FunctionType(ir.IntType(32), [voidptr_ty], var_arg=True) printf = ir.Function(self.module, printf_ty, name="printf") python_str = "SPL >> " for i in args: if i.type.intrinsic_name == 'i32': python_str = python_str + "%d " elif i.type.intrinsic_name == 'f64': python_str = python_str + "%f " else: python_str = python_str + "%s " python_str = python_str + "\0" fmt_str = ir.Constant(ir.ArrayType(ir.IntType(8), len(python_str)), bytearray(python_str.encode("utf8"))) global_fmt = ir.GlobalVariable(self.module, fmt_str.type, name='fmt' + ran) global_fmt.linkage = 'internal' global_fmt.global_constant = True global_fmt.initializer = fmt_str fmt_arg = self.builder.bitcast(global_fmt, voidptr_ty) self.builder.call(printf, [fmt_arg] + args) elif node.children[0].name == 'writeln': ran = str(randint(0, 0x7FFFFFFF)) voidptr_ty = ir.IntType(8).as_pointer() printf = self.module.globals.get('printf', None) if not printf: printf_ty = ir.FunctionType(ir.IntType(32), [voidptr_ty], var_arg=True) printf = ir.Function(self.module, printf_ty, name="printf") python_str = "SPL >> " for i in args: if i.type.intrinsic_name == 'i32': python_str = python_str + "%d " elif i.type.intrinsic_name == 'f64': python_str = python_str + "%f " else: python_str = python_str + "%s " python_str = python_str + "\n\0" fmt_str = ir.Constant(ir.ArrayType(ir.IntType(8), len(python_str)), bytearray(python_str.encode("utf8"))) global_fmt = ir.GlobalVariable(self.module, fmt_str.type, name='fmt' + ran) global_fmt.linkage = 'internal' global_fmt.global_constant = True global_fmt.initializer = fmt_str fmt_arg = self.builder.bitcast(global_fmt, voidptr_ty) self.builder.call(printf, [fmt_arg] + args) else: args = self.args_list(node.children[2]) func = self.symbol_table.find(node.children[0].name)["entry"] args_type = func.args for i in range(len(args_type)): if args_type[i].type.is_pointer: args[i] = self.find_addr(node.children[2], len(args) - i - 1) return self.builder.call(func, args)
def add_global_variable(module, ty, name, addrspace=0): unique_name = module.get_unique_name(name) return ir.GlobalVariable(module, ty, unique_name, addrspace)
float b = 1.0; g = 10; h = 10.0; a = a + 10; b = b + h; return 0; } ''' # Cria o módulo. module = ir.Module('meu_modulo.bc') # Variável inteira global g g = ir.GlobalVariable(module, ir.IntType(32), "g") # Inicializa a variavel g g.initializer = ir.Constant(ir.IntType(32), 0) # Linkage = common g.linkage = "common" # Define o alinhamento em 4 g.align = 4 # Variável float global h h = ir.GlobalVariable(module, ir.FloatType(), "h") # Inicializa a variavel h h.initializer = ir.Constant(ir.FloatType(), 0.0) # Linkage = common h.linkage = "common" # Define o alinhamento em 4 h.align = 4
# Código de Inicialização. llvm.initialize() llvm.initialize_all_targets() llvm.initialize_native_target() llvm.initialize_native_asmprinter() # Cria o módulo. module = ir.Module('meu_modulo.bc') module.triple = llvm.get_process_triple() target = llvm.Target.from_triple(module.triple) target_machine = target.create_target_machine() module.data_layout = target_machine.target_data # Variável inteira global a a = ir.GlobalVariable(module, ir.IntType(32), "a") # Inicializa a variavel a a.initializer = ir.Constant(ir.IntType(32), 0) # Linkage = common a.linkage = "common" # Define o alinhamento em 4 a.align = 4 # Variável float global b b = ir.GlobalVariable(module, ir.FloatType(), "b") # Inicializa a variavel h b.initializer = ir.Constant(ir.FloatType(), 0.0) # Linkage = common b.linkage = "common" # Define o alinhamento em 4 b.align = 4
# Código de Inicialização. llvm.initialize() llvm.initialize_all_targets() llvm.initialize_native_target() llvm.initialize_native_asmprinter() # Cria o módulo. module = ir.Module('meu_modulo.bc') module.triple = llvm.get_process_triple() target = llvm.Target.from_triple(module.triple) target_machine = target.create_target_machine() module.data_layout = target_machine.target_data # Variável inteira global a a = ir.GlobalVariable(module, ir.IntType(32), "a") # Inicializa a variavel a a.initializer = ir.Constant(ir.IntType(32), 0) # Linkage = common a.linkage = "common" # Define o alinhamento em 4 a.align = 4 # Variável inteira global b b = ir.GlobalVariable(module, ir.IntType(32), "b") # Inicializa a variavel b b.initializer = ir.Constant(ir.IntType(32), 0) # Linkage = common b.linkage = "common" # Define o alinhamento em 4 b.align = 4
def astToLLVM(jast): """ Convert the input json encoded AST to LLVM code properly, using llvm-lite JSON jast: the AST in JSON form produced by the main Haskell routine Returns the new function name, and an ir module containing the LLVM code matching the input json encoded AST """ # create a module for the output l_module = ir.Module(name=__file__) curBlock = jast parents = [] knownFuncs = ["print", "seq"] funcs = [] annotations = [] # traverse the AST matching functions to their corresponding body contents """ expression: Will contain a function. If it is built in, will have the tag "BuiltIn" and its "contents" Otherwise, will contain 3 fields: "function", "tag", "body" function: Will contain 2 fields: "annotation", "expression" body: Will contain 2 fields: "annotation", "expression" "Arrow" tag: Defines an input and output "Constructor" tag: Defines a type "Application" tag: Defines a function "BuiltIn" tag: Defines a function literal Perhaps the best way to go is look at the tag first, then decide what to do next """ try: if (curBlock.get("Right")): curBlock = curBlock["Right"]["expression"] while (True): if (curBlock.get("function")): parents.append(curBlock) annot = curBlock["function"]["annotation"] curBlock = curBlock["function"]["expression"] if (annot["tag"] == "Arrow"): #grab arrow types arrow_in = "NONE" arrow_out = "NONE" if (annot["input"]["tag"] == "Constructor"): arrow_in = annot["input"]["contents"] else: pass #TODO figure out what goes here if (annot["output"]["tag"] == "Constructor"): arrow_out = annot["output"]["contents"] else: pass #TODO figure out what goes here if (arrow_in != "NONE" and arrow_out != "NONE"): annotations.append((arrow_in, arrow_out)) if (curBlock["tag"] == "BuiltIn"): if (curBlock["contents"] in knownFuncs): funcs.append([curBlock["contents"]]) else: curBlock = parents.pop() curBlock = curBlock["body"]["expression"] if (curBlock["tag"] == "BuiltIn"): funcs[-1].append(curBlock["contents"]) else: #error occurred pass except: print("finished parsing AST. discovered code:", funcs) #now each function matched with it's input/output type. Use for more complex compilation func_and_types = list(zip(funcs, annotations)) # define llvm types l_int = ir.IntType( 32 ) # TODO: replace hard-coded int with a type extracted from the AST, once type info is merged in l_funcType = ir.FunctionType(l_int, []) #l_funcType = ir.FunctionType(l_int, [*([l_int]*len(funcArgs))]) # match number of function arguments # declare our new function funcName = "main" l_func = ir.Function(l_module, l_funcType, name=funcName) # function entry point block = l_func.append_basic_block(name="entry") # create a builder for constructing the function code builder = ir.IRBuilder(block) #add printing support if our code uses it anywhere if ("print" == f[0] for f in funcs): # Source: https://blog.usejournal.com/writing-your-own-programming-language-and-compiler-with-python-a468970ae6df voidptr_ty = ir.IntType(8).as_pointer() fmt = "%i \n\0" c_fmt = ir.Constant(ir.ArrayType(ir.IntType(8), len(fmt)), bytearray(fmt.encode("utf8"))) global_fmt = ir.GlobalVariable(l_module, c_fmt.type, name="fstr") global_fmt.linkage = 'internal' global_fmt.global_constant = True global_fmt.initializer = c_fmt fmt_arg = builder.bitcast(global_fmt, voidptr_ty) printf_ty = ir.FunctionType(ir.IntType(32), [voidptr_ty], var_arg=True) printf = ir.Function(l_module, printf_ty, name="printf") # now add the code from our ast for f in funcs: if (f[0] == "print"): if (getTypeFromStr(f[1]) == "int"): builder.call( printf, [fmt_arg, ir.Constant(ir.IntType(32), int(f[1]))]) else: #TODO: printing non-int primitives pass # return 0 builder.ret(l_int(0)) return funcName, l_module
def generate_declaration(self, n, status=0, motifiers=[]): """ status == 0: allocate local status == 1: allocate global status == 2: return element type """ typ = type(n) if typ == ast.IdentifierType: current = self.get_element(n.spec[0], None) decl = motifiers.pop(0) name = decl.name for m in motifiers: current = self.get_element(m, current) if status == 0: self.g_named_memory[n.name] = self.g_llvm_builder.alloca(current, name=name) elif status == 1: self.g_global_variable[name] = \ ir.GlobalVariable(self.g_llvm_module, current, name=name) self.g_global_variable[name].initializer = ir.Constant(current, None) elif status == 2: if len(motifiers) > 0 and type(motifiers[0]) == ast.FuncDecl: function = ir.Function(self.g_llvm_module, current, name=name) if motifiers[0].args: paranames = [param.name for param in motifiers[0].args.params] for arg, arg_name in zip(function.args, paranames): arg.name = arg_name # Add arguments to variable symbol table. self.g_named_argument[arg_name] = arg # Set signed extension for char if isinstance(arg.type, ir.IntType) and arg.type.width == 8: arg.add_attribute('signext') self.g_named_function[name] = function return function else: return current elif typ == ast.Struct: context = self.g_llvm_module.context current = context.get_identified_type(n.name) # define struct if n.name not in self.g_type_define: self.g_type_define[n.name] = [ele.name for ele in n.decls] types = [self.generate_declaration(ele, status=2) for ele in n.decls] current.set_body(*types) decl = motifiers.pop(0) name = decl.name for m in motifiers: current = self.get_element(m, current) if status == 0: return current elif status == 1: self.g_global_variable[name] = \ ir.GlobalVariable(self.g_llvm_module, current, name=name) self.g_global_variable[name].initializer = ir.Constant(current, None) elif len(motifiers) > 0 and type(motifiers[0]) == ast.FuncDecl: function = ir.Function(self.g_llvm_module, current, name=name) paranames = [param.name for param in motifiers[0].args] for arg, arg_name in zip(function.args, paranames): arg.name = arg_name # Add arguments to variable symbol table. self.g_named_argument[arg_name] = arg self.g_named_function[name] = function return function else: return current elif typ in {ast.ArrayDecl, ast.FuncDecl, ast.PtrDecl, ast.Decl}: return self.generate_declaration(n.type, status, motifiers+[n])
def codegen(self, node, builder): if isinstance(node, pc_ast.Constant): if node.dType == float: dType = ir.DoubleType() return dType(node.value) elif node.dType == int: dType = ir.IntType(32) return dType(node.value) elif node.dType == str: if node.value not in self.constants: dType = ir.ArrayType(ir.IntType(8), node.length) str_val = ir.Constant( dType, bytearray((node.value + "\0").encode("utf8"))) str_global = ir.GlobalVariable(self.module, str_val.type, "_str." + node.value) str_global.global_constant = True str_global.initializer = str_val self.constants[node.value] = str_global fmt_ptr = builder.gep( self.constants[node.value], [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name=node.value + "_ptr") return fmt_ptr elif isinstance(node, pc_ast.Variable): if node.dType == float: ptr_type = ir.PointerType(ir.DoubleType(), addrspace=0) elif node.dType == int: ptr_type = ir.PointerType(ir.IntType(32), addrspace=0) elif node.dType == str: ptr_type = ir.PointerType(ir.IntType(8), addrspace=0) return ptr_type('%"' + node.name + '"') elif isinstance(node, pc_ast.Array_Declaration): self.variables[(node.name, self.scope)] = node.dType r = node.elements if isinstance(r, pc_ast.Variable) or isinstance( r, pc_ast.Array_Element): rvalue = self.codegen(r, builder) if r.dType == float or r.dType == int: rvalue = builder.load(rvalue, name=r.name + "_val", align=None) else: rvalue = self.codegen(r, builder) if node.dType == int: raw = builder.call(self.malloc, [rvalue], name=node.name + "_raw") builder.bitcast(raw, ir.PointerType(ir.IntType(32), addrspace=0), name=node.name) elif node.dType == float: raw = builder.call(self.malloc, [rvalue], name=node.name + "_raw") builder.bitcast(raw, ir.PointerType(ir.DoubleType(), addrspace=0), name=node.name) return builder elif isinstance(node, pc_ast.Array_Element): if node.dType == int: ptr_type = ir.PointerType(ir.IntType(32), addrspace=0) elif node.dType == float: ptr_type = ir.PointerType(ir.DoubleType(), addrspace=0) index = self.codegen(node.index, builder) if isinstance(node.index, pc_ast.Variable) or isinstance( node.index, pc_ast.Array_Element): index = builder.load(index, name="_val", align=None) arr = ptr_type('%"' + node.name + '"') index_ptr = builder.gep(arr, [index], name="element") return index_ptr elif isinstance(node, pc_ast.Assignment): l, r = node.children() lvalue = self.codegen(l, builder) if isinstance(r, pc_ast.Variable) or isinstance( r, pc_ast.Array_Element): rvalue = self.codegen(r, builder) if r.dType == float or r.dType == int: rvalue = builder.load(rvalue, name=r.name + "_val", align=None) else: rvalue = self.codegen(r, builder) if node.dType == float: if (l.name, self.scope) not in self.variables: self.variables[(l.name, self.scope)] = 0 builder.alloca(ir.DoubleType(), size=None, name=l.name) builder.store(rvalue, lvalue, align=None) elif node.dType == int: if (l.name, self.scope) not in self.variables: self.variables[(l.name, self.scope)] = 0 builder.alloca(ir.IntType(32), size=None, name=l.name) builder.store(rvalue, lvalue, align=None) elif node.dType == str: if (l.name, self.scope) in self.variables: if self.variables[(l.name, self.scope)] != l.length: builder.call(self.realloc, [lvalue, ir.IntType(32)(l.length)]) self.variables[(l.name, self.scope)] = l.length else: self.variables[(l.name, self.scope)] = l.length builder.call(self.malloc, [ir.IntType(32)(l.length)], name=l.name) if isinstance(r, pc_ast.Constant): temp = builder.bitcast(rvalue, ir.PointerType(ir.IntType(8), addrspace=0), name="temp") else: temp = rvalue builder.call( self.memcpy, [lvalue, temp, ir.IntType(32)(l.length)]) return builder elif isinstance(node, pc_ast.BinaryOp): cmp_op = {">", "<", "!=", ">=", '<=', "=="} l, r = node.children() if isinstance(l, pc_ast.Variable) or isinstance( l, pc_ast.Array_Element): lvalue = self.codegen(l, builder) if l.dType == float or l.dType == int: lvalue = builder.load(lvalue, name=l.name + "_val", align=None) else: lvalue = self.codegen(l, builder) if isinstance(r, pc_ast.Variable) or isinstance( r, pc_ast.Array_Element): rvalue = self.codegen(r, builder) if r.dType == float or r.dType == int: rvalue = builder.load(rvalue, name=r.name + "_val", align=None) else: rvalue = self.codegen(r, builder) if l.dType == float and r.dType == int: rvalue = builder.sitofp(rvalue, ir.DoubleType(), name="_casted") elif l.dType == int and r.dType == float: lvalue = builder.sitofp(lvalue, ir.DoubleType(), name="_casted") if node.op == '+': if node.dType == str: res = builder.call(self.malloc, [ir.IntType(32)(node.length)], name="pt1") builder.call(self.memcpy, [res, lvalue, ir.IntType(32)(l.length - 1)]) pt2 = builder.gep(res, [ir.IntType(32)(l.length - 1)], "pt2") builder.call( self.memcpy, [pt2, rvalue, ir.IntType(32)(r.length)]) elif node.dType == float: res = builder.fadd(lvalue, rvalue, name="t") elif node.dType == int: res = builder.add(lvalue, rvalue, name="t") elif node.op == '-': if node.dType == float: res = builder.fsub(lvalue, rvalue, name="t") elif node.dType == int: res = builder.sub(lvalue, rvalue, name="t") elif node.op == '*': if node.dType == float: res = builder.fmul(lvalue, rvalue, name="t") elif node.dType == int: res = builder.mul(lvalue, rvalue, name="t") elif node.op == '/': if node.dType == float: res = builder.fdiv(lvalue, rvalue, name="t") elif node.dType == int: res = builder.sdiv(lvalue, rvalue, name="t") elif node.op == '%': if node.dType == float: res = builder.frem(lvalue, rvalue, name="t") elif node.dType == int: res = builder.srem(lvalue, rvalue, name="t") elif node.op in cmp_op: if node.dType == float: res = builder.fcmp_unordered(node.op, lvalue, rvalue, name="t") elif node.dType == int: res = builder.icmp_signed(node.op, lvalue, rvalue, name="t") return res elif isinstance(node, pc_ast.UnaryOp): r = node.right if isinstance(r, pc_ast.Variable) or isinstance( r, pc_ast.Array_Element): rvalue = self.codegen(r, builder) if r.dType == float or r.dType == int: rvalue = builder.load(rvalue, name=r.name + "_val", align=None) else: rvalue = self.codegen(r, builder) if node.op == '-': if r.dType == int: res = builder.neg(rvalue) elif r.dType == float: res = builder.fsub(ir.DoubleType()(0), rvalue) return res elif isinstance(node, pc_ast.Output): raw_data = node.children() data = self.codegen(raw_data, builder) if isinstance(raw_data, pc_ast.Array_Element): data = builder.load(data, name=raw_data.name + "_val", align=None) if raw_data.dType == float: fmt_ptr = builder.gep( self.double_fmt, [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name="fmt_ptr") if isinstance(raw_data, pc_ast.Variable): data = builder.load(data, name=raw_data.name + "_val", align=None) elif raw_data.dType == str: fmt_ptr = builder.gep( self.string_fmt, [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name="fmt_ptr") elif raw_data.dType == int: fmt_ptr = builder.gep( self.int_fmt, [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name="fmt_ptr") if isinstance(raw_data, pc_ast.Variable): data = builder.load(data, name=raw_data.name + "_val", align=None) builder.call(self.printf, [fmt_ptr, data], name="print") fmt_ptr = builder.gep( self.newline_fmt, [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name="fmt_ptr") builder.call(self.printf, [fmt_ptr], name="print") return builder elif isinstance(node, pc_ast.If): condition, if_true, if_false = node.children() condition = self.codegen(condition, builder) if if_false == None: with builder.if_then(condition) as then: for statement in if_true: builder = self.codegen(statement, builder) else: with builder.if_else(condition) as (then, otherwise): with then: for statement in if_true: builder = self.codegen(statement, builder) with otherwise: for statement in if_false: builder = self.codegen(statement, builder) return builder elif isinstance(node, pc_ast.While): condition, body = node.children() loop_body = self.scope.append_basic_block(name="while.body") builder.branch(loop_body) loop_body_builder = ir.IRBuilder(loop_body) for statement in body: loop_body_builder = self.codegen(statement, loop_body_builder) condition = self.codegen(condition, loop_body_builder) loop_exit = self.scope.append_basic_block(name="while.exit") loop_exit_builder = ir.IRBuilder(loop_exit) loop_body_builder.cbranch(condition, loop_body, loop_exit) return loop_exit_builder elif isinstance(node, pc_ast.Input): variable = self.codegen(node.variable, builder) #builder.call(self.realloc, [variable, ir.IntType(32)(5)]) for strings self.variables[(node.variable.name, self.scope)] = 0 if node.dType == int: fmt_ptr = builder.gep( self.int_fmt, [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name="fmt_ptr") elif node.dType == float: fmt_ptr = builder.gep( self.double_fmt, [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name="fmt_ptr") elif node.dType == str: fmt_ptr = builder.gep( self.string_fmt, [ir.IntType(32)(0), ir.IntType(32)(0)], inbounds=False, name="fmt_ptr") builder.call(self.scanf, [fmt_ptr, variable], name="scan") return builder elif isinstance(node, pc_ast.Function_Decl): args = [] for arg in node.args: if arg[1] == int: args.append(ir.IntType(32)) elif arg[1] == float: args.append(ir.DoubleType()) if node.dType == int: dType = ir.IntType(32) elif node.dType == float: dType = ir.DoubleType() fnty = ir.FunctionType(dType, args) func = ir.Function(self.module, fnty, name=node.name) self.variables[(node.name, self.scope)] = dType self.functions[node.name] = func for i in range(0, len(func.args)): func.args[i].name = node.args[i][0] + "_arg" block = func.append_basic_block(name="entry") func_builder = ir.IRBuilder(block) self.scope = func for i in range(0, len(node.args)): arg = node.args[i] if arg[1] == int: dType = ir.IntType(32) elif arg[1] == float: dType = ir.DoubleType() var = func_builder.alloca(dType, size=None, name=arg[0]) func_builder.store(func.args[i], var, align=None) for statement in node.body: func_builder = self.codegen(statement, func_builder) if not func_builder.block.is_terminated: if node.dType == int: dType = ir.IntType(32) func_builder.ret(dType(0)) elif node.dType == float: dType = ir.DoubleType() func_builder.ret(dType(0.0)) self.scope = self.main return builder elif isinstance(node, pc_ast.Function_Call): func = self.functions[node.name] args = [] for arg in node.args: built_arg = self.codegen(arg, builder) if isinstance(arg, pc_ast.Array_Element) or isinstance( arg, pc_ast.Variable): built_arg = builder.load(built_arg, name=arg.name + "_val", align=None) args.append(built_arg) res = builder.call(func, args, name=node.name + '_call') return res elif isinstance(node, pc_ast.Return): res = self.codegen(node.data, builder) if isinstance(node.data, pc_ast.Array_Element) or isinstance( node.data, pc_ast.Variable): res = builder.load(res, name="res", align=None) builder.ret(res) return builder
def gen_code(tree, symbol_table, sema_success): # symbol = { # "symbol_type": None, # "name": None, # "value_type": None, # "scope": None, # "parameters": [], # "dimensions": [], # "declared": True, # "inicialized": False, # "used": False # } global module global info # Define Global Variables and Functions for symbol in symbol_table: if (symbol["symbol_type"] == "variable" and symbol["scope"] == "global"): var_type = symbol["value_type"] if (var_type == "inteiro"): if (len(symbol["dimensions"]) == 0): g = ir.GlobalVariable(module, ir.IntType(32), symbol["name"]) if (len(symbol["dimensions"]) == 1): g_type = ir.ArrayType(ir.IntType(32), int(symbol["dimensions"][0])) g = ir.GlobalVariable(module, g_type, symbol["name"]) info["global_variables"].append(g) elif (var_type == "flutuante"): if (len(symbol["dimensions"]) == 0): g = ir.GlobalVariable(module, ir.FloatType(), symbol["name"]) if (len(symbol["dimensions"]) == 1): g_type = ir.ArrayType(ir.FloatType(), int(symbol["dimensions"][0])) g = ir.GlobalVariable(module, g_type, symbol["name"]) g.linkage = "common" g.align = 4 info["global_variables"].append(g) elif (symbol["symbol_type"] == "function"): if (symbol["name"] == "principal"): symbol["name"] = "main" arguments_list = [] if (len(symbol["parameters"]) > 0): for a in symbol["parameters"]: if (a["par_type"] == "inteiro"): arguments_list.append(ir.IntType(32)) else: arguments_list.append(ir.FloatType()) if (len(symbol["return"]) > 0): if (symbol["return"][0]["ret_type"] == "inteiro"): f_ret = ir.IntType(32) else: f_ret = ir.FloatType() f_func = ir.FunctionType(f_ret, arguments_list) f = ir.Function(module, f_func, name=symbol["name"]) entryBlock = f.append_basic_block('entry') builder = ir.IRBuilder(entryBlock) else: f_func = ir.FunctionType(ir.VoidType(), arguments_list) f = ir.Function(module, f_func, name=symbol["name"]) entryBlock = f.append_basic_block('entry') builder = ir.IRBuilder(entryBlock) for i in range(len(f.args)): f.args[i].name = symbol["parameters"][i]["par_name"] functions.append({ "function": f, "builder": builder, "arguments": f.args }) go_through_tree(tree, functions) file = open('module.ll', 'w') file.write(str(module)) file.close() print(module)
from __future__ import print_function import llvmlite.ir as ll i32 = ll.IntType(32) i8 = ll.IntType(8) builder = ll.IRBuilder() module = ll.Module() module.triple = '' hellostr = 'hello, world!' stringtype = ll.ArrayType(i8, len(hellostr)) hello = ll.GlobalVariable(module, stringtype, '.str4') hello.initializer = builder.constant(stringtype, bytearray(hellostr)) fntype = ll.FunctionType(i32, [i8.as_pointer()]) puts = ll.Function(module, fntype, 'puts') fntype = ll.FunctionType(i32, []) func = ll.Function(module, fntype, name='main') bb_entry = func.append_basic_block() builder.position_at_end(bb_entry) zero = builder.constant(i32, 0) builder.call(puts, [hello.gep((zero, zero))]) builder.ret(zero) print(module)
def add_global_variable(self, ty, name, addrspace=0): return ir.GlobalVariable(self, ty, self.get_unique_name(name), addrspace)
def declara_string_global(self, module, string_parametro): typ = ir.ArrayType(ir.IntType(8), len(string_parametro)) temp = ir.GlobalVariable(module, typ, name=str("scanf"))
def addIR(self, pattern): # done # in typecheck assignment, check if in.out.inout # should be done, in's are handled in typecheck, out and inout are declared beforehand? # handle strings and chars... and arrays.. and type checking/conversions # what else do i need to do? too many things... # if stmt with return inside # puInteger and etc # limit char length to only 1 !!!! somewhere... # initial array functionality # getBool etc.... these need pointers.... # everything will need to use pointers.... # advanced array functionality # adding 2 arrays # add 1 to all elems in array # fix for loop apparently # make for loop actually do the arithOp # do "error on line" according to last reduce! # in if stmt, make it necessary for there to be at least 1 stmt? # results in parsing error, that's fine # could make more robust by detecting "if", "lparen" "rparen" # oh well # type conversions in assignments? # not sure if this is necessary/desired functionality # figure out how to declare globals # getChar not working # characters suck. everything is a string. # todo: # and main enemy: err handling # clean up anything with arrays... so gross # need some global "is this array" function or thing # arrayExprIRHandle seems good, I am dumb # maybe even make array access different token from name # as that screws everything up # arithop, assignment huge if stmts tokType = pattern.tokType numChildren = len(pattern.children) if tokType in ["term", "relation", "arithOp", "expression"] and numChildren == 1: pattern.irHandle = pattern.children[0].irHandle elif tokType == "factor": # ("lparen", "expression", "rparen"): "factor", # ("minus", "name"): "factor", # ("name",): "factor", # ("minus", "number"): "factor", # ("number",): "factor", # ("string",): "factor", # ("char",): "factor", # ("true",): "factor", # ("false",): "factor", if numChildren == 1: child = pattern.children[0] if child.irHandle: pattern.irHandle = child.irHandle # pretty much for just name # elif child.name in ["true", "false"]: elif child.tokType in ["true", "false"]: # why does child.name not work? no idea. but it doesn't. typ = self.getType(child.resultType) # constant pattern.irHandle = typ elif child.tokType == "string_val": # tokType is not sanitize, resultType is typ = self.getType("string") # i am fed up with this, whatever const = pattern.grabLeafValue(0) const = const[1:-1] # chop off quotes # all strings are exactly 256 len I guess if len(const) > 256: raise TypeCheckError("Strings may only be 256 characters long.") else: null = "\0" + "0" * (255-len(const)) const = const + null const = bytearray(const.encode()) # convert to bytearray #const = [ord(char) for char in const] pattern.irHandle = ir.Constant(typ, const) elif child.tokType == "char_val": typ = self.getType("char") const = pattern.grabLeafValue(0) const = const[1:-1] # chop off quotes # ord() gives decimal value for ascii # NO idea why this works # just take it man const = ord(const) pattern.irHandle = ir.Constant(typ, const) elif numChildren == 2: # minus something #rhs should have ir handle? child = pattern.children[1] if child.tokType == "number": pattern.irHandle = self.builder.neg(pattern.children[1].irHandle) else: pattern.irHandle = self.builder.neg(child.irHandle) else: #print("Is this tested?") pattern.irHandle = pattern.children[1].irHandle # should be handled in expression elif tokType == "number": const = pattern.grabLeafValue(0) typ = self.getType(pattern.resultType) # floatType doesn't like float strings # look into this future michael # maybe contribute to llvmlite pattern.irHandle = ir.Constant(typ, const) if pattern.resultType == "float": pattern.irHandle = ir.Constant(typ, float(const)) elif tokType == "name": name = pattern.grabLeafValue(0) if name in self.symTable: symItem = self.symTable[name] if pattern.arrayExprIRHandle: loc = pattern.arrayExprIRHandle loc = self.builder.sub(loc, ir.Constant(ir.IntType(32), str(symItem.arrayStart))) ptr = symItem.irPtr zero = ir.Constant(ir.IntType(32), 0) ptrInArray = self.builder.gep(ptr, [zero, loc]) #val = self.builder.extract_value(self.builder.load(ptrInArray), [0]) val = self.builder.load(ptrInArray) else: val = self.builder.load(symItem.irPtr) # print(name, val) pattern.irHandle = val else: pass # it has to be declaring when this happens. Hopefully. or something elif tokType == "term": # ("term", "multiply", "factor"): "term", # ("term", "divide", "factor"): "term", # ("factor",): "term", op = pattern.grabLeafValue(1) lhs = pattern.children[0].irHandle rhs = pattern.children[2].irHandle if op == "/": pattern.irHandle = self.builder.sdiv(lhs, rhs) elif op == "*": pattern.irHandle = self.builder.mul(lhs, rhs) elif tokType == "relation": # ("relation", "less", "term"): "relation", # ("relation", "lessequal", "term"): "relation", # ("relation", "greater", "term"): "relation", # ("relation", "greaterequal", "term"): "relation", # ("relation", "equalequal", "term"): "relation", # ("relation", "notequal", "term"): "relation", # ("term",): "relation", op = pattern.grabLeafValue(1) # The string cmpop can be one of <, <=, ==, !=, >= or >. lhs = pattern.children[0].irHandle rhs = pattern.children[2].irHandle if pattern.children[0].resultType == "bool": lhs = self.builder.zext(lhs, ir.IntType(32)) if pattern.children[2].resultType == "bool": rhs = self.builder.zext(rhs, ir.IntType(32)) pattern.irHandle = self.builder.icmp_signed(op, lhs, rhs) elif tokType == "arithOp": # ("arithOp", "plus", "relation"): "arithOp", # ("arithOp", "minus", "relation"): "arithOp", # ("arithOp", "minus", "number"): "arithOp", # gross # ("arithOp", "minus", "name"): "arithOp", # but this fixes it? I guess? # ("relation",): "arithOp", op = pattern.grabLeafValue(1) opFunc = None if op == "+": #pattern.irHandle = self.builder.add(lhs, rhs) opFunc = self.builder.add elif op == "-": #print("ASDF",lhs, "ASD", rhs) #pattern.irHandle = self.builder.sub(lhs, rhs) opFunc = self.builder.sub lhsPattern = pattern.children[0] rhsPattern = pattern.children[2] lhs = lhsPattern.irHandle rhs = rhsPattern.irHandle lhsArray = False rhsArray = False rhsName = lhsPattern.grabLeafValue(0) if rhsName in self.symTable and self.symTable[rhsName].arraySize > 0 and lhsPattern.isVariable(): lhsArray = True lhsName = rhsPattern.grabLeafValue(0) if lhsName in self.symTable and self.symTable[lhsName].arraySize > 0 and rhsPattern.isVariable(): rhsArray = True if lhsArray or rhsArray: irHandleList = [] if lhsArray and rhsArray: # adding 2 arrays lhsItem = self.symTable[lhsName] rhsItem = self.symTable[rhsName] if lhsItem.arraySize != rhsItem.arraySize: raise TypeCheckError("Tried to assign array to array of different size") lhsPtr = lhsItem.irPtr rhsPtr = rhsItem.irPtr for x in range(0, lhsItem.arraySize): zero = ir.Constant(ir.IntType(32), 0) lhsLoc = ir.Constant(ir.IntType(32), str(x)) lhsPtrInArray = self.builder.gep(lhsPtr, [zero, lhsLoc]) rhsLoc = ir.Constant(ir.IntType(32), str(x)) rhsPtrInArray = self.builder.gep(rhsPtr, [zero, rhsLoc]) lhsVal = self.builder.load(lhsPtrInArray) rhsVal = self.builder.load(rhsPtrInArray) if pattern.children[0].resultType != pattern.children[2].resultType: # one is float and one is int, convert both to float lhsVal = self.builder.uitofp(lhsVal, ir.FloatType) rhsVal = self.builder.uitofp(rhsVal, ir.FloatType) result = opFunc(lhsVal, rhsVal) irHandleList.append(result) else: ''' this is like c := c + 15 ''' arrPattern, otherVal = (lhsPattern, rhsPattern) if lhsArray else (rhsPattern, lhsPattern) symItem = self.symTable[arrPattern.grabLeafValue(0)] ptr = symItem.irPtr for x in range(0, symItem.arraySize): loc = ir.Constant(ir.IntType(32), str(x)) zero = ir.Constant(ir.IntType(32), 0) ptrInArray = self.builder.gep(ptr, [zero, loc]) val = self.builder.load(ptrInArray) if pattern.children[0].resultType != pattern.children[2].resultType: # one is float and one is int, convert both to float val = self.builder.uitofp(val, ir.FloatType) otherVal = self.builder.uitofp(otherVal, ir.FloatType) result = opFunc(val, otherVal.irHandle) irHandleList.append(result) #self.builder.store(result, ptrInArray) pattern.irHandle = irHandleList else: # regular addition if pattern.children[0].resultType != pattern.children[2].resultType: # one is float and one is int, convert both to float lhs = self.builder.uitofp(lhs, ir.FloatType) rhs = self.builder.uitofp(rhs, ir.FloatType) pattern.irHandle = opFunc(lhs, rhs) elif tokType == "expression": # ("expression", "and", "arithOp"): "expression", # ("expression", "or", "arithOp"): "expression", # ("not", "arithOp"): "expression", # ("arithOp",): "expression", if numChildren == 2: pattern.irHandle = self.builder.not_(pattern.children[1].irHandle) else: op = pattern.grabLeafValue(1) lhs = pattern.children[0].irHandle rhs = pattern.children[2].irHandle if op == "and": pattern.irHandle = self.builder.and_(lhs, rhs) elif op == "or": pattern.irHandle = self.builder.or_(lhs, rhs) elif tokType == "if_start": # self.builder.select(cond, lhs, rhs, # test = ir.cbranch(cond, truebr, falsebr) if numChildren == 5: cond = pattern.children[2].irHandle # with self.builder.if_else(cond) as (then, orelse): # self.condStack.extend([orelse, then]) # self.condStack[-1].__enter__() bb = self.builder.basic_block bbif = self.builder.append_basic_block(name=bb.name + '.if') bbelse = self.builder.append_basic_block(name=bb.name + '.ifelse') bbend = self.builder.append_basic_block(name=bb.name + '.ifend') br = self.builder.cbranch(cond, bbif, bbelse) self.condStack.extend([bbend, bbelse, bbif]) self.enterCond() else: # if not a new stmt, statement will be handled by builder # do this in typechecking/parser, for parse errors of if stmt w/o statements. Also do else stmt pattern.irHandle = pattern.children[1].irHandle # keep this for the phi node!! elif tokType == "else_start": if numChildren == 2: # self.ifBlock = self.builder.basic_block # self.ifHandle = pattern.children[0].irHandle # print(self.condStack) # self.condStack[-1].__exit__() # del self.condStack[-1] # enter else # self.condStack[-1].__enter__() self.exitCond() else: pattern.irHandle = pattern.children[1].irHandle # for the phi node elif tokType == "if_stmt": #print(pattern.children[0].tokType) if self.condStack[-1].is_terminated: # return was called within if statements del self.condStack[-1] # do I need to do anything here? # if statement will return voiding # I think then the rest of the function is in else stmt # so we are good to go? # go into else and then end self.enterCond() self.exitCond() else: orelseHandle = pattern.children[0].irHandle # get the out handle here. if no else stmt, will get fixed if pattern.children[0].tokType == "if_start": # self.ifBlock = self.builder.basic_block # self.ifHandle = pattern.children[0].irHandle # self.condStack[-1].__exit__() # del self.condStack[-1] # self.condStack[-1].__enter__() # orelseHandle = self.builder.add(self.getType("true"),self.getType("true")) # dead code, return handle self.exitCond() # self.builder.add(self.getType("true"),self.getType("true")) # dead code # exit out of else, reattach to end self.exitCond() # now delete end just for good measure # self.builder.add(self.getType("true"),self.getType("true")) # print(self.condStack[-1]) # now this is up to speed # orelseBlock = self.builder.basic_block #...... do I need a phi node????????? I think not... # out_phi = builder.phi(i32) # out_phi.add_incoming(out_then, bb_then) # out_phi.add_incoming(out_orelse, bb_orelse) # self.condStack[-1].__exit__() # del self.condStack[-1] # self.ifBlock = None # self.ifHandle = None del self.condStack[-1] elif tokType == "loop_open": ''' ("for", "lparen","name", "assignment", "expression", "semic"): "loop_open", ("loop_open", "expression", "rparen"): "loop_start", ("loop_start", "statement", "semic",): "loop_start", ("loop_start", "end", "for",): "loop_stmt", ''' bb = self.builder.basic_block bbbranch = self.builder.append_basic_block(name=bb.name + '.loopstart') self.builder.branch(bbbranch) # small block just for deciding self.builder.position_at_end(bbbranch) pattern.irHandle = bbbranch elif tokType == "loop_start": firstChild = pattern.children[0] if firstChild.tokType == "loop_open": cond = pattern.children[1].irHandle bb = self.builder.basic_block bbloop = self.builder.append_basic_block(name=bb.name + '.loopblock') bbend = self.builder.append_basic_block(name=bb.name + '.loopend') br = self.builder.cbranch(cond, bbloop, bbend) self.loopStack.extend([bbend, bbloop]) bbbranch = pattern.children[0].irHandle # loop_open, use as handle to start the whole thing #bbbranch consists of assignment, conditional, and cbranch pattern.irHandle = bbbranch self.enterLoop() ''' name = pattern.children[1].grabLeafValue(0) ptr = self.symTable[name].irPtr val = self.builder.load(ptr) result = self.builder.add(val, ir.Constant(ir.IntType(32), "1")) self.builder.store(result, ptr) ''' else: pattern.irHandle = pattern.children[0].irHandle elif tokType == "loop_stmt": #("name", "assignment", "expression"): "assignment_stmt", ''' This is honestly some black magic, it's really gross We need to re-parse the expression in the assignment So that the IR can be readded (I wish I could just move the LLVM instructions but oh well) (Future library contribution?) Also, this needs to be done here as it is the END of the for loop if something uses i, it needs to be 0, not 1 These are the steps: 1) Descend to loop_open to get the assignment pattern 2) clear all IR handles for the expression 3) reparse the expression in new location 4) create custom assignment pattern and parse it The clearing of IR handles works because each of expr, factor, etc Will assign to the 0th child handle if there is not one So we clear them one by one and build a list of patterns to reparse Note: this probably doesn't always work (shh) ''' # descend to the first loop start to grab the name tmpPattern = pattern while tmpPattern.tokType != "loop_open": tmpPattern = tmpPattern.children[0] #("for", "lparen","name", "assignment", "expression", "semic"): "loop_open", namePattern = tmpPattern.children[2] exprPattern = tmpPattern.children[4] toReParse = [] tmpPattern = exprPattern while tmpPattern.irHandle: toReParse.append(tmpPattern) tmpPattern.irHandle = None tmpPattern = tmpPattern.children[0] for tmpPattern in reversed(toReParse): self.addIR(tmpPattern) assignPattern = Pattern("assignment_stmt", [namePattern, "assignment", exprPattern]) self.addIR(assignPattern) # still in loop, loop back to start of loop # pattern handle should be bbbranch loopHandle = pattern.children[0].irHandle self.builder.branch(loopHandle) # loop back to the bbbranch to decide to keep going self.exitLoop() # position ptr to end of loop elif tokType == "argument_list": argsToAdd = [] if pattern.children[0].tokType == "expression": argsToAdd.append(pattern.children[0]) else: pattern.irHandleList = pattern.children[0].irHandleList argsToAdd.append(pattern.children[2]) # all arguments to a function come in as ptrs to maintain in/out/inout for argPattern in argsToAdd: name = argPattern.grabLeafValue(0) if name in self.symTable and argPattern.isVariable(): pattern.irHandleList.append(self.symTable[name].irPtr) else: # turn a constant into a ptr to the constant handle = argPattern.irHandle typ = handle.type ptr = self.builder.alloca(typ) self.builder.store(handle, ptr) pattern.irHandleList.append(ptr) pattern.children[0].irHandleList = [] # just save space elif tokType == "parameter_list": if numChildren == 1: pattern.irHandleList.append(pattern.children[0].irHandle) else: pattern.irHandleList = pattern.children[0].irHandleList pattern.irHandleList.append(pattern.children[2].irHandle) pattern.children[0].irHandleList = [] # just save space elif tokType == "procedure_header": # matters # ("procedure", "identifier", "lparen", "rparen",): "procedure_header", # ("procedure", "identifier", "lparen", "parameter_list","rparen"): "procedure_header", # just for parsing # ("procedure_header", "procedure_body",): "procedure_declaration", # ("procedure_header_w_vars", "procedure_body",): "procedure_declaration", # handled when declaration occurs?? I think so... routes to appropriate builder... # ("procedure_header", "declaration", "semic",): "procedure_header_w_vars", # ("procedure_header_w_vars", "declaration", "semic",): "procedure_header_w_vars", # what procedure actually does, also routes to builder? # ("begin",): "procedure_body_start", # ("procedure_body_start", "statement", "semic",): "procedure_body_start", # ("procedure_body_start", "end", "procedure",): "procedure_body", func = None void = self.getType("void") procName = pattern.grabLeafValue(1) if numChildren == 4: fnty = ir.FunctionType(void, tuple()) func = ir.Function(self.module, fnty, name=procName) else: irHandleList = [] for paramPattern in pattern.myList: # child = pattern.children[0] symItem = self.symTable[paramPattern.name] typ = None if symItem.arrayType: typ = self.getType(symItem.valType, arr=True, arrSize=symItem.arraySize) else: typ = self.getType(paramPattern.resultType) # params hsould not be able to be global....? # if pattern is global: # irHandleList.append(ir.GlobalVariable(self.module, typ, paramPattern.name)) # else: # include name of variable somehow? not really important? IDK # irHandleList.append(self.builder.alloca(typ, name=paramPattern.name)) # print(self.symTable[paramPattern.name]) irHandleList.append(ir.PointerType(typ)) symItem.irPtr = pattern.irHandle # fnty = ir.FunctionType(void, pattern.children[3].irHandleList) fnty = ir.FunctionType(void, irHandleList) func = ir.Function(self.module, fnty, name=procName) funcArgs = func.args for i in range(0, len(pattern.myList)): self.symTable[pattern.myList[i].name].irPtr = func.args[i] self.symTable[procName].irPtr = func self.enterProc(func) elif tokType == "procedure_call": argList = [] procName = pattern.grabLeafValue(0) if pattern.children[2].tokType == "expression": name = pattern.grabLeafValue(2) if name in self.symTable and pattern.children[2].isVariable(): #print(name, pattern.children[2].isVariable()) # this is a variable, not a constant!! argList.append(self.symTable[name].irPtr) else: # NOTE: This also handles array indexing args because # they will have their irhandle set # argList.append(pattern.children[2].irHandle) # turn a constant into a stored variable. gross :( but necessary handle = pattern.children[2].irHandle typ = handle.type ptr = self.builder.alloca(typ) self.builder.store(handle, ptr) argList.append(ptr) elif pattern.children[2].tokType == "argument_list": argList = pattern.children[2].irHandleList # ptrList = [] # for arg in argList: # arg = # ptrList.append( ir.PointerType(arg)) #if procName in self.defaults and len(argList) == 1 and procName != "putstring": # argList[0] = self.builder.load(argList[0]) #print(argList) # for default functions e.g. putInteger() # arguments come in as pointers # this results in a type mismatch # even when I make the C function argument a pointers # therefore, I need to dereference for these functions # and write C functions as NOT taking pointers pattern.irHandle = self.builder.call(self.symTable[procName].irPtr, argList) elif tokType == "declaration": loc = 0 if numChildren == 2: loc = 1 child = pattern.children[loc] symItem = self.symTable[child.name] if pattern.children[loc].tokType == "procedure_declaration": self.builder.ret_void() self.exitBuilder() else: # variable declaration # ("type_mark", "identifier"): "variable_declaration", # ("type_mark", "identifier","lbracket", "number", "colon", "number", "rbracket"): "variable_declaration", # ("type_mark", "identifier","lbracket", "expression", "rbracket"): "variable_declaration", # ("global", "procedure_declaration",): "declaration", # ("global", "variable_declaration",): "declaration", # ("procedure_declaration",): "declaration", # ("variable_declaration",): "declaration", typ = None if symItem.arrayType: typ = self.getType(symItem.valType, arr=True, arrSize=symItem.arraySize) else: typ = self.getType(child.resultType) if numChildren == 2: # is this really it? damn. that was easy # thanks LLVM testing code # saved me about 5 hours there pattern.irHandle = ir.GlobalVariable(self.module, typ, child.name) pattern.irHandle.linkage = "internal" else: #declaring a variable # alignSize = child.arraySize if child.arraySize else None # print(alignSize, child.name) # what actually is align?? # pattern.irHandle = self.builder.alloca(typ, name=child.name, size=alignSize) pattern.irHandle = self.builder.alloca(typ, name=child.name) symItem.irPtr = pattern.irHandle elif tokType == "assignment_stmt": # ("name", "assignment", "expression"): "assignment_stmt", #typ = self.getType(pattern.children[2].resultType) result = pattern.children[2].irHandle name = pattern.grabLeafValue(0) symItem = self.symTable[name] ptr = self.symTable[name].irPtr if symItem.arraySize > 0: tmpPattern = pattern # this may be dangerous! while tmpPattern.tokType != "name": tmpPattern = tmpPattern.children[0] # assigning entire array at once if len(tmpPattern.children) == 1: # left side of assignment is only name of an array if isinstance(result, type([])): # right side is either adding 2 arrays or adding 1 val to entire array if symItem.arraySize != len(result): raise TypeCheckError("Tried to assign array to array of different size") for x in range(0, symItem.arraySize): loc = ir.Constant(ir.IntType(32), str(x)) zero = ir.Constant(ir.IntType(32), 0) ptrInArray = self.builder.gep(ptr, [zero, loc]) self.builder.store(result[x], ptrInArray) else: # right side needs to be name of equal size array #self.builder.store(result, ptr) pass else: loc = tmpPattern.arrayExprIRHandle loc = self.builder.sub(loc, ir.Constant(ir.IntType(32), str(symItem.arrayStart))) #newArr = self.builder.insert_value(self.builder.load(ptr), result, 2) #self.builder.store(newArr, ptr) zero = ir.Constant(ir.IntType(32), 0) ptrInArray = self.builder.gep(ptr, [zero, loc]) self.builder.store(result, ptrInArray) else: self.builder.store(result, ptr) if symItem.valType != pattern.children[2].resultType: # print(item.valType) # TODO: future michael, incorporate type conversions! typeConvert = self.getTypeConversion(symItem.valType, pattern.children[2].resultType) elif tokType == "return_stmt": self.builder.ret_void()
def codegen(self, builder, symbolTable, module=None): # codegen each operand and check for errors LHS_val = self.LHS.codegen(builder, symbolTable, module) if not LHS_val: self.errList += self.LHS.errList return None RHS_val = self.RHS.codegen(builder, symbolTable, module) if not RHS_val: self.errList += self.LHS.errList return None # handle array element extraction if isinstance(self.LHS, VariableExpr): if self.LHS.array_index != None: index_val = self.LHS.array_index.codegen( builder, symbolTable, module) LHS_ptr = builder.gep(LHS_val, [index_val], name="indexPtr") LHS_val = builder.load(LHS_ptr, name="arrayElement") if isinstance(self.RHS, VariableExpr): if self.RHS.array_index != None: index_val = self.RHS.array_index.codegen( builder, symbolTable, module) RHS_ptr = builder.gep(RHS_val, [index_val], name="indexPtr") RHS_val = builder.load(RHS_ptr, name="arrayElement") # check operands for type compatability, then # call llvm codegen for given operator and operand types numeric_types = [tkn.INT_TYPE, tkn.FLOAT_TYPE] relation_ops = { tkn.OP_LT: "<", tkn.OP_GE: ">=", tkn.OP_LE: "<=", tkn.OP_GT: ">", tkn.OP_EQ: "==", tkn.OP_NE: "!=" } if self.LHS.data_type == tkn.INT_TYPE and \ self.RHS.data_type == tkn.INT_TYPE: # INTEGER operations self.data_type = tkn.INT_TYPE if self.Op == tkn.OP_ADD: return builder.add(LHS_val, RHS_val, name="addTmp") elif self.Op == tkn.OP_SUB: return builder.sub(LHS_val, RHS_val, name="subTmp") elif self.Op == tkn.OP_MUL: return builder.mul(LHS_val, RHS_val, name="mulTmp") elif self.Op == tkn.OP_DIV: return builder.sdiv(LHS_val, RHS_val, name="divTmp") elif self.Op == tkn.BOOL_AND: return builder.and_(LHS_val, RHS_val, name="andTmp") elif self.Op == tkn.BOOL_OR: return builder.or_(LHS_val, RHS_val, name="orTmp") elif self.Op in relation_ops.keys(): # compare and set data_type to bool self.data_type = tkn.BOOL_TYPE return builder.icmp_signed(relation_ops[self.Op], LHS_val, RHS_val, name="boolTmp") elif self.LHS.data_type in numeric_types and \ self.RHS.data_type in numeric_types: # promote to float if self.LHS.data_type == tkn.INT_TYPE: ir_type = symbolTable.get_ir_type(tkn.FLOAT_TYPE) LHS_val = builder.sitofp(LHS_val, ir_type, name="floatTmp") if self.RHS.data_type == tkn.INT_TYPE: ir_type = symbolTable.get_ir_type(tkn.FLOAT_TYPE) RHS_val = builder.sitofp(LHS_val, ir_type, name="floatTmp") # FLOAT Operations self.data_type = tkn.FLOAT_TYPE if self.Op == tkn.OP_ADD: return builder.fadd(LHS_val, RHS_val, name="floatTmp") elif self.Op == tkn.OP_SUB: return builder.fsub(LHS_val, RHS_val, name="floatTmp") elif self.Op == tkn.OP_MUL: return builder.fMul(LHS_val, RHS_val, name="floatTmp") elif self.Op == tkn.OP_DIV: return builder.fdiv(LHS_val, RHS_val, name="floatTmp") elif self.Op in relation_ops.keys(): # compare and set data_type to bool res = builder.fcmp_unordered(relation_ops[self.Op], LHS_val, RHS_val, name="boolTmp") self.data_type = tkn.BOOL_TYPE return res elif self.LHS.data_type == tkn.STRING_TYPE and \ self.RHS.data_type == tkn.STRING_TYPE: # STRING operations # strings have no operations defined by the language # spec other than assignment and equality checking # strings are basically character arrays. if self.Op == tkn.OP_EQ: self.data_type = tkn.BOOL_TYPE args = [] # parse params for string comparison function for param, p_val in [(self.LHS, LHS_val), (self.RHS, RHS_val)]: if isinstance(param, LiteralExpr ) and param.data_type == tkn.STRING_TYPE: # create a tmp variable to pass the string literal varName = symbolTable.get_unique_name() valuePtr = ir.GlobalVariable(module, ir.IntType(8), varName) valuePtr.initializer = p_val p_val = valuePtr args.append(p_val) func = symbolTable.get("main.StringEquals").value return builder.call(func, args, name="boolTmp") elif self.LHS.data_type == tkn.BOOL_TYPE and \ self.RHS.data_type == tkn.BOOL_TYPE: # BOOL Operations self.data_type = tkn.BOOL_TYPE if self.Op == tkn.BOOL_AND: res = builder.and_(LHS_val, RHS_val, name="boolTmp") return res elif self.Op == tkn.BOOL_OR: res = builder.or_(LHS_val, RHS_val, name="boolTmp") return res # handle any undefined operations err = "operation {} is not defined for operands of type {} and {}".format( self.Op, self.LHS.data_type, self.RHS.data_type) self.errList.append((self.line, err)) return None