def new_ir(opcode, rs, rt, rd): if (opcode in binops and type(rs) != Register and type(rt) != Register): left = rs right = rt op = binops[opcode] # my mom should be proud that I am using eval here folded = eval('%s %s %s' % (left, op, right)) inst = IR('li', rd=rd, rs=grammar.Int(folded), rt=None) elif opcode == 'beq' and rs == rt: inst = jump(rd) elif (opcode == 'bne' and type(rs) == grammar.Value and type(rt) == grammar.Value): if rs.val != rt.val: inst = jump(rd) else: # fallthrough inst = NOP() elif opcode == 'neg': if type(rs) != Register: val = eval('-%s' % rs) inst = IR('li', rd=rd, rs=grammar.Int(val), rt=None) else: if type(rs) == grammar.Value and type(rt) == Register: # assuming opcode is commutative rt, rs = rs, rt inst = IR(opcode, rs, rt, rd) return inst
def memcpy(self, src, dest, size): ''' emmit code to copy `size` bytes from `src` to `dest` NOTE: this function assumes that both `src` and `dest` are aligned ''' frm = new_reg() to = new_reg() upper_bound = new_reg() temp = new_reg() copy_loop = new_branch() copy_done = new_branch() word_size = grammar.Int(4) # size of memory that can be copied by words aligned_size = size - size % 4 self.emmit_many( assign(dest=frm, src=src), assign(dest=to, src=dest), new_ir('add', rd=upper_bound, rs=frm, rt=grammar.Int(aligned_size)), copy_loop, new_ir('bge', rs=frm, rt=upper_bound, rd=copy_done), new_ir('lw', rt=temp, rs=frm, rd=grammar.Int(0)), new_ir('sw', rt=temp, rs=to, rd=grammar.Int(0)), new_ir('add', rd=frm, rs=frm, rt=word_size), new_ir('add', rd=to, rs=to, rt=word_size), jump(copy_loop), copy_done) # copy unaliged memory byte-by-byte # note that this loop runs no more than three times for offset in range(aligned_size, size): load = new_ir('lb', rt=temp, rs=src, rd=offset) store = new_ir('sb', rt=temp, rs=dest, rd=offset) self.emmit_many(load, store)
def fold_const(self): ''' constant folding; this is actually more about producing syntactically correct assembly... so that anything like `mul $t0, 1, 4` will be elliminated.... ''' insts = [] for inst in self.insts: if type(inst) == IR: if (inst.opcode in binops and type(inst.rs) != Register and type(inst.rt) != Register): left = inst.rs right = inst.rt op = binops[inst.opcode] # my mom should be proud that I am using eval here folded = eval('%s %s %s' % (left, op, right)) inst = new_ir('li', rd=inst.rd, rs=grammar.Int(folded), rt=None) elif inst.opcode == 'neg': if type(inst.rs) != Register: val = eval('-%s' % inst.rs) inst = new_ir('li', rd=inst.rd, rs=grammar.Int(val), rt=None) insts.append(inst) self.insts = insts
def declare(self, stmt): reg = new_reg() if type(stmt) == ast.DeclrAssign: declr = stmt.declr else: declr = stmt val = Value(val=reg, typ=declr.typ, in_mem=False) if type(declr.typ) not in (ast.Array, ast.Struct): self.scope.add(declr.name.val, val) else: # struct or array if type(declr.typ) == ast.Array: arr = declr.typ #val = val._replace(typ=ast.Pointer(typ=arr.typ)) offset = self.alloc(arr) else: # struct val = val._replace(in_mem=True) struct = declr.typ # if no struct specs is provided, we look it up # otherwise we record it for later use if struct.fields is not None: self.scope.add(struct.name.val, struct) else: struct = self.scope.lookup(struct.name.val) offset = self.alloc(struct) # map value to mem. addr self.emmit_one( new_ir('add', rd=reg, rs=REG_SP, rt=grammar.Int(offset))) self.scope.add(declr.name.val, val) if type(stmt) == ast.DeclrAssign: self.exp(ast.Assignment(stmt.declr.name, stmt.val))
def store(self, src, dest, typ, offset=grammar.Int(0)): if type(dest) == Offset: dest, offset = dest if typ == 'char': opcode = 'sb' else: opcode = 'sw' self.emmit_one(new_ir(opcode, rt=src, rs=dest, rd=offset))
def emmit_builtin_func(compiler, exp): func_name = exp.func.val if func_name == 'sbrk': size = compiler.exp_val(exp.args[0]).val compiler.emmit_many(assign(dest=Register('a', 0), src=size), assign(dest=REG_RET, src=grammar.Int(SBRK)), Syscall()) return Value(typ=ast.Pointer('void'), in_mem=False, val=REG_RET)
def emmit_prefix_exp(compiler, exp): # operator can be '*', '&', '-', or '!' # ('++' and '--' has been desugared as '+=' ad '-=') op = exp.op result = new_reg() operand = compiler.exp_val(exp.expr) if op != '&' else compiler.exp( exp.expr) if op == '!' or op == '-': if op == '!': compiler.emmit_one( new_ir(opcode='seq', rd=result, rs=operand.val, rt=REG_ZERO)) else: compiler.emmit_one( new_ir(opcode='sub', rd=result, rs=REG_ZERO, rt=operand.val)) return Value(val=result, typ=operand.typ, in_mem=False) elif op == '*': typ = operand.typ.typ return operand._replace(in_mem=True, typ=typ) elif op == '&': if operand.in_mem or type(operand.typ) == ast.Array: ptr_type = (operand.typ if type(operand.typ) != ast.Array else operand.typ.typ) val_type = ast.Pointer(ptr_type) if type(operand.val) in (Register, Offset): # the register already represents a memory address val = operand._replace(typ=val_type, in_mem=False) else: # global variable compiler.emmit_one( new_ir('la', rt=result, rs=operand.val, rd=grammar.Int(0))) val = Value(val=result, typ=val_type, in_mem=False) else: # value not in memory, # store it in memory and deref offset = grammar.Int(compiler.alloc(operand.typ)) compiler.store(src=operand.val, dest=REG_SP, offset=offset, typ=operand.typ) compiler.emmit_one(new_ir('add', rd=result, rs=REG_SP, rt=offset)) # update the environment to point out that # the value is now in memory var_name = exp.expr.val compiler.scope.add(var_name, Value(val=result, in_mem=True, typ=operand.typ)) val = Value(in_mem=False, val=result, typ=ast.Pointer(operand.typ)) return val
def load(self, addr, typ, offset=grammar.Int(0)): if type(addr) == Offset: addr, offset = addr if typ == 'char': opcode = 'lb' else: opcode = 'lw' result = new_reg() self.emmit_one(new_ir(opcode, rt=result, rs=addr, rd=offset)) return result
def desugar(node): ''' * transform while loops into for loops * reduce augmented_assignment e.g. turn `a += 1` into `a = a + 1` * transform index into dereferencing e.g turn `a[offset]` into `*(a+offset)` ''' typ = type(node) if typ == ast.Declaration: return ast.Declaration(node.name, desugar(node.typ)) elif typ == ast.DeclrAssign: return ast.DeclrAssign(node.declr, desugar(node.val)) elif typ == ast.Function: return ast.Function(args=node.args, ret=node.ret, body=desugar(node.body)) elif typ == ast.Block: return ast.Block(map(desugar, node.stmts)) elif typ == ast.For: return ast.For(desugar(node.init), desugar(node.cond), desugar(node.cont), desugar(node.body)) elif typ == ast.If: return ast.If(desugar(node.cond), desugar(node.conseq), desugar(node.alt)) elif typ == ast.While: return ast.For(ast.EmptyStmt(), desugar(node.cond), ast.EmptyStmt(), desugar(node.body)) elif typ == ast.BinExpr: left = desugar(node.left) right = desugar(node.right) if node.op in augmented_assignment: op = node.op[:-1] return ast.Assignment(left, ast.BinExpr(op, left, right)) elif node.op == '=': return ast.Assignment(left, right) else: return ast.BinExpr(node.op, left, right) elif typ == ast.PrefixExpr: if node.op != '++' and node.op != '--': return ast.PrefixExpr(node.op, desugar(node.expr)) else: op = '+=' if node.op == '++' else '-=' return desugar(ast.BinExpr(op, node.expr, grammar.Int(1))) elif typ == ast.CallExpr: return ast.CallExpr(node.func, map(desugar, node.args)) elif typ == ast.ChainExpr: return ast.ChainExpr(map(desugar, node.exprs)) elif typ == ast.Index: addr = ast.BinExpr('+', desugar(node.array), desugar(node.index)) return ast.PrefixExpr('*', addr) elif typ == ast.PostfixExpr: return ast.PostfixExpr(node.op, desugar(node.expr)) else: return node
def emmit_postfix_exp(compiler, exp): ''' emmit instructions for a++ and a-- ''' name = exp.expr # MUST be an `IDENT` prev_val = new_reg() # value of x++ compiler.emmit_one(assign(dest=prev_val, src=compiler.exp_val(name).val)) var = compiler.scope.lookup(name.val) # x itself diff = compiler.sizeof(var.typ.typ) if is_pointer(var) else 1 post_val = new_reg() compiler.emmit_one( new_ir('add', rd=post_val, rs=prev_val, rt=grammar.Int(diff))) if var.in_mem: compiler.store(src=post_val, dest=var.val, typ=var.typ) else: compiler.emmit_one(assign(dest=var.val, src=post_val)) return Value(val=prev_val, in_mem=False, typ=var.typ)
def exp(self, exp): ''' emmit instructions for an expression and return value representing the expression ''' if exp is None: return elif type(exp) in exp_emmitters: return exp_emmitters[type(exp)](self, exp) elif exp.is_('IDENT'): name = exp.val return self.scope.lookup(name) elif exp.typ == 'STRING': # need to allocate in text segment str_name = gensym('___str') s = new_reg() self.declared_strs.append((str_name, exp.val)) self.emmit_one(new_ir('la', rt=s, rs=str_name, rd=grammar.Int(0))) return Value(val=s, in_mem=False, typ=ast.Pointer('char')) else: # plain value return Value(val=exp, in_mem=False, typ=exp.typ)
def remove_placeholders(self): ''' replace Prolog/Epilog/SaveRegisters/RestoreRegisters with real instructions now that we know what registers are used ''' cfg = flow.make_cfg(self.insts) outs = flow.get_lives(cfg) # t registers that need to be saved tregs = [] for node in sorted(cfg.get_calls()): tregs.append(sorted({reg for reg in outs[node] if reg.typ == 't'})) space = (max(len(regs) for regs in tregs) * 4 if len(tregs) > 0 else 0) t_offset = self.alloc(size=space) s_offset = self.alloc(size=len(self.sregs) * 4 + 4) # replace prologs/epilogs with stores and loads insts = [] i = 0 if i < len(tregs): regs = tregs[i] i += 1 for inst in self.insts: inst_type = type(inst) if inst_type == Call: store_regs(regs, t_offset, insts) insts.append( new_ir('add', rd=REG_SP, rs=REG_SP, rt=grammar.Int(-mulof4(inst.extra)))) insts.append( new_ir('jal', rs=funcname_to_branch(inst.name), rt=None, rd=None)) insts.append( new_ir('add', rd=REG_SP, rs=REG_SP, rt=grammar.Int(mulof4(inst.extra)))) load_regs(regs, t_offset, insts) if i < len(tregs): regs = tregs[i] i += 1 elif inst_type == Prolog: # grow stack and store needed s registers grow = new_ir('add', rd=REG_SP, rs=REG_SP, rt=grammar.Int(-self.stack_size())) insts.extend( [new_ir('move', rd=REG_FP, rs=REG_SP, rt=None), grow]) store_regs(self.sregs + [REG_RA], s_offset, insts) elif inst_type == Epilog: # restore used registers load_regs(self.sregs + [REG_RA], s_offset, insts) shrink = new_ir('add', rd=REG_SP, rs=REG_SP, rt=grammar.Int(self.stack_size())) insts.extend([ shrink, new_ir('jr', rs=Register('ra', None), rt=None, rd=None) ]) else: insts.append(inst) self.insts = insts
def emmit_sizeof(compiler, exp): typ = compiler.exp(exp.operand).typ size = compiler.sizeof(typ) return Value(val=grammar.Int(size), typ='int', in_mem=False)
def emmit_bin_exp(compiler, exp): if exp.op == '&&': left = compiler.exp_val(exp.left) right_branch = new_branch() result = new_reg() compiler.emmit_one( new_ir('beq', rs=left.val, rt=grammar.Int(0), rd=right_branch)) right = compiler.exp_val(exp.right) right_bool = new_reg() compiler.emmit_one( new_ir('sne', rd=right_bool, rs=right.val, rt=grammar.Int(0))) compiler.emmit_one(right_branch) compiler.emmit_one( new_ir('sne', rd=result, rs=left.val, rt=grammar.Int(0))) compiler.emmit_one(new_ir('and', rd=result, rs=result, rt=right_bool)) return Value(val=result, in_mem=False, typ=compiler.binexp_type(left, right)) elif exp.op == '||': left = compiler.exp_val(exp.left) right_branch = new_branch() result = new_reg() compiler.emmit_one( new_ir('bne', rs=left.val, rt=grammar.Int(0), rd=right_branch)) right = compiler.exp_val(exp.right) compiler.emmit_one(right_branch) compiler.emmit_one(new_ir('or', rs=left.val, rt=right.val, rd=result)) return Value(val=result, in_mem=False, typ=compiler.binexp_type(left, right)) # TODO: refactor this hairy thing elif exp.op in bin_opcodes: op = exp.op left = compiler.exp_val(exp.left) right = compiler.exp_val(exp.right) # make sure it's syntactically correct MIPS if type(left.val) == grammar.Value and type(right.val) == Register: if op in commutative_ops: left, right = right, left elif op == '>': left, right = right, left op = '<' elif op == '<': left, right = right, left op = '>' else: # we have to load `left` into a register first reg = new_reg() compiler.emmit_one(assign(dest=reg, src=left.val)) left = left._replace(val=reg) rs = left rt = right exp_type = left.typ opcode = bin_opcodes[op] if op in ('+', '-') and (is_pointer(left) or is_pointer(right)): exp_type = ast.Pointer(left.typ.typ) ptr = left.val idx = right.val offset = new_reg() compiler.emmit_one( new_ir(opcode='mul', rs=idx, rt=compiler.sizeof(exp_type.typ), rd=offset)) rs_val = offset rt_val = ptr else: rs_val = rs.val rt_val = rt.val result = new_reg() inst = new_ir(opcode=opcode, rs=rs_val, rt=rt_val, rd=result) compiler.emmit_one(inst) return Value(val=result, in_mem=False, typ=exp_type) else: # the operator is either '.' or '->' # value of the struct will be a register # storing addr. of the struct in the memory left = compiler.exp(exp.left) if type(left.typ) == ast.Pointer and left.in_mem: struct_addr = compiler.load(left.val, typ=left.typ) else: struct_addr = left.val if exp.op == '.': struct = left.typ else: # pointer struct = left.typ.typ if struct.fields is None: # we have to lookup the struct since no struct layout is given struct = compiler.scope.lookup(struct.name.val) field = exp.right assert field.is_('IDENT') offset = compiler.offsetof(struct, field.val) base = new_reg() compiler.emmit_one(assign(dest=base, src=struct_addr)) field_addr = Offset(base=base, offset=offset) typ = field_type(struct, field.val) in_mem = type(typ) != ast.Array return Value(val=field_addr, in_mem=in_mem, typ=typ)