def _InsEliminateRem(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: """Rewrites modulo instructions like so: z = a % b becomes z = a // b z = z * b z = a - z TODO: double check that this works out for corner-cases """ if ins.opcode is not o.REM: return None ops = ins.operands out = [] tmp_reg1 = fun.GetScratchReg(ops[0].kind, "elim_rem1", True) out.append(ir.Ins(o.DIV, [tmp_reg1, ops[1], ops[2]])) # NOTE: this implementation for floating mod may have precision issues. if ops[0].kind.flavor() is o.DK_FLAVOR_F: tmp_reg3 = fun.GetScratchReg(ops[0].kind, "elim_rem3", True) out.append(ir.Ins(o.TRUNC, [tmp_reg3, tmp_reg1])) tmp_reg1 = tmp_reg3 tmp_reg2 = fun.GetScratchReg(ops[0].kind, "elim_rem2", True) out.append(ir.Ins(o.MUL, [tmp_reg2, tmp_reg1, ops[2]])) out.append(ir.Ins(o.SUB, [ops[0], ops[1], tmp_reg2])) return out
def InsEliminateCmp(ins: ir.Ins, bbl: ir.Bbl, fun: ir.Fun): """Rewrites cmpXX a, b, c, x, y instructions like so: canonicalization ensures that a != c mov z b bXX skip, x, y mov z c .bbl skip mov a z TODO: This is very coarse """ assert ins.opcode.kind is o.OPC_KIND.CMP bbl_skip = cfg.BblSplit(ins, bbl, fun, bbl.name + "_spilt") bbl_prev = cfg.BblSplit(ins, bbl_skip, fun, bbl.name + "_spilt") assert not bbl_skip.inss assert bbl_prev.inss[-1] is ins assert bbl_prev.edge_out == [bbl_skip] assert bbl_skip.edge_in == [bbl_prev] assert bbl_skip.edge_out == [bbl] assert bbl.edge_in == [bbl_skip] reg = fun.GetScratchReg(ins.operands[0].kind, "cmp", False) del bbl_prev.inss[-1] ops = ins.operands bbl_prev.inss.append(ir.Ins(o.MOV, [reg, ops[1]])) bbl_prev.inss.append( ir.Ins(o.BEQ if ins.opcode == o.CMPEQ else o.BLT, [ops[3], ops[4], bbl])) bbl_skip.inss.append(ir.Ins(o.MOV, [reg, ops[2]])) bbl.inss.insert(0, ir.Ins(o.MOV, [ops[0], reg])) bbl_prev.edge_out.append(bbl) bbl.edge_in.append(bbl_prev)
def maybe_add_spill(pos, reg: ir.Reg): if reg.name not in regs_to_be_spilled: return if pos < def_count: out_st.append(ir.Ins(st_spill, [spill_slots[reg.name], zero, reg])) else: out_ld.append(ir.Ins(ld_spill, [reg, spill_slots[reg.name], zero]))
def EmitCall(fun: ir.Fun, bbl: ir.Bbl, call_ins: ir.Ins, op_stack, mem_base, callee: ir.Fun): """ if the wasm function has the signature: [a b] -> [c d] This means the top of op_stack must be [a b] before the call and will be [c d] after the call The called Cwerg function expects the right most input to be pushed on the stack first, so we get pusharg b pusharg a We always pass mem_base as the the first argument so there is also pusharg mem_base The called Cwerg function pushes the results also from right to left [callee] pusharg d [callee] pusharg c """ # print (f"########## calling {callee.name} in:{callee.input_types} out:{callee.output_types}") # print ("# STACK") # print (op_stack) for dk in reversed(callee.input_types[1:]): arg = op_stack.pop(-1) assert arg.kind == dk, f"expected type {dk} [{arg}] got {arg.kind}" bbl.AddIns(ir.Ins(o.PUSHARG, [arg])) bbl.AddIns(ir.Ins(o.PUSHARG, [mem_base])) bbl.AddIns(call_ins) for dk in callee.output_types: dst = GetOpReg(fun, dk, len(op_stack)) op_stack.append(dst) bbl.AddIns(ir.Ins(o.POPARG, [dst]))
def _InsEliminateMemLoadStore(ins: ir.Ins, fun: ir.Fun, base_kind: o.DK, offset_kind: o.DK) -> Optional[List[ir.Ins]]: """This rewrite is usually applied as prep step by some backends to get rid of Mem operands. It allows the register allocator to see the scratch register but it will obscure the fact that a ld/st is from a static location. Note: this function may add local registers which does not affect liveness or use-deg chains """ opc = ins.opcode ops = ins.operands if opc is o.ST_MEM: st_offset = ops[1] lea_offset = ir.Const(offset_kind, 0) if isinstance(st_offset, ir.Const): st_offset, lea_offset = lea_offset, st_offset scratch_reg = fun.GetScratchReg(base_kind, "base", False) lea = ir.Ins(o.LEA_MEM, [scratch_reg, ops[0], lea_offset]) ins.Init(o.ST, [scratch_reg, st_offset, ops[2]]) return [lea, ins] elif opc is o.LD_MEM: ld_offset = ops[2] lea_offset = ir.Const(offset_kind, 0) if isinstance(ld_offset, ir.Const): ld_offset, lea_offset = lea_offset, ld_offset scratch_reg = fun.GetScratchReg(base_kind, "base", False) # TODO: should the Zero Offset stay with the ld op? lea = ir.Ins(o.LEA_MEM, [scratch_reg, ops[1], lea_offset]) ins.Init(o.LD, [ops[0], scratch_reg, ld_offset]) return [lea, ins] else: return None
def GenerateInitGlobalVarsFun(mod: wasm.Module, unit: ir.Unit, addr_type: o.DK) -> ir.Fun: fun = unit.AddFun(ir.Fun("init_global_vars_fun", o.FUN_KIND.NORMAL, [], [])) bbl = fun.AddBbl(ir.Bbl("start")) epilog = fun.AddBbl(ir.Bbl("end")) epilog.AddIns(ir.Ins(o.RET, [])) section = mod.sections.get(wasm.SECTION_ID.GLOBAL) if not section: return fun val32 = fun.AddReg(ir.Reg("val32", o.DK.U32)) val64 = fun.AddReg(ir.Reg("val64", o.DK.U64)) for n, data in enumerate(section.items): kind = o.MEM_KIND.RO if data.global_type.mut is wasm.MUT.CONST else o.MEM_KIND.RW mem = unit.AddMem(ir.Mem(f"global_vars_{n}", 16, kind)) ins = GetInsFromInitializerExpression(data.expr) var_type = data.global_type.value_type if ins.opcode is wasm_opc.GLOBAL_GET: mem.AddData( ir.DataBytes(1, b"\0" * (4 if var_type.is_32bit() else 8))) src_mem = unit.GetMem(f"global_vars_{int(ins.args[0])}") reg = val32 if var_type.is_32bit() else val64 bbl.AddIns(ir.Ins(o.LD_MEM, [reg, src_mem, ZERO])) bbl.AddIns(ir.Ins(o.ST_MEM, [mem, ZERO, reg])) elif ins.opcode.kind is wasm_opc.OPC_KIND.CONST: mem.AddData( ir.DataBytes(1, ExtractBytesFromConstIns(ins, var_type))) else: assert False, f"unsupported init instructions {ins}" return fun
def HandleRotl(dst: ir.Reg, op1: ir.Reg, op2: ir.Reg, bbl: ir.Bbl): assert dst != op1 assert dst.kind is o.DK.U32 or dst.kind is o.DK.U64, f"{dst}" bitwidth = ir.Const(dst.kind, dst.kind.bitwidth()) bbl.AddIns(ir.Ins(o.SHL, [dst, op1, op2])) bbl.AddIns(ir.Ins(o.SUB, [op2, bitwidth, op2])) bbl.AddIns(ir.Ins( o.SHR, [op1, op1, op2])) # here the unsigned requirement kicks in bbl.AddIns(ir.Ins(o.OR, [dst, dst, op1]))
def GenerateMemcpyFun(unit: ir.Unit, addr_type: o.DK) -> ir.Fun: fun = unit.AddFun( ir.Fun("$memcpy", o.FUN_KIND.NORMAL, [], [addr_type, addr_type, o.DK.U32])) dst = fun.AddReg(ir.Reg("dst", addr_type)) src = fun.AddReg(ir.Reg("src", addr_type)) cnt = fun.AddReg(ir.Reg("cnt", o.DK.U32)) data = fun.AddReg(ir.Reg("data", o.DK.U8)) prolog = fun.AddBbl(ir.Bbl("prolog")) loop = fun.AddBbl(ir.Bbl("loop")) epilog = fun.AddBbl(ir.Bbl("epilog")) prolog.AddIns(ir.Ins(o.POPARG, [dst])) prolog.AddIns(ir.Ins(o.POPARG, [src])) prolog.AddIns(ir.Ins(o.POPARG, [cnt])) prolog.AddIns(ir.Ins(o.BRA, [epilog])) loop.AddIns(ir.Ins(o.SUB, [cnt, cnt, ONE])) loop.AddIns(ir.Ins(o.LD, [data, src, cnt])) loop.AddIns(ir.Ins(o.ST, [dst, cnt, data])) epilog.AddIns(ir.Ins(o.BLT, [ZERO, cnt, loop])) epilog.AddIns(ir.Ins(o.RET, [])) return fun
def testMov(self): mov = o.Opcode.Lookup("mov") ins = ir.Ins(mov, [reg_u32, reg_u32]) sanity.InsCheckConstraints(ins) with self.assertRaises(sanity.ParseError): ins = ir.Ins(mov, [reg_s32, reg_u32]) sanity.InsCheckConstraints(ins) with self.assertRaises(sanity.ParseError): ins = ir.Ins(mov, [reg_s32, reg_s8]) sanity.InsCheckConstraints(ins)
def ProcessLine(token: List, unit: ir.Unit, fun: Optional[ir.Fun], cpu_regs: Dict[str, ir.Reg]): opc = o.Opcode.Table.get(token[0]) if not opc: raise ir.ParseError(f"unknown opcode/directive: {token}") if opc == o.LEA: if token[2] in unit.fun_syms: opc = o.LEA_FUN elif token[2] in unit.mem_syms: opc = o.LEA_MEM elif token[2] in fun.stk_syms: opc = o.LEA_STK if opc != o.LEA_FUN and len(token) < 4: token.append("0") if len(token) - 1 != len(opc.operand_kinds): raise ir.ParseError("operand number %d mismatch: %s" % (len(opc.operand_kinds), token)) if token[0].startswith("."): operands = RetrieveActualOperands(unit, fun, opc, token, {}) directive = DIR_DISPATCHER[token[0]] directive(unit, operands) else: assert fun is not None operands = RetrieveActualOperands(unit, fun, opc, token, cpu_regs) assert fun.bbls, f"no bbl specified to contain instruction" bbl = fun.bbls[-1] ins = ir.Ins(opc, operands) bbl.AddIns(ins) sanity.InsCheckConstraints(ins)
def ProcessLine(token: List, unit: ir.Unit, fun: Optional[ir.Fun], cpu_regs: Dict[str, ir.Reg]): opc = o.Opcode.Table.get(token[0]) if not opc: raise ir.ParseError(f"unknown opcode/directive: {token}") # TODO: get rid of this hack which simplifies FrontEndC/translate.py a bit if opc == o.LEA: if token[2] in fun.reg_syms: pass # in case the register name is shadows a global elif token[2] in unit.fun_syms: opc = o.LEA_FUN elif token[2] in unit.mem_syms: opc = o.LEA_MEM elif token[2] in fun.stk_syms: opc = o.LEA_STK if opc != o.LEA_FUN and len(token) < 4: token.append("0") if len(token) - 1 != len(opc.operand_kinds): raise ir.ParseError( f"operand number {len(opc.operand_kinds)} mismatch: {token}") if token[0].startswith("."): operands = RetrieveActualOperands(unit, fun, opc, token, {}) directive = DIR_DISPATCHER[token[0]] directive(unit, operands) else: assert fun is not None operands = RetrieveActualOperands(unit, fun, opc, token, cpu_regs) assert fun.bbls, f"no bbl specified to contain instruction" bbl = fun.bbls[-1] ins = ir.Ins(opc, operands) bbl.AddIns(ins) sanity.InsCheckConstraints(ins)
def _InsLimitShiftAmounts(ins: ir.Ins, fun: ir.Fun, width: int) -> Optional[List[ir.Ins]]: """This rewrite is usually applied as prep step by some backends to get rid of Stk operands. It allows the register allocator to see the scratch register but it will obscure the fact that a memory access is a stack access. Note, a stack address already implies a `sp+offset` addressing mode and risk ISAs do no usually support `sp+offset+reg` addressing mode. """ opc = ins.opcode ops = ins.operands if (opc is not o.SHL and opc is not o.SHR) or ops[0].kind.bitwidth() != width: return None amount = ops[2] if isinstance(amount, ir.Const): if 0 <= amount.value < width: return None else: ops[2] = ir.Const(amount.kind, amount.value % width) return ins else: tmp = fun.GetScratchReg(amount.kind, "shift", False) mask = ir.Ins(o.AND, [tmp, amount, ir.Const(amount.kind, width - 1)]) ins.Init(opc, [ops[0], ops[1], tmp]) return [mask, ins]
def testNoChange(self): x = ir.Reg("x", o.DK.S32) target = ir.Bbl("target") bbl = ir.Bbl("bbl") bbl.live_out.add(x) bbl.AddIns(ir.Ins(O("poparg"), [x])) bbl.AddIns(ir.Ins(O("blt"), [target, ir.OffsetConst(1), x])) DumpBbl(bbl) live_ranges = liveness.BblGetLiveRanges(bbl, None, bbl.live_out, False) live_ranges.sort() lr_cross_bbl = [lr for lr in live_ranges if lr.is_cross_bbl()] lr_lac = [lr for lr in live_ranges if liveness.LiveRangeFlag.LAC in lr.flags] assert len(live_ranges) == 1 assert len(lr_cross_bbl) == 1 assert len(lr_lac) == 0, f"{lr_lac}"
def testAdd2(self): ld = o.Opcode.Lookup("ld") ins = ir.Ins(ld, [reg_u32, reg_a32, reg_u32]) sanity.InsCheckConstraints(ins) ins = ir.Ins(ld, [reg_u32, reg_a32, reg_u16]) sanity.InsCheckConstraints(ins) ins = ir.Ins(ld, [reg_u32, reg_a32, reg_s32]) sanity.InsCheckConstraints(ins) with self.assertRaises(sanity.ParseError): ins = ir.Ins(ld, [reg_u32, reg_u32, reg_u32]) sanity.InsCheckConstraints(ins) with self.assertRaises(sanity.ParseError): ins = ir.Ins(ld, [reg_u32, reg_a32, reg_a32]) sanity.InsCheckConstraints(ins)
def _InsAddNop1ForCodeSel(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: opc = ins.opcode if opc is o.SWITCH: # needs scratch to compute the jmp address into scratch = fun.GetScratchReg(o.DK.C32, "switch", False) return [ir.Ins(o.NOP1, [scratch]), ins] elif (opc is o.CONV and o.RegIsInt(ins.operands[0].kind) and ins.operands[1].kind.flavor() == o.DK_FLAVOR_F): # need scratch for intermediate ftl result # we know the result cannot be wider than 32bit for this CPU scratch = fun.GetScratchReg(o.DK.F32, "ftoi", False) return [ir.Ins(o.NOP1, [scratch]), ins] elif (opc is o.CONV and o.RegIsInt(ins.operands[1].kind) and ins.operands[0].kind is o.DK.F64): # need scratch for intermediate ftl result # we know the result cannot be wider than 32bit for this CPU scratch = fun.GetScratchReg(o.DK.F32, "itof", False) return [ir.Ins(o.NOP1, [scratch]), ins] return [ins]
def _InsRewriteIntoAABForm(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: ops = ins.operands if not NeedsAABFromRewrite(ins): return None if ops[0] == ops[1]: ops[0].flags |= ir.REG_FLAG.TWO_ADDRESS return None if ops[0] == ops[2] and o.OA.COMMUTATIVE in ins.opcode.attributes: ir.InsSwapOps(ins, 1, 2) ops[0].flags |= ir.REG_FLAG.TWO_ADDRESS return [ins] else: reg = fun.GetScratchReg(ins.operands[0].kind, "aab", False) reg.flags |= ir.REG_FLAG.TWO_ADDRESS return [ ir.Ins(o.MOV, [reg, ops[1]]), ir.Ins(ins.opcode, [reg, reg, ops[2]]), ir.Ins(o.MOV, [ops[0], reg]) ]
def FunAddUnconditionalBranches(fun: ir.Fun): """Re-insert necessary unconditional branches sort of inverse of FunRemoveUnconditionalBranches """ bbls = [] for n, bbl in enumerate(fun.bbls): bbls.append(bbl) if bbl.inss and not bbl.inss[-1].opcode.has_fallthrough(): continue if len(bbl.edge_out) == 1: assert len(fun.bbls) > n succ = bbl.edge_out[0] if n + 1 == len(fun.bbls) or fun.bbls[n + 1] != succ: bbl.inss.append(ir.Ins(o.BRA, [succ])) continue assert len(bbl.edge_out) == 2 cond_bra = bbl.inss[-1] assert cond_bra.opcode.kind is o.OPC_KIND.COND_BRA, ( f"not a cond bra: {cond_bra} bbl: {bbl}") target = cond_bra.operands[2] other = bbl.edge_out[0] if target == bbl.edge_out[1] else bbl.edge_out[ 1] succ = fun.bbls[n + 1] if succ in bbl.edge_out: # target == other can happen if the cond_bra is pointless if target == succ and target != other: InsFlipCondBra(cond_bra, target, other) continue else: bbl_bra = ir.Bbl(NewDerivedBblName(bbl.name, "bra", fun)) bbl_bra.inss.append(ir.Ins(o.BRA, [other])) fun.bbl_syms[bbl_bra.name] = bbl_bra # forward fallthrough to new bbl if bbl.inss: InsMaybePatchNewSuccessor(bbl.inss[-1], other, bbl_bra) bbl.ReplaceEdgeOut(other, bbl_bra) bbl_bra.AddEdgeOut(other) bbls.append(bbl_bra) fun.bbls = bbls fun.flags &= ~ir.FUN_FLAG.CFG_NOT_LINEAR
def FinalizeResultsCopy(self, op_stack, bbl: ir.Bbl, fun: ir.Fun): # print (f"@@ FinalizeCopy {fun.name}: {self.num_results}") dst_pos = self.stack_start + len(self.result_types) src_pos = len(op_stack) for i in range(len(self.result_types)): dst_pos -= 1 src_pos -= 1 op = op_stack[src_pos] dst_reg = GetOpReg(fun, op.kind, dst_pos) if dst_reg != op: bbl.AddIns(ir.Ins(o.MOV, [dst_reg, op]))
def _InsRewriteFltImmediates(ins: ir.Ins, fun: ir.Fun, unit: ir.Unit) -> Optional[List[ir.Ins]]: inss = [] for n, op in enumerate(ins.operands): if isinstance(op, ir.Const) and op.kind.flavor() is o.DK_FLAVOR_F: mem = unit.FindOrAddConstMem(op) tmp = fun.GetScratchReg(op.kind, "flt_const", True) inss.append(ir.Ins(o.LD_MEM, [tmp, mem, _ZERO_OFFSET])) ins.operands[n] = tmp if inss: return inss + [ins] return None
def InsEliminateImmediateViaMem(ins: ir.Ins, pos: int, fun: ir.Fun, unit: ir.Unit, addr_kind: o.DK, offset_kind: o.DK) -> List[ir.Ins]: """Rewrite instruction with an immediate as load of the immediate This is useful if the target architecture does not support immediate for that instruction, or the immediate is too large. This optimization is run rather late and may already see machine registers. """ # support of PUSHARG would require additional work because they need to stay consecutive assert ins.opcode is not o.PUSHARG const = ins.operands[pos] mem = unit.FindOrAddConstMem(const) tmp_addr = fun.GetScratchReg(addr_kind, "mem_const_addr", True) lea_ins = ir.Ins(o.LEA_MEM, [tmp_addr, mem, ir.Const(offset_kind, 0)]) tmp = fun.GetScratchReg(const.kind, "mem_const", True) ld_ins = ir.Ins(o.LD, [tmp, tmp_addr, ir.Const(offset_kind, 0)]) ins.operands[pos] = tmp return [lea_ins, ld_ins]
def _InsEliminateCopySign(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: """Rewrites copysign instructions like so: z = copysign a b aa = int(a) & 0x7f...f bb = int(b) & 0x80...0 z = flt(aa | bb) """ if ins.opcode is not o.COPYSIGN: return None ops = ins.operands out = [] if ops[0].kind == o.DK.F32: kind = o.DK.U32 sign = 1 << 31 mask = sign - 1 else: kind = o.DK.U64 sign = 1 << 63 mask = sign - 1 tmp_src1 = fun.GetScratchReg(kind, "elim_copysign1", False) out.append(ir.Ins(o.BITCAST, [tmp_src1, ops[1]])) out.append(ir.Ins(o.AND, [tmp_src1, tmp_src1, ir.Const(kind, mask)])) # tmp_src2 = fun.GetScratchReg(kind, "elim_copysign2", False) out.append(ir.Ins(o.BITCAST, [tmp_src2, ops[2]])) out.append(ir.Ins(o.AND, [tmp_src2, tmp_src2, ir.Const(kind, sign)])) # out.append(ir.Ins(o.OR, [tmp_src1, tmp_src1, tmp_src2])) out.append(ir.Ins(o.BITCAST, [ops[0], tmp_src1])) return out
def _InsRewriteDivRemShifts(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: opc = ins.opcode ops = ins.operands if opc is o.DIV and ops[0].kind.flavor() != o.DK_FLAVOR_F: # note: we could leave it to the register allocator to pick a CpuReg for ops[2] # but then we would somehow have to ensure that the reg is NOT rdx. # By forcing rcx for ops[2] we sidestep the issue rax = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rax"], ops[0].kind) rcx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rcx"], ops[0].kind) rdx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rdx"], ops[0].kind) return [ ir.Ins(o.MOV, [rax, ops[1]]), ir.Ins(o.MOV, [rcx, ops[2]]), ir.Ins(o.DIV, [rdx, rax, rcx ]), # note the notion of src/dst regs is murky here ir.Ins(o.MOV, [ops[0], rax]) ] elif opc is o.REM and ops[0].kind.flavor() != o.DK_FLAVOR_F: rax = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rax"], ops[0].kind) rcx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rcx"], ops[0].kind) rdx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rdx"], ops[0].kind) return [ ir.Ins(o.MOV, [rax, ops[1]]), ir.Ins(o.MOV, [rcx, ops[2]]), ir.Ins(o.DIV, [rdx, rax, rcx ]), # note the notion of src/dst regs is murky here ir.Ins(o.MOV, [ops[0], rdx]) ] elif opc in {o.SHR, o.SHL} and isinstance(ops[2], ir.Reg): rcx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rcx"], ops[0].kind) mov = ir.Ins(o.MOV, [rcx, ops[2]]) ops[2] = rcx mask = _SHIFT_MASK.get(ops[0].kind) if mask: return [mov, ir.Ins(o.AND, [rcx, rcx, mask]), ins] else: return [mov, ins] else: return None
def InsSpillRegs(ins: ir.Ins, fun: ir.Fun, zero_const, reg_to_stk) -> Optional[List[ir.Ins]]: before: List[ir.Ins] = [] after: List[ir.Ins] = [] num_defs = ins.opcode.def_ops_count() for n, reg in reversed(list(enumerate(ins.operands))): if not isinstance(reg, ir.Reg): continue stk = reg_to_stk.get(reg) if stk is None: continue if n < num_defs: scratch = fun.GetScratchReg(reg.kind, "stspill", False) ins.operands[n] = scratch after.append(ir.Ins(o.ST_STK, [stk, zero_const, scratch])) else: scratch = fun.GetScratchReg(reg.kind, "ldspill", False) ins.operands[n] = scratch before.append(ir.Ins(o.LD_STK, [scratch, stk, zero_const])) if before or after: return before + [ins] + after else: return None
def _InsMoveImmediatesToMemory(ins: ir.Ins, fun: ir.Fun, unit: ir.Unit, kind: o.DK) -> Optional[List[ir.Ins]]: inss = [] for n, op in enumerate(ins.operands): if isinstance(op, ir.Const) and op.kind is kind: mem = unit.FindOrAddConstMem(op) tmp = fun.GetScratchReg(kind, "mem_const", True) # TODO: pass the offset kind as a parameter inss.append(ir.Ins(o.LD_MEM, [tmp, mem, ir.Const(o.DK.U32, 0)])) ins.operands[n] = tmp if inss: return inss + [ins] return None
def GenerateInitDataFun(mod: wasm.Module, unit: ir.Unit, memcpy: ir.Fun, addr_type: o.DK) -> typing.Optional[ir.Fun]: fun = unit.AddFun( ir.Fun("init_data_fun", o.FUN_KIND.NORMAL, [], [addr_type])) bbl = fun.AddBbl(ir.Bbl("start")) epilog = fun.AddBbl(ir.Bbl("end")) epilog.AddIns(ir.Ins(o.RET, [])) section = mod.sections.get(wasm.SECTION_ID.DATA) mem_base = fun.AddReg(ir.Reg("mem_base", addr_type)) bbl.AddIns(ir.Ins(o.POPARG, [mem_base])) if not section: return None offset = fun.AddReg(ir.Reg("offset", o.DK.S32)) src = fun.AddReg(ir.Reg("src", addr_type)) dst = fun.AddReg(ir.Reg("dst", addr_type)) for n, data in enumerate(section.items): assert data.memory_index == 0 assert isinstance(data.offset, wasm.Expression) ins = GetInsFromInitializerExpression(data.offset) init = unit.AddMem(ir.Mem(f"global_init_mem_{n}", 16, o.MEM_KIND.RO)) init.AddData(ir.DataBytes(1, data.init)) if ins.opcode is wasm_opc.GLOBAL_GET: src_mem = unit.GetMem(f"global_vars_{int(ins.args[0])}") bbl.AddIns(ir.Ins(o.LD_MEM, [offset, src_mem, ZERO])) elif ins.opcode is wasm_opc.I32_CONST: bbl.AddIns(ir.Ins(o.MOV, [offset, ir.Const(o.DK.S32, ins.args[0])])) else: assert False, f"unsupported init instructions {ins}" bbl.AddIns(ir.Ins(o.LEA, [dst, mem_base, offset])) bbl.AddIns(ir.Ins(o.LEA_MEM, [src, init, ZERO])) bbl.AddIns(ir.Ins(o.PUSHARG, [ir.Const(o.DK.U32, len(data.init))])) bbl.AddIns(ir.Ins(o.PUSHARG, [src])) bbl.AddIns(ir.Ins(o.PUSHARG, [dst])) bbl.AddIns(ir.Ins(o.BSR, [memcpy])) return fun
def _InsRewriteOutOfBoundsOffsetsStk(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: # Note, we can handle any LEA_STK as long as it is adding a constant if ins.opcode not in {o.LD_STK, o.ST_STK}: return None mismatches = isel_tab.FindtImmediateMismatchesInBestMatchPattern(ins) assert mismatches != isel_tab.MATCH_IMPOSSIBLE, f"could not match opcode {ins} {ins.operands}" if mismatches == 0: return None inss = [] tmp = fun.GetScratchReg(o.DK.A32, "imm_stk", False) if ins.opcode is o.ST_STK: # note we do not have to worry about ins.operands[2] being Const # because those were dealt with by FunEliminateImmediateStores assert mismatches == (1 << 1) if isinstance(ins.operands[1], ir.Const): inss.append( ir.Ins(o.LEA_STK, [tmp, ins.operands[0], ins.operands[1]])) ins.Init(o.ST, [tmp, _ZERO_OFFSET, ins.operands[2]]) else: inss.append(ir.Ins(o.LEA_STK, [tmp, ins.operands[0], _ZERO_OFFSET])) ins.Init(o.ST, [tmp, ins.operands[1], ins.operands[2]]) else: assert ins.opcode is o.LD_STK assert mismatches & (1 << 2) if isinstance(ins.operands[2], ir.Const): inss.append( ir.Ins(o.LEA_STK, [tmp, ins.operands[1], ins.operands[2]])) ins.Init(o.LD, [ins.operands[0], tmp, _ZERO_OFFSET]) else: inss.append(ir.Ins(o.LEA_STK, [tmp, ins.operands[1], _ZERO_OFFSET])) ins.Init(o.LD, [ins.operands[0], tmp, ins.operands[2]]) inss.append(ins) return inss
def _InsEliminateStkLoadStoreWithRegOffset( ins: ir.Ins, fun: ir.Fun, base_kind: o.DK, offset_kind: o.DK) -> Optional[List[ir.Ins]]: """This rewrite is usually applied as prep step by some backends to get rid of Stk operands. It allows the register allocator to see the scratch register but it will obscure the fact that a memory access is a stack access. Note, a stack address already implies a `sp+offset` addressing mode and risk ISAs do no usually support `sp+offset+reg` addressing mode. """ opc = ins.opcode ops = ins.operands if opc is o.ST_STK and isinstance(ops[1], ir.Reg): scratch_reg = fun.GetScratchReg(base_kind, "base", False) lea = ir.Ins(o.LEA_STK, [scratch_reg, ops[0], ir.Const(offset_kind, 0)]) ins.Init(o.ST, [scratch_reg, ops[1], ops[2]]) return [lea, ins] elif opc is o.LD_STK and isinstance(ops[2], ir.Reg): scratch_reg = fun.GetScratchReg(base_kind, "base", False) lea = ir.Ins(o.LEA_STK, [scratch_reg, ops[1], ir.Const(offset_kind, 0)]) ins.Init(o.LD, [ops[0], scratch_reg, ops[2]]) return [lea, ins] elif opc is o.LEA_STK and isinstance(ops[2], ir.Reg): scratch_reg = fun.GetScratchReg(base_kind, "base", False) # TODO: maybe reverse the order so that we can tell that ops[0] holds a stack # location lea = ir.Ins(o.LEA_STK, [scratch_reg, ops[1], ir.Const(offset_kind, 0)]) ins.Init(o.LEA, [ops[0], scratch_reg, ops[2]]) return [lea, ins] else: return None
def _InsEliminateImmediateStores(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: """RISC architectures typically do not allow immediates to be stored directly TODO: maybe allow zero immediates """ opc = ins.opcode ops = ins.operands if opc in {o.ST_MEM, o.ST, o.ST_STK} and isinstance(ops[2], ir.Const): scratch_reg = fun.GetScratchReg(ops[2].kind, "st_imm", False) mov = ir.Ins(o.MOV, [scratch_reg, ops[2]]) ops[2] = scratch_reg return [mov, ins] else: return None
def InsEliminateImmediate(ins: ir.Ins, pos: int, fun: ir.Fun) -> ir.Ins: """Rewrite instruction with an immediate as load of the immediate followed by a pure register version of that instruction, e.g. mul z = a 666 becomes mov scratch = 666 mul z = a scratch This is useful if the target architecture does not support immediate for that instruction, or the immediate is too large. This optimization is run rather late and may already see machine registers like the sp. Hence we are careful to use and update ins.orig_operand """ const = ins.operands[pos] assert isinstance(const, ir.Const) reg = fun.GetScratchReg(const.kind, "imm", True) ins.operands[pos] = reg return ir.Ins(o.MOV, [reg, const])
def InsEliminateImmediateViaMov(ins: ir.Ins, pos: int, fun: ir.Fun) -> ir.Ins: """Rewrite instruction with an immediate as mov of the immediate mul z = a 666 becomes mov scratch = 666 mul z = a scratch This is useful if the target architecture does not support immediate for that instruction, or the immediate is too large. This optimization is run rather late and may already see machine registers. Ideally, the generated mov instruction hould be iselectable by the target architecture or else another pass may be necessary. """ # support of PUSHARG would require additional work because they need to stay consecutive assert ins.opcode is not o.PUSHARG const = ins.operands[pos] assert isinstance(const, ir.Const) reg = fun.GetScratchReg(const.kind, "imm", True) ins.operands[pos] = reg return ir.Ins(o.MOV, [reg, const])