def _InsLimitShiftAmounts(ins: ir.Ins, fun: ir.Fun, width: int) -> Optional[List[ir.Ins]]: """This rewrite is usually applied as prep step by some backends to get rid of Stk operands. It allows the register allocator to see the scratch register but it will obscure the fact that a memory access is a stack access. Note, a stack address already implies a `sp+offset` addressing mode and risk ISAs do no usually support `sp+offset+reg` addressing mode. """ opc = ins.opcode ops = ins.operands if (opc is not o.SHL and opc is not o.SHR) or ops[0].kind.bitwidth() != width: return None amount = ops[2] if isinstance(amount, ir.Const): if 0 <= amount.value < width: return None else: ops[2] = ir.Const(amount.kind, amount.value % width) return ins else: tmp = fun.GetScratchReg(amount.kind, "shift", False) mask = ir.Ins(o.AND, [tmp, amount, ir.Const(amount.kind, width - 1)]) ins.Init(opc, [ops[0], ops[1], tmp]) return [mask, ins]
def _InsPropagateConsts(ins: ir.Ins, _fun: ir.Fun): changes = 0 for n, d in enumerate(ins.operand_defs): if d is ir.INS_INVALID or not isinstance(d, ir.Ins) or d.opcode != o.MOV: continue value = d.operands[1] if not isinstance(value, ir.Const): continue ins.operands[n] = value ins.operand_defs[n] = ir.INS_INVALID changes += 1 if changes == 0: return None else: return [ins]
def _InsEliminateMemLoadStore(ins: ir.Ins, fun: ir.Fun, base_kind: o.DK, offset_kind: o.DK) -> Optional[List[ir.Ins]]: """This rewrite is usually applied as prep step by some backends to get rid of Mem operands. It allows the register allocator to see the scratch register but it will obscure the fact that a ld/st is from a static location. Note: this function may add local registers which does not affect liveness or use-def chains st.mem -> lea.mem + st # st offset will be zero or register which should be iselectable ld.mem -> lea.mem + ld # ld offset will be zero or register which should be iselectable lea.mem (with reg offset) -> lea.mem (zero offset) + lea """ opc = ins.opcode ops = ins.operands if opc is o.ST_MEM: st_offset = ops[1] lea_offset = ir.Const(offset_kind, 0) if isinstance(st_offset, ir.Const): st_offset, lea_offset = lea_offset, st_offset scratch_reg = fun.GetScratchReg(base_kind, "base", False) lea = ir.Ins(o.LEA_MEM, [scratch_reg, ops[0], lea_offset]) ins.Init(o.ST, [scratch_reg, st_offset, ops[2]]) return [lea, ins] elif opc is o.LD_MEM: ld_offset = ops[2] lea_offset = ir.Const(offset_kind, 0) if isinstance(ld_offset, ir.Const): ld_offset, lea_offset = lea_offset, ld_offset scratch_reg = fun.GetScratchReg(base_kind, "base", False) # TODO: should the Zero Offset stay with the ld op? lea = ir.Ins(o.LEA_MEM, [scratch_reg, ops[1], lea_offset]) ins.Init(o.LD, [ops[0], scratch_reg, ld_offset]) return [lea, ins] elif opc is o.LEA_MEM and isinstance(ops[2], ir.Reg): scratch_reg = fun.GetScratchReg(base_kind, "base", False) # TODO: maybe reverse the order so that we can tell that ops[0] holds a mem location lea = ir.Ins(o.LEA_MEM, [scratch_reg, ops[1], ir.Const(offset_kind, 0)]) ins.Init(o.LEA, [ops[0], scratch_reg, ops[2]]) return [lea, ins] else: return None
def InsMaybeReplaceDefReg(ins: ir.Ins, reg_old: ir.Reg, reg_new: ir.Reg) -> int: """If the ins writes reg_old, replace it with reg_new """ if ins.opcode.def_ops_count() == 0: return 0 assert ins.opcode.def_ops_count() == 1 op = ins.operands[0] if op == reg_old: ins.operands[0] = reg_new return 1
def _InsTryLoadStoreSimplify(ins: ir.Ins, defs: ir.REG_DEF_MAP) -> int: if ins.opcode not in {o.ST, o.LD, o.LEA}: return 0 # do we have a suitable ins defining the base of the ld/st? base_pos = 0 if ins.opcode is o.ST else 1 ins_base = ins.operand_defs[base_pos] if ins_base is ir.INS_INVALID or not isinstance(ins_base, ir.Ins): return 0 new_opc = _LOAD_STORE_BASE_REWRITE.get((ins_base.opcode, ins.opcode)) if new_opc is None: return 0 # print ("") # print("#", serialize.InsRenderToAsm(ins)) # print("#", serialize.InsRenderToAsm(ins_base)) # is the original base still available at the ld/st base = ins_base.operands[1] base_def = ins_base.operand_defs[1] if not _DefAvailable(base, base_def, defs): # print ("#base not avail ", base, base_def) return 0 # can the new offset be determined and is it available offset, offset_def = _CombinedOffset(ins, ins_base) if offset is None or not _DefAvailable(offset, offset_def, defs): return 0 if base_pos == 0: # store defs = [base_def, offset_def, ins.operand_defs[2]] ins.Init(new_opc, [base, offset, ins.operands[2]]) ins.operand_defs = defs else: defs = [ins.operand_defs[0], base_def, offset_def] assert base_pos == 1 ins.Init(new_opc, [ins.operands[0], base, offset]) ins.operand_defs = defs # print("#>>>> ", serialize.InsRenderToAsm(ins)) return 1
def _InsRewriteFltImmediates(ins: ir.Ins, fun: ir.Fun, unit: ir.Unit) -> Optional[List[ir.Ins]]: inss = [] for n, op in enumerate(ins.operands): if isinstance(op, ir.Const) and op.kind.flavor() is o.DK_FLAVOR_F: mem = unit.FindOrAddConstMem(op) tmp = fun.GetScratchReg(op.kind, "flt_const", True) inss.append(ir.Ins(o.LD_MEM, [tmp, mem, _ZERO_OFFSET])) ins.operands[n] = tmp if inss: return inss + [ins] return None
def InsSpillRegs(ins: ir.Ins, fun: ir.Fun, zero_const, reg_to_stk) -> Optional[List[ir.Ins]]: before: List[ir.Ins] = [] after: List[ir.Ins] = [] num_defs = ins.opcode.def_ops_count() for n, reg in reversed(list(enumerate(ins.operands))): if not isinstance(reg, ir.Reg): continue stk = reg_to_stk.get(reg) if stk is None: continue if n < num_defs: scratch = fun.GetScratchReg(reg.kind, "stspill", False) ins.operands[n] = scratch after.append(ir.Ins(o.ST_STK, [stk, zero_const, scratch])) else: scratch = fun.GetScratchReg(reg.kind, "ldspill", False) ins.operands[n] = scratch before.append(ir.Ins(o.LD_STK, [scratch, stk, zero_const])) if before or after: return before + [ins] + after else: return None
def _InsMoveImmediatesToMemory(ins: ir.Ins, fun: ir.Fun, unit: ir.Unit, kind: o.DK) -> Optional[List[ir.Ins]]: inss = [] for n, op in enumerate(ins.operands): if isinstance(op, ir.Const) and op.kind is kind: mem = unit.FindOrAddConstMem(op) tmp = fun.GetScratchReg(kind, "mem_const", True) # TODO: pass the offset kind as a parameter inss.append(ir.Ins(o.LD_MEM, [tmp, mem, ir.Const(o.DK.U32, 0)])) ins.operands[n] = tmp if inss: return inss + [ins] return None
def InsMaybeReplaceUseReg(ins: ir.Ins, reg_old: ir.Reg, reg_new: ir.Reg) -> int: """If the ins reads reg_old, replace it with reg_new """ it = enumerate(ins.operands) # skip register writing operands for _ in range(ins.opcode.def_ops_count()): next(it) count = 0 for n, op in it: if op == reg_old: ins.operands[n] = reg_new count += 1 return count
def _InsEliminateStkLoadStoreWithRegOffset( ins: ir.Ins, fun: ir.Fun, base_kind: o.DK, offset_kind: o.DK) -> Optional[List[ir.Ins]]: """This rewrite is usually applied as prep step by some backends to get rid of Stk operands. It allows the register allocator to see the scratch register but it will obscure the fact that a memory access is a stack access. Note, a stack address already implies a `sp+offset` addressing mode and risk ISAs do no usually support `sp+offset+reg` addressing mode. """ opc = ins.opcode ops = ins.operands if opc is o.ST_STK and isinstance(ops[1], ir.Reg): scratch_reg = fun.GetScratchReg(base_kind, "base", False) lea = ir.Ins(o.LEA_STK, [scratch_reg, ops[0], ir.Const(offset_kind, 0)]) ins.Init(o.ST, [scratch_reg, ops[1], ops[2]]) return [lea, ins] elif opc is o.LD_STK and isinstance(ops[2], ir.Reg): scratch_reg = fun.GetScratchReg(base_kind, "base", False) lea = ir.Ins(o.LEA_STK, [scratch_reg, ops[1], ir.Const(offset_kind, 0)]) ins.Init(o.LD, [ops[0], scratch_reg, ops[2]]) return [lea, ins] elif opc is o.LEA_STK and isinstance(ops[2], ir.Reg): scratch_reg = fun.GetScratchReg(base_kind, "base", False) # TODO: maybe reverse the order so that we can tell that ops[0] holds a stack # location lea = ir.Ins(o.LEA_STK, [scratch_reg, ops[1], ir.Const(offset_kind, 0)]) ins.Init(o.LEA, [ops[0], scratch_reg, ops[2]]) return [lea, ins] else: return None
def _InsRewriteOutOfBoundsOffsetsStk(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]: # Note, we can handle any LEA_STK as long as it is adding a constant if ins.opcode not in {o.LD_STK, o.ST_STK}: return None mismatches = isel_tab.FindtImmediateMismatchesInBestMatchPattern(ins) assert mismatches != isel_tab.MATCH_IMPOSSIBLE, f"could not match opcode {ins} {ins.operands}" if mismatches == 0: return None inss = [] tmp = fun.GetScratchReg(o.DK.A32, "imm_stk", False) if ins.opcode is o.ST_STK: # note we do not have to worry about ins.operands[2] being Const # because those were dealt with by FunEliminateImmediateStores assert mismatches == (1 << 1) if isinstance(ins.operands[1], ir.Const): inss.append( ir.Ins(o.LEA_STK, [tmp, ins.operands[0], ins.operands[1]])) ins.Init(o.ST, [tmp, _ZERO_OFFSET, ins.operands[2]]) else: inss.append(ir.Ins(o.LEA_STK, [tmp, ins.operands[0], _ZERO_OFFSET])) ins.Init(o.ST, [tmp, ins.operands[1], ins.operands[2]]) else: assert ins.opcode is o.LD_STK assert mismatches & (1 << 2) if isinstance(ins.operands[2], ir.Const): inss.append( ir.Ins(o.LEA_STK, [tmp, ins.operands[1], ins.operands[2]])) ins.Init(o.LD, [ins.operands[0], tmp, _ZERO_OFFSET]) else: inss.append(ir.Ins(o.LEA_STK, [tmp, ins.operands[1], _ZERO_OFFSET])) ins.Init(o.LD, [ins.operands[0], tmp, ins.operands[2]]) inss.append(ins) return inss
def InsEliminateImmediate(ins: ir.Ins, pos: int, fun: ir.Fun) -> ir.Ins: """Rewrite instruction with an immediate as load of the immediate followed by a pure register version of that instruction, e.g. mul z = a 666 becomes mov scratch = 666 mul z = a scratch This is useful if the target architecture does not support immediate for that instruction, or the immediate is too large. This optimization is run rather late and may already see machine registers like the sp. Hence we are careful to use and update ins.orig_operand """ const = ins.operands[pos] assert isinstance(const, ir.Const) reg = fun.GetScratchReg(const.kind, "imm", True) ins.operands[pos] = reg return ir.Ins(o.MOV, [reg, const])
def InsEliminateImmediateViaMem(ins: ir.Ins, pos: int, fun: ir.Fun, unit: ir.Unit, addr_kind: o.DK, offset_kind: o.DK) -> List[ir.Ins]: """Rewrite instruction with an immediate as load of the immediate This is useful if the target architecture does not support immediate for that instruction, or the immediate is too large. This optimization is run rather late and may already see machine registers. """ # support of PUSHARG would require additional work because they need to stay consecutive assert ins.opcode is not o.PUSHARG const = ins.operands[pos] mem = unit.FindOrAddConstMem(const) tmp_addr = fun.GetScratchReg(addr_kind, "mem_const_addr", True) lea_ins = ir.Ins(o.LEA_MEM, [tmp_addr, mem, ir.Const(offset_kind, 0)]) tmp = fun.GetScratchReg(const.kind, "mem_const", True) ld_ins = ir.Ins(o.LD, [tmp, tmp_addr, ir.Const(offset_kind, 0)]) ins.operands[pos] = tmp return [lea_ins, ld_ins]
def InsEliminateImmediateViaMov(ins: ir.Ins, pos: int, fun: ir.Fun) -> ir.Ins: """Rewrite instruction with an immediate as mov of the immediate mul z = a 666 becomes mov scratch = 666 mul z = a scratch This is useful if the target architecture does not support immediate for that instruction, or the immediate is too large. This optimization is run rather late and may already see machine registers. Ideally, the generated mov instruction hould be iselectable by the target architecture or else another pass may be necessary. """ # support of PUSHARG would require additional work because they need to stay consecutive assert ins.opcode is not o.PUSHARG const = ins.operands[pos] assert isinstance(const, ir.Const) reg = fun.GetScratchReg(const.kind, "imm", True) ins.operands[pos] = reg return ir.Ins(o.MOV, [reg, const])
def _InsConstantFold(ins: ir.Ins, bbl: ir.Bbl, _fun: ir.Fun, allow_conv_conversion: bool) -> Optional[List[ir.Ins]]: """ Try combining the constant from ins_def with the instruction in ins Return 1 iff a change was made Note: None of the transformations must change the def register - otherwise the reaching_defs will be stale """ ops = ins.operands kind = ins.opcode.kind if kind is o.OPC_KIND.COND_BRA: if not isinstance(ops[0], ir.Const) or not isinstance( ops[1], ir.Const): return None branch_taken = eval.EvaluatateCondBra(ins.opcode, ops[0], ops[1]) target = ops[2] assert len(bbl.edge_out) == 2 if branch_taken: succ_to_drop = bbl.edge_out[1] if bbl.edge_out[0] == target else \ bbl.edge_out[0] else: succ_to_drop = target bbl.DelEdgeOut(succ_to_drop) return [] elif kind is o.OPC_KIND.CMP: if not isinstance(ops[3], ir.Const) or not isinstance( ops[4], ir.Const): return None cmp_true = eval.EvaluatateCondBra( o.BEQ if ins.opcode is o.CMPEQ else o.BLT, ops[3], ops[4]) if cmp_true: ins.Init(o.MOV, [ops[0], ops[1]]) else: ins.Init(o.MOV, [ops[0], ops[2]]) elif kind is o.OPC_KIND.ALU1: if not isinstance(ops[1], ir.Const): return None new_op = eval.EvaluatateALU1(ins.opcode, ops[1]) ins.Init(o.MOV, [ops[0], new_op]) return [ins] elif kind is o.OPC_KIND.ALU: if not isinstance(ops[1], ir.Const) or not isinstance( ops[2], ir.Const): return None new_op = eval.EvaluatateALU(ins.opcode, ops[1], ops[2]) ins.Init(o.MOV, [ops[0], new_op]) return [ins] elif ins.opcode is o.CONV: # TODO: this needs some more thought generally but in # particular when we apply register widening # transformations, conv instructions end up being the only # ones with narrow width regs which simplifies # code generation. By allowing this to be converted into a # mov instruction we may leak the narrow register. if not allow_conv_conversion or not isinstance(ops[1], ir.Const): return None dst: ir.Reg = ops[0] src = ops[1] if not o.RegIsAddrInt(src.kind) or not o.RegIsAddrInt(dst.kind): return None new_val = eval.ConvertIntValue(dst.kind, src) ins.Init(o.MOV, [dst, new_val]) return [ins] else: return None
def InsFlipCondBra(ins: ir.Ins, old_target: ir.Bbl, new_target: ir.Bbl): assert ins.operands[2] == old_target ins.operands[2] = new_target ins.opcode, must_flip = _OPCODE_BRANCH_INVERSION[ins.opcode] if must_flip: ir.InsSwapOps(ins, 0, 1)
def _InsConstantFold(ins: ir.Ins, bbl: ir.Bbl, _fun: ir.Fun, allow_conv_conversion: bool) -> Optional[List[ir.Ins]]: """ Try combining the constant from ins_def with the instruction in ins Return 1 iff a change was made Note: None of the transformations must change the def register - otherwise the reaching_defs will be stale """ ops = ins.operands kind = ins.opcode.kind if kind is o.OPC_KIND.COND_BRA: if not isinstance(ops[0], ir.Const) or not isinstance( ops[1], ir.Const): return None # TODO: implement this, needs access to BBL for CFG changes evaluator = _EVALUATORS_COND_BRA.get(ins.opcode) assert evaluator, f"Evaluator NYI for: {ins} {ins.operands}" branch_taken = evaluator(ops[0].value, ops[1].value) target = ops[2] assert len(bbl.edge_out) == 2 if branch_taken: succ_to_drop = bbl.edge_out[1] if bbl.edge_out[0] == target else \ bbl.edge_out[0] else: succ_to_drop = target bbl.DelEdgeOut(succ_to_drop) return [] elif kind is o.OPC_KIND.ALU1: if not isinstance(ops[1], ir.Const): return None assert False, f"Evaluator NYI for ALU1: {ins} {ins.operands}" elif kind is o.OPC_KIND.ALU: if not isinstance(ops[1], ir.Const) or not isinstance( ops[2], ir.Const): return None evaluator = _EVALUATORS_ALU.get(ins.opcode) assert evaluator, f"Evaluator NYI for: {ins} {ins.operands}" val = ir.Const(ops[1].kind, evaluator(ops[1].value, ops[2].value)) ins.opcode = o.MOV ins.operands.pop(-1) ins.operands[1] = val ins.operand_defs.pop(-1) ins.operand_defs[1] = ir.INS_INVALID return [ins] elif ins.opcode is o.CONV: # TODO: this needs some more thought generally but in # particular when we apply register widening # transformations, conv instructions end up being the only # ones with narrow width regs which simplifies # code generation. By allowing this to be converted into a # mov instruction we may leak the narrow register. if not allow_conv_conversion or not isinstance(ops[1], ir.Const): return None dst: ir.Reg = ops[0] src = ops[1] if not o.RegIsAddrInt(src.kind) or not o.RegIsAddrInt(dst.kind): return None new_val = ConvertIntValue(dst.kind, src) ins.Init(o.MOV, [dst, new_val]) return [ins] else: return None