Beispiel #1
0
def _InsEliminateCopySign(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]:
    """Rewrites copysign instructions like so:
    z = copysign a  b
    aa = int(a) & 0x7f...f
    bb = int(b) & 0x80...0
    z = flt(aa | bb)
    """

    if ins.opcode is not o.COPYSIGN:
        return None
    ops = ins.operands
    out = []
    if ops[0].kind == o.DK.F32:
        kind = o.DK.U32
        sign = 1 << 31
        mask = sign - 1
    else:
        kind = o.DK.U64
        sign = 1 << 63
        mask = sign - 1

    tmp_src1 = fun.GetScratchReg(kind, "elim_copysign1", False)
    out.append(ir.Ins(o.BITCAST, [tmp_src1, ops[1]]))
    out.append(ir.Ins(o.AND, [tmp_src1, tmp_src1, ir.Const(kind, mask)]))
    #
    tmp_src2 = fun.GetScratchReg(kind, "elim_copysign2", False)
    out.append(ir.Ins(o.BITCAST, [tmp_src2, ops[2]]))
    out.append(ir.Ins(o.AND, [tmp_src2, tmp_src2, ir.Const(kind, sign)]))
    #
    out.append(ir.Ins(o.OR, [tmp_src1, tmp_src1, tmp_src2]))
    out.append(ir.Ins(o.BITCAST, [ops[0], tmp_src1]))
    return out
Beispiel #2
0
def _InsEliminateRem(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]:
    """Rewrites modulo instructions like so:
    z = a % b
    becomes
    z = a // b
    z = z * b
    z = a - z
    TODO: double check that this works out for corner-cases
    """

    if ins.opcode is not o.REM:
        return None
    ops = ins.operands
    out = []
    tmp_reg1 = fun.GetScratchReg(ops[0].kind, "elim_rem1", True)
    out.append(ir.Ins(o.DIV, [tmp_reg1, ops[1], ops[2]]))
    # NOTE: this implementation for floating mod may have precision issues.
    if ops[0].kind.flavor() is o.DK_FLAVOR_F:
        tmp_reg3 = fun.GetScratchReg(ops[0].kind, "elim_rem3", True)
        out.append(ir.Ins(o.TRUNC, [tmp_reg3, tmp_reg1]))
        tmp_reg1 = tmp_reg3
    tmp_reg2 = fun.GetScratchReg(ops[0].kind, "elim_rem2", True)
    out.append(ir.Ins(o.MUL, [tmp_reg2, tmp_reg1, ops[2]]))
    out.append(ir.Ins(o.SUB, [ops[0], ops[1], tmp_reg2]))
    return out
Beispiel #3
0
def _InsEliminateMemLoadStore(ins: ir.Ins, fun: ir.Fun, base_kind: o.DK,
                              offset_kind: o.DK) -> Optional[List[ir.Ins]]:
    """This rewrite is usually applied as prep step by some backends
     to get rid of Mem operands.
     It allows the register allocator to see the scratch register but
     it will obscure the fact that a ld/st is from a static location.

     Note: this function may add local registers which does not affect liveness or use-deg chains
    """
    opc = ins.opcode
    ops = ins.operands
    if opc is o.ST_MEM:
        st_offset = ops[1]
        lea_offset = ir.Const(offset_kind, 0)
        if isinstance(st_offset, ir.Const):
            st_offset, lea_offset = lea_offset, st_offset
        scratch_reg = fun.GetScratchReg(base_kind, "base", False)
        lea = ir.Ins(o.LEA_MEM, [scratch_reg, ops[0], lea_offset])
        ins.Init(o.ST, [scratch_reg, st_offset, ops[2]])
        return [lea, ins]
    elif opc is o.LD_MEM:
        ld_offset = ops[2]
        lea_offset = ir.Const(offset_kind, 0)
        if isinstance(ld_offset, ir.Const):
            ld_offset, lea_offset = lea_offset, ld_offset
        scratch_reg = fun.GetScratchReg(base_kind, "base", False)
        # TODO: should the Zero Offset stay with the ld op?
        lea = ir.Ins(o.LEA_MEM, [scratch_reg, ops[1], lea_offset])
        ins.Init(o.LD, [ops[0], scratch_reg, ld_offset])
        return [lea, ins]
    else:
        return None
Beispiel #4
0
def PhaseLegalization(fun: ir.Fun, unit: ir.Unit, _opt_stats: Dict[str, int],
                      fout):
    """
    Does a lot of the heavily lifting so that the instruction selector can remain
    simple and table driven.
    * lift almost all regs to 32bit width
    * rewrite Ins that cannot be expanded
    * rewrite immediates that cannot be expanded except stack offsets which are dealt with in
      another pass

    TODO: missing is a function to change calling signature so that
    """

    lowering.FunRegWidthWidening(fun, o.DK.U8, o.DK.U32)
    lowering.FunRegWidthWidening(fun, o.DK.S8, o.DK.S32)
    lowering.FunRegWidthWidening(fun, o.DK.S16, o.DK.S32)
    lowering.FunRegWidthWidening(fun, o.DK.U16, o.DK.U32)

    fun.cpu_live_in = regs.GetCpuRegsForSignature(fun.input_types)
    fun.cpu_live_out = regs.GetCpuRegsForSignature(fun.output_types)
    if fun.kind is not o.FUN_KIND.NORMAL:
        return

    # ARM has no mod instruction
    lowering.FunEliminateRem(fun)

    # ARM has not support for these addressing modes
    lowering.FunEliminateStkLoadStoreWithRegOffset(fun,
                                                   base_kind=o.DK.A32,
                                                   offset_kind=o.DK.S32)
    # No floating point immediates
    lowering.FunMoveImmediatesToMemory(fun, unit, o.DK.F32)
    lowering.FunMoveImmediatesToMemory(fun, unit, o.DK.F64)
    # also handles ld_mem from two transformations above
    lowering.FunEliminateMemLoadStore(fun,
                                      base_kind=o.DK.A32,
                                      offset_kind=o.DK.S32)

    canonicalize.FunCanonicalize(fun)
    # TODO: add a cfg linearization pass to improve control flow
    optimize.FunCfgExit(
        fun, unit)  # not this may affect immediates as it flips branches

    # Handle most overflowing immediates.
    # This excludes immediates related to stack offsets which have not been determined yet
    lowering.FunEliminateImmediateStores(fun)  # handles st_stk immediates
    _FunRewriteOutOfBoundsImmediates(fun)
    # hack: some of the code expansion templates need a scratch reg
    # we do not want to reserve registers for this globally, so instead
    # we inject some nop instructions that reserve a register that we
    # use as a scratch for the instruction immediately following the nop
    isel_tab.FunAddNop1ForCodeSel(fun)
    sanity.FunCheck(fun, None)
Beispiel #5
0
def _GetRegOrConstOperand(fun: ir.Fun, last_kind: o.DK, ok: o.OP_KIND,
                          tc: o.TC, token: str, regs_cpu: Dict[str,
                                                               ir.Reg]) -> Any:
    if ok == o.OP_KIND.REG_OR_CONST:
        ok = o.OP_KIND.CONST if parse.IsLikelyConst(token) else o.OP_KIND.REG

    if ok is o.OP_KIND.REG:
        cpu_reg = None
        pos = token.find("@")
        if pos > 0:
            cpu_reg_name = token[pos + 1:]
            token = token[:pos]
            if cpu_reg_name == "STK":
                cpu_reg = ir.StackSlot(0)
            else:
                cpu_reg = regs_cpu.get(cpu_reg_name)
                assert cpu_reg is not None, f"unknown cpu_reg {token[pos + 1:]} known regs {regs_cpu.keys()}"
        pos = token.find(":")
        if pos < 0:
            reg = fun.GetReg(token)
        else:
            kind = token[pos + 1:]
            reg_name = token[:pos]
            reg = ir.Reg(reg_name, o.SHORT_STR_TO_RK.get(kind))
            fun.AddReg(reg)
            assert o.CheckTypeConstraint(last_kind, tc, reg.kind)
        if cpu_reg:
            if reg.cpu_reg:
                assert reg.cpu_reg == cpu_reg
            else:
                reg.cpu_reg = cpu_reg
        return reg

    else:
        pos = token.find(":")
        if pos >= 0:
            kind = token[pos + 1:]
            value_str = token[:pos]
            const = ir.ParseConst(value_str, o.SHORT_STR_TO_RK.get(kind))
            return const
        elif tc == o.TC.SAME_AS_PREV:
            const = ir.ParseConst(token, last_kind)
            return const
        elif tc == o.TC.OFFSET:
            const = ir.ParseOffsetConst(token)
            return const
        elif tc == o.TC.UINT:
            assert token[0] != "-"
            const = ir.ParseOffsetConst(token)
            return const
        else:
            assert False, f"cannot deduce type for const {token} [{tc}]"
Beispiel #6
0
def PhaseLegalization(fun: ir.Fun, unit: ir.Unit, _opt_stats: Dict[str, int], fout):
    """
    Does a lot of the heavily lifting so that the instruction selector can remain
    simple and table driven.
    * lift almost all regs to 32bit width
    * rewrite Ins that cannot be expanded
    * rewrite immediates that cannot be expanded except stack offsets which are dealt with in
      another pass

    TODO: missing is a function to change calling signature so that
    """

    lowering.FunRegWidthWidening(fun, o.DK.U8, o.DK.U32)
    lowering.FunRegWidthWidening(fun, o.DK.S8, o.DK.S32)
    lowering.FunRegWidthWidening(fun, o.DK.S16, o.DK.S32)
    lowering.FunRegWidthWidening(fun, o.DK.U16, o.DK.U32)

    fun.cpu_live_in = regs.PushPopInterface.GetCpuRegsForInSignature(fun.input_types)
    fun.cpu_live_out = regs.PushPopInterface.GetCpuRegsForOutSignature(fun.output_types)
    if fun.kind is not o.FUN_KIND.NORMAL:
        return

    # Getting rid of the pusharg/poparg now relieves us form having to pay to attention to  the
    # invariant that pushargs/popargs must be adjacent.
    lowering.FunPushargConversion(fun, regs.PushPopInterface)
    lowering.FunPopargConversion(fun, regs.PushPopInterface)

    # ARM has no mod instruction
    lowering.FunEliminateRem(fun)

    # A64 has not support for these addressing modes
    lowering.FunEliminateStkLoadStoreWithRegOffset(fun, base_kind=o.DK.A64,
                                                   offset_kind=o.DK.S32)

    # we cannot load/store directly from mem so expand the instruction to simpler
    # sequences
    lowering.FunEliminateMemLoadStore(fun, base_kind=o.DK.A64,
                                      offset_kind=o.DK.S32)

    canonicalize.FunCanonicalize(fun)
    # TODO: add a cfg linearization pass to improve control flow
    optimize.FunCfgExit(fun, unit)  # not this may affect immediates as it flips branches

    # Handle most overflowing immediates.
    # This excludes immediates related to stack offsets which have not been determined yet
    _FunRewriteOutOfBoundsImmediates(fun, unit)

    sanity.FunCheck(fun, None)
Beispiel #7
0
def _InsLimitShiftAmounts(ins: ir.Ins, fun: ir.Fun,
                          width: int) -> Optional[List[ir.Ins]]:
    """This rewrite is usually applied as prep step by some backends
     to get rid of Stk operands.
     It allows the register allocator to see the scratch register but
     it will obscure the fact that a memory access is a stack access.

     Note, a stack address already implies a `sp+offset` addressing mode and risk
     ISAs do no usually support  `sp+offset+reg` addressing mode.
    """
    opc = ins.opcode
    ops = ins.operands
    if (opc is not o.SHL
            and opc is not o.SHR) or ops[0].kind.bitwidth() != width:
        return None
    amount = ops[2]
    if isinstance(amount, ir.Const):
        if 0 <= amount.value < width:
            return None
        else:
            ops[2] = ir.Const(amount.kind, amount.value % width)
            return ins
    else:
        tmp = fun.GetScratchReg(amount.kind, "shift", False)
        mask = ir.Ins(o.AND, [tmp, amount, ir.Const(amount.kind, width - 1)])
        ins.Init(opc, [ops[0], ops[1], tmp])
        return [mask, ins]
Beispiel #8
0
def FunRemoveEmptyBbls(fun: ir.Fun) -> int:
    keep = []
    for bbl in fun.bbls:
        if bbl.inss:
            keep.append(bbl)
            continue
        succ = bbl.edge_out[0]
        if succ == bbl:
            # we have to keep infinite loop
            keep.append(bbl)
            continue
        # print ("BBL -DELETE", bbl.name)
        # print("IN",  bbl.edge_in)
        # print ("OUT", bbl.edge_out)
        del fun.bbl_syms[bbl.name]
        # assert bbl != fun.bbls[0], f"attempt to delete first bbl in fun {fun.name}"
        assert len(bbl.edge_out) == 1, bbl
        succ = bbl.edge_out[0]
        bbl.DelEdgeOut(succ)
        # We need to clone the edge list since we have destructive updates
        # but while we are at it let's also process every predecessor only once
        unique_preds: Set[str] = set(pred.name for pred in bbl.edge_in)
        for pred_name in unique_preds:
            pred = fun.bbl_syms[pred_name]
            if pred.inss:
                InsMaybePatchNewSuccessor(pred.inss[-1], bbl,
                                          succ)  # patch ins/jtb
            pred.ReplaceEdgeOut(bbl, succ)  # patch edg

    discarded = len(fun.bbls) - len(keep)
    fun.bbls = keep
    return discarded
Beispiel #9
0
def InsEliminateCmp(ins: ir.Ins, bbl: ir.Bbl, fun: ir.Fun):
    """Rewrites cmpXX a, b, c, x, y instructions like so:
    canonicalization ensures that a != c
    mov z b
    bXX skip, x, y
      mov z c
    .bbl skip
      mov a z

    TODO: This is very coarse
    """
    assert ins.opcode.kind is o.OPC_KIND.CMP
    bbl_skip = cfg.BblSplit(ins, bbl, fun, bbl.name + "_spilt")
    bbl_prev = cfg.BblSplit(ins, bbl_skip, fun, bbl.name + "_spilt")
    assert not bbl_skip.inss
    assert bbl_prev.inss[-1] is ins
    assert bbl_prev.edge_out == [bbl_skip]
    assert bbl_skip.edge_in == [bbl_prev]
    assert bbl_skip.edge_out == [bbl]
    assert bbl.edge_in == [bbl_skip]

    reg = fun.GetScratchReg(ins.operands[0].kind, "cmp", False)

    del bbl_prev.inss[-1]
    ops = ins.operands
    bbl_prev.inss.append(ir.Ins(o.MOV, [reg, ops[1]]))
    bbl_prev.inss.append(
        ir.Ins(o.BEQ if ins.opcode == o.CMPEQ else o.BLT,
               [ops[3], ops[4], bbl]))
    bbl_skip.inss.append(ir.Ins(o.MOV, [reg, ops[2]]))
    bbl.inss.insert(0, ir.Ins(o.MOV, [ops[0], reg]))
    bbl_prev.edge_out.append(bbl)
    bbl.edge_in.append(bbl_prev)
Beispiel #10
0
def FunSeparateLocalRegUsage(fun: ir.Fun) -> int:
    """ Split life ranges for (BBL) local regs

    This is works in coordination with the liverange computation AND
    the local register allocator which assigns one cpu register to each
    liverange.
    """
    count = 0
    for bbl in fun.bbls:
        for pos, ins in enumerate(bbl.inss):
            num_defs = ins.opcode.def_ops_count()
            for n, reg in enumerate(ins.operands[:num_defs]):
                assert isinstance(reg, ir.Reg)
                # do not separate if:
                # * this is the first definition of this reg
                # * the reg is global
                # * the reg is part of a two address "situation" (for x64)
                # * the reg is has been assigned a cpu_reg
                if (reg.def_ins is ins or ir.REG_FLAG.GLOBAL in reg.flags
                        or (ir.REG_FLAG.TWO_ADDRESS in reg.flags
                            and len(ins.operands) >= 2
                            and ins.operands[0] == ins.operands[1])
                        or reg.cpu_reg is not None):
                    continue
                purpose = reg.name
                if purpose.startswith("$"):
                    underscore_pos = purpose.find("_")
                    purpose = purpose[underscore_pos + 1:]
                new_reg = fun.GetScratchReg(reg.kind, purpose, False)
                if ir.REG_FLAG.TWO_ADDRESS in reg.flags:
                    new_reg.flags |= ir.REG_FLAG.TWO_ADDRESS
                ins.operands[n] = new_reg
                _BblRenameReg(bbl, pos + 1, reg, new_reg)
                count += 1
    return count
Beispiel #11
0
def PhaseFinalizeStackAndLocalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int],
                                       fout):
    """Finalizing the stack implies performing all transformations that
    could increase register usage.

    """
    # print("@@@@@@\n", "\n".join(serialize.FunRenderToAsm(fun)), file=fout)

    # hack: some of the code expansion templates need a scratch reg
    # we do not want to reserve registers for this globally, so instead
    # we inject some nop instructions that reserve a register that we
    # use as a scratch for the instruction immediately following the nop
    #
    # This still has a potential bug: if the next instruction has one of its
    # inputs spilled, it will like use the scratch reg provided by the nop1
    # which will cause incorrect code.
    # TODO: add a checker so we at least detect this
    # Alternatives: reserve reg (maybe only for functions that need it)
    # TODO: make sure that nop1 regs never get spilled
    isel_tab.FunAddNop1ForCodeSel(fun)
    regs.FunLocalRegAlloc(fun)
    fun.FinalizeStackSlots()
    # if fun.name == "fibonacci": DumpFun("after local alloc", fun)
    # DumpFun("after local alloc", fun)
    # cleanup
    _FunMoveEliminationCpu(fun)
Beispiel #12
0
def _GetOperand(unit: ir.Unit, fun: ir.Fun, ok: o.OP_KIND, v: Any) -> Any:
    if ok in o.OKS_LIST:
        assert isinstance(v,
                          list) or v[0] == v[-1] == '"', f"operand {ok}: [{v}]"
    else:
        assert isinstance(v, str), f"bad operand {v} of type [{ok}]"

    if ok is o.OP_KIND.TYPE_LIST:
        out = []
        for kind_name in v:
            kind = o.SHORT_STR_TO_RK.get(kind_name)
            assert kind is not None, f"bad kind name [{kind_name}]"
            out.append(kind)
        return out
    elif ok is o.OP_KIND.FUN:
        return unit.GetFunOrAddForwardDeclaration(v)
    elif ok is o.OP_KIND.BBL:
        return fun.GetBblOrAddForwardDeclaration(v)
    elif ok is o.OP_KIND.BBL_TAB:
        return ExtractBblTable(fun, v)
    elif ok is o.OP_KIND.MEM:
        return unit.GetMem(v)
    elif ok is o.OP_KIND.STK:
        return fun.GetStk(v)
    elif ok is o.OP_KIND.FUN_KIND:
        return o.SHORT_STR_TO_FK[v]
    elif ok is o.OP_KIND.DATA_KIND:
        rk = o.SHORT_STR_TO_RK.get(v)
        assert rk is not None, f"bad kind name [{v}]"
        return rk
    elif ok is o.OP_KIND.NAME:
        assert parse.RE_IDENTIFIER.match(v), f"bad identifier [{v}]"
        return v
    elif ok is o.OP_KIND.NAME_LIST:
        for x in v:
            assert parse.RE_IDENTIFIER.match(x), f"bad identifier [{x}]"
        return v
    elif ok is o.OP_KIND.MEM_KIND:
        return o.SHORT_STR_TO_MK[v]
    elif ok is o.OP_KIND.VALUE:
        return v
    elif ok is o.OP_KIND.BYTES:
        return ExtractBytes(v)
    elif ok is o.OP_KIND.JTB:
        return fun.GetJbl(v)
    else:
        raise ir.ParseError(f"cannot read op type: {ok}")
Beispiel #13
0
def ExtractBblTable(fun: ir.Fun, lst: List) -> Dict[int, ir.Bbl]:
    assert len(lst) % 2 == 0
    it = iter(lst)
    out = {}
    for num_str in it:
        bbl_name = next(it)
        out[int(num_str)] = fun.GetBblOrAddForwardDeclaration(bbl_name)
    return out
Beispiel #14
0
def FunSpillRegs(fun: ir.Fun, offset_kind: o.DK, regs: List[ir.Reg]) -> int:
    reg_to_stk: Dict[ir.Reg, ir.Stk] = {}
    for reg in regs:
        size = ir.OffsetConst(reg.kind.bitwidth() // 8)
        stk = ir.Stk(f"$spill_{reg.name}", size, size)
        reg_to_stk[reg] = stk
        fun.AddStk(stk)
    return ir.FunGenericRewrite(fun, InsSpillRegs, zero_const=ir.Const(offset_kind, 0),
                                reg_to_stk=reg_to_stk)
Beispiel #15
0
def PhaseFinalizeStackAndLocalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int],
                                       fout):
    """Finalizing the stack implies performing all transformations that
    could increase register usage.

    """
    regs.FunLocalRegAlloc(fun)
    fun.FinalizeStackSlots()
    # cleanup
    FunMoveEliminationCpu(fun)
Beispiel #16
0
def _InsAddNop1ForCodeSel(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]:
    opc = ins.opcode
    if opc is o.SWITCH:
        # needs scratch to compute the jmp address into
        scratch = fun.GetScratchReg(o.DK.C32, "switch", False)
        return [ir.Ins(o.NOP1, [scratch]), ins]
    elif (opc is o.CONV and o.RegIsInt(ins.operands[0].kind) and
          ins.operands[1].kind.flavor() == o.DK_FLAVOR_F):
        # need scratch for intermediate ftl result
        # we know the result cannot be wider than 32bit for this CPU
        scratch = fun.GetScratchReg(o.DK.F32, "ftoi", False)
        return [ir.Ins(o.NOP1, [scratch]), ins]
    elif (opc is o.CONV and o.RegIsInt(ins.operands[1].kind) and
          ins.operands[0].kind is o.DK.F64):
        # need scratch for intermediate ftl result
        # we know the result cannot be wider than 32bit for this CPU
        scratch = fun.GetScratchReg(o.DK.F32, "itof", False)
        return [ir.Ins(o.NOP1, [scratch]), ins]
    return [ins]
Beispiel #17
0
def FunRemoveUnreachableBbls(fun: ir.Fun) -> int:
    reachable = set()
    stack: List[ir.Bbl] = [fun.bbls[0]]
    while stack:
        curr = stack.pop(-1)
        if curr.name in reachable:
            continue
        reachable.add(curr.name)
        stack += curr.edge_out

    discarded = len(fun.bbls) - len(reachable)
    for bbl in fun.bbls:
        if bbl.name in reachable:
            continue
        for succ in bbl.edge_out:
            succ.edge_in.remove(bbl)
    fun.bbls = [bbl for bbl in fun.bbls if bbl.name in reachable]
    fun.bbl_syms = {bbl.name: bbl for bbl in fun.bbls}
    return discarded
Beispiel #18
0
def _InsRewriteFltImmediates(ins: ir.Ins, fun: ir.Fun,
                             unit: ir.Unit) -> Optional[List[ir.Ins]]:
    inss = []
    for n, op in enumerate(ins.operands):
        if isinstance(op, ir.Const) and op.kind.flavor() is o.DK_FLAVOR_F:
            mem = unit.FindOrAddConstMem(op)
            tmp = fun.GetScratchReg(op.kind, "flt_const", True)
            inss.append(ir.Ins(o.LD_MEM, [tmp, mem, _ZERO_OFFSET]))
            ins.operands[n] = tmp
    if inss:
        return inss + [ins]
    return None
Beispiel #19
0
def InsEliminateImmediateViaMem(ins: ir.Ins, pos: int, fun: ir.Fun,
                                unit: ir.Unit, addr_kind: o.DK,
                                offset_kind: o.DK) -> List[ir.Ins]:
    """Rewrite instruction with an immediate as load of the immediate


    This is useful if the target architecture does not support immediate
    for that instruction, or the immediate is too large.

    This optimization is run rather late and may already see machine registers.
    """
    # support of PUSHARG would require additional work because they need to stay consecutive
    assert ins.opcode is not o.PUSHARG
    const = ins.operands[pos]
    mem = unit.FindOrAddConstMem(const)
    tmp_addr = fun.GetScratchReg(addr_kind, "mem_const_addr", True)
    lea_ins = ir.Ins(o.LEA_MEM, [tmp_addr, mem, ir.Const(offset_kind, 0)])
    tmp = fun.GetScratchReg(const.kind, "mem_const", True)
    ld_ins = ir.Ins(o.LD, [tmp, tmp_addr, ir.Const(offset_kind, 0)])
    ins.operands[pos] = tmp
    return [lea_ins, ld_ins]
Beispiel #20
0
def _InsRewriteDivRemShifts(ins: ir.Ins,
                            fun: ir.Fun) -> Optional[List[ir.Ins]]:
    opc = ins.opcode
    ops = ins.operands
    if opc is o.DIV and ops[0].kind.flavor() != o.DK_FLAVOR_F:
        # note: we could leave it to the register allocator to pick a CpuReg for ops[2]
        # but then we would somehow have to  ensure that the reg is NOT rdx.
        # By forcing rcx for ops[2] we sidestep the issue
        rax = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rax"], ops[0].kind)
        rcx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rcx"], ops[0].kind)
        rdx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rdx"], ops[0].kind)
        return [
            ir.Ins(o.MOV, [rax, ops[1]]),
            ir.Ins(o.MOV, [rcx, ops[2]]),
            ir.Ins(o.DIV, [rdx, rax, rcx
                           ]),  # note the notion of src/dst regs is murky here
            ir.Ins(o.MOV, [ops[0], rax])
        ]
    elif opc is o.REM and ops[0].kind.flavor() != o.DK_FLAVOR_F:
        rax = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rax"], ops[0].kind)
        rcx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rcx"], ops[0].kind)
        rdx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rdx"], ops[0].kind)
        return [
            ir.Ins(o.MOV, [rax, ops[1]]),
            ir.Ins(o.MOV, [rcx, ops[2]]),
            ir.Ins(o.DIV, [rdx, rax, rcx
                           ]),  # note the notion of src/dst regs is murky here
            ir.Ins(o.MOV, [ops[0], rdx])
        ]
    elif opc in {o.SHR, o.SHL} and isinstance(ops[2], ir.Reg):
        rcx = fun.FindOrAddCpuReg(regs.CPU_REGS_MAP["rcx"], ops[0].kind)
        mov = ir.Ins(o.MOV, [rcx, ops[2]])
        ops[2] = rcx
        mask = _SHIFT_MASK.get(ops[0].kind)
        if mask:
            return [mov, ir.Ins(o.AND, [rcx, rcx, mask]), ins]
        else:
            return [mov, ins]
    else:
        return None
Beispiel #21
0
def BblSpillRegs(bbl: ir.Bbl, fun: ir.Fun, regs: List[ir.Reg],
                 offset_kind: o.DK, prefix) -> int:
    reg_to_stk: Dict[ir.Reg, ir.Stk] = {}
    for reg in regs:
        size = reg.kind.bitwidth() // 8
        stk = ir.Stk(f"{prefix}_{reg.name}", size, size)
        reg_to_stk[reg] = stk
        fun.AddStk(stk)
    ir.BblGenericRewrite(bbl,
                         fun,
                         InsSpillRegs,
                         zero_const=ir.Const(offset_kind, 0),
                         reg_to_stk=reg_to_stk)
Beispiel #22
0
def InsSpillRegs(ins: ir.Ins, fun: ir.Fun, zero_const, reg_to_stk) -> Optional[List[ir.Ins]]:
    before: List[ir.Ins] = []
    after: List[ir.Ins] = []
    num_defs = ins.opcode.def_ops_count()
    for n, reg in reversed(list(enumerate(ins.operands))):
        if not isinstance(reg, ir.Reg):
            continue
        stk = reg_to_stk.get(reg)
        if stk is None:
            continue
        if n < num_defs:
            scratch = fun.GetScratchReg(reg.kind, "stspill", False)
            ins.operands[n] = scratch
            after.append(ir.Ins(o.ST_STK, [stk, zero_const, scratch]))
        else:
            scratch = fun.GetScratchReg(reg.kind, "ldspill", False)
            ins.operands[n] = scratch
            before.append(ir.Ins(o.LD_STK, [scratch, stk, zero_const]))
    if before or after:
        return before + [ins] + after
    else:
        return None
Beispiel #23
0
def _InsMoveImmediatesToMemory(ins: ir.Ins, fun: ir.Fun, unit: ir.Unit,
                               kind: o.DK) -> Optional[List[ir.Ins]]:
    inss = []
    for n, op in enumerate(ins.operands):
        if isinstance(op, ir.Const) and op.kind is kind:
            mem = unit.FindOrAddConstMem(op)
            tmp = fun.GetScratchReg(kind, "mem_const", True)
            # TODO: pass the offset kind as a parameter
            inss.append(ir.Ins(o.LD_MEM, [tmp, mem, ir.Const(o.DK.U32, 0)]))
            ins.operands[n] = tmp
    if inss:
        return inss + [ins]
    return None
Beispiel #24
0
def _InsEliminateStkLoadStoreWithRegOffset(
        ins: ir.Ins, fun: ir.Fun, base_kind: o.DK,
        offset_kind: o.DK) -> Optional[List[ir.Ins]]:
    """This rewrite is usually applied as prep step by some backends
     to get rid of Stk operands.
     It allows the register allocator to see the scratch register but
     it will obscure the fact that a memory access is a stack access.

     Note, a stack address already implies a `sp+offset` addressing mode and risk
     ISAs do no usually support  `sp+offset+reg` addressing mode.
    """
    opc = ins.opcode
    ops = ins.operands
    if opc is o.ST_STK and isinstance(ops[1], ir.Reg):
        scratch_reg = fun.GetScratchReg(base_kind, "base", False)
        lea = ir.Ins(o.LEA_STK,
                     [scratch_reg, ops[0],
                      ir.Const(offset_kind, 0)])
        ins.Init(o.ST, [scratch_reg, ops[1], ops[2]])
        return [lea, ins]
    elif opc is o.LD_STK and isinstance(ops[2], ir.Reg):
        scratch_reg = fun.GetScratchReg(base_kind, "base", False)
        lea = ir.Ins(o.LEA_STK,
                     [scratch_reg, ops[1],
                      ir.Const(offset_kind, 0)])
        ins.Init(o.LD, [ops[0], scratch_reg, ops[2]])
        return [lea, ins]
    elif opc is o.LEA_STK and isinstance(ops[2], ir.Reg):
        scratch_reg = fun.GetScratchReg(base_kind, "base", False)
        # TODO: maybe reverse the order so that we can tell that ops[0] holds a stack
        # location
        lea = ir.Ins(o.LEA_STK,
                     [scratch_reg, ops[1],
                      ir.Const(offset_kind, 0)])
        ins.Init(o.LEA, [ops[0], scratch_reg, ops[2]])
        return [lea, ins]
    else:
        return None
Beispiel #25
0
def PhaseFinalizeStackAndLocalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int],
                                       fout):
    """Finalizing the stack implies performing all transformations that
    could increase register usage.

    """
    if False:
        to_be_spillled = [reg for reg in fun.regs if not reg.HasCpuReg()]
        to_be_spillled.sort()
        reg_alloc.FunSpillRegs(fun, o.DK.U32, to_be_spillled)

    fun.FinalizeStackSlots()
    # DumpFun("@@@ aaa", fun)
    # Special flavor out-of-bound immediate rewriter that is stack aware
    # In rare cases this could introduce the need for another gpr reg
    _FunRewriteOutOfBoundsOffsetsStk(fun)
    # DumpFun("@@@@ before reg-alloc", fun)
    # Assign regs to local var

    regs.FunLocalRegAlloc(fun)
    fun.flags &= ~ir.FUN_FLAG.STACK_FINALIZED
    fun.FinalizeStackSlots()
    # cleanup
    FunMoveEliminationCpu(fun)
Beispiel #26
0
def _InsEliminateImmediateStores(ins: ir.Ins,
                                 fun: ir.Fun) -> Optional[List[ir.Ins]]:
    """RISC architectures typically do not allow immediates to be stored directly

    TODO: maybe allow zero immediates
    """
    opc = ins.opcode
    ops = ins.operands
    if opc in {o.ST_MEM, o.ST, o.ST_STK} and isinstance(ops[2], ir.Const):
        scratch_reg = fun.GetScratchReg(ops[2].kind, "st_imm", False)
        mov = ir.Ins(o.MOV, [scratch_reg, ops[2]])
        ops[2] = scratch_reg
        return [mov, ins]
    else:
        return None
Beispiel #27
0
def FunSeparateLocalRegUsage(fun: ir.Fun) -> int:
    count = 0
    for bbl in fun.bbls:
        for pos, ins in enumerate(bbl.inss):
            num_defs = ins.opcode.def_ops_count()
            for n, reg in enumerate(ins.operands[:num_defs]):
                assert isinstance(reg, ir.Reg)
                if reg.def_ins is ins or ir.REG_FLAG.GLOBAL in reg.flags or reg.cpu_reg is not None:
                    continue
                purpose = reg.name
                if purpose.startswith("$"):
                    underscore_pos = purpose.find("_")
                    purpose = purpose[underscore_pos + 1:]
                new_reg = fun.GetScratchReg(reg.kind, purpose, False)
                ins.operands[n] = new_reg
                _BblRenameReg(bbl, pos + 1, reg, new_reg)
                count += 1
    return count
Beispiel #28
0
def _InsRewriteIntoAABForm(ins: ir.Ins, fun: ir.Fun) -> Optional[List[ir.Ins]]:
    ops = ins.operands
    if not NeedsAABFromRewrite(ins):
        return None
    if ops[0] == ops[1]:
        ops[0].flags |= ir.REG_FLAG.TWO_ADDRESS
        return None
    if ops[0] == ops[2] and o.OA.COMMUTATIVE in ins.opcode.attributes:
        ir.InsSwapOps(ins, 1, 2)
        ops[0].flags |= ir.REG_FLAG.TWO_ADDRESS
        return [ins]
    else:
        reg = fun.GetScratchReg(ins.operands[0].kind, "aab", False)
        reg.flags |= ir.REG_FLAG.TWO_ADDRESS
        return [
            ir.Ins(o.MOV, [reg, ops[1]]),
            ir.Ins(ins.opcode, [reg, reg, ops[2]]),
            ir.Ins(o.MOV, [ops[0], reg])
        ]
Beispiel #29
0
def FunAddUnconditionalBranches(fun: ir.Fun):
    """Re-insert necessary unconditional branches

    sort of inverse of FunRemoveUnconditionalBranches
    """
    bbls = []
    for n, bbl in enumerate(fun.bbls):
        bbls.append(bbl)
        if bbl.inss and not bbl.inss[-1].opcode.has_fallthrough():
            continue
        if len(bbl.edge_out) == 1:
            assert len(fun.bbls) > n
            succ = bbl.edge_out[0]
            if n + 1 == len(fun.bbls) or fun.bbls[n + 1] != succ:
                bbl.inss.append(ir.Ins(o.BRA, [succ]))
            continue

        assert len(bbl.edge_out) == 2
        cond_bra = bbl.inss[-1]
        assert cond_bra.opcode.kind is o.OPC_KIND.COND_BRA, (
            f"not a cond bra: {cond_bra}  bbl: {bbl}")
        target = cond_bra.operands[2]
        other = bbl.edge_out[0] if target == bbl.edge_out[1] else bbl.edge_out[
            1]
        succ = fun.bbls[n + 1]
        if succ in bbl.edge_out:
            # target == other can happen if the cond_bra is pointless
            if target == succ and target != other:
                InsFlipCondBra(cond_bra, target, other)
            continue
        else:
            bbl_bra = ir.Bbl(NewDerivedBblName(bbl.name, "bra", fun))
            bbl_bra.inss.append(ir.Ins(o.BRA, [other]))
            fun.bbl_syms[bbl_bra.name] = bbl_bra
            # forward fallthrough to new bbl
            if bbl.inss:
                InsMaybePatchNewSuccessor(bbl.inss[-1], other, bbl_bra)
            bbl.ReplaceEdgeOut(other, bbl_bra)
            bbl_bra.AddEdgeOut(other)
            bbls.append(bbl_bra)
    fun.bbls = bbls
    fun.flags &= ~ir.FUN_FLAG.CFG_NOT_LINEAR
Beispiel #30
0
def FunSplitBblsAtTerminators(fun: ir.Fun):
    """split bbls after terminator instructions and remove dead code after 'ret'"""
    for bbl in fun.bbl_syms.values():
        assert not bbl.forward_declared, f"bbl referenced but not defined {bbl}"

    bbls = []
    for bbl in fun.bbls:
        _BblRemoveUnreachableIns(bbl)
        ranges = _BblFindSubRanges(bbl)
        # print ("@@@@ ranges", ranges)
        inss = bbl.inss
        for start, end in ranges:
            new_bbl = bbl
            if start != 0:
                new_bbl = ir.Bbl(NewDerivedBblName(bbl.name, "_", fun))
                fun.bbl_syms[bbl.name] = bbl
            new_bbl.inss = inss[start:end]
            bbls.append(new_bbl)
            fun.bbl_syms[new_bbl.name] = new_bbl
    fun.bbls = bbls