コード例 #1
0
ファイル: optimize.py プロジェクト: mbrukman/Cwerg
def FunOptBasic(fun: ir.Fun, opt_stats: Dict[str, int],
                allow_conv_conversion: bool):
    opt_stats["canonicalized"] += canonicalize.FunCanonicalize(fun)
    opt_stats["strength_red"] += lowering.FunStrengthReduction(fun)

    reaching_defs.FunComputeReachingDefs(fun)
    reaching_defs.FunCheckReachingDefs(fun)
    opt_stats["reg_prop"] = reaching_defs.FunPropagateRegs(fun)
    opt_stats["const_prop"] += reaching_defs.FunPropagateConsts(fun)

    opt_stats["const_fold"] += reaching_defs.FunConstantFold(
        fun, allow_conv_conversion)

    opt_stats["canonicalized"] += canonicalize.FunCanonicalize(fun)
    opt_stats["strength_red"] += lowering.FunStrengthReduction(fun)

    opt_stats["ls_st_simplify"] += reaching_defs.FunLoadStoreSimplify(fun)

    opt_stats["move_elim"] += lowering.FunMoveElimination(fun)

    liveness.FunComputeLivenessInfo(fun)

    opt_stats["useless"] = liveness.FunRemoveUselessInstructions(fun)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    opt_stats["dropped_regs"] += reg_stats.FunDropUnreferencedRegs(fun)
    opt_stats["separated_regs"] += reg_stats.FunSeparateLocalRegUsage(fun)
コード例 #2
0
ファイル: optimize.py プロジェクト: mbrukman/Cwerg
def UnitOptBasic(unit: ir.Unit, dump_reg_stats) -> Dict[str, int]:
    opt_stats: Dict[str, int] = collections.defaultdict(int)
    for fun in unit.funs:
        if fun.kind is not o.FUN_KIND.NORMAL:
            continue
        FunOptBasic(fun, opt_stats, allow_conv_conversion=True)
        if dump_reg_stats:
            reg_stats.FunComputeRegStatsExceptLAC(fun)
            liveness.FunComputeLivenessInfo(fun)
            reg_stats.FunComputeRegStatsLAC(fun)
            rs = reg_stats.FunCalculateRegStats(fun)
            print(f"# {fun.name:30} RegStats: {rs}")
    return opt_stats
コード例 #3
0
ファイル: optimize.py プロジェクト: mbrukman/Cwerg
def UnitOpt(unit: ir.Unit, dump_reg_stats) -> Dict[str, int]:
    opt_stats: Dict[str, int] = collections.defaultdict(int)
    for fun in unit.funs:
        if fun.kind is not o.FUN_KIND.NORMAL:
            continue
        FunOpt(fun, opt_stats)
        if dump_reg_stats:
            local_stats = reg_stats.FunComputeBblRegUsageStats(
                fun, REG_KIND_MAP_TYPICAL)
            loc_lac = sum(
                count for (kind, lac), count in local_stats.items() if lac)
            loc_not_lac = sum(
                count for (kind, lac), count in local_stats.items() if not lac)

            reg_stats.FunComputeRegStatsExceptLAC(fun)
            reg_stats.FunComputeRegStatsLAC(fun)
            rs = reg_stats.FunCalculateRegStats(fun)
            print(
                f"# {fun.name:30} RegStats: {rs}  {loc_lac:2}/{loc_not_lac:2}")
    return opt_stats
コード例 #4
0
ファイル: legalize.py プロジェクト: robertmuth/Cwerg
def PhaseGlobalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int], fout):
    """
    These phase introduces CpuReg for globals and situations where we have no choice
    which register to use, e.g. function parameters and results ("pre-allocated" regs).

    After this function has been run all globals will have a valid cpu_reg and
    we have to be careful to not introduce new globals subsequently.
    If not enough cpu_regs are available for all globals, some of them will be spilled.
    We err on the site of spilling more, the biggest danger is to over-allocate and then
    lack registers for intra-bbl register allocation.

    The whole global allocator is terrible and so is the the decision which globals
    to spill is extremely simplistic at this time.

    We separate global from local register allocation so that we can use a straight
    forward linear scan allocator for the locals. This allocator assumes that
    each register is defined exactly once and hence does not work for globals.
    """
    debug = None
    if fout:
        print("#" * 60, file=fout)
        print(f"# GlobalRegAlloc {fun.name}", file=fout)
        print("#" * 60, file=fout)

    # print ("@@@@@@\n", "\n".join(serialize.FunRenderToAsm(fun)))

    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    # Note: REG_KIND_MAP_ARM maps all non-float to registers to S64
    local_reg_stats = reg_stats.FunComputeBblRegUsageStats(fun,
                                                           regs.REG_KIND_TO_CPU_REG_FAMILY)
    # we  have introduced some cpu regs in previous phases - do not treat them as globals
    global_reg_stats = _FunGlobalRegStats(fun, regs.REG_KIND_TO_CPU_REG_FAMILY)
    DumpRegStats(fun, local_reg_stats, fout)

    pre_allocated_mask_gpr = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind == regs.CpuRegKind.GPR:
            pre_allocated_mask_gpr |= 1 << reg.cpu_reg.no

    # Handle GPR regs
    needed_gpr = RegsNeeded(len(global_reg_stats[(regs.CpuRegKind.GPR, True)]),
                            len(global_reg_stats[(regs.CpuRegKind.GPR, False)]),
                            local_reg_stats.get((regs.CpuRegKind.GPR, True), 0),
                            local_reg_stats.get((regs.CpuRegKind.GPR, False), 0))
    if debug:
        print(f"@@ GPR NEEDED {needed_gpr.global_lac} {needed_gpr.global_not_lac} "
              f"{needed_gpr.local_lac} {needed_gpr.local_not_lac}", file=debug)

    gpr_global_lac, gpr_global_not_lac = _GetRegPoolsForGlobals(
        needed_gpr, regs.GPR_REGS_MASK & regs.GPR_LAC_REGS_MASK,
                    regs.GPR_REGS_MASK & ~regs.GPR_LAC_REGS_MASK, pre_allocated_mask_gpr)
    if debug:
        print(f"@@ GPR POOL {gpr_global_lac:x} {gpr_global_not_lac:x}", file=debug)

    to_be_spilled: List[ir.Reg] = []
    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, True)], gpr_global_lac, 0)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, False)],
        gpr_global_not_lac & ~regs.GPR_LAC_REGS_MASK,
        gpr_global_not_lac & regs.GPR_LAC_REGS_MASK)

    # Handle Float regs
    pre_allocated_mask_flt = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind == regs.CpuRegKind.FLT:
            pre_allocated_mask_flt |= 1 << reg.cpu_reg.no

    needed_flt = RegsNeeded(len(global_reg_stats[(regs.CpuRegKind.FLT, True)]),
                            len(global_reg_stats[(regs.CpuRegKind.FLT, False)]),
                            local_reg_stats.get((regs.CpuRegKind.FLT, True), 0),
                            local_reg_stats.get((regs.CpuRegKind.FLT, False), 0))
    if debug:
        print(f"@@ FLT NEEDED {needed_flt.global_lac} {needed_flt.global_not_lac} "
              f"{needed_flt.local_lac} {needed_flt.local_not_lac}", file=debug)

    flt_global_lac, flt_global_not_lac = _GetRegPoolsForGlobals(
        needed_flt, regs.FLT_REGS_MASK & regs.FLT_LAC_REGS_MASK,
                    regs.FLT_REGS_MASK & ~regs.FLT_LAC_REGS_MASK, pre_allocated_mask_flt)
    if debug:
        print(f"@@ FLT POOL {flt_global_lac:x} {flt_global_not_lac:x}", file=debug)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.FLT, True)], flt_global_lac, 0)
    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.FLT, False)],
        flt_global_not_lac & ~regs.FLT_LAC_REGS_MASK,
        flt_global_not_lac & regs.FLT_LAC_REGS_MASK)

    reg_alloc.FunSpillRegs(fun, o.DK.U32, to_be_spilled, prefix="$gspill")

    # Recompute Everything (TODO: make this more selective to reduce work)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    reg_stats.FunSeparateLocalRegUsage(fun)
コード例 #5
0
ファイル: legalize.py プロジェクト: mbrukman/Cwerg
def PhaseGlobalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int], fout):
    """
    These phase introduces CpuReg for globals and situations where we have no choice
    which register to use, e.g. function parameters and results ("pre-allocated" regs).

    After this function has been run all globals will have a valid cpu_reg and
    we have to be careful to not introduce new globals subsequently.
    IF not enough cpu_regs are available for all globals, some of them will be spilled.

    The whole global allocator is terrible and so is the the decision which globals
    to spill is extremely simplistic at this time.

    We sepatate global from local register allocation so that we can use a straight
    forward linear scan allocator for the locals. This allocator assumes that
    each register is defined exactly once and hence does not work for globals.
    """

    if fout:
        print("#" * 60, file=fout)
        print(f"# GlobalRegAlloc {fun.name}", file=fout)
        print("#" * 60, file=fout)

    regs.FunPushargConversion(fun)
    regs.FunPopargConversion(fun)

    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    # Note: REG_KIND_MAP_ARM maps all non-float to registers to S32
    local_reg_stats = reg_stats.FunComputeBblRegUsageStats(
        fun, REG_KIND_MAP_ARM)
    # we  have introduced some cpu regs in previous phases - do not treat them as globals
    global_reg_stats = _FunGlobalRegStats(fun, REG_KIND_MAP_ARM)
    DumpRegStats(fun, local_reg_stats, fout)

    pre_allocated: Set[ir.CpuReg] = {
        reg.cpu_reg
        for reg in fun.regs if reg.HasCpuReg()
    }

    # Handle GPR regs
    needed_gpr = RegsNeeded(
        len(global_reg_stats[(o.DK.S32, True)]),
        len(global_reg_stats[(o.DK.S32, False)]),
        local_reg_stats.get((o.DK.S32, True), 0),
        # TODO: avoid fudge factor
        1 + local_reg_stats.get((o.DK.S32, False), 0))
    gpr_global_lac, gpr_global_not_lac = _GetRegPoolsForGlobals(
        needed_gpr, regs.GPR_CALLEE_SAVE_REGS.copy(),
        regs.GPR_NOT_LAC_REGS.copy(), pre_allocated)

    to_be_spilled: List[ir.Reg] = []
    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.S32, True)], gpr_global_lac)
    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.S32, False)], gpr_global_not_lac)

    # Handle Float regs
    needed_flt = RegsNeeded(
        len(global_reg_stats[(o.DK.F32, True)]) +
        2 * len(global_reg_stats[(o.DK.F64, True)]),
        len(global_reg_stats[(o.DK.F32, False)]) +
        2 * len(global_reg_stats[(o.DK.F64, True)]),
        local_reg_stats.get((o.DK.F32, True), 0) + 2 * local_reg_stats.get(
            (o.DK.F64, True), 0),
        # TODO: avoid fudge factor
        2 + local_reg_stats.get(
            (o.DK.F32, False), 0) + 2 * local_reg_stats.get(
                (o.DK.F64, False), 0))

    flt_global_lac, flt_global_not_lac = _GetRegPoolsForGlobals(
        needed_flt, regs.FLT_CALLEE_SAVE_REGS.copy(),
        regs.FLT_PARAMETER_REGS.copy(), pre_allocated)

    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.F64, True)] +
        global_reg_stats[(o.DK.F32, True)], flt_global_lac)
    to_be_spilled += _AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(o.DK.F64, False)] +
        global_reg_stats[(o.DK.F32, False)], flt_global_not_lac)

    reg_alloc.FunSpillRegs(fun, o.DK.U32, to_be_spilled)

    # Recompute Everything (TODO: make this more selective)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    reg_stats.FunSeparateLocalRegUsage(fun)
コード例 #6
0
ファイル: legalize.py プロジェクト: robertmuth/Cwerg
def PhaseLegalization(fun: ir.Fun, unit: ir.Unit, _opt_stats: Dict[str, int],
                      fout):
    """
    Does a lot of the heavily lifting so that the instruction selector can remain
    simple and table driven.
    * lift almost all regs to 32bit width
    * rewrite Ins that cannot be expanded
    * rewrite immediates that cannot be expanded except stack offsets which are dealt with in
      another pass

    TODO: missing is a function to change calling signature so that
    """
    fun.cpu_live_in = regs.PushPopInterface.GetCpuRegsForInSignature(
        fun.input_types)
    fun.cpu_live_out = regs.PushPopInterface.GetCpuRegsForOutSignature(
        fun.output_types)
    if fun.kind is not o.FUN_KIND.NORMAL:
        return

    # Getting rid of the pusharg/poparg now relieves us form having to pay to attention to  the
    # invariant that pushargs/popargs must be adjacent.
    lowering.FunPushargConversion(fun, regs.PushPopInterface)
    lowering.FunPopargConversion(fun, regs.PushPopInterface)

    # We did not bother with this addressing mode
    # TODO: we like can avoid this by adding more cases to isel_tab.py
    lowering.FunEliminateStkLoadStoreWithRegOffset(fun,
                                                   base_kind=o.DK.A64,
                                                   offset_kind=o.DK.S32)

    # TODO: switch this to a WithRegOffset flavor
    lowering.FunEliminateMemLoadStore(fun,
                                      base_kind=o.DK.A64,
                                      offset_kind=o.DK.S32)

    lowering.FunEliminateCopySign(fun)
    # TODO: support a few special cases in the isel, e.g. cmpXX a 0, 1, x, y
    lowering.FunEliminateCmp(fun)

    canonicalize.FunCanonicalize(fun)
    # TODO: add a cfg linearization pass to improve control flow
    optimize.FunCfgExit(
        fun, unit)  # not this may affect immediates as it flips branches

    # Handle most overflowing immediates.
    # This excludes immediates related to stack offsets which have not been determined yet
    _FunRewriteOutOfBoundsImmediates(fun, unit)

    # mul/div/rem need special treatment
    _FunRewriteDivRem(fun)

    _FunRewriteIntoAABForm(fun, unit)

    # Recompute Everything (TODO: make this more selective to reduce work)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    reg_stats.FunSeparateLocalRegUsage(
        fun
    )  # this has special hacks to avoid undoing _FunRewriteIntoAABForm()
    # DumpRegStats(fun, local_reg_stats)

    # if fun.name == "fibonacci": DumpFun("end of legal", fun)
    # if fun.name == "write_s": exit(1)
    sanity.FunCheck(fun, None)
コード例 #7
0
ファイル: legalize.py プロジェクト: robertmuth/Cwerg
def PhaseGlobalRegAlloc(fun: ir.Fun, _opt_stats: Dict[str, int], fout):
    """
    These phase introduces CpuReg for globals and situations where we have no choice
    which register to use, e.g. function parameters and results ("pre-allocated" regs).

    After this function has been run all globals will have a valid cpu_reg and
    we have to be careful to not introduce new globals subsequently.
    If not enough cpu_regs are available for all globals, some of them will be spilled.
    We err on the site of spilling more, the biggest danger is to over-allocate and then
    lack registers for intra-bbl register allocation.

    The whole global allocator is terrible and so is the the decision which globals
    to spill is extremely simplistic at this time.

    We separate global from local register allocation so that we can use a straight
    forward linear scan allocator for the locals. This allocator assumes that
    each register is defined exactly once and hence does not work for globals.
    """

    if fout:
        print("#" * 60, file=fout)
        print(f"# GlobalRegAlloc {fun.name}", file=fout)
        print("#" * 60, file=fout)

    # replaces pusharg and poparg instructions and replace them with moves
    # The moves will use pre-allocated regs (the once use for argument/result paassing)
    # regs.FunPushargConversion(fun)
    # regs.FunPopargConversion(fun)

    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)

    # Note: REG_KIND_MAP_ARM maps all non-float to registers to S32
    local_reg_stats = reg_stats.FunComputeBblRegUsageStats(
        fun, REG_KIND_TO_CPU_KIND)
    # we  have introduced some cpu regs in previous phases - do not treat them as globals
    global_reg_stats = _FunGlobalRegStats(fun, REG_KIND_TO_CPU_KIND)
    DumpRegStats(fun, local_reg_stats, fout)

    # Handle GPR regs
    pre_allocated_mask_gpr = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind == regs.CpuRegKind.GPR:
            pre_allocated_mask_gpr |= regs.A32RegToAllocMask(reg.cpu_reg)
    # compute the number of regs needed if had indeed unlimited regs
    needed_gpr = RegsNeeded(
        len(global_reg_stats[(regs.CpuRegKind.GPR, True)]),
        len(global_reg_stats[(regs.CpuRegKind.GPR, False)]),
        local_reg_stats.get((regs.CpuRegKind.GPR, True), 0),
        local_reg_stats.get((regs.CpuRegKind.GPR, False), 0))
    # earmark some regs for globals
    gpr_global_lac, gpr_global_not_lac = _GetRegPoolsForGlobals(
        needed_gpr, regs.GPR_REGS_MASK & regs.GPR_LAC_REGS_MASK,
        regs.GPR_REGS_MASK & ~regs.GPR_LAC_REGS_MASK, pre_allocated_mask_gpr)

    # assign the earmarked regs to some globals and spill the rest
    to_be_spilled: List[ir.Reg] = []
    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, True)], gpr_global_lac, 0)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.GPR, False)],
        gpr_global_not_lac & ~regs.GPR_LAC_REGS_MASK,
        gpr_global_not_lac & regs.GPR_LAC_REGS_MASK)

    # Handle Float regs
    pre_allocated_mask_flt = 0
    for reg in fun.regs:
        if reg.HasCpuReg() and reg.cpu_reg.kind != regs.CpuRegKind.GPR:
            pre_allocated_mask_flt |= regs.A32RegToAllocMask(reg.cpu_reg)
    # repeat the same process as we did for GPR regs
    needed_flt = RegsNeeded(
        len(global_reg_stats[(regs.CpuRegKind.FLT, True)]) +
        2 * len(global_reg_stats[(regs.CpuRegKind.DBL, True)]),
        len(global_reg_stats[(regs.CpuRegKind.FLT, False)]) +
        2 * len(global_reg_stats[(regs.CpuRegKind.DBL, False)]),
        local_reg_stats.get(
            (regs.CpuRegKind.FLT, True), 0) + 2 * local_reg_stats.get(
                (regs.CpuRegKind.DBL, True), 0),
        local_reg_stats.get(
            (regs.CpuRegKind.FLT, False), 0) + 2 * local_reg_stats.get(
                (regs.CpuRegKind.DBL, False), 0))
    flt_global_lac, flt_global_not_lac = _GetRegPoolsForGlobals(
        needed_flt, regs.FLT_REGS_MASK & regs.FLT_LAC_REGS_MASK,
        regs.FLT_REGS_MASK & ~regs.FLT_LAC_REGS_MASK, pre_allocated_mask_flt)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.DBL, True)] +
        global_reg_stats[(regs.CpuRegKind.FLT, True)], flt_global_lac, 0)

    to_be_spilled += regs.AssignCpuRegOrMarkForSpilling(
        global_reg_stats[(regs.CpuRegKind.DBL, False)] +
        global_reg_stats[(regs.CpuRegKind.FLT, False)],
        flt_global_not_lac & ~regs.FLT_LAC_REGS_MASK,
        flt_global_not_lac & regs.FLT_LAC_REGS_MASK)

    reg_alloc.FunSpillRegs(fun, o.DK.U32, to_be_spilled, prefix="$gspill")

    # Recompute Everything (TODO: make this more selective)
    reg_stats.FunComputeRegStatsExceptLAC(fun)
    reg_stats.FunDropUnreferencedRegs(fun)
    liveness.FunComputeLivenessInfo(fun)
    reg_stats.FunComputeRegStatsLAC(fun)
    # establish per bbl SSA form by splitting liveranges
    reg_stats.FunSeparateLocalRegUsage(fun)