Example #1
0
def NonFPModule(function):
  instruction_count     = function["instruction_count"]
  nonidle_elapsed_time  = function["nonidle_elapsed_time"]
  fp_addsub             = asohelper.get_fp_addsub(function)
  fp_muldiv             = asohelper.get_fp_muldiv(function)
  cpiBase               = function["cpiBase"]
  cpiMem                = function["cpiMem"]
  cpiBranchPredictor    = function["cpiBranchPredictor"]
  cpiOther              = nonidle_elapsed_time - cpiBase - cpiMem - cpiBranchPredictor
  fp_instructions       = fp_addsub + fp_muldiv
  non_fp_instructions   = instruction_count - fp_instructions
  non_fp_fraction       = non_fp_instructions / instruction_count #the fraction of non-fp instructions
  if instruction_count > 0:
    non_fp_time         = non_fp_fraction*(cpiBase + cpiMem + cpiBranchPredictor + cpiOther)
  else:
    non_fp_time         = 0
  optimizedfunction     = copy.deepcopy(function)
  if non_fp_time > 0:
    #changing properties of function
    optimizedfunction["instruction_count"]      -=non_fp_instructions
    optimizedfunction["time_won_back"]          +=non_fp_time
    optimizedfunction["nonidle_elapsed_time"]   -=non_fp_time
    optimizedfunction["cpiBase"]                -=non_fp_fraction*cpiBase
    optimizedfunction["cpiMem"]                 -=non_fp_fraction*cpiMem
    optimizedfunction["cpiBranchPredictor"]     -=non_fp_fraction*cpiBranchPredictor
    optimizedfunction["optimizations"].append(dict(optimization="NonFP", timegain=non_fp_time))
    optimizedfunction["originalfunction"]       = function

  return optimizedfunction
Example #2
0
def writerooflinestats(outputfile):
  output = {}
  output["rooflinedata"]=[]
  for data in functiondata:
    x=0
    y=0
    fpinstr = (asohelper.get_fp_addsub(data)+asohelper.get_fp_muldiv(data))
    if (data["l3miss"]) > 0:
      x = float((fpinstr/data["l3miss"])/64) #per byte, so division by 64
    if (data["nonidle_elapsed_time"]) > 0:
      y = float(fpinstr/data["nonidle_elapsed_time"]*1e6) #GFLOPS
    output["rooflinedata"].append([x,y])
  
  functioninfo = copy.deepcopy(functiondata)
  for function in functioninfo:
    function["nonidle_elapsed_time"]/=1e6
    function["cpi"]=getCPI(function)
  output["functioninfo"]=functioninfo
  output["functionpercentages"]=functionpercentages
  output["peakfpperformance"]=getPeakFPPerformance()
  output["peakmembandwidth"]=getPeakMemBandwidth()
  f = open(outputfile, "w")
  f.write(json.dumps(output,indent=4))
  f.close()
def VectorizationModule(function):
    optimizedfunction = copy.deepcopy(function)
    instruction_count = function["instruction_count"]
    nonidle_elapsed_time = function["nonidle_elapsed_time"]
    fp_addsub = asohelper.get_fp_addsub(function)
    fp_muldiv = asohelper.get_fp_muldiv(function)
    cpiBase = function["cpiBase"]
    cpiMem = function["cpiMem"]
    cpiBranchPredictor = function["cpiBranchPredictor"]
    cpiOther = nonidle_elapsed_time - cpiBase - cpiMem - cpiBranchPredictor
    fp_instructions = fp_addsub + fp_muldiv
    non_fp_instructions = instruction_count - fp_instructions

    #add instructions
    addpd = function["addpd"]  #packed double precision
    addsd = function["addsd"]  #double precision
    addss = function["addss"]  #single precision
    addps = function["addps"]  #packed single precision

    #sub instructions
    subpd = function["subpd"]  #packed double precision
    subsd = function["subsd"]  #double precision
    subss = function["subss"]  #single precision
    subps = function["subps"]  #packed single precision

    #mul instructions
    mulpd = function["mulpd"]  #packed double precision
    mulsd = function["mulsd"]  #double precision
    mulss = function["mulss"]  #single precision
    mulps = function["mulps"]  #packed single precision

    #div instructions
    divpd = function["divpd"]  #packed double precision
    divsd = function["divsd"]  #double precision
    divss = function["divss"]  #single precision
    divps = function["divps"]  #packed single precision

    #check if number of instructions is correct
    assert instruction_count == non_fp_instructions + addpd + addsd + addss + subpd + subsd + subss + subps + mulpd + mulsd + mulss + mulps + divpd + divsd + divss + divps

    if instruction_count > 0:
        #if we only have addsub or muldiv operations
        if fp_addsub == 0 or fp_muldiv == 0:
            n_ports = 1  #only one issue port can be used per fp-instruction
        else:
            n_ports = 2  #the two issue ports can be used simultaneously

        newtime = 0
        #already vectorized and double precision
        #cpibase part used by packed double precision:
        cpiBasePD = (
            (addpd + subpd + mulpd + divpd) / instruction_count) * cpiBase
        newtime += cpiBasePD / n_ports
        #vectorization: SD -> PD
        #cpibase part used by non-vectorized double precision
        cpiBaseSD = (
            (addsd + subsd + mulsd + divsd) / instruction_count) * cpiBase
        newtime += cpiBaseSD / (2 * n_ports)
        #already vectorized and single precision
        #cpibase part used by packed single precision:
        cpiBasePS = (
            (addps + subps + mulps + divps) / instruction_count) * cpiBase
        newtime += cpiBasePS / n_ports
        #vectorization: SS -> PS
        #cpibase part used by non-vectorized single precision
        cpiBaseSS = (
            (addss + subss + mulss + divss) / instruction_count) * cpiBase
        newtime += cpiBaseSS / (4 * n_ports)
        #these non_fp instructions can't be vectorized
        #cpibase part used by non-floating point instructions:
        cpiBaseNonFP = (non_fp_instructions / instruction_count) * cpiBase
        newtime += cpiBaseNonFP
        new_cpiBase = newtime
        #add the branch, mem and other component
        newtime += cpiMem + cpiBranchPredictor + cpiOther

        #the sum of the cpiBase components should be the total cpiBase component
        total = cpiBasePD + cpiBaseSD + cpiBasePS + cpiBaseSS + cpiBaseNonFP
        assert round(cpiBase) == round(total), "%s != %s" % (
            round(cpiBase),
            round(total),
        )

        time_gain = nonidle_elapsed_time - newtime
        if time_gain > 0:
            #changing properties of function
            optimizedfunction["time_won_back"] += time_gain
            optimizedfunction["nonidle_elapsed_time"] -= time_gain
            optimizedfunction["cpiBase"] = new_cpiBase
            optimizedfunction["optimizations"].append(
                dict(optimization="Vectorization", timegain=time_gain))
            optimizedfunction["originalfunction"] = function

        assert time_gain >= 0, "we should not gain negative time"

        if fp_instructions == 0:
            #if we don't have any floating point instruction, the time_gain should be zero
            assert time_gain == 0, "%s != %s" % (
                time_gain,
                0,
            )

    return optimizedfunction
Example #4
0
def VectorizationModule(function):
  optimizedfunction             = copy.deepcopy(function)
  instruction_count             = function["instruction_count"]
  nonidle_elapsed_time          = function["nonidle_elapsed_time"]
  fp_addsub                     = asohelper.get_fp_addsub(function)
  fp_muldiv                     = asohelper.get_fp_muldiv(function)
  cpiBase                       = function["cpiBase"]
  cpiMem                        = function["cpiMem"]
  cpiBranchPredictor            = function["cpiBranchPredictor"]
  cpiOther                      = nonidle_elapsed_time - cpiBase - cpiMem - cpiBranchPredictor
  fp_instructions               = fp_addsub + fp_muldiv
  non_fp_instructions           = instruction_count - fp_instructions

  #add instructions
  addpd = function["addpd"]     #packed double precision
  addsd = function["addsd"]     #double precision
  addss = function["addss"]     #single precision
  addps = function["addps"]     #packed single precision

  #sub instructions
  subpd = function["subpd"]     #packed double precision
  subsd = function["subsd"]     #double precision
  subss = function["subss"]     #single precision
  subps = function["subps"]     #packed single precision

  #mul instructions
  mulpd = function["mulpd"]     #packed double precision
  mulsd = function["mulsd"]     #double precision
  mulss = function["mulss"]     #single precision
  mulps = function["mulps"]     #packed single precision

  #div instructions
  divpd = function["divpd"]     #packed double precision
  divsd = function["divsd"]     #double precision
  divss = function["divss"]     #single precision
  divps = function["divps"]     #packed single precision

  #check if number of instructions is correct
  assert instruction_count == non_fp_instructions+addpd+addsd+addss+subpd+subsd+subss+subps+mulpd+mulsd+mulss+mulps+divpd+divsd+divss+divps

  if instruction_count > 0:
    #if we only have addsub or muldiv operations
    if fp_addsub == 0 or fp_muldiv == 0:
      n_ports = 1 #only one issue port can be used per fp-instruction
    else:
      n_ports = 2 #the two issue ports can be used simultaneously

    newtime = 0
    #already vectorized and double precision
    #cpibase part used by packed double precision:
    cpiBasePD   = ((addpd + subpd + mulpd + divpd) / instruction_count) *cpiBase
    newtime     += cpiBasePD/n_ports
    #vectorization: SD -> PD
    #cpibase part used by non-vectorized double precision
    cpiBaseSD   = ((addsd + subsd + mulsd + divsd) / instruction_count) *cpiBase
    newtime     += cpiBaseSD / (2*n_ports)
    #already vectorized and single precision
    #cpibase part used by packed single precision:
    cpiBasePS   = ((addps + subps + mulps + divps) / instruction_count) *cpiBase
    newtime     += cpiBasePS / n_ports
    #vectorization: SS -> PS
    #cpibase part used by non-vectorized single precision
    cpiBaseSS   = ((addss + subss + mulss + divss) / instruction_count) *cpiBase
    newtime     += cpiBaseSS / (4*n_ports)
    #these non_fp instructions can't be vectorized
    #cpibase part used by non-floating point instructions:
    cpiBaseNonFP = (non_fp_instructions / instruction_count)            *cpiBase
    newtime     += cpiBaseNonFP
    new_cpiBase = newtime
    #add the branch, mem and other component
    newtime     += cpiMem + cpiBranchPredictor + cpiOther

    #the sum of the cpiBase components should be the total cpiBase component
    total = cpiBasePD + cpiBaseSD + cpiBasePS + cpiBaseSS + cpiBaseNonFP
    assert round(cpiBase) == round(total), "%s != %s" % (round(cpiBase), round(total),)

    time_gain = nonidle_elapsed_time - newtime
    if time_gain > 0:
      #changing properties of function
      optimizedfunction["time_won_back"]        += time_gain
      optimizedfunction["nonidle_elapsed_time"] -= time_gain
      optimizedfunction["cpiBase"]              = new_cpiBase
      optimizedfunction["optimizations"].append(dict(optimization="Vectorization", timegain=time_gain))
      optimizedfunction["originalfunction"]     = function

    assert time_gain >= 0, "we should not gain negative time"

    if fp_instructions == 0:
      #if we don't have any floating point instruction, the time_gain should be zero
      assert time_gain == 0, "%s != %s" % (time_gain, 0,)

  return optimizedfunction