예제 #1
0
        opt = zopt
    elif cc != None:
        opt = opt + ' UCC=' + cc + ' UCCFLAGS="' + ccf + outf + '"'
    else:
        opt = opt + ' UCCFLAGS="' + outf + '"'


#   print "opt = '%s'" % opt
    cmnd = 'make %s%scase N=%d urout=%s %s' % (pre, blas, N, rout, opt)
    cmnds = 'cd %s/tune/blas/level1/%s ; %s ' % (ATLdir, ARCH, cmnd)
    print "cmnds = '%s'" % (cmnds)
    fo = os.popen(cmnds, 'r')
    lines = fo.readlines()
    err = fo.close()

(IFKOdir, fko) = fkocmnd.GetFKOinfo()
OUTdir = IFKOdir + '/blas/assembly'

(ATLdir, ARCH) = fkocmnd.FindAtlas(IFKOdir)
print ARCH
print "ATLdir='%s', ARCH='%s'" % (ATLdir, ARCH)

# [time,mflop] = l1cmnd.l1time(ATLdir, ARCH, 'd', 'dot', 80000, 'dot1_x1y1.c')
# print "time=%f, mflop=%f" % (time,mflop)

#
# Defaults
#
N = 80000
pres = l1cmnd.GetDefaultPre()
l1routs = l1cmnd.GetDefaultBlas()
예제 #2
0
파일: ifko.py 프로젝트: rdolbeau/math-atlas
def ifko0(l1bla, pre, N, M=None, lda=None):
   (IFKOdir, fko) = fkocmnd.GetFKOinfo()
   (ATLdir, ARCH) = fkocmnd.FindAtlas(IFKOdir)
   rout =  IFKOdir + '/blas/' + pre + l1bla + '.b'
   #outf =  ATLdir + '/tune/blas/level1/' + l1bla.upper() + '/fkorout.s'
   outf =  ATLdir + kernels.GetBlasPath(l1bla) + '/fkorout.s'
#
#  Majedul: calling new info func, info represents the old list
#  new data: [npath, red2onePath, vecMethod, vpathinfo, arrtypes] at the end
#
   #info = fkocmnd.info(fko, rout)

   newinfo = fkocmnd.NewInfo(fko, rout)
   info = [newinfo[i] for i in range(11) ]
   [npath, red1path, vecm, vpath, arrtypes] = [ newinfo[i] for i in range(11,16)]
   
   ncache = info[0]
   vec = info[5]
   #(fparrs, fpsets, fpuses, fpurs) = fkocmnd.GetFPInfo(info)
   (fparrs, fpsets, fpuses, fpurs) = fkocmnd.GetFPInfo(newinfo)
   nfp = len(fparrs)
#
#  Findout the default flags (it includes vector, default prefetch and unroll)
#
   #KFLAGS = fkocmnd.GetStandardFlags(fko, rout, pre) 
   KFLAGS = fkocmnd.GetOptStdFlags(fko, rout, pre, 1, URF) 
   print "\n   Default Flag = " + KFLAGS 
   KFLAGS = KFLAGS + " -o " + str(outf) + " " + rout
   mflist = []
   testlist = []
   #print KFLAGS
#
#  Majedul: default and vect case would not be same now. Vspec may be 
#  worse than NonVec case.
#  So, I will choose the best as the default for the later optimization
#

#
#  check best scalar xforms, delete any vector flag
#
   #j = KFLAGS.find("-V")
   #if j != -1 :
      #KFn = KFLAGS[0:j-1] + KFLAGS[j+2:]
#
#  find out best standard scalar flag 
#
   KFn = fkocmnd.GetOptStdFlags(fko, rout, pre, 0, URF) 
   KFn = KFn + " -o " + str(outf) + " " + rout
   #print KFn
#
#  standard flag without vect
#
   KF0 = KFn
   fkocmnd.callfko(fko, KF0)
   [t0,mf0] = cmnd.time(ATLdir, ARCH, pre, l1bla, N, M, lda, "fkorout.s", 
                        "gcc", "-x assembler-with-cpp", opt=opt)
   mflist.append(mf0)
   testlist.append("default") ## this is using std flags
   print "\n   Default Flag = " + KF0 

#
#  Finding the best path reduction option
#
   if npath > 1:
      [mfs, KFs] = ifko_PathXform(ATLdir, ARCH, KFn, ncache, fko, rout, pre,
                                 l1bla, N, M, lda,  npath, red1path)
      mflist.append(mfs)
      testlist.append("PathXform")
      if (mfs > mf0) :
         mf0 = mfs
         KF0 = KFs
#
#  Finding the best vector option with/without path reduction
#
   global isSV;
   if SB:
      KFv = fkocmnd.GetOptStdFlags(fko, rout, pre, 1, SB, URF)
   else:
      KFv = fkocmnd.GetOptStdFlags(fko, rout, pre, 1, 0, URF)
   
   print "\n   Standad Flag for Vect = " + KFv
   
   KFv = KFv + " -o " + str(outf) + " " + rout
   if vec:
      if 'v' in skipOpt:
         print '\n   SKIPPING VECTORIZATION'
      else:
         [mfv, KFv] =  ifko_Vec(ATLdir, ARCH, KFv, ncache, fko, rout, pre, 
                                l1bla, N, M, lda, npath, vecm, vpath)
         mflist.append(mfv)
         testlist.append("vect")
         if (mfv > mf0) :
            mf0 = mfv
            KF0 = KFv
#
#        if we have forceOpt, we will keep vec even if it's not better
#
         elif  'sv' in forceOpt or 'vrc' in forceOpt or 'vmmr' in forceOpt:
            print '\n   FORCING VECTORIZATION'
            mf0 = mfv
            KF0 = KFv
         else:  # no vector is selected, skip the SB too  #
            isSV = 0
#
#  choose the better as the ref of later opt
#
   KFLAGS = KF0
   mf = mf0
   
   print "\n   FLAGS so far =", fkocmnd.RemoveFilesFromFlags(l1bla, KFLAGS)

#
#  Previous code which is substituted by the above codes
#

#
#  Find performance of default case
#
#   j = KFLAGS.find("-V")
#   if j != -1 :
#      KFn = KFLAGS[0:j-1] + KFLAGS[j+2:]
#      fkocmnd.callfko(fko, KFn)
#      [t,mf] = l1cmnd.time(ATLdir, ARCH, pre, l1bla, N, "fkorout.s", 
#                           "gcc", "-x assembler-with-cpp", opt=opt)
#      mflist.append(mf)
#      testlist.append("default")
#      fkocmnd.callfko(fko, KFLAGS)
#      [t,mf] = l1cmnd.time(ATLdir, ARCH, pre, l1bla, N, "fkorout.s", 
#                           "gcc", "-x assembler-with-cpp", opt=opt)
#   else :
#      fkocmnd.callfko(fko, KFLAGS)
#      [t,mf] = l1cmnd.time(ATLdir, ARCH, pre, l1bla, N, "fkorout.s", 
#                           "gcc", "-x assembler-with-cpp", opt=opt)
#      testlist.append("default")
#      mflist.append(mf)
#   mflist.append(mf)
#   testlist.append("vect")
#
#  Eventually, want to try both -V and scalar, but for now, use -V whenever
#  possible

#
#  Find if we want to use cache-through writes on any arrays
#
   if 'wnt' in skipOpt:
      print '\n   SKIPPING WNT'
   else:
      n = len(fpsets)
      i = 0
      wnt = []
      while i < n:
         if fpsets[i] > 0 :
#        and fpuses[i] == 0:
            wnt.append(fparrs[i])
         i += 1
      if len(wnt) > 0:
         [mf,KFLAGS,wnt] = ifko_writeNT(ATLdir, ARCH, KFLAGS, fko, rout, pre,
                                     l1bla, N, M, lda, wnt)
      mflist.append(mf)
      testlist.append("writeNT") 
#
#  Find best PFD for each pfarr
#
   pfarrs = fparrs
   pfsets = fpsets
   for arr in pfarrs:
      [mf,KFLAGS] = FindPFD(ATLdir, ARCH, KFLAGS, fko, rout, pre,l1bla, N,M,lda,
                            info, arr)
   mflist.append(mf)
   testlist.append("pfdist")
   KFLAGS = fkocmnd.RemoveRedundantPrefFlags(KFLAGS, pfarrs)
#
#  Find best pf type
#
   [mf,KFLAGS] = ifko_pftype(ATLdir, ARCH, KFLAGS, ncache, fko, rout, pre, 
                             l1bla, N, M, lda, info, pfarrs, pfsets)
   mflist.append(mf)
   testlist.append("pftype")
   print "\n   FLAGS so far =", fkocmnd.RemoveFilesFromFlags(l1bla, KFLAGS)
   
#
#  Find best unroll
#
   if URF:
      print '\n   SKIPPING UNROLL TUNNING : FORCED TO %d' %URF
   else:
      [mf,KFLAGS] = FindUR(ATLdir, ARCH, KFLAGS, fko, rout, pre, l1bla, N, M, 
                            lda, info)
      mflist.append(mf)
      testlist.append("unroll")

#
#  Find best bet for over speculation
#  FIXME: find out the -U and pass it to the function
#  FIXME: can't apply Over Spec if there is a memory write inside the loop
#
   
   if isSV:
      if l1bla.find("irk1amax") != -1:
         print '\n   SKIPPING STRONGER BET UNROLLING for IRK1AMAX' 
      elif l1bla.find("irk2amax") != -1:
         print '\n   SKIPPING STRONGER BET UNROLLING for IRK2AMAX' 
      elif l1bla.find("irk3amax") != -1:
         print '\n   SKIPPING STRONGER BET UNROLLING for IRK3AMAX' 
      elif l1bla.find("sin") != -1:
         print '\n   SKIPPING STRONGER BET UNROLLING for SIN' 
      elif l1bla.find("cos") != -1:
         print '\n   SKIPPING STRONGER BET UNROLLING for COS' 
      else:
         [mf,KFLAGS] = FindBET(ATLdir, ARCH, KFLAGS, fko, rout, pre, l1bla, N, 
                               M, lda)
         mflist.append(mf)
         testlist.append("OverSpec")

#
#  See if we can apply accumulator expansion
#
#   acc = fkocmnd.GetFPAccum(info)
#   nacc = len(acc)
#   if nacc > 0 and nacc < 3:
#      [mf,KFLAGS] = FindAE(ATLdir, ARCH, KFLAGS, fko, rout, pre, l1bla, N, acc)
#   mflist.append(mf)
#   testlist.append("accexpans")

#
#  Majedul: See if we can apply scalar expansion (accexpan + man/min expansion)
#
   acc = fkocmnd.GetFPAccum(info)
   nacc = len(acc)
   if 're' in skipOpt:
      print '\n   SKIPPING SCALAR EXPANSION'
   elif isSV:
      print '\n   SKIPPING SCALAR EXPANSION: NOT SUPPORTED WITH SV'
   elif l1bla.find("iamax") != -1:
      print '\n   SKIPPING SCALAR EXPANSION FOR IAMAX'
   else:
      if nacc > 0 and nacc < 3:
         [mf,KFLAGS] = FindRE(ATLdir, ARCH, KFLAGS, fko, rout, pre, l1bla, N, 
                              M, lda, acc)
      mflist.append(mf)
      testlist.append("rdexp")
#
#  Majedul: shifted it here to test
#
#
#  Find if we want to use cache-through writes on any arrays
#
   """if 'wnt' in skipOpt:
      print '\n   SKIPPING WNT'
   else:
      n = len(fpsets)
      i = 0
      wnt = []
      while i < n:
         if fpsets[i] > 0 :
#        and fpuses[i] == 0:
            wnt.append(fparrs[i])
         i += 1
      if len(wnt) > 0:
         [mf,KFLAGS,wnt] = ifko_writeNT(ATLdir, ARCH, KFLAGS, fko, rout, pre,
                                     l1bla, N, wnt)
      mflist.append(mf)
      testlist.append("writeNT") 
#
#  Find best PFD for each pfarr
#
   pfarrs = fparrs
   pfsets = fpsets
   for arr in pfarrs:
      [mf,KFLAGS] = FindPFD(ATLdir, ARCH, KFLAGS, fko, rout, pre,l1bla, N, M, 
                            lda, info, arr)
   mflist.append(mf)
   testlist.append("pfdist")
   KFLAGS = fkocmnd.RemoveRedundantPrefFlags(KFLAGS, pfarrs)
#
#  Find best pf type
#
   [mf,KFLAGS] = ifko_pftype(ATLdir, ARCH, KFLAGS, ncache, fko, rout, pre, 
                             l1bla, N, info, pfarrs, pfsets)
   mflist.append(mf)
   testlist.append("pftype")
   print "\n   FLAGS so far =", fkocmnd.RemoveFilesFromFlags(l1bla, KFLAGS)
   """
#
#  tesing: re-tune the prefetch distance!
#  NOTE:  this re-tuning can be omitted just by enabling the comment
#
   #"""
   #KFLAGS = fkocmnd.SetDefaultPFD(KFLAGS, info)
   KFLAGS = fkocmnd.SetDefaultPFD(KFLAGS, newinfo)
   #print "default PFD: ", KFLAGS
   print "\n   TUNING PFD AGAIN: "
   for arr in pfarrs:
      [mf,KFLAGS] = FindPFD(ATLdir, ARCH, KFLAGS, fko, rout, pre,l1bla, N, M, 
                            lda, info, arr)
   KFLAGS = fkocmnd.RemoveRedundantPrefFlags(KFLAGS, pfarrs)
#
# FIXME: it will create problem for the calculaton of % of improvement
#
#   if 'pfdist' in testlist:
#      j = testlist.index('pfdist')
#      mflist[j] = mf
#   else:
#      mflist.append(mf)
#      testlist.append("pfdist")
   #KFLAGS = fkocmnd.RemoveRedundantPrefFlags(KFLAGS, pfarrs)

   mflist.append(mf)
   testlist.append("pfd2")
   
   #"""

#
#  Find performance of best case
#
#   fkocmnd.callfko(fko, KFLAGS)
#   [t,mf] = l1cmnd.time(ATLdir, ARCH, pre, l1bla, N, "fkorout.s", 
#                        "gcc", "-x assembler-with-cpp", opt=opt)
   print "\n\n   BEST FLAGS FOUND (%.2f) = %s" % (mf,
         fkocmnd.RemoveFilesFromFlags(l1bla, KFLAGS))
   res = fkocmnd.GetOptVals(KFLAGS, pfarrs, pfsets, acc)
   tst = cmnd.test(ATLdir, ARCH, pre, l1bla, N, M, lda, "fkorout.s",
                     cc="gcc", ccf="-x assembler-with-cpp", opt=optT)
   #tst = l1cmnd.silent_test(ATLdir, ARCH, pre, l1bla, N, "fkorout.s",
   #                     cc="gcc", ccf="-x assembler-with-cpp", opt=optT)

   return(res, KFLAGS, mf, tst, testlist, mflist)