Beispiel #1
0
def is_unfloppy(file,thresh):
  k1=['amd64_core','amd64_sock','cpu']
  k2=['SSE_FLOPS', 'DRAM',      'user']
  peak=[ 2.3e9*16*2, 24e9, 1.]
  
  try:
    ts=tspl.TSPLSum(file,k1,k2)
  except tspl.TSPLException as e:
    return

  if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'
    return

  gfloprate = numpy.zeros(len(ts.t)-1)
  gdramrate = numpy.zeros(len(ts.t)-1)
  gcpurate  = numpy.zeros(len(ts.t)-1)
  for h in ts.j.hosts.keys():
    gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t))
    gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t))
    gcpurate  += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t))

    mfr=scipy.stats.tmean(gfloprate)/ts.numhosts
    mdr=scipy.stats.tmean(gdramrate)/ts.numhosts
    mcr=scipy.stats.tmean(gcpurate)/(ts.numhosts*ts.wayness*100.)

  #print [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2]]

  if ( (mcr/peak[2] > 0.5 ) and
       (mfr/peak[0])/(mdr/peak[1]) < thresh ):
    return True
  else:
    return False
def compute_imbalance(file,k1,k2,thresh,lariat_dict):
  try:
    ts=tspl.TSPLBase(file,k1,k2)
  except tspl.TSPLException as e:
    return
  except EOFError as e:
    print 'End of file found reading: ' + file
    return

  ignore_qs=['gpu','gpudev','vis','visdev']
  if not tspl_utils.checkjob(ts,3600,16,ignore_qs): # 1 hour, 16way only
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'
    return

  if lariat_dict == None:
    ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path)
  else:
    ld=lariat_utils.LariatData(ts.j.id,olddata=lariat_dict)

  if ld.wayness == -1:
    print 'Unknown wayness: ', ts.j.id
    return
  elif ld.wayness != ts.wayness:
    print 'Lariat and TACC Stats disagree about wayness. Skipping: ', ts.j.id
    return
    
  
  tmid=(ts.t[:-1]+ts.t[1:])/2.0
  rng=range(1,len(tmid)) # Throw out first and last
  tmid=tmid[rng]         

  for h in ts.data[0].keys():
    host_data=ts.data[0][h]
    maxval=numpy.zeros(len(rng))
    minval=numpy.ones(len(rng))*1e100
    rate=[]
    for v in host_data:
      rate.append(numpy.diff(v)[rng]/numpy.diff(ts.t)[rng])
      maxval=numpy.maximum(maxval,rate[-1])
      minval=numpy.minimum(minval,rate[-1])

    vals=[]
    mean=[]
    std=[]
    for j in range(len(rng)):
      vals.append([])
      for v in rate:
        vals[j].append(v[j])
      mean.append(scipy.stats.tmean(vals[j]))
      std.append(scipy.stats.tstd(vals[j]))

    ratio=numpy.divide(std,mean)

    var=scipy.stats.tmean(ratio)

    if abs(var) > thresh:
      print ts.j.id + ': ' + str(var)
      return file
Beispiel #3
0
def main():

  parser = argparse.ArgumentParser(description='Dump CSV for a key pair for some jobs')
  parser.add_argument('key1', help='First key', nargs='?',
                      default='amd64_core')
  parser.add_argument('key2', help='Second key', nargs='?',
                      default='SSE_FLOPS')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')
  n=parser.parse_args()

  filelist=tspl_utils.getfilelist(n.filearg)

  print  sys.argv[3]
  
  for file in filelist:
    try:
      ts=tspl.TSPLSum(file,[n.key1],[n.key2])
    except tspl.TSPLException as e:
      continue

    if not tspl_utils.checkjob(ts,3600,16):
      continue
    elif ts.numhosts < 2:
      print ts.j.id + ': 1 host'
      continue

    tmid=(ts.t[:-1]+ts.t[1:])/2.0

    rate={}
    for k in ts.j.hosts.keys():
      rate[k]=numpy.divide(numpy.diff(ts.data[0][k][0]),numpy.diff(ts.t))
      for i in range(len(tmid)):
        print ','.join([ts.j.id,k,str(tmid[i]),str(rate[k][i])])
Beispiel #4
0
def main():
  
  parser = argparse.ArgumentParser(description='Plot MemUsed-AnonPages for jobs')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')
  n=parser.parse_args()
  filelist=tspl_utils.getfilelist(n.filearg)

  for file in filelist:
    try:
      ts=tspl.TSPLSum(file,['mem','mem'],['MemUsed','AnonPages'])
    except tspl.TSPLException as e:
      continue

    if not tspl_utils.checkjob(ts,3600,16):
      continue
    else:
      print ts.j.id
      
    fig=plt.figure()
    ax=fig.gca()
    ax.hold=True
    for k in ts.j.hosts.keys():
      m=ts.data[0][k][0]-ts.data[1][k][0]
      m-=ts.data[0][k][0][0]
      ax.plot(ts.t/3600.,m)

    ax.set_ylabel('MemUsed - AnonPages ' +
                  ts.j.get_schema(ts.k1[0])[ts.k2[0]].unit)
    ax.set_xlabel('Time (hr)')
    plt.suptitle(ts.title)

    fname='graph_'+ts.j.id+'_'+ts.k1[0]+'_'+ts.k2[0]+'.png'
    fig.savefig(fname)
    plt.close()
Beispiel #5
0
def main():
  
  parser = argparse.ArgumentParser(description='Plot MemUsed-AnonPages for jobs')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')
  n=parser.parse_args()
  filelist=tspl_utils.getfilelist(n.filearg)

  for file in filelist:
    try:
      ts=tspl.TSPLSum(file,['mem','mem'],['MemUsed','AnonPages'])
    except tspl.TSPLException as e:
      continue

    if not tspl_utils.checkjob(ts,3600,16):
      continue
    else:
      print ts.j.id
      
    fig=plt.figure()
    ax=fig.gca()
    ax.hold=True
    for k in ts.j.hosts.keys():
      m=ts.data[0][k][0]-ts.data[1][k][0]
      m-=ts.data[0][k][0][0]
      ax.plot(ts.t/3600.,m)

    ax.set_ylabel('MemUsed - AnonPages ' +
                  ts.j.get_schema(ts.k1[0])[ts.k2[0]].unit)
    ax.set_xlabel('Time (hr)')
    plt.suptitle(ts.title)

    fname='graph_'+ts.j.id+'_'+ts.k1[0]+'_'+ts.k2[0]+'.png'
    fig.savefig(fname)
    plt.close()
Beispiel #6
0
  def setup(self, job_data):
    self.aggregate = True
    self.min_time = 3600
    self.min_hosts = 1
    self.waynesses=[x+1 for x in range(32)]
    self.ignore_qs = []
    
    self.metric = float("nan")
    try:
      if self.aggregate:
        self.ts=tspl.TSPLSum("",self.k1,self.k2,job_data=job_data)
      else:
        self.ts=tspl.TSPLBase("",self.k1,self.k2,job_data=job_data)
    except tspl.TSPLException as e:
      return False
    except EOFError as e:
      print('End of file found reading: ' + job_path)
      return False

    if not tspl_utils.checkjob(self.ts,self.min_time,
                               self.waynesses,skip_queues=self.ignore_qs):
      return False
    elif self.ts.numhosts < self.min_hosts:
      return False
    else:
      return True
Beispiel #7
0
def main():

  parser = argparse.ArgumentParser(description='Dump CSV for a key pair for some jobs')
  parser.add_argument('-k1', help='Set first key',
                      nargs='+', type=str, default=['amd64_sock'])
  parser.add_argument('-k2', help='Set second key',
                      nargs='+', type=str, default=['DRAM'])
  parser.add_argument('-f', help='File, directory, or quoted'
                      ' glob pattern', nargs=1, type=str, default=['jobs'])
  n=parser.parse_args()

  filelist=tspl_utils.getfilelist(n.f[0])

  for file in filelist:
    try:
      ts=tspl.TSPLSum(file,n.k1,n.k2)
    except tspl.TSPLException as e:
      continue

    if not tspl_utils.checkjob(ts,0,16):
      continue
    elif ts.numhosts < 2:
      print ts.j.id + ': 1 host'
      continue

    tmid=(ts.t[:-1]+ts.t[1:])/2.0

    for k in ts.j.hosts.keys():
      rates=[numpy.divide(numpy.diff(ts.data[x][k][0]),numpy.diff(ts.t))
             for x in range(len(ts.data))]
      for i in range(len(tmid)):
        v=[rates[x][i] for x in range(len(ts.data))]
        print ','.join([ts.j.id,k,str(tmid[i])]+[str(x) for x in v])
Beispiel #8
0
def isidle(file,thresh):
  k1=['amd64_core','amd64_sock','cpu']
  k2=['SSE_FLOPS', 'DRAM',      'user']
  try:
    ts=tspl.TSPLSum(file,k1,k2)
  except tspl.TSPLException as e:
    return

  if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'
    return


  mr=[]
  for i in range(len(k1)):
    maxrate=numpy.zeros(len(ts.t)-1)
    for h in ts.j.hosts.keys():
      rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t))
      maxrate=numpy.maximum(rate,maxrate)
    mr.append(maxrate)

  sums=[]
  for i in range(len(k1)):
    for h in ts.j.hosts.keys():
      rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t))
      sums.append(numpy.sum(numpy.divide(mr[i]-rate,mr[i]))/(len(ts.t)-1))

  if max(sums) > thresh:
    return True
  else:
    return False
def main():

    parser = argparse.ArgumentParser(description="Dump CSV for a key pair for some jobs")
    parser.add_argument("-k1", help="Set first key", nargs="+", type=str, default=["amd64_sock"])
    parser.add_argument("-k2", help="Set second key", nargs="+", type=str, default=["DRAM"])
    parser.add_argument("-f", help="File, directory, or quoted" " glob pattern", nargs=1, type=str, default=["jobs"])
    n = parser.parse_args()

    filelist = tspl_utils.getfilelist(n.f[0])

    for file in filelist:
        try:
            ts = tspl.TSPLSum(file, n.k1, n.k2)
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 0, 16):
            continue
        elif ts.numhosts < 2:
            print ts.j.id + ": 1 host"
            continue

        tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

        for k in ts.j.hosts.keys():
            rates = [numpy.divide(numpy.diff(ts.data[x][k][0]), numpy.diff(ts.t)) for x in range(len(ts.data))]
            for i in range(len(tmid)):
                v = [rates[x][i] for x in range(len(ts.data))]
                print ",".join([ts.j.id, k, str(tmid[i])] + [str(x) for x in v])
Beispiel #10
0
def main():

    parser = argparse.ArgumentParser(
        description='Plot a key pair for some jobs')
    parser.add_argument('-t', help='Threshold', metavar='thresh')
    parser.add_argument('key1',
                        help='First key',
                        nargs='?',
                        default='amd64_core')
    parser.add_argument('key2',
                        help='Second key',
                        nargs='?',
                        default='SSE_FLOPS')
    parser.add_argument('filearg',
                        help='File, directory, or quoted'
                        ' glob pattern',
                        nargs='?',
                        default='jobs')
    parser.add_argument('-f', help='Set full mode', action='store_true')
    parser.add_argument('-m', help='Set heatmap mode', action='store_true')
    parser.add_argument('--max',
                        help='Use max instead of mean',
                        action='store_true')
    n = parser.parse_args()

    filelist = tspl_utils.getfilelist(n.filearg)

    if n.max:
        func = max
    else:
        func = scipy.stats.tmean

    for file in filelist:
        try:
            if n.f:
                full = '_full'
                ts = tspl.TSPLBase(file, [n.key1], [n.key2])
            else:
                full = ''
                ts = tspl.TSPLSum(file, [n.key1], [n.key2])
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 3600, 16):
            continue

        reduction = []  # place to store reductions via func
        for v in ts:
            rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t))
            reduction.append(func(rate))
            m = func(reduction)
        if not n.t or m > float(n.t):
            print ts.j.id + ': ' + str(m)
            if n.m:
                heatmap(ts, n, m, full)
            else:
                lineplot(ts, n, m, full)
        else:
            print ts.j.id + ': under threshold, ' + str(m) + ' < ' + n.t
Beispiel #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", help="Set full mode", action="store_true")
    parser.add_argument("key1", help="First key", nargs="?", default="amd64_core")
    parser.add_argument("key2", help="Second key", nargs="?", default="SSE_FLOPS")
    parser.add_argument("filearg", help="File, directory, or quoted" " glob pattern", nargs="?", default="jobs")

    n = parser.parse_args()
    filelist = tspl_utils.getfilelist(n.filearg)

    for file in filelist:
        try:
            if n.f:
                full = "_full"
                ts = tspl.TSPLBase(file, [n.key1], [n.key2])
            else:
                full = ""
                ts = tspl.TSPLSum(file, [n.key1], [n.key2])
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 3600, 16):  # 1 hour, 16way only
            continue
        elif ts.numhosts < 2:  # At least 2 hosts
            print ts.j.id + ": 1 host"
            continue

        print ts.j.id

        tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

        s = []
        for v in ts:
            s = v
            break

        fig, ax = plt.subplots(2, 1, figsize=(8, 6), dpi=80)
        ax[0].hold = True
        ax[1].hold = True
        xmin, xmax = [0.0, 0.0]
        xmin1, xmax1 = [0.0, 0.0]
        dt = numpy.diff(ts.t)
        for v in ts:
            rate = numpy.array(numpy.divide(numpy.diff(v), dt), dtype=numpy.int64)
            d = numpy.linalg.norm(rate, ord=1) / float(len(rate))
            xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))]
            xmin1, xmax1 = [min(xmin1, min(rate - d)), max(xmax1, max(rate - d))]
            ax[0].plot(tmid, rate)
            ax[1].plot(tmid, rate - d)

        xmin, xmax = tspl_utils.expand_range(xmin, xmax, 0.1)
        xmin1, xmax1 = tspl_utils.expand_range(xmin1, xmax1, 0.1)

        ax[0].set_ylim(bottom=xmin, top=xmax)
        ax[1].set_ylim(bottom=xmin1, top=xmax1)

        fname = "_".join(["graph", ts.j.id, ts.k1[0], ts.k2[0], "adjust" + full])
        fig.savefig(fname)
        plt.close()
Beispiel #12
0
def compute_imbalance(file,k1,k2,threshold,plot_flag,full_flag,ratios):
  try:
    if full_flag:
      full='_full'
      ts=tspl.TSPLBase(file,k1,k2)
    else:
      full=''
      ts=tspl.TSPLSum(file,k1,k2)
  except tspl.TSPLException as e:
    return
  except EOFError as e:
    print 'End of file found reading: ' + file
    return

  ignore_qs=['gpu','gpudev','vis','visdev']
  if not tspl_utils.checkjob(ts,3600,16,ignore_qs): # 1 hour, 16way only
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'
    return

  tmid=(ts.t[:-1]+ts.t[1:])/2.0
  rng=range(1,len(tmid)) # Throw out first and last
  tmid=tmid[rng]         

  maxval=numpy.zeros(len(rng))
  minval=numpy.ones(len(rng))*1e100

  rate=[]
  for v in ts:
    rate.append(numpy.divide(numpy.diff(v)[rng],
                             numpy.diff(ts.t)[rng]))
    maxval=numpy.maximum(maxval,rate[-1])
    minval=numpy.minimum(minval,rate[-1])

  vals=[]
  mean=[]
  std=[]
  for j in range(len(rng)):
    vals.append([])
    for v in rate:
      vals[j].append(v[j])
    mean.append(scipy.stats.tmean(vals[j]))
    std.append(scipy.stats.tstd(vals[j]))

  imbl=maxval-minval
  ratio=numpy.divide(std,mean)
  ratio2=numpy.divide(imbl,maxval)

  var=scipy.stats.tmean(ratio) # mean of ratios is the threshold statistic

  # Save away a list of ratios per user
  ratios[ts.j.id]=[var,ts.owner] 
  print ts.j.id + ': ' + str(var)
  # If over the threshold, plot this job (This should be factored out)
  if plot_flag and abs(var) > threshold:
    fig,ax=plt.subplots(2,1,figsize=(8,8),dpi=80)
    plot_ratios(ts,tmid,ratio,ratio2,rate,var,fig,ax,full)
Beispiel #13
0
def compute_imbalance(file, k1, k2, threshold, plot_flag, full_flag, ratios):
    try:
        if full_flag:
            full = '_full'
            ts = tspl.TSPLBase(file, k1, k2)
        else:
            full = ''
            ts = tspl.TSPLSum(file, k1, k2)
    except tspl.TSPLException as e:
        return
    except EOFError as e:
        print 'End of file found reading: ' + file
        return

    ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev']
    if not tspl_utils.checkjob(ts, 3600, 16, ignore_qs):  # 1 hour, 16way only
        return
    elif ts.numhosts < 2:  # At least 2 hosts
        print ts.j.id + ': 1 host'
        return

    tmid = (ts.t[:-1] + ts.t[1:]) / 2.0
    rng = range(1, len(tmid))  # Throw out first and last
    tmid = tmid[rng]

    maxval = numpy.zeros(len(rng))
    minval = numpy.ones(len(rng)) * 1e100

    rate = []
    for v in ts:
        rate.append(numpy.divide(numpy.diff(v)[rng], numpy.diff(ts.t)[rng]))
        maxval = numpy.maximum(maxval, rate[-1])
        minval = numpy.minimum(minval, rate[-1])

    vals = []
    mean = []
    std = []
    for j in range(len(rng)):
        vals.append([])
        for v in rate:
            vals[j].append(v[j])
        mean.append(scipy.stats.tmean(vals[j]))
        std.append(scipy.stats.tstd(vals[j]))

    imbl = maxval - minval
    ratio = numpy.divide(std, mean)
    ratio2 = numpy.divide(imbl, maxval)

    var = scipy.stats.tmean(ratio)  # mean of ratios is the threshold statistic

    # Save away a list of ratios per user
    ratios[ts.j.id] = [var, ts.owner]
    print ts.j.id + ': ' + str(var)
    # If over the threshold, plot this job (This should be factored out)
    if plot_flag and abs(var) > threshold:
        fig, ax = plt.subplots(2, 1, figsize=(8, 8), dpi=80)
        plot_ratios(ts, tmid, ratio, ratio2, rate, var, fig, ax, full)
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', help='Set full mode', action='store_true')
    parser.add_argument('filearg',
                        help='File, directory, or quoted'
                        ' glob pattern',
                        nargs='?',
                        default='jobs')

    n = parser.parse_args()
    filelist = tspl_utils.getfilelist(n.filearg)

    for file in filelist:
        try:
            full = ''
            ts = tspl.TSPLBase(file,
                               ['amd64_sock', 'amd64_sock', 'amd64_sock'],
                               ['HT0', 'HT1', 'HT2'])
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 3600, 16):  # 1 hour, 16way only
            continue
        elif ts.numhosts < 2:  # At least 2 hosts
            print ts.j.id + ': 1 host'
            continue

        print ts.j.id
        tmid = (ts.t[:-1] + ts.t[1:]) / 2.0
        dt = numpy.diff(ts.t)

        fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=80)
        ax.hold = True
        xmin, xmax = [0., 0.]
        c = Colors()
        for k in ts.j.hosts.keys():
            h = ts.j.hosts[k]
            col = c.next()
            for i in range(3):
                for j in range(4):
                    rate = numpy.divide(numpy.diff(ts.data[i][k][j]), dt)
                    xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))]
                    ax.plot(tmid / 3600, rate, '-' + col)
        if xmax > 2.0e9:
            print ts.j.id + ' over limit: %(v)8.3f' % {'v': xmax}
        else:
            plt.close()
            continue

        plt.suptitle(ts.title)
        xmin, xmax = tspl_utils.expand_range(xmin, xmax, .1)
        ax.set_ylim(bottom=xmin, top=xmax)

        fname = '_'.join(['graph', ts.j.id, 'HT_rates'])
        fig.savefig(fname)
        plt.close()
Beispiel #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', help='Set full mode', action='store_true')
    parser.add_argument('key1',
                        help='First key',
                        nargs='?',
                        default='amd64_core')
    parser.add_argument('key2',
                        help='Second key',
                        nargs='?',
                        default='SSE_FLOPS')
    parser.add_argument('filearg',
                        help='File, directory, or quoted'
                        ' glob pattern',
                        nargs='?',
                        default='jobs')

    n = parser.parse_args()
    filelist = tspl_utils.getfilelist(n.filearg)

    for file in filelist:
        try:
            if n.f:
                full = '_full'
                ts = tspl.TSPLBase(file, [n.key1], [n.key2])
            else:
                full = ''
                ts = tspl.TSPLSum(file, [n.key1], [n.key2])
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 3600, 16):  # 1 hour, 16way only
            continue
        elif ts.numhosts < 2:  # At least 2 hosts
            print ts.j.id + ': 1 host'
            continue

        print ts.j.id

        fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=80)
        xmin, xmax = [0., 0.]
        for v in ts:
            rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t))
            xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))]
            ax.hold = True
            ax.plot(rate[1:], rate[:-1], '.')

        ax.set_ylim(bottom=xmin, top=xmax)
        ax.set_xlim(left=xmin, right=xmax)

        fname = '_'.join(
            ['graph', ts.j.id, ts.k1[0], ts.k2[0], 'phase' + full])
        fig.savefig(fname)
        plt.close()
Beispiel #16
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('-f', help='Set full mode', action='store_true')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')

  n=parser.parse_args()
  filelist=tspl_utils.getfilelist(n.filearg)


  for file in filelist:
    try:
      full=''
      ts=tspl.TSPLBase(file,['amd64_sock', 'amd64_sock', 'amd64_sock'],
                      ['HT0', 'HT1', 'HT2'])
    except tspl.TSPLException as e:
      continue
    
    if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only
      continue
    elif ts.numhosts < 2: # At least 2 hosts
      print ts.j.id + ': 1 host'
      continue

    print ts.j.id
    tmid=(ts.t[:-1]+ts.t[1:])/2.0
    dt=numpy.diff(ts.t)

    fig,ax=plt.subplots(1,1,figsize=(8,6),dpi=80)
    ax.hold=True
    xmin,xmax=[0.,0.]
    c=Colors()
    for k in ts.j.hosts.keys():
      h=ts.j.hosts[k]
      col=c.next()
      for i in range(3):
        for j in range(4):
          rate=numpy.divide(numpy.diff(ts.data[i][k][j]),dt)
          xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))]
          ax.plot(tmid/3600,rate,'-'+col)
    if xmax > 2.0e9:
      print ts.j.id + ' over limit: %(v)8.3f' % {'v' : xmax}
    else:
      plt.close()
      continue

    plt.suptitle(ts.title)
    xmin,xmax=tspl_utils.expand_range(xmin,xmax,.1)
    ax.set_ylim(bottom=xmin,top=xmax)

    fname='_'.join(['graph',ts.j.id,'HT_rates'])
    fig.savefig(fname)
    plt.close()
Beispiel #17
0
def do_check(f, jobs):
    try:
        ts = tspl.TSPLSum(f, ['amd64_core'], ['SSE_FLOPS'])
    except tspl.TSPLException:
        return

    if not tspl_utils.checkjob(ts, 3600, range(1, 33)):  # 1 hour
        return

    ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time,
                                 analyze_conf.lariat_path)
    jobs[ts.j.id] = ld.exc
Beispiel #18
0
def main():

  parser = argparse.ArgumentParser(description='Plot a key pair for some jobs')
  parser.add_argument('-t', help='Threshold', metavar='thresh')
  parser.add_argument('key1', help='First key', nargs='?',
                      default='amd64_core')
  parser.add_argument('key2', help='Second key', nargs='?',
                      default='SSE_FLOPS')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')
  parser.add_argument('-f', help='Set full mode', action='store_true')
  parser.add_argument('-m', help='Set heatmap mode', action='store_true')
  parser.add_argument('--max', help='Use max instead of mean',
                      action='store_true')
  n=parser.parse_args()

  filelist=tspl_utils.getfilelist(n.filearg)

  if n.max:
    func=max
  else:
    func=scipy.stats.tmean

  for file in filelist:
    try:
      if n.f:
        full='_full'
        ts=tspl.TSPLBase(file,[n.key1],[n.key2])
      else:
        full=''
        ts=tspl.TSPLSum(file,[n.key1],[n.key2])
    except tspl.TSPLException as e:
      continue

    if not tspl_utils.checkjob(ts,3600,16):
      continue

    reduction=[] # place to store reductions via func
    for v in ts:
      rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t))
      reduction.append(func(rate))
      m=func(reduction)
    if not n.t or m > float(n.t):
      print ts.j.id + ': ' + str(m)
      if n.m:
        fig, fname = heatmap(ts,n,m,full)
      else:
        fig, fname = lineplot(ts,n,m,full)
    else:
      print ts.j.id + ': under threshold, ' + str(m) + ' < ' + n.t

    fig.savefig(fname)      
Beispiel #19
0
def do_check(f,jobs):
  try:
    ts=tspl.TSPLSum(f,['amd64_core'],['SSE_FLOPS'])
  except tspl.TSPLException:
    return

  if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour
    return

  
  ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time,
                             '/scratch/projects/lariatData')
  jobs[ts.j.id]=ld.exc
Beispiel #20
0
def do_check(f,jobs):
  try:
    ts=tspl.TSPLSum(f,['amd64_core'],['SSE_FLOPS'])
  except tspl.TSPLException:
    return

  if not tspl_utils.checkjob(ts,3600,range(1,33)): # 1 hour
    return

  
  ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time,
                             analyze_conf.lariat_path)
  jobs[ts.j.id]=ld.exc
Beispiel #21
0
def main():

    parser = argparse.ArgumentParser(description='Look for lack of correlation'
                                     ' between two key pairs/')
    parser.add_argument('threshold',
                        help='Treshold Pearson R',
                        nargs='?',
                        default=0.8)
    parser.add_argument('keya1',
                        help='Key A1',
                        nargs='?',
                        default='amd64_core')
    parser.add_argument('keya2', help='Key A2', nargs='?', default='DCSF')
    parser.add_argument('keyb1',
                        help='Key B1',
                        nargs='?',
                        default='amd64_core')
    parser.add_argument('keyb2', help='Key B2', nargs='?', default='SSE_FLOPS')
    parser.add_argument('filearg',
                        help='File, directory, or quoted'
                        ' glob pattern',
                        nargs='?',
                        default='jobs')
    parser.add_argument('-f', help='Set full mode', action='store_true')
    n = parser.parse_args()

    filelist = tspl_utils.getfilelist(n.filearg)

    threshold = n.threshold
    k1 = [n.keya1, n.keyb1]
    k2 = [n.keya2, n.keyb2]

    for file in filelist:
        try:
            if n.f:
                full = '_full'
                ts = tspl.TSPLBase(file, k1, k2)
            else:
                full = ''
                ts = tspl.TSPLSum(file, k1, k2)
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 3600, 16):
            continue

        r = pearson(ts)
        print ts.j.id + ': ' + str(r)
        if abs(r) < float(threshold):
            plot_correlation(ts, r, full)
Beispiel #22
0
def is_unfloppy(file,thresh):
  k1={'amd64' : ['amd64_core','amd64_sock','cpu'],
      'intel_snb' : [ 'intel_snb', 'intel_snb', 'intel_snb', 'cpu'],}
  k2={'amd64' : ['SSE_FLOPS', 'DRAM',      'user'],
      'intel_snb' : ['SIMD_D_256','SSE_D_ALL','LOAD_L1D_ALL','user'],}

  peak={'amd64' : [2.3e9*16*2, 24e9, 1.],
        'intel_snb' : [ 16*2.7e9*2, 16*2.7e9/2.*64., 1.],}
  
  try:
    ts=tspl.TSPLSum(file,k1,k2)
  except tspl.TSPLException as e:
    return

  ignore_qs=['gpu','gpudev','vis','visdev']
  if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs): 
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'
    return

  gfloprate = numpy.zeros(len(ts.t)-1)
  gdramrate = numpy.zeros(len(ts.t)-1)
  gcpurate  = numpy.zeros(len(ts.t)-1)
  for h in ts.j.hosts.keys():
    if ts.pmc_type == 'amd64' :
      gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t))
      gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t))
      gcpurate  += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t))
    elif ts.pmc_type == 'intel_snb':
      gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t))
      gfloprate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t))
      gdramrate += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t))
      gcpurate  += numpy.divide(numpy.diff(ts.data[3][h][0]),numpy.diff(ts.t))
      

  mfr=scipy.stats.tmean(gfloprate)/ts.numhosts
  mdr=scipy.stats.tmean(gdramrate)/ts.numhosts
  mcr=scipy.stats.tmean(gcpurate)/(ts.numhosts*ts.wayness*100.)

  print mfr/peak[ts.pmc_type][0], (mdr/peak[ts.pmc_type][1])

  # [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2]
  #print 'mcr',mcr/peak[ts.pmc_type][2], (mfr/peak[ts.pmc_type][0])/(mdr/peak[ts.pmc_type][1])
  if ( (mcr/peak[ts.pmc_type][2] > 0.5 ) and
       (mfr/peak[ts.pmc_type][0])/(mdr/peak[ts.pmc_type][1]) < thresh ):
    return True
  else:
    return False
def main():

    parser = argparse.ArgumentParser(
        description='Dump CSV for a key pair for some jobs')
    parser.add_argument('-k1',
                        help='Set first key',
                        nargs='+',
                        type=str,
                        default=['amd64_sock'])
    parser.add_argument('-k2',
                        help='Set second key',
                        nargs='+',
                        type=str,
                        default=['DRAM'])
    parser.add_argument('-f',
                        help='File, directory, or quoted'
                        ' glob pattern',
                        nargs=1,
                        type=str,
                        default=['jobs'])
    n = parser.parse_args()

    filelist = tspl_utils.getfilelist(n.f[0])

    for file in filelist:
        try:
            ts = tspl.TSPLSum(file, n.k1, n.k2)
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 0, 16):
            continue
        elif ts.numhosts < 2:
            print ts.j.id + ': 1 host'
            continue

        tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

        for k in ts.j.hosts.keys():
            rates = [
                numpy.divide(numpy.diff(ts.data[x][k][0]), numpy.diff(ts.t))
                for x in range(len(ts.data))
            ]
            for i in range(len(tmid)):
                v = [rates[x][i] for x in range(len(ts.data))]
                print ','.join([ts.j.id, k, str(tmid[i])] +
                               [str(x) for x in v])
Beispiel #24
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('-f', help='Set full mode', action='store_true')
  parser.add_argument('key1', help='First key', nargs='?',
                      default='amd64_core')
  parser.add_argument('key2', help='Second key', nargs='?',
                      default='SSE_FLOPS')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')

  n=parser.parse_args()
  filelist=tspl_utils.getfilelist(n.filearg)

  for file in filelist:
    try:
      if n.f:
        full='_full'
        ts=tspl.TSPLBase(file,[n.key1],[n.key2])
      else:
        full=''
        ts=tspl.TSPLSum(file,[n.key1],[n.key2])
    except tspl.TSPLException as e:
      continue
    
    if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only
      continue
    elif ts.numhosts < 2: # At least 2 hosts
      print ts.j.id + ': 1 host'
      continue

    print ts.j.id

    fig,ax=plt.subplots(1,1,figsize=(8,6),dpi=80)
    xmin,xmax=[0.,0.]
    for v in ts:
      rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t))
      xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))]
      ax.hold=True
      ax.plot(rate[1:],rate[:-1],'.')
      
    ax.set_ylim(bottom=xmin,top=xmax)
    ax.set_xlim(left=xmin,right=xmax)

    fname='_'.join(['graph',ts.j.id,ts.k1[0],ts.k2[0],'phase'+full])
    fig.savefig(fname)
    plt.close()
Beispiel #25
0
def fit_step(fn,k1,k2,genplot=False,res={}):
  
  try:
    ts=tspl.TSPLSum(fn,k1,k2)
  except tspl.TSPLException as e:
    return
  
  ignore_qs=['gpu','gpudev','vis','visdev']
  if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs):
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'

  bad_hosts=tspl_utils.lost_data(ts)
  if len(bad_hosts) > 0:
    print ts.j.id, ': Detected hosts with bad data: ', bad_hosts
    return
    
  vals=[]
  for i in [x + 2 for x in range(ts.size-4)]:
    vals.append(compute_fit_params(ts,i))

  vals2=[]
  for v in vals:
    vals2.append([ b/a for (a,b) in v])

  arr=numpy.array(vals2)
  brr=numpy.transpose(arr)

  (m,n)=numpy.shape(brr)

  if genplot:
    fig,ax=plt.subplots(1,1,dpi=80)
    ax.hold=True
    for i in range(m):
      ax.semilogy(brr[i,:])
    fig.savefig('foo.pdf')
    plt.close()

  r=[]
  for i in range(m):
    jnd=numpy.argmin(brr[i,:])
    r.append((jnd,brr[i,jnd]))

  res[fn]=r
Beispiel #26
0
def main():

    parser = argparse.ArgumentParser(
        description='Dump CSV for a key pair for some jobs')
    parser.add_argument('key1',
                        help='First key',
                        nargs='?',
                        default='amd64_core')
    parser.add_argument('key2',
                        help='Second key',
                        nargs='?',
                        default='SSE_FLOPS')
    parser.add_argument('filearg',
                        help='File, directory, or quoted'
                        ' glob pattern',
                        nargs='?',
                        default='jobs')
    n = parser.parse_args()

    filelist = tspl_utils.getfilelist(n.filearg)

    print sys.argv[3]

    for file in filelist:
        try:
            ts = tspl.TSPLSum(file, [n.key1], [n.key2])
        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 3600, 16):
            continue
        elif ts.numhosts < 2:
            print ts.j.id + ': 1 host'
            continue

        tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

        rate = {}
        for k in ts.j.hosts.keys():
            rate[k] = numpy.divide(numpy.diff(ts.data[0][k][0]),
                                   numpy.diff(ts.t))
            for i in range(len(tmid)):
                print ','.join([ts.j.id, k, str(tmid[i]), str(rate[k][i])])
Beispiel #27
0
def main():

  parser = argparse.ArgumentParser(description='Look for lack of correlation'
                                   ' between two key pairs/')
  parser.add_argument('threshold', help='Treshold Pearson R',
                      nargs='?', default=0.8)
  parser.add_argument('keya1', help='Key A1', nargs='?',
                      default='amd64_core')
  parser.add_argument('keya2', help='Key A2', nargs='?',
                      default='DCSF')
  parser.add_argument('keyb1', help='Key B1', nargs='?',
                      default='amd64_core')
  parser.add_argument('keyb2', help='Key B2', nargs='?',
                      default='SSE_FLOPS')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')
  parser.add_argument('-f', help='Set full mode', action='store_true')
  n=parser.parse_args()

  filelist=tspl_utils.getfilelist(n.filearg)

  threshold=n.threshold
  k1=[n.keya1, n.keyb1]
  k2=[n.keya2, n.keyb2]

  for file in filelist:
    try:
      if n.f:
        full='_full'
        ts=tspl.TSPLBase(file,k1,k2)
      else:
        full=''
        ts=tspl.TSPLSum(file,k1,k2)
    except tspl.TSPLException as e:
      continue

    if not tspl_utils.checkjob(ts,3600,16):
      continue
    
    r=pearson(ts)
    print ts.j.id + ': ' + str(r)
    if abs(r) < float(threshold) :
      plot_correlation(ts,r,full)
Beispiel #28
0
def isidle(file, thresh):
    k1 = {
        'amd64': ['amd64_core', 'amd64_sock', 'cpu'],
        'intel_snb': ['intel_snb', 'intel_snb', 'cpu'],
    }
    k2 = {
        'amd64': ['SSE_FLOPS', 'DRAM', 'user'],
        'intel_snb': ['SIMD_D_256', 'LOAD_L1D_ALL', 'user'],
    }
    try:
        ts = tspl.TSPLSum(file, k1, k2)
    except tspl.TSPLException as e:
        return

    ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev']
    if not tspl_utils.checkjob(ts, 3600, range(1, 33), ignore_qs):
        return
    elif ts.numhosts < 2:  # At least 2 hosts
        print ts.j.id + ': 1 host'
        return

    mr = []
    for i in range(len(k1)):
        maxrate = numpy.zeros(len(ts.t) - 1)
        for h in ts.j.hosts.keys():
            rate = numpy.divide(numpy.diff(ts.data[i][h]), numpy.diff(ts.t))
            maxrate = numpy.maximum(rate, maxrate)
        mr.append(maxrate)

    sums = []
    for i in range(len(k1)):
        for h in ts.j.hosts.keys():
            rate = numpy.divide(numpy.diff(ts.data[i][h]), numpy.diff(ts.t))
            sums.append(
                numpy.sum(numpy.divide(mr[i] - rate, mr[i])) / (len(ts.t) - 1))

    sums = [0. if math.isnan(x) else x for x in sums]

    if max(sums) > thresh:
        return True
    else:
        return False
Beispiel #29
0
def has_highbw(file,thresh):
  try:
    k1=['intel_snb_imc', 'intel_snb_imc']
    k2=['CAS_READS', 'CAS_WRITES']

    peak = 76.*1.e9
    
    try:
      ts=tspl.TSPLSum(file,k1,k2)
    except tspl.TSPLException as e:
      return

    ignore_qs=['gpu','gpudev','vis','visdev']
    if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs): 
      return
    elif ts.numhosts < 2: # At least 2 hosts
      print ts.j.id + ': 1 host'
      return

    gdramrate = numpy.zeros(len(ts.t)-1)
    for h in ts.j.hosts.keys():
      gdramrate += numpy.divide(numpy.diff(64.*ts.assemble([0,1],h,0)),
                                numpy.diff(ts.t))
      
    mdr=scipy.stats.tmean(gdramrate)/ts.numhosts

    print mdr/peak

    #print [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2]]

    if mdr/peak > thresh:
      return True
    else:
      return False
  except Exception as e:
    import sys
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    print(exc_type, fname, exc_tb.tb_lineno)
    raise e
Beispiel #30
0
def isidle(file,thresh):
  k1={'amd64' : ['amd64_core','amd64_sock','cpu'],
      'intel_snb' : [ 'intel_snb', 'intel_snb', 'cpu'],}
  k2={'amd64' : ['SSE_FLOPS', 'DRAM',      'user'],
      'intel_snb' : ['SIMD_D_256','LOAD_L1D_ALL','user'],}
  try:
    ts=tspl.TSPLSum(file,k1,k2)
  except tspl.TSPLException as e:
    return

  ignore_qs=['gpu','gpudev','vis','visdev']
  if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs):
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'
    return


  mr=[]
  for i in range(len(k1)):
    maxrate=numpy.zeros(len(ts.t)-1)
    for h in ts.j.hosts.keys():
      rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t))
      maxrate=numpy.maximum(rate,maxrate)
    mr.append(maxrate)


  sums=[]
  for i in range(len(k1)):
    for h in ts.j.hosts.keys():
      rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t))
      sums.append(numpy.sum(numpy.divide(mr[i]-rate,mr[i]))/(len(ts.t)-1))

  sums = [0. if math.isnan(x) else x for x in sums]

  if max(sums) > thresh:
    return True
  else:
    return False
Beispiel #31
0
def fit_step(fn,k1,k2,genplot=False,res={}):
  
  try:
    ts=tspl.TSPLSum(fn,k1,k2)
  except tspl.TSPLException as e:
    return
  
  if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour
    return
  elif ts.numhosts < 2: # At least 2 hosts
    print ts.j.id + ': 1 host'
    
  vals=[]
  for i in [x + 2 for x in range(ts.size-4)]:
    vals.append(compute_fit_params(ts,i))

  vals2=[]
  for v in vals:
    vals2.append([ b/a for (a,b) in v])

  arr=numpy.array(vals2)
  brr=numpy.transpose(arr)

  (m,n)=numpy.shape(brr)

  if genplot:
    fig,ax=plt.subplots(1,1,dpi=80)
    ax.hold=True
    for i in range(m):
      ax.semilogy(brr[i,:])
    fig.savefig('foo.pdf')
    plt.close()

  r=[]
  for i in range(m):
    jnd=numpy.argmin(brr[i,:])
    r.append((jnd,brr[i,jnd]))

  res[fn]=r
Beispiel #32
0
def master_plot(file,mode='lines',threshold=False,
                output_dir='.',prefix='graph',mintime=3600,wayness=16,
                header='Master',lariat_dict=None,wide=False,job_stats=None):
  k1={'amd64' :
      ['amd64_core','amd64_core','amd64_sock','lnet','lnet',
       'ib_sw','ib_sw','cpu'],
      'intel' : ['intel_pmc3', 'intel_pmc3', 'intel_pmc3', 
                 'lnet', 'lnet', 'ib_ext','ib_ext','cpu','mem','mem'],
      'intel_snb' : ['intel_snb_imc', 'intel_snb_imc', 'intel_snb', 
                     'lnet', 'lnet', 'ib_sw','ib_sw','cpu',
                     'intel_snb', 'intel_snb', 'mem', 'mem'],
      }
  
  k2={'amd64':
      ['SSE_FLOPS','DCSF','DRAM','rx_bytes','tx_bytes',
       'rx_bytes','tx_bytes','user'],
      'intel' : ['MEM_LOAD_RETIRED_L1D_HIT', 'FP_COMP_OPS_EXE_X87', 
                 'INSTRUCTIONS_RETIRED', 'rx_bytes','tx_bytes', 
                 'port_recv_data','port_xmit_data','user', 'MemUsed', 'AnonPages'],
      'intel_snb' : ['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL',
                     'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user',
                     'SSE_D_ALL', 'SIMD_D_256', 'MemUsed', 'AnonPages'],
      }

  try:
    print file
    ts=tspl.TSPLSum(file,k1,k2,job_stats)
  except tspl.TSPLException as e:
    return 
  
  ignore_qs=[]#'gpu','gpudev','vis','visdev']
  if not tspl_utils.checkjob(ts,mintime,wayness,ignore_qs):
    return

  if lariat_dict == None:
    ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path)
  elif lariat_dict == "pass": ld = lariat_utils.LariatData(ts.j.id)
  else:
    ld=lariat_utils.LariatData(ts.j.id,olddata=lariat_dict)

    

  wayness=ts.wayness
  if ld.wayness != -1 and ld.wayness < ts.wayness:
    wayness=ld.wayness

  if wide:
    fig,ax=plt.subplots(6,2,figsize=(15.5,12),dpi=110)

    # Make 2-d array into 1-d, and reorder so that the left side is blank
    ax=my_utils.flatten(ax)
    ax_even=ax[0:12:2]
    ax_odd =ax[1:12:2]
    ax=ax_odd + ax_even
    
    for a in ax_even:
      a.axis('off')
  else:
    fig,ax=plt.subplots(6,1,figsize=(8,12),dpi=110)

  if mode == 'hist':
    plot=plot_thist
  elif mode == 'percentile':
    plot=plot_mmm
  else:
    plot=plot_lines

  if ts.pmc_type == 'intel_snb' :
    # Plot key 1
    plot(ax[0],ts,[8,9],3600.,1e9,
         ylabel='Total AVX +\nSSE Ginst/s')
    
    # Plot key 2
    plot(ax[1],ts,[0,1],3600.,1.0/64.0*1024.*1024.*1024.,
         ylabel='Total Mem BW GB/s')

    #Plot key 3
    #plot(ax[2],ts,[2],3600.,1.0/64.0*1e9, ylabel='L1 BW GB/s')
    plot(ax[2],ts,[10,-11],3600.,1024.0*1024.0*1024.0, ylabel='Memory Usage GB',
         do_rate=False)
  elif ts.pmc_type == 'intel':
    plot(ax[0],ts,[1],3600.,1e9,ylabel='FP Ginst/s')
    plot(ax[2],ts,[8,-9],3600.,1024.0*1024.0*1024.0, ylabel='Memory Usage GB',do_rate=False)
  else: 
    #Fix this to support the old amd plots
    print ts.pmc_type + ' not supported'
    return 

  # Plot lnet sum rate
  plot(ax[3],ts,[3,4],3600.,1024.**2,ylabel='Total lnet MB/s')

  # Plot remaining IB sum rate
  if ts.pmc_type == 'intel_snb' :
    plot(ax[4],ts,[5,6,-3,-4],3600.,1024.**2,ylabel='Total (ib_sw-lnet) MB/s') 
  elif ts.pmc_type == 'intel' :
    plot(ax[4],ts,[5,6,-3,-4],3600.,1024.**2,ylabel='Total (ib_ext-lnet) MB/s') 

  #Plot CPU user time
  plot(ax[5],ts,[7],3600.,wayness*100.,
       xlabel='Time (hr)',
       ylabel='Total cpu user\nfraction')
  
  print ts.j.id + ': '
  
  plt.subplots_adjust(hspace=0.35)
  if wide:
    left_text=header+'\n'+my_utils.summary_text(ld,ts)
    text_len=len(left_text.split('\n'))
    fontsize=ax[0].yaxis.label.get_size()
    linespacing=1.2
    fontrate=float(fontsize*linespacing)/72./15.5
    yloc=.8-fontrate*(text_len-1) # this doesn't quite work. fontrate is too
                                  # small by a small amount
    plt.figtext(.05,yloc,left_text,linespacing=linespacing)
    fname='_'.join([prefix,ts.j.id,ts.owner,'wide_master'])
  elif header != None:
    title=header+'\n'+ts.title
    if threshold:
      title+=', V: %(v)-6.1f' % {'v': threshold}
    title += '\n' + ld.title()
    plt.suptitle(title)
    fname='_'.join([prefix,ts.j.id,ts.owner,'master'])
  else:
    fname='_'.join([prefix,ts.j.id,ts.owner,'master'])

  if mode == 'hist':
    fname+='_hist'
  elif mode == 'percentile':
    fname+='_perc'
    

  plt.close()

  return fig, fname
def do_compute(file):
    try:
        ts = tspl.TSPLSum(
            file,
            ["intel_snb_imc", "intel_snb_imc", "intel_snb", "intel_snb", "intel_snb", "intel_snb", "intel_snb"],
            ["CAS_READS", "CAS_WRITES", "LOAD_L1D_ALL", "SIMD_D_256", "SSE_D_ALL", "STALLS", "CLOCKS_UNHALTED_CORE"],
        )

    except tspl.TSPLException as e:
        return

    if not tspl_utils.checkjob(ts, 0, 16):
        return
    elif ts.numhosts < 2:
        print ts.j.id + ": 1 host"
        return

    ignore_qs = ["gpu", "gpudev", "vis", "visdev"]
    if not tspl_utils.checkjob(ts, 3600.0, range(1, 33), ignore_qs):
        return

    ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time, "/scratch/projects/lariatData")
    if ld.exc == "unknown":
        return

    tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

    read_rate = numpy.zeros_like(tmid)
    write_rate = numpy.zeros_like(tmid)
    l1_rate = numpy.zeros_like(tmid)
    avx_rate = numpy.zeros_like(tmid)
    sse_rate = numpy.zeros_like(tmid)
    stall_rate = numpy.zeros_like(tmid)
    clock_rate = numpy.zeros_like(tmid)

    for host in ts.j.hosts.keys():
        read_rate += numpy.diff(ts.assemble([0], host, 0)) / numpy.diff(ts.t)
        write_rate += numpy.diff(ts.assemble([1], host, 0)) / numpy.diff(ts.t)
        l1_rate += numpy.diff(ts.assemble([2], host, 0)) / numpy.diff(ts.t)
        avx_rate += numpy.diff(ts.assemble([3], host, 0)) / numpy.diff(ts.t)
        sse_rate += numpy.diff(ts.assemble([4], host, 0)) / numpy.diff(ts.t)
        stall_rate += numpy.diff(ts.assemble([5], host, 0)) / numpy.diff(ts.t)
        clock_rate += numpy.diff(ts.assemble([6], host, 0)) / numpy.diff(ts.t)

    read_rate /= ts.numhosts
    write_rate /= ts.numhosts
    l1_rate /= ts.numhosts
    avx_rate /= ts.numhosts
    sse_rate /= ts.numhosts
    stall_rate /= ts.numhosts
    clock_rate /= ts.numhosts

    data_ratio = (read_rate + write_rate) / l1_rate
    flops = avx_rate + sse_rate
    flops_ratio = (flops - numpy.min(flops)) / (numpy.max(flops) - numpy.min(flops))
    stall_ratio = stall_rate / clock_rate

    mean_data_ratio = numpy.mean(data_ratio)
    mean_stall_ratio = numpy.mean(stall_ratio)
    mean_flops = numpy.mean(flops)

    ename = ld.exc.split("/")[-1]
    ename = ld.comp_name(ename, ld.equiv_patterns)
    mean_mem_rate = numpy.mean(read_rate + write_rate)
    if mean_mem_rate > 2e9:  # Put a print in here and investigate bad jobs
        return

    return ",".join(
        [ts.j.id, ts.owner, ename, str(mean_mem_rate), str(mean_stall_ratio), str(mean_data_ratio), str(mean_flops)]
    )
def main():

  parser = argparse.ArgumentParser(description='Look for high meta data rate'\
                                   ' to Lustre')
  parser.add_argument('-t', metavar='thresh',
                      help='Treshold metadata rate',
                      nargs=1, default=[100000.])
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')

  n=parser.parse_args()
  thresh=float(n.t[0])
  print thresh


  filelist=tspl_utils.getfilelist(n.filearg)

#  k1=['llite', 'llite', 'llite', 'llite', 'llite',
#      'llite', 'llite', 'llite', 'llite', 'llite',
#      'llite', 'llite', 'llite', 'llite', 'llite',
#      'llite', 'llite', 'llite', 'llite', 'llite',
#      'llite', 'llite', 'llite', 'llite', 'llite',
#      'llite']
#  k2=['open','close','mmap','seek','fsync','setattr',
#      'truncate','flock','getattr','statfs','alloc_inode',
#      'setxattr','getxattr',' listxattr',
#      'removexattr', 'inode_permission', 'readdir',
#      'create','lookup','link','unlink','symlink','mkdir',
#      'rmdir','mknod','rename',]
  k1=['llite', 'llite', 'llite', 'llite', 'llite',
      'llite', 'llite', 'llite', 'llite', 'llite',
      'llite', 'llite', 'llite', 'llite', 'llite',
      'llite', 'llite', 'llite', 'llite', 'llite',
      'llite', 'llite', 'llite', ]
  k2=['open','close','mmap','fsync','setattr',
      'truncate','flock','getattr','statfs','alloc_inode',
      'setxattr',' listxattr',
      'removexattr', 'readdir',
      'create','lookup','link','unlink','symlink','mkdir',
      'rmdir','mknod','rename',]

  for file in filelist:
    try:
      ts=tspl.TSPLSum(file,k1,k2)
            
    except tspl.TSPLException as e:
      continue

    if not tspl_utils.checkjob(ts,3600.,range(1,33)):
      continue

    tmid=(ts.t[:-1]+ts.t[1:])/2.0

    ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time,'lariatData')
    
    meta_rate = numpy.zeros_like(tmid)

    for k in ts.j.hosts.keys():
      meta_rate +=numpy.diff(ts.assemble(range(0,len(k1)),k,0))/numpy.diff(ts.t)
      
    meta_rate  /= float(ts.numhosts)

    if numpy.max(meta_rate) > thresh:
      title=ts.title
      if ld.exc != 'unknown':
        title += ', E: ' + ld.exc.split('/')[-1]

      fig,ax=plt.subplots(1,1,figsize=(10,8),dpi=80)
      plt.subplots_adjust(hspace=0.35)
      plt.suptitle(title)

      markers = ('o','x','+','^','s','8','p',
                 'h','*','D','<','>','v','d','.')
          
      colors  = ('b','g','r','c','m','k','y')

      cnt=0
      for v in ts.data:
        for host in v:
          for vals in v[host]:
            rate=numpy.diff(vals)/numpy.diff(ts.t)
            c=colors[cnt % len(colors)]
            m=markers[cnt % len(markers)]
#            print cnt,(cnt % len(colors)), (cnt % len(markers)), k2[cnt], c, m
            
            ax.plot(tmid/3600., rate, marker=m,
                    markeredgecolor=c, linestyle='-', color=c,
                    markerfacecolor='None', label=k2[cnt])
            ax.hold=True
        cnt=cnt+1

      ax.set_ylabel('Meta Data Rate (op/s)')
      tspl_utils.adjust_yaxis_range(ax,0.1)

      handles,labels=ax.get_legend_handles_labels()
      new_handles={}
      for h,l in zip(handles,labels):
        new_handles[l]=h

      box = ax.get_position()
      ax.set_position([box.x0, box.y0, box.width * 0.9, box.height])
      ax.legend(new_handles.values(),new_handles.keys(),prop={'size':8},
                bbox_to_anchor=(1.05,1), borderaxespad=0., loc=2)

      fname='_'.join(['metadata',ts.j.id,ts.owner])

      fig.savefig(fname)
      plt.close()
Beispiel #35
0
def master_plot(file,mode='lines',threshold=False,
                output_dir='.',prefix='graph',mintime=3600,wayness=16,
                header='Master'):
  k1={'amd64' :
      ['amd64_core','amd64_core','amd64_sock','lnet','lnet',
       'ib_sw','ib_sw','cpu'],
      'intel' : ['intel_pmc3', 'intel_pmc3', 'intel_pmc3', 
                 'lnet', 'lnet', 'ib_sw','ib_sw','cpu']
      }
  
  k2={'amd64':
      ['SSE_FLOPS','DCSF','DRAM','rx_bytes','tx_bytes',
       'rx_bytes','tx_bytes','user'],
      'intel' : ['PMC3', 'PMC2', 'FIXED_CTR0',
                 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user']
      }

  try:
    print file
    ts=tspl.TSPLSum(file,k1,k2)
  except tspl.TSPLException as e:
    return

  if not tspl_utils.checkjob(ts,mintime,wayness):
    return

  fig,ax=plt.subplots(6,1,figsize=(8,12),dpi=80)
  ax=my_utils.flatten(ax)

  if mode == 'hist':
    plot=plot_thist
  elif mode == 'percentile':
    plot=plot_mmm
  else:
    plot=plot_lines
  
  # Plot SSE FLOPS
  plot(ax[0],ts,[0],3600.)
    
  # Plot DCSF rate
  plot(ax[1],ts,[1],3600.,1e9)

  #Plot DRAM rate
  plot(ax[2],ts,[2],3600.,1e9)
  
  # Plot lnet sum rate
  plot(ax[3],ts,[3,4],3600.,1024.**2,ylabel='Total lnet MB/s')

  # Plot remaining IB sum rate
  plot(ax[4],ts,[5,6,-3,-4],3600.,1024.**2,ylabel='Total (ib_sw-lnet) MB/s') 

  #Plot CPU user time
  plot(ax[5],ts,[7],3600.,ts.wayness*100.,
       xlabel='Time (hr)',
       ylabel='Total cpu user\nfraction')
  
  print ts.j.id + ': '
  print 'cc'
  
  title=header+'\n'+ts.title
  if threshold:
    title+=', V: %(v)-8.3f' % {'v': threshold}
  ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time,'/scratch/projects/lariatData')
  title += '\n' + ld.title()
  print 'dd'

  plt.suptitle(title)
  plt.subplots_adjust(hspace=0.35)

  fname='_'.join([prefix,ts.j.id,ts.owner,'master'])
  if mode == 'hist':
    fname+='_hist'
  elif mode == 'percentile':
    fname+='_perc'
    
  fig.savefig(output_dir+'/'+fname)
  plt.close()
def compute_imbalance(file, k1, k2, thresh, lariat_dict):
    try:
        ts = tspl.TSPLBase(file, k1, k2)
    except tspl.TSPLException as e:
        return
    except EOFError as e:
        print 'End of file found reading: ' + file
        return

    ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev']
    if not tspl_utils.checkjob(ts, 3600, 16, ignore_qs):  # 1 hour, 16way only
        return
    elif ts.numhosts < 2:  # At least 2 hosts
        print ts.j.id + ': 1 host'
        return

    if lariat_dict == None:
        ld = lariat_utils.LariatData(ts.j.id,
                                     end_epoch=ts.j.end_time,
                                     daysback=3,
                                     directory=analyze_conf.lariat_path)
    else:
        ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict)

    if ld.wayness == -1:
        print 'Unknown wayness: ', ts.j.id
        return
    elif ld.wayness != ts.wayness:
        print 'Lariat and TACC Stats disagree about wayness. Skipping: ', ts.j.id
        return

    tmid = (ts.t[:-1] + ts.t[1:]) / 2.0
    rng = range(1, len(tmid))  # Throw out first and last
    tmid = tmid[rng]

    for h in ts.data[0].keys():
        host_data = ts.data[0][h]
        maxval = numpy.zeros(len(rng))
        minval = numpy.ones(len(rng)) * 1e100
        rate = []
        for v in host_data:
            rate.append(numpy.diff(v)[rng] / numpy.diff(ts.t)[rng])
            maxval = numpy.maximum(maxval, rate[-1])
            minval = numpy.minimum(minval, rate[-1])

        vals = []
        mean = []
        std = []
        for j in range(len(rng)):
            vals.append([])
            for v in rate:
                vals[j].append(v[j])
            mean.append(scipy.stats.tmean(vals[j]))
            std.append(scipy.stats.tstd(vals[j]))

        ratio = numpy.divide(std, mean)

        var = scipy.stats.tmean(ratio)

        if abs(var) > thresh:
            print ts.j.id + ': ' + str(var)
            return file
Beispiel #37
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('-f', help='Set full mode', action='store_true')
  parser.add_argument('key1', help='First key', nargs='?',
                      default='amd64_core')
  parser.add_argument('key2', help='Second key', nargs='?',
                      default='SSE_FLOPS')
  parser.add_argument('filearg', help='File, directory, or quoted'
                      ' glob pattern', nargs='?',default='jobs')

  n=parser.parse_args()
  filelist=tspl_utils.getfilelist(n.filearg)

  for file in filelist:
    try:
      if n.f:
        full='_full'
        ts=tspl.TSPLBase(file,[n.key1],[n.key2])
      else:
        full=''
        ts=tspl.TSPLSum(file,[n.key1],[n.key2])
    except tspl.TSPLException as e:
      continue
    
    if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only
      continue
    elif ts.numhosts < 2: # At least 2 hosts
      print ts.j.id + ': 1 host'
      continue

    print ts.j.id

    tmid=(ts.t[:-1]+ts.t[1:])/2.0

    s=[]
    for v in ts:
      s=v
      break

    fig,ax=plt.subplots(2,1,figsize=(8,6),dpi=80)
    ax[0].hold=True
    ax[1].hold=True
    xmin,xmax=[0.,0.]
    xmin1,xmax1=[0.,0.]
    dt=numpy.diff(ts.t)
    for v in ts:
      rate=numpy.array(numpy.divide(numpy.diff(v),dt),dtype=numpy.int64)
      d=numpy.linalg.norm(rate,ord=1)/float(len(rate))
      xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))]
      xmin1,xmax1=[min(xmin1,min(rate-d)),max(xmax1,max(rate-d))]
      ax[0].plot(tmid,rate)
      ax[1].plot(tmid,rate-d)

    xmin,xmax=tspl_utils.expand_range(xmin,xmax,.1)
    xmin1,xmax1=tspl_utils.expand_range(xmin1,xmax1,.1)

    ax[0].set_ylim(bottom=xmin,top=xmax)
    ax[1].set_ylim(bottom=xmin1,top=xmax1)

    fname='_'.join(['graph',ts.j.id,ts.k1[0],ts.k2[0],'adjust'+full])
    fig.savefig(fname)
    plt.close()
def main():

    parser = argparse.ArgumentParser(description='Look for high meta data rate'\
                                     ' to Lustre')
    parser.add_argument('-t',
                        metavar='thresh',
                        help='Treshold metadata rate',
                        nargs=1,
                        default=[100000.])
    parser.add_argument('filearg',
                        help='File, directory, or quoted'
                        ' glob pattern',
                        nargs='?',
                        default='jobs')

    n = parser.parse_args()
    thresh = float(n.t[0])
    print thresh

    filelist = tspl_utils.getfilelist(n.filearg)

    #  k1=['llite', 'llite', 'llite', 'llite', 'llite',
    #      'llite', 'llite', 'llite', 'llite', 'llite',
    #      'llite', 'llite', 'llite', 'llite', 'llite',
    #      'llite', 'llite', 'llite', 'llite', 'llite',
    #      'llite', 'llite', 'llite', 'llite', 'llite',
    #      'llite']
    #  k2=['open','close','mmap','seek','fsync','setattr',
    #      'truncate','flock','getattr','statfs','alloc_inode',
    #      'setxattr','getxattr',' listxattr',
    #      'removexattr', 'inode_permission', 'readdir',
    #      'create','lookup','link','unlink','symlink','mkdir',
    #      'rmdir','mknod','rename',]
    k1 = [
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
        'llite',
    ]
    k2 = [
        'open',
        'close',
        'mmap',
        'fsync',
        'setattr',
        'truncate',
        'flock',
        'getattr',
        'statfs',
        'alloc_inode',
        'setxattr',
        ' listxattr',
        'removexattr',
        'readdir',
        'create',
        'lookup',
        'link',
        'unlink',
        'symlink',
        'mkdir',
        'rmdir',
        'mknod',
        'rename',
    ]

    for file in filelist:
        try:
            ts = tspl.TSPLSum(file, k1, k2)

        except tspl.TSPLException as e:
            continue

        if not tspl_utils.checkjob(ts, 3600., range(1, 33)):
            continue

        tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

        ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time, 'lariatData')

        meta_rate = numpy.zeros_like(tmid)

        for k in ts.j.hosts.keys():
            meta_rate += numpy.diff(ts.assemble(range(0, len(k1)), k,
                                                0)) / numpy.diff(ts.t)

        meta_rate /= float(ts.numhosts)

        if numpy.max(meta_rate) > thresh:
            title = ts.title
            if ld.exc != 'unknown':
                title += ', E: ' + ld.exc.split('/')[-1]

            fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=80)
            plt.subplots_adjust(hspace=0.35)
            plt.suptitle(title)

            markers = ('o', 'x', '+', '^', 's', '8', 'p', 'h', '*', 'D', '<',
                       '>', 'v', 'd', '.')

            colors = ('b', 'g', 'r', 'c', 'm', 'k', 'y')

            cnt = 0
            for v in ts.data:
                for host in v:
                    for vals in v[host]:
                        rate = numpy.diff(vals) / numpy.diff(ts.t)
                        c = colors[cnt % len(colors)]
                        m = markers[cnt % len(markers)]
                        #            print cnt,(cnt % len(colors)), (cnt % len(markers)), k2[cnt], c, m

                        ax.plot(tmid / 3600.,
                                rate,
                                marker=m,
                                markeredgecolor=c,
                                linestyle='-',
                                color=c,
                                markerfacecolor='None',
                                label=k2[cnt])
                        ax.hold = True
                cnt = cnt + 1

            ax.set_ylabel('Meta Data Rate (op/s)')
            tspl_utils.adjust_yaxis_range(ax, 0.1)

            handles, labels = ax.get_legend_handles_labels()
            new_handles = {}
            for h, l in zip(handles, labels):
                new_handles[l] = h

            box = ax.get_position()
            ax.set_position([box.x0, box.y0, box.width * 0.9, box.height])
            ax.legend(new_handles.values(),
                      new_handles.keys(),
                      prop={'size': 8},
                      bbox_to_anchor=(1.05, 1),
                      borderaxespad=0.,
                      loc=2)

            fname = '_'.join(['metadata', ts.j.id, ts.owner])

            fig.savefig(fname)
            plt.close()
Beispiel #39
0
def compute_ratio(file,lariat_dict=None):
  try:
    ts=tspl.TSPLSum(file,['intel_snb_imc', 'intel_snb_imc',
                          'intel_snb', 'intel_snb', 'intel_snb',
                          'intel_snb', 'intel_snb'],
                    ['CAS_READS', 'CAS_WRITES',
                     'LOAD_L1D_ALL', 'SIMD_D_256', 'SSE_D_ALL',
                     'STALLS', 'CLOCKS_UNHALTED_CORE'])
    
  except tspl.TSPLException as e:
    return

  ignore_qs=['gpu','gpudev','vis','visdev']
  if not tspl_utils.checkjob(ts,3600.,range(1,33),ignore_qs):
    return

  tmid=(ts.t[:-1]+ts.t[1:])/2.0

  if lariat_dict == None:
    ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path)
  else:
    ld=lariat_utils.LariatData(ts.j.id,olddata=lariat_dict)

  if ld.exc == 'unknown' or ld.wayness != ts.wayness: # try loading older lariat
    ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path,olddata=ld.ld)
  if ld.exc == 'unknown' or ld.wayness != ts.wayness: # Still nothing; return
    return

  read_rate  = numpy.zeros_like(tmid)
  write_rate = numpy.zeros_like(tmid)
  l1_rate    = numpy.zeros_like(tmid)
  avx_rate   = numpy.zeros_like(tmid)
  sse_rate   = numpy.zeros_like(tmid)
  stall_rate = numpy.zeros_like(tmid)
  clock_rate = numpy.zeros_like(tmid)


  for host in ts.j.hosts.keys():
    read_rate  += numpy.diff(ts.assemble([0],host,0))/numpy.diff(ts.t)
    write_rate += numpy.diff(ts.assemble([1],host,0))/numpy.diff(ts.t)
    l1_rate    += numpy.diff(ts.assemble([2],host,0))/numpy.diff(ts.t)
    avx_rate   += numpy.diff(ts.assemble([3],host,0))/numpy.diff(ts.t)
    sse_rate   += numpy.diff(ts.assemble([4],host,0))/numpy.diff(ts.t)
    stall_rate += numpy.diff(ts.assemble([5],host,0))/numpy.diff(ts.t)
    clock_rate += numpy.diff(ts.assemble([6],host,0))/numpy.diff(ts.t)

  if float(ts.numhosts*int(ts.wayness)*int(ld.threads)) == 0:
    print 'No tasks in', ts.j.id, ' skipping'
    return

  read_rate  /= float(ts.numhosts*int(ts.wayness)*int(ld.threads))
  write_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads))
  l1_rate    /= float(ts.numhosts*int(ts.wayness)*int(ld.threads))
  avx_rate   /= float(ts.numhosts*int(ts.wayness)*int(ld.threads))
  sse_rate   /= float(ts.numhosts*int(ts.wayness)*int(ld.threads))
  stall_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads))
  clock_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads))
    

  try:
    data_ratio  = (read_rate+write_rate)/l1_rate
  except RuntimeWarning:
    print 'Division by zero, skipping:', ts.j.id
    return
  flops       = avx_rate+sse_rate
  try:
    flops_ratio = (flops-numpy.min(flops))/(numpy.max(flops)-numpy.min(flops))
  except RuntimeWarning:
    print 'Division by zero, skipping:', ts.j.id
    return
  try:
    stall_ratio = stall_rate/clock_rate
  except RuntimeWarning:
    print 'Division by zero, skipping:', ts.j.id
    return


  mean_data_ratio=numpy.mean(data_ratio)
  mean_stall_ratio=numpy.mean(stall_ratio)
  mean_mem_rate=numpy.mean(read_rate + write_rate)*64.0
  if mean_stall_ratio > 1.:
    return
  elif mean_mem_rate > 75.*1000000000./16.:
    return

  ename=ld.exc.split('/')[-1]
  ename=ld.comp_name(ename,ld.equiv_patterns)
##  if mean_mem_rate > 2e9: # Put a print in here and investigate bad jobs
##    return
  return (ts.j.id, ts.su, ename, mean_data_ratio, mean_stall_ratio, mean_mem_rate )
Beispiel #40
0
def get_data(file, mintime=1.0, wayness=range(1, 33), lariat_dict=None):
    try:
        ts = tspl.TSPLSum(
            file,
            [
                "intel_snb_imc",
                "intel_snb_imc",
                "intel_snb",
                "intel_snb",
                "intel_snb",
                "intel_snb",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "llite",
                "intel_snb",
                "intel_snb",
                "intel_snb",
                "intel_snb",
                "intel_snb",
            ],
            [
                "CAS_READS",
                "CAS_WRITES",
                "STALLS",
                "CLOCKS_UNHALTED_CORE",
                "SSE_D_ALL",
                "SIMD_D_256",
                "open",
                "close",
                "mmap",
                "seek",
                "fsync",
                "setattr",
                "truncate",
                "flock",
                "getattr",
                "statfs",
                "alloc_inode",
                "setxattr",
                "getxattr",
                " listxattr",
                "removexattr",
                "inode_permission",
                "readdir",
                "create",
                "lookup",
                "link",
                "unlink",
                "symlink",
                "mkdir",
                "rmdir",
                "mknod",
                "rename",
                "LOAD_OPS_L1_HIT",
                "LOAD_OPS_L2_HIT",
                "LOAD_OPS_LLC_HIT",
                "LOAD_OPS_ALL",
                "INSTRUCTIONS_RETIRED",
            ],
        )

    except tspl.TSPLException as e:
        return

    if not tspl_utils.checkjob(ts, mintime, wayness):
        return

    tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

    if lariat_dict == None:
        ld = lariat_utils.LariatData(ts.j.id, end_epoch=ts.j.end_time, daysback=3, directory=analyze_conf.lariat_path)
    else:
        ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict)

    read_rate = numpy.zeros_like(tmid)
    write_rate = numpy.zeros_like(tmid)
    stall_rate = numpy.zeros_like(tmid)
    clock_rate = numpy.zeros_like(tmid)
    avx_rate = numpy.zeros_like(tmid)
    sse_rate = numpy.zeros_like(tmid)
    inst_rate = numpy.zeros_like(tmid)
    meta_rate = numpy.zeros_like(tmid)
    l1_rate = numpy.zeros_like(tmid)
    l2_rate = numpy.zeros_like(tmid)
    l3_rate = numpy.zeros_like(tmid)
    load_rate = numpy.zeros_like(tmid)

    for k in ts.j.hosts.keys():
        read_rate += numpy.diff(ts.assemble([0], k, 0)) / numpy.diff(ts.t)
        write_rate += numpy.diff(ts.assemble([1], k, 0)) / numpy.diff(ts.t)
        stall_rate += numpy.diff(ts.assemble([2], k, 0)) / numpy.diff(ts.t)
        clock_rate += numpy.diff(ts.assemble([3], k, 0)) / numpy.diff(ts.t)
        avx_rate += numpy.diff(ts.assemble([5], k, 0)) / numpy.diff(ts.t)
        sse_rate += numpy.diff(ts.assemble([4], k, 0)) / numpy.diff(ts.t)
        inst_rate += numpy.diff(ts.assemble([36], k, 0)) / numpy.diff(ts.t)
        meta_rate += numpy.diff(ts.assemble(range(5, 32), k, 0)) / numpy.diff(ts.t)
        l1_rate += numpy.diff(ts.assemble([32], k, 0)) / numpy.diff(ts.t)
        l2_rate += numpy.diff(ts.assemble([33], k, 0)) / numpy.diff(ts.t)
        l3_rate += numpy.diff(ts.assemble([34], k, 0)) / numpy.diff(ts.t)
        load_rate += numpy.diff(ts.assemble([35], k, 0)) / numpy.diff(ts.t)

    read_rate /= float(ts.numhosts)
    write_rate /= float(ts.numhosts)
    stall_rate /= float(ts.numhosts)
    clock_rate /= float(ts.numhosts)
    avx_rate /= float(ts.numhosts)
    sse_rate /= float(ts.numhosts)
    inst_rate /= float(ts.numhosts)
    meta_rate /= float(ts.numhosts)
    l1_rate /= float(ts.numhosts)
    l2_rate /= float(ts.numhosts)
    l3_rate /= float(ts.numhosts)
    load_rate /= float(ts.numhosts)

    read_frac = read_rate / (read_rate + write_rate + 1)
    stall_frac = stall_rate / clock_rate

    return (
        ts,
        ld,
        tmid,
        read_rate,
        write_rate,
        stall_rate,
        clock_rate,
        avx_rate,
        sse_rate,
        inst_rate,
        meta_rate,
        l1_rate,
        l2_rate,
        l3_rate,
        load_rate,
        read_frac,
        stall_frac,
    )
Beispiel #41
0
def getuser(file,user):
  try:
    k1=['intel_snb_imc', 'intel_snb_imc', 'intel_snb', 
        'lnet', 'lnet', 'ib_sw','ib_sw','cpu',
        'intel_snb', 'intel_snb', 'mem']
    k2=['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL',
        'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user',
        'SSE_D_ALL', 'SIMD_D_256', 'MemUsed']

    try:
      ts=tspl.TSPLSum(file,k1,k2)
    except tspl.TSPLException as e:
      return

    if ts.owner != user:
      return
    
    ignore_qs=['gpu','gpudev','vis','visdev']
    if not tspl_utils.checkjob(ts,1.,range(1,33),ignore_qs):
      return

    tmid=(ts.t[:-1]+ts.t[1:])/2.0
    dt=numpy.diff(ts.t)

    dram_rate  = numpy.zeros_like(tmid)
    l1_rate    = numpy.zeros_like(tmid)
    lnet_rate  = numpy.zeros_like(tmid)
    ib_rate    = numpy.zeros_like(tmid)
    user_rate  = numpy.zeros_like(tmid)
    flops_rate = numpy.zeros_like(tmid)
    mem_usage  = numpy.zeros_like(tmid)

    for host in ts.j.hosts.keys():
      dram_rate  += numpy.diff(ts.assemble([0,1],host,0))/dt
      l1_rate    += numpy.diff(ts.assemble([2],host,0))/dt
      lnet_rate  += numpy.diff(ts.assemble([3,4],host,0))/dt
      ib_rate    += numpy.diff(ts.assemble([5,6,-3,-4],host,0))/dt
      user_rate  += numpy.diff(ts.assemble([7],host,0))/dt
      flops_rate += numpy.diff(ts.assemble([8,9],host,0))/dt
      v           = ts.assemble([10],host,0)
      mem_usage  += (v[:-1]+v[1:])/2.0
      

    dram_rate  /= float(ts.numhosts)*1024.*1024.*1024./64.
    l1_rate    /= float(ts.numhosts)*1024.*1024./64.
    lnet_rate  /= float(ts.numhosts)*1e6
    ib_rate    /= float(ts.numhosts)*1e6
    user_rate  /= float(ts.numhosts)*100.*ts.wayness
    flops_rate /= float(ts.numhosts)*1e9
    mem_usage  /= float(ts.numhosts)*(1024.*1024.*1024.)

    min_dram_rate   = numpy.min(dram_rate)
    max_dram_rate   = numpy.max(dram_rate)
    mean_dram_rate  = numpy.mean(dram_rate)
    min_l1_rate     = numpy.min(l1_rate)
    max_l1_rate     = numpy.max(l1_rate)
    mean_l1_rate    = numpy.mean(l1_rate)
    min_lnet_rate   = numpy.min(lnet_rate)
    max_lnet_rate   = numpy.max(lnet_rate)
    mean_lnet_rate  = numpy.mean(lnet_rate)
    min_ib_rate     = numpy.min(ib_rate)
    max_ib_rate     = numpy.max(ib_rate)
    mean_ib_rate    = numpy.mean(ib_rate)
    min_user_rate   = numpy.min(user_rate)
    max_user_rate   = numpy.max(user_rate)
    mean_user_rate  = numpy.mean(user_rate)
    min_flops_rate  = numpy.min(flops_rate)
    max_flops_rate  = numpy.max(flops_rate)
    mean_flops_rate = numpy.mean(flops_rate)
    min_mem_usage   = numpy.min(mem_usage)
    max_mem_usage   = numpy.max(mem_usage)
    mean_mem_usage  = numpy.mean(mem_usage)
    

    return (ts.j.acct['end_time'],
            min_dram_rate,max_dram_rate,mean_dram_rate,
            min_l1_rate,max_l1_rate,mean_l1_rate,
            min_lnet_rate,max_lnet_rate,mean_lnet_rate,
            min_ib_rate,max_ib_rate,mean_ib_rate,
            min_user_rate,max_user_rate,mean_user_rate,
            min_flops_rate,max_flops_rate,mean_flops_rate,
            min_mem_usage,max_mem_usage,mean_mem_usage,
            ts.j.id)
  except Exception as e:
    import sys
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    print(exc_type, fname, exc_tb.tb_lineno)
    raise e
Beispiel #42
0
def master_plot(file, threshold=False):
    k1 = [
        'amd64_core', 'amd64_core', 'amd64_sock', 'lnet', 'lnet', 'ib_sw',
        'ib_sw', 'cpu'
    ]
    k2 = [
        'SSE_FLOPS', 'DCSF', 'DRAM', 'rx_bytes', 'tx_bytes', 'rx_bytes',
        'tx_bytes', 'user'
    ]

    try:
        print file
        ts = tspl.TSPLSum(file, k1, k2)
    except tspl.TSPLException as e:
        return

    if not tspl_utils.checkjob(ts, 3600, 16):
        return
    elif ts.numhosts < 2:
        print ts.j.id + ': 1 host'
        return

    tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

    fig, ax = plt.subplots(6, 1, figsize=(8, 12), dpi=80)

    # Plot flop rate
    ax[0].hold = True
    for k in ts.j.hosts.keys():
        h = ts.j.hosts[k]
        rate = numpy.divide(numpy.diff(ts.data[0][k][0]), numpy.diff(ts.t))
        ax[0].plot(tmid / 3600, rate)
    ax[0].set_ylabel('Total ' + ts.k1[0] + '\n' + ts.k2[0] + '/s')

    # Plot DCSF rate
    ax[1].hold = True
    for k in ts.j.hosts.keys():
        h = ts.j.hosts[k]
        rate = numpy.divide(numpy.diff(ts.data[1][k][0]), numpy.diff(ts.t))
        ax[1].plot(tmid / 3600, rate)
    ax[1].set_ylabel('Total ' + ts.k1[1] + '\n' + ts.k2[1] + '/s')

    #Plot DRAM rate
    ax[2].hold = True
    for k in ts.j.hosts.keys():
        h = ts.j.hosts[k]
        rate = numpy.divide(numpy.diff(ts.data[2][k][0]), numpy.diff(ts.t))
        ax[2].plot(tmid / 3600, rate)
    ax[2].set_ylabel('Total ' + ts.k1[2] + '\n' + ts.k2[2] + '/s')

    # Plot lnet sum rate
    ax[3].hold = True
    for k in ts.j.hosts.keys():
        h = ts.j.hosts[k]
        rate = numpy.divide(numpy.diff(ts.data[3][k][0] + ts.data[4][k][0]),
                            numpy.diff(ts.t))
        ax[3].plot(tmid / 3600, rate / (1024. * 1024.))
    ax[3].set_ylabel('Total lnet MB/s')

    # Plot remaining IB sum rate
    ax[4].hold = True
    for k in ts.j.hosts.keys():
        h = ts.j.hosts[k]
        v = ts.data[5][k][0] + ts.data[6][k][0] - (ts.data[3][k][0] +
                                                   ts.data[4][k][0])
        rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t))
        ax[4].plot(tmid / 3600, rate / (1024 * 1024.))
    ax[4].set_ylabel('Total (ib_sw-lnet) MB/s')

    #Plot CPU user time
    ax[5].hold = True
    for k in ts.j.hosts.keys():
        h = ts.j.hosts[k]
        rate = numpy.divide(numpy.diff(ts.data[7][k][0] / 100 / ts.wayness),
                            numpy.diff(ts.t))
        ax[5].plot(tmid / 3600, rate)
    ax[5].set_ylabel('Total ' + ts.k1[7] + '\n' + ts.k2[7] + '/s')
    ax[5].set_xlabel('Time (hr)')

    print ts.j.id + ': '

    title = ts.title
    if threshold:
        title += ', V: %(v)-8.3f' % {'v': threshold}

    plt.suptitle(title)
    plt.subplots_adjust(hspace=0.35)
    for a in ax:
        tspl_utils.adjust_yaxis_range(a, 0.1)

    fname = '_'.join(['graph', ts.j.id, 'master'])
    fig.savefig(fname)
    plt.close()
Beispiel #43
0
def is_unfloppy(file, thresh):
    k1 = {
        'amd64': ['amd64_core', 'amd64_sock', 'cpu'],
        'intel_snb': ['intel_snb', 'intel_snb', 'intel_snb', 'cpu'],
    }
    k2 = {
        'amd64': ['SSE_FLOPS', 'DRAM', 'user'],
        'intel_snb': ['SIMD_D_256', 'SSE_D_ALL', 'LOAD_L1D_ALL', 'user'],
    }

    peak = {
        'amd64': [2.3e9 * 16 * 2, 24e9, 1.],
        'intel_snb': [16 * 2.7e9 * 2, 16 * 2.7e9 / 2. * 64., 1.],
    }

    try:
        ts = tspl.TSPLSum(file, k1, k2)
    except tspl.TSPLException as e:
        return

    ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev']
    if not tspl_utils.checkjob(ts, 3600, range(1, 33), ignore_qs):
        return
    elif ts.numhosts < 2:  # At least 2 hosts
        print ts.j.id + ': 1 host'
        return

    gfloprate = numpy.zeros(len(ts.t) - 1)
    gdramrate = numpy.zeros(len(ts.t) - 1)
    gcpurate = numpy.zeros(len(ts.t) - 1)
    for h in ts.j.hosts.keys():
        if ts.pmc_type == 'amd64':
            gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),
                                      numpy.diff(ts.t))
            gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]),
                                      numpy.diff(ts.t))
            gcpurate += numpy.divide(numpy.diff(ts.data[2][h][0]),
                                     numpy.diff(ts.t))
        elif ts.pmc_type == 'intel_snb':
            gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),
                                      numpy.diff(ts.t))
            gfloprate += numpy.divide(numpy.diff(ts.data[1][h][0]),
                                      numpy.diff(ts.t))
            gdramrate += numpy.divide(numpy.diff(ts.data[2][h][0]),
                                      numpy.diff(ts.t))
            gcpurate += numpy.divide(numpy.diff(ts.data[3][h][0]),
                                     numpy.diff(ts.t))

    mfr = scipy.stats.tmean(gfloprate) / ts.numhosts
    mdr = scipy.stats.tmean(gdramrate) / ts.numhosts
    mcr = scipy.stats.tmean(gcpurate) / (ts.numhosts * ts.wayness * 100.)

    print mfr / peak[ts.pmc_type][0], (mdr / peak[ts.pmc_type][1])

    # [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2]
    #print 'mcr',mcr/peak[ts.pmc_type][2], (mfr/peak[ts.pmc_type][0])/(mdr/peak[ts.pmc_type][1])
    if ((mcr / peak[ts.pmc_type][2] > 0.5) and (mfr / peak[ts.pmc_type][0]) /
        (mdr / peak[ts.pmc_type][1]) < thresh):
        return True
    else:
        return False
Beispiel #44
0
def master_plot(file,
                mode='lines',
                threshold=False,
                output_dir='.',
                prefix='graph',
                mintime=3600,
                wayness=16,
                header='Master',
                lariat_dict=None,
                wide=False,
                job_stats=None):
    k1 = {
        'amd64': [
            'amd64_core', 'amd64_core', 'amd64_sock', 'lnet', 'lnet', 'ib_sw',
            'ib_sw', 'cpu'
        ],
        'intel': [
            'intel_pmc3', 'intel_pmc3', 'intel_pmc3', 'lnet', 'lnet', 'ib_ext',
            'ib_ext', 'cpu', 'mem', 'mem'
        ],
        'intel_snb': [
            'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'lnet', 'lnet',
            'ib_sw', 'ib_sw', 'cpu', 'intel_snb', 'intel_snb', 'mem', 'mem'
        ],
    }

    k2 = {
        'amd64': [
            'SSE_FLOPS', 'DCSF', 'DRAM', 'rx_bytes', 'tx_bytes', 'rx_bytes',
            'tx_bytes', 'user'
        ],
        'intel': [
            'MEM_LOAD_RETIRED_L1D_HIT', 'FP_COMP_OPS_EXE_X87',
            'INSTRUCTIONS_RETIRED', 'rx_bytes', 'tx_bytes', 'port_recv_data',
            'port_xmit_data', 'user', 'MemUsed', 'AnonPages'
        ],
        'intel_snb': [
            'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes', 'tx_bytes',
            'rx_bytes', 'tx_bytes', 'user', 'SSE_D_ALL', 'SIMD_D_256',
            'MemUsed', 'AnonPages'
        ],
    }

    try:
        print file
        ts = tspl.TSPLSum(file, k1, k2, job_stats)
    except tspl.TSPLException as e:
        return

    ignore_qs = []  #'gpu','gpudev','vis','visdev']
    if not tspl_utils.checkjob(ts, mintime, wayness, ignore_qs):
        return

    if lariat_dict == None:
        ld = lariat_utils.LariatData(ts.j.id,
                                     end_epoch=ts.j.end_time,
                                     daysback=3,
                                     directory=analyze_conf.lariat_path)
    elif lariat_dict == "pass":
        ld = lariat_utils.LariatData(ts.j.id)
    else:
        ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict)

    wayness = ts.wayness
    if ld.wayness != -1 and ld.wayness < ts.wayness:
        wayness = ld.wayness

    if wide:
        fig, ax = plt.subplots(6, 2, figsize=(15.5, 12), dpi=110)

        # Make 2-d array into 1-d, and reorder so that the left side is blank
        ax = my_utils.flatten(ax)
        ax_even = ax[0:12:2]
        ax_odd = ax[1:12:2]
        ax = ax_odd + ax_even

        for a in ax_even:
            a.axis('off')
    else:
        fig, ax = plt.subplots(6, 1, figsize=(8, 12), dpi=110)

    if mode == 'hist':
        plot = plot_thist
    elif mode == 'percentile':
        plot = plot_mmm
    else:
        plot = plot_lines

    if ts.pmc_type == 'intel_snb':
        # Plot key 1
        plot(ax[0], ts, [8, 9], 3600., 1e9, ylabel='Total AVX +\nSSE Ginst/s')

        # Plot key 2
        plot(ax[1],
             ts, [0, 1],
             3600.,
             1.0 / 64.0 * 1024. * 1024. * 1024.,
             ylabel='Total Mem BW GB/s')

        #Plot key 3
        #plot(ax[2],ts,[2],3600.,1.0/64.0*1e9, ylabel='L1 BW GB/s')
        plot(ax[2],
             ts, [10, -11],
             3600.,
             1024.0 * 1024.0 * 1024.0,
             ylabel='Memory Usage GB',
             do_rate=False)
    elif ts.pmc_type == 'intel':
        plot(ax[0], ts, [1], 3600., 1e9, ylabel='FP Ginst/s')
        plot(ax[2],
             ts, [8, -9],
             3600.,
             1024.0 * 1024.0 * 1024.0,
             ylabel='Memory Usage GB',
             do_rate=False)
    else:
        #Fix this to support the old amd plots
        print ts.pmc_type + ' not supported'
        return

    # Plot lnet sum rate
    plot(ax[3], ts, [3, 4], 3600., 1024.**2, ylabel='Total lnet MB/s')

    # Plot remaining IB sum rate
    if ts.pmc_type == 'intel_snb':
        plot(ax[4],
             ts, [5, 6, -3, -4],
             3600.,
             1024.**2,
             ylabel='Total (ib_sw-lnet) MB/s')
    elif ts.pmc_type == 'intel':
        plot(ax[4],
             ts, [5, 6, -3, -4],
             3600.,
             1024.**2,
             ylabel='Total (ib_ext-lnet) MB/s')

    #Plot CPU user time
    plot(ax[5],
         ts, [7],
         3600.,
         wayness * 100.,
         xlabel='Time (hr)',
         ylabel='Total cpu user\nfraction')

    print ts.j.id + ': '

    plt.subplots_adjust(hspace=0.35)
    if wide:
        left_text = header + '\n' + my_utils.summary_text(ld, ts)
        text_len = len(left_text.split('\n'))
        fontsize = ax[0].yaxis.label.get_size()
        linespacing = 1.2
        fontrate = float(fontsize * linespacing) / 72. / 15.5
        yloc = .8 - fontrate * (text_len - 1
                                )  # this doesn't quite work. fontrate is too
        # small by a small amount
        plt.figtext(.05, yloc, left_text, linespacing=linespacing)
        fname = '_'.join([prefix, ts.j.id, ts.owner, 'wide_master'])
    elif header != None:
        title = header + '\n' + ts.title
        if threshold:
            title += ', V: %(v)-6.1f' % {'v': threshold}
        title += '\n' + ld.title()
        plt.suptitle(title)
        fname = '_'.join([prefix, ts.j.id, ts.owner, 'master'])
    else:
        fname = '_'.join([prefix, ts.j.id, ts.owner, 'master'])

    if mode == 'hist':
        fname += '_hist'
    elif mode == 'percentile':
        fname += '_perc'

    plt.close()

    return fig, fname
Beispiel #45
0
def master_plot(file,threshold=False):
  k1=['amd64_core','amd64_core','amd64_sock','lnet','lnet','ib_sw','ib_sw',
      'cpu']
  k2=['SSE_FLOPS','DCSF','DRAM','rx_bytes','tx_bytes','rx_bytes','tx_bytes',
      'user']

  try:
    print file
    ts=tspl.TSPLSum(file,k1,k2)
  except tspl.TSPLException as e:
    return

  if not tspl_utils.checkjob(ts,3600,16):
    return
  elif ts.numhosts < 2:
    print ts.j.id + ': 1 host'
    return

  tmid=(ts.t[:-1]+ts.t[1:])/2.0
  

  fig,ax=plt.subplots(6,1,figsize=(8,12),dpi=80)
  
  # Plot flop rate
  ax[0].hold=True 
  for k in ts.j.hosts.keys():
    h=ts.j.hosts[k]
    rate=numpy.divide(numpy.diff(ts.data[0][k][0]),numpy.diff(ts.t))
    ax[0].plot(tmid/3600,rate)
  ax[0].set_ylabel('Total ' + ts.k1[0] + '\n' + ts.k2[0] + '/s')

  # Plot DCSF rate
  ax[1].hold=True
  for k in ts.j.hosts.keys():
    h=ts.j.hosts[k]
    rate=numpy.divide(numpy.diff(ts.data[1][k][0]),numpy.diff(ts.t))
    ax[1].plot(tmid/3600,rate)
  ax[1].set_ylabel('Total ' + ts.k1[1] + '\n' + ts.k2[1] + '/s')

  #Plot DRAM rate
  ax[2].hold=True
  for k in ts.j.hosts.keys():
    h=ts.j.hosts[k]
    rate=numpy.divide(numpy.diff(ts.data[2][k][0]),numpy.diff(ts.t))
    ax[2].plot(tmid/3600,rate)
  ax[2].set_ylabel('Total ' + ts.k1[2] + '\n' + ts.k2[2] + '/s')

  # Plot lnet sum rate
  ax[3].hold=True
  for k in ts.j.hosts.keys():
    h=ts.j.hosts[k]
    rate=numpy.divide(numpy.diff(ts.data[3][k][0]+ts.data[4][k][0]),
                      numpy.diff(ts.t))
    ax[3].plot(tmid/3600,rate/(1024.*1024.))
  ax[3].set_ylabel('Total lnet MB/s')

  # Plot remaining IB sum rate
  ax[4].hold=True
  for k in ts.j.hosts.keys():
    h=ts.j.hosts[k]
    v=ts.data[5][k][0]+ts.data[6][k][0]-(ts.data[3][k][0]+ts.data[4][k][0])
    rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t))
    ax[4].plot(tmid/3600,rate/(1024*1024.))
  ax[4].set_ylabel('Total (ib_sw-lnet) MB/s')

  #Plot CPU user time
  ax[5].hold=True
  for k in ts.j.hosts.keys():
    h=ts.j.hosts[k]
    rate=numpy.divide(numpy.diff(ts.data[7][k][0]/100/ts.wayness),
                      numpy.diff(ts.t))
    ax[5].plot(tmid/3600,rate)
  ax[5].set_ylabel('Total ' + ts.k1[7] + '\n' + ts.k2[7] + '/s')
  ax[5].set_xlabel('Time (hr)')
  
  print ts.j.id + ': '

  title=ts.title
  if threshold:
    title+=', V: %(v)-8.3f' % {'v': threshold}

  plt.suptitle(title)
  plt.subplots_adjust(hspace=0.35)
  for a in ax:
    tspl_utils.adjust_yaxis_range(a,0.1)

  fname='_'.join(['graph',ts.j.id,'master'])
  fig.savefig(fname)
  plt.close()
Beispiel #46
0
def getqueue(file, queue):
    try:
        k1 = [
            'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'lnet', 'lnet',
            'ib_sw', 'ib_sw', 'cpu', 'intel_snb', 'intel_snb', 'mem'
        ]
        k2 = [
            'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes', 'tx_bytes',
            'rx_bytes', 'tx_bytes', 'user', 'SSE_D_ALL', 'SIMD_D_256',
            'MemUsed'
        ]

        try:
            ts = tspl.TSPLSum(file, k1, k2)
        except tspl.TSPLException as e:
            return

        if ts.queue != queue:
            return

        if not tspl_utils.checkjob(ts, 1., range(1, 33)):
            return

        tmid = (ts.t[:-1] + ts.t[1:]) / 2.0
        dt = numpy.diff(ts.t)

        dram_rate = numpy.zeros_like(tmid)
        l1_rate = numpy.zeros_like(tmid)
        lnet_rate = numpy.zeros_like(tmid)
        ib_rate = numpy.zeros_like(tmid)
        user_rate = numpy.zeros_like(tmid)
        flops_rate = numpy.zeros_like(tmid)
        mem_usage = numpy.zeros_like(tmid)

        for host in ts.j.hosts.keys():
            dram_rate += numpy.diff(ts.assemble([0, 1], host, 0)) / dt
            l1_rate += numpy.diff(ts.assemble([2], host, 0)) / dt
            lnet_rate += numpy.diff(ts.assemble([3, 4], host, 0)) / dt
            ib_rate += numpy.diff(ts.assemble([5, 6, -3, -4], host, 0)) / dt
            user_rate += numpy.diff(ts.assemble([7], host, 0)) / dt
            flops_rate += numpy.diff(ts.assemble([8, 9], host, 0)) / dt
            v = ts.assemble([10], host, 0)
            mem_usage += (v[:-1] + v[1:]) / 2.0

        dram_rate /= float(ts.numhosts) * 1024. * 1024. * 1024. / 64.
        l1_rate /= float(ts.numhosts) * 1024. * 1024. / 64.
        lnet_rate /= float(ts.numhosts) * 1e6
        ib_rate /= float(ts.numhosts) * 1e6
        user_rate /= float(ts.numhosts) * 100. * ts.wayness
        flops_rate /= float(ts.numhosts) * 1e9
        mem_usage /= float(ts.numhosts) * (1024. * 1024. * 1024.)

        min_dram_rate = numpy.min(dram_rate)
        max_dram_rate = numpy.max(dram_rate)
        mean_dram_rate = numpy.mean(dram_rate)
        min_l1_rate = numpy.min(l1_rate)
        max_l1_rate = numpy.max(l1_rate)
        mean_l1_rate = numpy.mean(l1_rate)
        min_lnet_rate = numpy.min(lnet_rate)
        max_lnet_rate = numpy.max(lnet_rate)
        mean_lnet_rate = numpy.mean(lnet_rate)
        min_ib_rate = numpy.min(ib_rate)
        max_ib_rate = numpy.max(ib_rate)
        mean_ib_rate = numpy.mean(ib_rate)
        min_user_rate = numpy.min(user_rate)
        max_user_rate = numpy.max(user_rate)
        mean_user_rate = numpy.mean(user_rate)
        min_flops_rate = numpy.min(flops_rate)
        max_flops_rate = numpy.max(flops_rate)
        mean_flops_rate = numpy.mean(flops_rate)
        min_mem_usage = numpy.min(mem_usage)
        max_mem_usage = numpy.max(mem_usage)
        mean_mem_usage = numpy.mean(mem_usage)

        return (ts.j.acct['end_time'], min_dram_rate, max_dram_rate,
                mean_dram_rate, min_l1_rate, max_l1_rate, mean_l1_rate,
                min_lnet_rate, max_lnet_rate, mean_lnet_rate, min_ib_rate,
                max_ib_rate, mean_ib_rate, min_user_rate, max_user_rate,
                mean_user_rate, min_flops_rate, max_flops_rate,
                mean_flops_rate, min_mem_usage, max_mem_usage, mean_mem_usage,
                ts.j.id)
    except Exception as e:
        import sys
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
        raise e
Beispiel #47
0
def compute_ratio(file, lariat_dict=None):
    try:
        ts = tspl.TSPLSum(file, [
            'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'intel_snb',
            'intel_snb', 'intel_snb', 'intel_snb'
        ], [
            'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'SIMD_D_256',
            'SSE_D_ALL', 'STALLS', 'CLOCKS_UNHALTED_CORE'
        ])

    except tspl.TSPLException as e:
        return

    ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev']
    if not tspl_utils.checkjob(ts, 3600., range(1, 33), ignore_qs):
        return

    tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

    if lariat_dict == None:
        ld = lariat_utils.LariatData(ts.j.id,
                                     end_epoch=ts.j.end_time,
                                     daysback=3,
                                     directory=analyze_conf.lariat_path)
    else:
        ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict)

    if ld.exc == 'unknown' or ld.wayness != ts.wayness:  # try loading older lariat
        ld = lariat_utils.LariatData(ts.j.id,
                                     end_epoch=ts.j.end_time,
                                     daysback=3,
                                     directory=analyze_conf.lariat_path,
                                     olddata=ld.ld)
    if ld.exc == 'unknown' or ld.wayness != ts.wayness:  # Still nothing; return
        return

    read_rate = numpy.zeros_like(tmid)
    write_rate = numpy.zeros_like(tmid)
    l1_rate = numpy.zeros_like(tmid)
    avx_rate = numpy.zeros_like(tmid)
    sse_rate = numpy.zeros_like(tmid)
    stall_rate = numpy.zeros_like(tmid)
    clock_rate = numpy.zeros_like(tmid)

    for host in ts.j.hosts.keys():
        read_rate += numpy.diff(ts.assemble([0], host, 0)) / numpy.diff(ts.t)
        write_rate += numpy.diff(ts.assemble([1], host, 0)) / numpy.diff(ts.t)
        l1_rate += numpy.diff(ts.assemble([2], host, 0)) / numpy.diff(ts.t)
        avx_rate += numpy.diff(ts.assemble([3], host, 0)) / numpy.diff(ts.t)
        sse_rate += numpy.diff(ts.assemble([4], host, 0)) / numpy.diff(ts.t)
        stall_rate += numpy.diff(ts.assemble([5], host, 0)) / numpy.diff(ts.t)
        clock_rate += numpy.diff(ts.assemble([6], host, 0)) / numpy.diff(ts.t)

    if float(ts.numhosts * int(ts.wayness) * int(ld.threads)) == 0:
        print 'No tasks in', ts.j.id, ' skipping'
        return

    read_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads))
    write_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads))
    l1_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads))
    avx_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads))
    sse_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads))
    stall_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads))
    clock_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads))

    try:
        data_ratio = (read_rate + write_rate) / l1_rate
    except RuntimeWarning:
        print 'Division by zero, skipping:', ts.j.id
        return
    flops = avx_rate + sse_rate
    try:
        flops_ratio = (flops - numpy.min(flops)) / (numpy.max(flops) -
                                                    numpy.min(flops))
    except RuntimeWarning:
        print 'Division by zero, skipping:', ts.j.id
        return
    try:
        stall_ratio = stall_rate / clock_rate
    except RuntimeWarning:
        print 'Division by zero, skipping:', ts.j.id
        return

    mean_data_ratio = numpy.mean(data_ratio)
    mean_stall_ratio = numpy.mean(stall_ratio)
    mean_mem_rate = numpy.mean(read_rate + write_rate) * 64.0
    if mean_stall_ratio > 1.:
        return
    elif mean_mem_rate > 75. * 1000000000. / 16.:
        return

    ename = ld.exc.split('/')[-1]
    ename = ld.comp_name(ename, ld.equiv_patterns)
    ##  if mean_mem_rate > 2e9: # Put a print in here and investigate bad jobs
    ##    return
    return (ts.j.id, ts.su, ename, mean_data_ratio, mean_stall_ratio,
            mean_mem_rate)
Beispiel #48
0
def do_compute(file):
    try:
        ts = tspl.TSPLSum(file, [
            'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'intel_snb',
            'intel_snb', 'intel_snb', 'intel_snb'
        ], [
            'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'SIMD_D_256',
            'SSE_D_ALL', 'STALLS', 'CLOCKS_UNHALTED_CORE'
        ])

    except tspl.TSPLException as e:
        return

    if not tspl_utils.checkjob(ts, 0, 16):
        return
    elif ts.numhosts < 2:
        print ts.j.id + ': 1 host'
        return

    ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev']
    if not tspl_utils.checkjob(ts, 3600., range(1, 33), ignore_qs):
        return

    ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time,
                                 '/scratch/projects/lariatData')
    if ld.exc == 'unknown':
        return

    tmid = (ts.t[:-1] + ts.t[1:]) / 2.0

    read_rate = numpy.zeros_like(tmid)
    write_rate = numpy.zeros_like(tmid)
    l1_rate = numpy.zeros_like(tmid)
    avx_rate = numpy.zeros_like(tmid)
    sse_rate = numpy.zeros_like(tmid)
    stall_rate = numpy.zeros_like(tmid)
    clock_rate = numpy.zeros_like(tmid)

    for host in ts.j.hosts.keys():
        read_rate += numpy.diff(ts.assemble([0], host, 0)) / numpy.diff(ts.t)
        write_rate += numpy.diff(ts.assemble([1], host, 0)) / numpy.diff(ts.t)
        l1_rate += numpy.diff(ts.assemble([2], host, 0)) / numpy.diff(ts.t)
        avx_rate += numpy.diff(ts.assemble([3], host, 0)) / numpy.diff(ts.t)
        sse_rate += numpy.diff(ts.assemble([4], host, 0)) / numpy.diff(ts.t)
        stall_rate += numpy.diff(ts.assemble([5], host, 0)) / numpy.diff(ts.t)
        clock_rate += numpy.diff(ts.assemble([6], host, 0)) / numpy.diff(ts.t)

    read_rate /= ts.numhosts
    write_rate /= ts.numhosts
    l1_rate /= ts.numhosts
    avx_rate /= ts.numhosts
    sse_rate /= ts.numhosts
    stall_rate /= ts.numhosts
    clock_rate /= ts.numhosts

    data_ratio = (read_rate + write_rate) / l1_rate
    flops = avx_rate + sse_rate
    flops_ratio = (flops - numpy.min(flops)) / (numpy.max(flops) -
                                                numpy.min(flops))
    stall_ratio = stall_rate / clock_rate

    mean_data_ratio = numpy.mean(data_ratio)
    mean_stall_ratio = numpy.mean(stall_ratio)
    mean_flops = numpy.mean(flops)

    ename = ld.exc.split('/')[-1]
    ename = ld.comp_name(ename, ld.equiv_patterns)
    mean_mem_rate = numpy.mean(read_rate + write_rate)
    if mean_mem_rate > 2e9:  # Put a print in here and investigate bad jobs
        return

    return ','.join([
        ts.j.id, ts.owner, ename,
        str(mean_mem_rate),
        str(mean_stall_ratio),
        str(mean_data_ratio),
        str(mean_flops)
    ])