def is_unfloppy(file,thresh): k1=['amd64_core','amd64_sock','cpu'] k2=['SSE_FLOPS', 'DRAM', 'user'] peak=[ 2.3e9*16*2, 24e9, 1.] try: ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return gfloprate = numpy.zeros(len(ts.t)-1) gdramrate = numpy.zeros(len(ts.t)-1) gcpurate = numpy.zeros(len(ts.t)-1) for h in ts.j.hosts.keys(): gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t)) gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t)) gcpurate += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t)) mfr=scipy.stats.tmean(gfloprate)/ts.numhosts mdr=scipy.stats.tmean(gdramrate)/ts.numhosts mcr=scipy.stats.tmean(gcpurate)/(ts.numhosts*ts.wayness*100.) #print [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2]] if ( (mcr/peak[2] > 0.5 ) and (mfr/peak[0])/(mdr/peak[1]) < thresh ): return True else: return False
def compute_imbalance(file,k1,k2,thresh,lariat_dict): try: ts=tspl.TSPLBase(file,k1,k2) except tspl.TSPLException as e: return except EOFError as e: print 'End of file found reading: ' + file return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,3600,16,ignore_qs): # 1 hour, 16way only return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return if lariat_dict == None: ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path) else: ld=lariat_utils.LariatData(ts.j.id,olddata=lariat_dict) if ld.wayness == -1: print 'Unknown wayness: ', ts.j.id return elif ld.wayness != ts.wayness: print 'Lariat and TACC Stats disagree about wayness. Skipping: ', ts.j.id return tmid=(ts.t[:-1]+ts.t[1:])/2.0 rng=range(1,len(tmid)) # Throw out first and last tmid=tmid[rng] for h in ts.data[0].keys(): host_data=ts.data[0][h] maxval=numpy.zeros(len(rng)) minval=numpy.ones(len(rng))*1e100 rate=[] for v in host_data: rate.append(numpy.diff(v)[rng]/numpy.diff(ts.t)[rng]) maxval=numpy.maximum(maxval,rate[-1]) minval=numpy.minimum(minval,rate[-1]) vals=[] mean=[] std=[] for j in range(len(rng)): vals.append([]) for v in rate: vals[j].append(v[j]) mean.append(scipy.stats.tmean(vals[j])) std.append(scipy.stats.tstd(vals[j])) ratio=numpy.divide(std,mean) var=scipy.stats.tmean(ratio) if abs(var) > thresh: print ts.j.id + ': ' + str(var) return file
def main(): parser = argparse.ArgumentParser(description='Dump CSV for a key pair for some jobs') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) print sys.argv[3] for file in filelist: try: ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid=(ts.t[:-1]+ts.t[1:])/2.0 rate={} for k in ts.j.hosts.keys(): rate[k]=numpy.divide(numpy.diff(ts.data[0][k][0]),numpy.diff(ts.t)) for i in range(len(tmid)): print ','.join([ts.j.id,k,str(tmid[i]),str(rate[k][i])])
def main(): parser = argparse.ArgumentParser(description='Plot MemUsed-AnonPages for jobs') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: ts=tspl.TSPLSum(file,['mem','mem'],['MemUsed','AnonPages']) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue else: print ts.j.id fig=plt.figure() ax=fig.gca() ax.hold=True for k in ts.j.hosts.keys(): m=ts.data[0][k][0]-ts.data[1][k][0] m-=ts.data[0][k][0][0] ax.plot(ts.t/3600.,m) ax.set_ylabel('MemUsed - AnonPages ' + ts.j.get_schema(ts.k1[0])[ts.k2[0]].unit) ax.set_xlabel('Time (hr)') plt.suptitle(ts.title) fname='graph_'+ts.j.id+'_'+ts.k1[0]+'_'+ts.k2[0]+'.png' fig.savefig(fname) plt.close()
def setup(self, job_data): self.aggregate = True self.min_time = 3600 self.min_hosts = 1 self.waynesses=[x+1 for x in range(32)] self.ignore_qs = [] self.metric = float("nan") try: if self.aggregate: self.ts=tspl.TSPLSum("",self.k1,self.k2,job_data=job_data) else: self.ts=tspl.TSPLBase("",self.k1,self.k2,job_data=job_data) except tspl.TSPLException as e: return False except EOFError as e: print('End of file found reading: ' + job_path) return False if not tspl_utils.checkjob(self.ts,self.min_time, self.waynesses,skip_queues=self.ignore_qs): return False elif self.ts.numhosts < self.min_hosts: return False else: return True
def main(): parser = argparse.ArgumentParser(description='Dump CSV for a key pair for some jobs') parser.add_argument('-k1', help='Set first key', nargs='+', type=str, default=['amd64_sock']) parser.add_argument('-k2', help='Set second key', nargs='+', type=str, default=['DRAM']) parser.add_argument('-f', help='File, directory, or quoted' ' glob pattern', nargs=1, type=str, default=['jobs']) n=parser.parse_args() filelist=tspl_utils.getfilelist(n.f[0]) for file in filelist: try: ts=tspl.TSPLSum(file,n.k1,n.k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,0,16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid=(ts.t[:-1]+ts.t[1:])/2.0 for k in ts.j.hosts.keys(): rates=[numpy.divide(numpy.diff(ts.data[x][k][0]),numpy.diff(ts.t)) for x in range(len(ts.data))] for i in range(len(tmid)): v=[rates[x][i] for x in range(len(ts.data))] print ','.join([ts.j.id,k,str(tmid[i])]+[str(x) for x in v])
def isidle(file,thresh): k1=['amd64_core','amd64_sock','cpu'] k2=['SSE_FLOPS', 'DRAM', 'user'] try: ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return mr=[] for i in range(len(k1)): maxrate=numpy.zeros(len(ts.t)-1) for h in ts.j.hosts.keys(): rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t)) maxrate=numpy.maximum(rate,maxrate) mr.append(maxrate) sums=[] for i in range(len(k1)): for h in ts.j.hosts.keys(): rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t)) sums.append(numpy.sum(numpy.divide(mr[i]-rate,mr[i]))/(len(ts.t)-1)) if max(sums) > thresh: return True else: return False
def main(): parser = argparse.ArgumentParser(description="Dump CSV for a key pair for some jobs") parser.add_argument("-k1", help="Set first key", nargs="+", type=str, default=["amd64_sock"]) parser.add_argument("-k2", help="Set second key", nargs="+", type=str, default=["DRAM"]) parser.add_argument("-f", help="File, directory, or quoted" " glob pattern", nargs=1, type=str, default=["jobs"]) n = parser.parse_args() filelist = tspl_utils.getfilelist(n.f[0]) for file in filelist: try: ts = tspl.TSPLSum(file, n.k1, n.k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 0, 16): continue elif ts.numhosts < 2: print ts.j.id + ": 1 host" continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 for k in ts.j.hosts.keys(): rates = [numpy.divide(numpy.diff(ts.data[x][k][0]), numpy.diff(ts.t)) for x in range(len(ts.data))] for i in range(len(tmid)): v = [rates[x][i] for x in range(len(ts.data))] print ",".join([ts.j.id, k, str(tmid[i])] + [str(x) for x in v])
def main(): parser = argparse.ArgumentParser( description='Plot a key pair for some jobs') parser.add_argument('-t', help='Threshold', metavar='thresh') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('-m', help='Set heatmap mode', action='store_true') parser.add_argument('--max', help='Use max instead of mean', action='store_true') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) if n.max: func = max else: func = scipy.stats.tmean for file in filelist: try: if n.f: full = '_full' ts = tspl.TSPLBase(file, [n.key1], [n.key2]) else: full = '' ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): continue reduction = [] # place to store reductions via func for v in ts: rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t)) reduction.append(func(rate)) m = func(reduction) if not n.t or m > float(n.t): print ts.j.id + ': ' + str(m) if n.m: heatmap(ts, n, m, full) else: lineplot(ts, n, m, full) else: print ts.j.id + ': under threshold, ' + str(m) + ' < ' + n.t
def main(): parser = argparse.ArgumentParser() parser.add_argument("-f", help="Set full mode", action="store_true") parser.add_argument("key1", help="First key", nargs="?", default="amd64_core") parser.add_argument("key2", help="Second key", nargs="?", default="SSE_FLOPS") parser.add_argument("filearg", help="File, directory, or quoted" " glob pattern", nargs="?", default="jobs") n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full = "_full" ts = tspl.TSPLBase(file, [n.key1], [n.key2]) else: full = "" ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ": 1 host" continue print ts.j.id tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 s = [] for v in ts: s = v break fig, ax = plt.subplots(2, 1, figsize=(8, 6), dpi=80) ax[0].hold = True ax[1].hold = True xmin, xmax = [0.0, 0.0] xmin1, xmax1 = [0.0, 0.0] dt = numpy.diff(ts.t) for v in ts: rate = numpy.array(numpy.divide(numpy.diff(v), dt), dtype=numpy.int64) d = numpy.linalg.norm(rate, ord=1) / float(len(rate)) xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))] xmin1, xmax1 = [min(xmin1, min(rate - d)), max(xmax1, max(rate - d))] ax[0].plot(tmid, rate) ax[1].plot(tmid, rate - d) xmin, xmax = tspl_utils.expand_range(xmin, xmax, 0.1) xmin1, xmax1 = tspl_utils.expand_range(xmin1, xmax1, 0.1) ax[0].set_ylim(bottom=xmin, top=xmax) ax[1].set_ylim(bottom=xmin1, top=xmax1) fname = "_".join(["graph", ts.j.id, ts.k1[0], ts.k2[0], "adjust" + full]) fig.savefig(fname) plt.close()
def compute_imbalance(file,k1,k2,threshold,plot_flag,full_flag,ratios): try: if full_flag: full='_full' ts=tspl.TSPLBase(file,k1,k2) else: full='' ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return except EOFError as e: print 'End of file found reading: ' + file return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,3600,16,ignore_qs): # 1 hour, 16way only return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return tmid=(ts.t[:-1]+ts.t[1:])/2.0 rng=range(1,len(tmid)) # Throw out first and last tmid=tmid[rng] maxval=numpy.zeros(len(rng)) minval=numpy.ones(len(rng))*1e100 rate=[] for v in ts: rate.append(numpy.divide(numpy.diff(v)[rng], numpy.diff(ts.t)[rng])) maxval=numpy.maximum(maxval,rate[-1]) minval=numpy.minimum(minval,rate[-1]) vals=[] mean=[] std=[] for j in range(len(rng)): vals.append([]) for v in rate: vals[j].append(v[j]) mean.append(scipy.stats.tmean(vals[j])) std.append(scipy.stats.tstd(vals[j])) imbl=maxval-minval ratio=numpy.divide(std,mean) ratio2=numpy.divide(imbl,maxval) var=scipy.stats.tmean(ratio) # mean of ratios is the threshold statistic # Save away a list of ratios per user ratios[ts.j.id]=[var,ts.owner] print ts.j.id + ': ' + str(var) # If over the threshold, plot this job (This should be factored out) if plot_flag and abs(var) > threshold: fig,ax=plt.subplots(2,1,figsize=(8,8),dpi=80) plot_ratios(ts,tmid,ratio,ratio2,rate,var,fig,ax,full)
def compute_imbalance(file, k1, k2, threshold, plot_flag, full_flag, ratios): try: if full_flag: full = '_full' ts = tspl.TSPLBase(file, k1, k2) else: full = '' ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: return except EOFError as e: print 'End of file found reading: ' + file return ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev'] if not tspl_utils.checkjob(ts, 3600, 16, ignore_qs): # 1 hour, 16way only return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 rng = range(1, len(tmid)) # Throw out first and last tmid = tmid[rng] maxval = numpy.zeros(len(rng)) minval = numpy.ones(len(rng)) * 1e100 rate = [] for v in ts: rate.append(numpy.divide(numpy.diff(v)[rng], numpy.diff(ts.t)[rng])) maxval = numpy.maximum(maxval, rate[-1]) minval = numpy.minimum(minval, rate[-1]) vals = [] mean = [] std = [] for j in range(len(rng)): vals.append([]) for v in rate: vals[j].append(v[j]) mean.append(scipy.stats.tmean(vals[j])) std.append(scipy.stats.tstd(vals[j])) imbl = maxval - minval ratio = numpy.divide(std, mean) ratio2 = numpy.divide(imbl, maxval) var = scipy.stats.tmean(ratio) # mean of ratios is the threshold statistic # Save away a list of ratios per user ratios[ts.j.id] = [var, ts.owner] print ts.j.id + ': ' + str(var) # If over the threshold, plot this job (This should be factored out) if plot_flag and abs(var) > threshold: fig, ax = plt.subplots(2, 1, figsize=(8, 8), dpi=80) plot_ratios(ts, tmid, ratio, ratio2, rate, var, fig, ax, full)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: try: full = '' ts = tspl.TSPLBase(file, ['amd64_sock', 'amd64_sock', 'amd64_sock'], ['HT0', 'HT1', 'HT2']) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 dt = numpy.diff(ts.t) fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=80) ax.hold = True xmin, xmax = [0., 0.] c = Colors() for k in ts.j.hosts.keys(): h = ts.j.hosts[k] col = c.next() for i in range(3): for j in range(4): rate = numpy.divide(numpy.diff(ts.data[i][k][j]), dt) xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))] ax.plot(tmid / 3600, rate, '-' + col) if xmax > 2.0e9: print ts.j.id + ' over limit: %(v)8.3f' % {'v': xmax} else: plt.close() continue plt.suptitle(ts.title) xmin, xmax = tspl_utils.expand_range(xmin, xmax, .1) ax.set_ylim(bottom=xmin, top=xmax) fname = '_'.join(['graph', ts.j.id, 'HT_rates']) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full = '_full' ts = tspl.TSPLBase(file, [n.key1], [n.key2]) else: full = '' ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=80) xmin, xmax = [0., 0.] for v in ts: rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t)) xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))] ax.hold = True ax.plot(rate[1:], rate[:-1], '.') ax.set_ylim(bottom=xmin, top=xmax) ax.set_xlim(left=xmin, right=xmax) fname = '_'.join( ['graph', ts.j.id, ts.k1[0], ts.k2[0], 'phase' + full]) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: full='' ts=tspl.TSPLBase(file,['amd64_sock', 'amd64_sock', 'amd64_sock'], ['HT0', 'HT1', 'HT2']) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id tmid=(ts.t[:-1]+ts.t[1:])/2.0 dt=numpy.diff(ts.t) fig,ax=plt.subplots(1,1,figsize=(8,6),dpi=80) ax.hold=True xmin,xmax=[0.,0.] c=Colors() for k in ts.j.hosts.keys(): h=ts.j.hosts[k] col=c.next() for i in range(3): for j in range(4): rate=numpy.divide(numpy.diff(ts.data[i][k][j]),dt) xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))] ax.plot(tmid/3600,rate,'-'+col) if xmax > 2.0e9: print ts.j.id + ' over limit: %(v)8.3f' % {'v' : xmax} else: plt.close() continue plt.suptitle(ts.title) xmin,xmax=tspl_utils.expand_range(xmin,xmax,.1) ax.set_ylim(bottom=xmin,top=xmax) fname='_'.join(['graph',ts.j.id,'HT_rates']) fig.savefig(fname) plt.close()
def do_check(f, jobs): try: ts = tspl.TSPLSum(f, ['amd64_core'], ['SSE_FLOPS']) except tspl.TSPLException: return if not tspl_utils.checkjob(ts, 3600, range(1, 33)): # 1 hour return ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time, analyze_conf.lariat_path) jobs[ts.j.id] = ld.exc
def main(): parser = argparse.ArgumentParser(description='Plot a key pair for some jobs') parser.add_argument('-t', help='Threshold', metavar='thresh') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('-m', help='Set heatmap mode', action='store_true') parser.add_argument('--max', help='Use max instead of mean', action='store_true') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) if n.max: func=max else: func=scipy.stats.tmean for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,[n.key1],[n.key2]) else: full='' ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue reduction=[] # place to store reductions via func for v in ts: rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t)) reduction.append(func(rate)) m=func(reduction) if not n.t or m > float(n.t): print ts.j.id + ': ' + str(m) if n.m: fig, fname = heatmap(ts,n,m,full) else: fig, fname = lineplot(ts,n,m,full) else: print ts.j.id + ': under threshold, ' + str(m) + ' < ' + n.t fig.savefig(fname)
def do_check(f,jobs): try: ts=tspl.TSPLSum(f,['amd64_core'],['SSE_FLOPS']) except tspl.TSPLException: return if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour return ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time, '/scratch/projects/lariatData') jobs[ts.j.id]=ld.exc
def do_check(f,jobs): try: ts=tspl.TSPLSum(f,['amd64_core'],['SSE_FLOPS']) except tspl.TSPLException: return if not tspl_utils.checkjob(ts,3600,range(1,33)): # 1 hour return ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time, analyze_conf.lariat_path) jobs[ts.j.id]=ld.exc
def main(): parser = argparse.ArgumentParser(description='Look for lack of correlation' ' between two key pairs/') parser.add_argument('threshold', help='Treshold Pearson R', nargs='?', default=0.8) parser.add_argument('keya1', help='Key A1', nargs='?', default='amd64_core') parser.add_argument('keya2', help='Key A2', nargs='?', default='DCSF') parser.add_argument('keyb1', help='Key B1', nargs='?', default='amd64_core') parser.add_argument('keyb2', help='Key B2', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) threshold = n.threshold k1 = [n.keya1, n.keyb1] k2 = [n.keya2, n.keyb2] for file in filelist: try: if n.f: full = '_full' ts = tspl.TSPLBase(file, k1, k2) else: full = '' ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): continue r = pearson(ts) print ts.j.id + ': ' + str(r) if abs(r) < float(threshold): plot_correlation(ts, r, full)
def is_unfloppy(file,thresh): k1={'amd64' : ['amd64_core','amd64_sock','cpu'], 'intel_snb' : [ 'intel_snb', 'intel_snb', 'intel_snb', 'cpu'],} k2={'amd64' : ['SSE_FLOPS', 'DRAM', 'user'], 'intel_snb' : ['SIMD_D_256','SSE_D_ALL','LOAD_L1D_ALL','user'],} peak={'amd64' : [2.3e9*16*2, 24e9, 1.], 'intel_snb' : [ 16*2.7e9*2, 16*2.7e9/2.*64., 1.],} try: ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs): return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return gfloprate = numpy.zeros(len(ts.t)-1) gdramrate = numpy.zeros(len(ts.t)-1) gcpurate = numpy.zeros(len(ts.t)-1) for h in ts.j.hosts.keys(): if ts.pmc_type == 'amd64' : gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t)) gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t)) gcpurate += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t)) elif ts.pmc_type == 'intel_snb': gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t)) gfloprate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t)) gdramrate += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t)) gcpurate += numpy.divide(numpy.diff(ts.data[3][h][0]),numpy.diff(ts.t)) mfr=scipy.stats.tmean(gfloprate)/ts.numhosts mdr=scipy.stats.tmean(gdramrate)/ts.numhosts mcr=scipy.stats.tmean(gcpurate)/(ts.numhosts*ts.wayness*100.) print mfr/peak[ts.pmc_type][0], (mdr/peak[ts.pmc_type][1]) # [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2] #print 'mcr',mcr/peak[ts.pmc_type][2], (mfr/peak[ts.pmc_type][0])/(mdr/peak[ts.pmc_type][1]) if ( (mcr/peak[ts.pmc_type][2] > 0.5 ) and (mfr/peak[ts.pmc_type][0])/(mdr/peak[ts.pmc_type][1]) < thresh ): return True else: return False
def main(): parser = argparse.ArgumentParser( description='Dump CSV for a key pair for some jobs') parser.add_argument('-k1', help='Set first key', nargs='+', type=str, default=['amd64_sock']) parser.add_argument('-k2', help='Set second key', nargs='+', type=str, default=['DRAM']) parser.add_argument('-f', help='File, directory, or quoted' ' glob pattern', nargs=1, type=str, default=['jobs']) n = parser.parse_args() filelist = tspl_utils.getfilelist(n.f[0]) for file in filelist: try: ts = tspl.TSPLSum(file, n.k1, n.k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 0, 16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 for k in ts.j.hosts.keys(): rates = [ numpy.divide(numpy.diff(ts.data[x][k][0]), numpy.diff(ts.t)) for x in range(len(ts.data)) ] for i in range(len(tmid)): v = [rates[x][i] for x in range(len(ts.data))] print ','.join([ts.j.id, k, str(tmid[i])] + [str(x) for x in v])
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,[n.key1],[n.key2]) else: full='' ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id fig,ax=plt.subplots(1,1,figsize=(8,6),dpi=80) xmin,xmax=[0.,0.] for v in ts: rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t)) xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))] ax.hold=True ax.plot(rate[1:],rate[:-1],'.') ax.set_ylim(bottom=xmin,top=xmax) ax.set_xlim(left=xmin,right=xmax) fname='_'.join(['graph',ts.j.id,ts.k1[0],ts.k2[0],'phase'+full]) fig.savefig(fname) plt.close()
def fit_step(fn,k1,k2,genplot=False,res={}): try: ts=tspl.TSPLSum(fn,k1,k2) except tspl.TSPLException as e: return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs): return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' bad_hosts=tspl_utils.lost_data(ts) if len(bad_hosts) > 0: print ts.j.id, ': Detected hosts with bad data: ', bad_hosts return vals=[] for i in [x + 2 for x in range(ts.size-4)]: vals.append(compute_fit_params(ts,i)) vals2=[] for v in vals: vals2.append([ b/a for (a,b) in v]) arr=numpy.array(vals2) brr=numpy.transpose(arr) (m,n)=numpy.shape(brr) if genplot: fig,ax=plt.subplots(1,1,dpi=80) ax.hold=True for i in range(m): ax.semilogy(brr[i,:]) fig.savefig('foo.pdf') plt.close() r=[] for i in range(m): jnd=numpy.argmin(brr[i,:]) r.append((jnd,brr[i,jnd])) res[fn]=r
def main(): parser = argparse.ArgumentParser( description='Dump CSV for a key pair for some jobs') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) print sys.argv[3] for file in filelist: try: ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 rate = {} for k in ts.j.hosts.keys(): rate[k] = numpy.divide(numpy.diff(ts.data[0][k][0]), numpy.diff(ts.t)) for i in range(len(tmid)): print ','.join([ts.j.id, k, str(tmid[i]), str(rate[k][i])])
def main(): parser = argparse.ArgumentParser(description='Look for lack of correlation' ' between two key pairs/') parser.add_argument('threshold', help='Treshold Pearson R', nargs='?', default=0.8) parser.add_argument('keya1', help='Key A1', nargs='?', default='amd64_core') parser.add_argument('keya2', help='Key A2', nargs='?', default='DCSF') parser.add_argument('keyb1', help='Key B1', nargs='?', default='amd64_core') parser.add_argument('keyb2', help='Key B2', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) threshold=n.threshold k1=[n.keya1, n.keyb1] k2=[n.keya2, n.keyb2] for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,k1,k2) else: full='' ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue r=pearson(ts) print ts.j.id + ': ' + str(r) if abs(r) < float(threshold) : plot_correlation(ts,r,full)
def isidle(file, thresh): k1 = { 'amd64': ['amd64_core', 'amd64_sock', 'cpu'], 'intel_snb': ['intel_snb', 'intel_snb', 'cpu'], } k2 = { 'amd64': ['SSE_FLOPS', 'DRAM', 'user'], 'intel_snb': ['SIMD_D_256', 'LOAD_L1D_ALL', 'user'], } try: ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: return ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev'] if not tspl_utils.checkjob(ts, 3600, range(1, 33), ignore_qs): return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return mr = [] for i in range(len(k1)): maxrate = numpy.zeros(len(ts.t) - 1) for h in ts.j.hosts.keys(): rate = numpy.divide(numpy.diff(ts.data[i][h]), numpy.diff(ts.t)) maxrate = numpy.maximum(rate, maxrate) mr.append(maxrate) sums = [] for i in range(len(k1)): for h in ts.j.hosts.keys(): rate = numpy.divide(numpy.diff(ts.data[i][h]), numpy.diff(ts.t)) sums.append( numpy.sum(numpy.divide(mr[i] - rate, mr[i])) / (len(ts.t) - 1)) sums = [0. if math.isnan(x) else x for x in sums] if max(sums) > thresh: return True else: return False
def has_highbw(file,thresh): try: k1=['intel_snb_imc', 'intel_snb_imc'] k2=['CAS_READS', 'CAS_WRITES'] peak = 76.*1.e9 try: ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs): return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return gdramrate = numpy.zeros(len(ts.t)-1) for h in ts.j.hosts.keys(): gdramrate += numpy.divide(numpy.diff(64.*ts.assemble([0,1],h,0)), numpy.diff(ts.t)) mdr=scipy.stats.tmean(gdramrate)/ts.numhosts print mdr/peak #print [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2]] if mdr/peak > thresh: return True else: return False except Exception as e: import sys exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) raise e
def isidle(file,thresh): k1={'amd64' : ['amd64_core','amd64_sock','cpu'], 'intel_snb' : [ 'intel_snb', 'intel_snb', 'cpu'],} k2={'amd64' : ['SSE_FLOPS', 'DRAM', 'user'], 'intel_snb' : ['SIMD_D_256','LOAD_L1D_ALL','user'],} try: ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,3600,range(1,33),ignore_qs): return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return mr=[] for i in range(len(k1)): maxrate=numpy.zeros(len(ts.t)-1) for h in ts.j.hosts.keys(): rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t)) maxrate=numpy.maximum(rate,maxrate) mr.append(maxrate) sums=[] for i in range(len(k1)): for h in ts.j.hosts.keys(): rate=numpy.divide(numpy.diff(ts.data[i][h]),numpy.diff(ts.t)) sums.append(numpy.sum(numpy.divide(mr[i]-rate,mr[i]))/(len(ts.t)-1)) sums = [0. if math.isnan(x) else x for x in sums] if max(sums) > thresh: return True else: return False
def fit_step(fn,k1,k2,genplot=False,res={}): try: ts=tspl.TSPLSum(fn,k1,k2) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts,3600,[x+1 for x in range(16)]): # 1 hour return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' vals=[] for i in [x + 2 for x in range(ts.size-4)]: vals.append(compute_fit_params(ts,i)) vals2=[] for v in vals: vals2.append([ b/a for (a,b) in v]) arr=numpy.array(vals2) brr=numpy.transpose(arr) (m,n)=numpy.shape(brr) if genplot: fig,ax=plt.subplots(1,1,dpi=80) ax.hold=True for i in range(m): ax.semilogy(brr[i,:]) fig.savefig('foo.pdf') plt.close() r=[] for i in range(m): jnd=numpy.argmin(brr[i,:]) r.append((jnd,brr[i,jnd])) res[fn]=r
def master_plot(file,mode='lines',threshold=False, output_dir='.',prefix='graph',mintime=3600,wayness=16, header='Master',lariat_dict=None,wide=False,job_stats=None): k1={'amd64' : ['amd64_core','amd64_core','amd64_sock','lnet','lnet', 'ib_sw','ib_sw','cpu'], 'intel' : ['intel_pmc3', 'intel_pmc3', 'intel_pmc3', 'lnet', 'lnet', 'ib_ext','ib_ext','cpu','mem','mem'], 'intel_snb' : ['intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'lnet', 'lnet', 'ib_sw','ib_sw','cpu', 'intel_snb', 'intel_snb', 'mem', 'mem'], } k2={'amd64': ['SSE_FLOPS','DCSF','DRAM','rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user'], 'intel' : ['MEM_LOAD_RETIRED_L1D_HIT', 'FP_COMP_OPS_EXE_X87', 'INSTRUCTIONS_RETIRED', 'rx_bytes','tx_bytes', 'port_recv_data','port_xmit_data','user', 'MemUsed', 'AnonPages'], 'intel_snb' : ['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user', 'SSE_D_ALL', 'SIMD_D_256', 'MemUsed', 'AnonPages'], } try: print file ts=tspl.TSPLSum(file,k1,k2,job_stats) except tspl.TSPLException as e: return ignore_qs=[]#'gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,mintime,wayness,ignore_qs): return if lariat_dict == None: ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path) elif lariat_dict == "pass": ld = lariat_utils.LariatData(ts.j.id) else: ld=lariat_utils.LariatData(ts.j.id,olddata=lariat_dict) wayness=ts.wayness if ld.wayness != -1 and ld.wayness < ts.wayness: wayness=ld.wayness if wide: fig,ax=plt.subplots(6,2,figsize=(15.5,12),dpi=110) # Make 2-d array into 1-d, and reorder so that the left side is blank ax=my_utils.flatten(ax) ax_even=ax[0:12:2] ax_odd =ax[1:12:2] ax=ax_odd + ax_even for a in ax_even: a.axis('off') else: fig,ax=plt.subplots(6,1,figsize=(8,12),dpi=110) if mode == 'hist': plot=plot_thist elif mode == 'percentile': plot=plot_mmm else: plot=plot_lines if ts.pmc_type == 'intel_snb' : # Plot key 1 plot(ax[0],ts,[8,9],3600.,1e9, ylabel='Total AVX +\nSSE Ginst/s') # Plot key 2 plot(ax[1],ts,[0,1],3600.,1.0/64.0*1024.*1024.*1024., ylabel='Total Mem BW GB/s') #Plot key 3 #plot(ax[2],ts,[2],3600.,1.0/64.0*1e9, ylabel='L1 BW GB/s') plot(ax[2],ts,[10,-11],3600.,1024.0*1024.0*1024.0, ylabel='Memory Usage GB', do_rate=False) elif ts.pmc_type == 'intel': plot(ax[0],ts,[1],3600.,1e9,ylabel='FP Ginst/s') plot(ax[2],ts,[8,-9],3600.,1024.0*1024.0*1024.0, ylabel='Memory Usage GB',do_rate=False) else: #Fix this to support the old amd plots print ts.pmc_type + ' not supported' return # Plot lnet sum rate plot(ax[3],ts,[3,4],3600.,1024.**2,ylabel='Total lnet MB/s') # Plot remaining IB sum rate if ts.pmc_type == 'intel_snb' : plot(ax[4],ts,[5,6,-3,-4],3600.,1024.**2,ylabel='Total (ib_sw-lnet) MB/s') elif ts.pmc_type == 'intel' : plot(ax[4],ts,[5,6,-3,-4],3600.,1024.**2,ylabel='Total (ib_ext-lnet) MB/s') #Plot CPU user time plot(ax[5],ts,[7],3600.,wayness*100., xlabel='Time (hr)', ylabel='Total cpu user\nfraction') print ts.j.id + ': ' plt.subplots_adjust(hspace=0.35) if wide: left_text=header+'\n'+my_utils.summary_text(ld,ts) text_len=len(left_text.split('\n')) fontsize=ax[0].yaxis.label.get_size() linespacing=1.2 fontrate=float(fontsize*linespacing)/72./15.5 yloc=.8-fontrate*(text_len-1) # this doesn't quite work. fontrate is too # small by a small amount plt.figtext(.05,yloc,left_text,linespacing=linespacing) fname='_'.join([prefix,ts.j.id,ts.owner,'wide_master']) elif header != None: title=header+'\n'+ts.title if threshold: title+=', V: %(v)-6.1f' % {'v': threshold} title += '\n' + ld.title() plt.suptitle(title) fname='_'.join([prefix,ts.j.id,ts.owner,'master']) else: fname='_'.join([prefix,ts.j.id,ts.owner,'master']) if mode == 'hist': fname+='_hist' elif mode == 'percentile': fname+='_perc' plt.close() return fig, fname
def do_compute(file): try: ts = tspl.TSPLSum( file, ["intel_snb_imc", "intel_snb_imc", "intel_snb", "intel_snb", "intel_snb", "intel_snb", "intel_snb"], ["CAS_READS", "CAS_WRITES", "LOAD_L1D_ALL", "SIMD_D_256", "SSE_D_ALL", "STALLS", "CLOCKS_UNHALTED_CORE"], ) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts, 0, 16): return elif ts.numhosts < 2: print ts.j.id + ": 1 host" return ignore_qs = ["gpu", "gpudev", "vis", "visdev"] if not tspl_utils.checkjob(ts, 3600.0, range(1, 33), ignore_qs): return ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time, "/scratch/projects/lariatData") if ld.exc == "unknown": return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 read_rate = numpy.zeros_like(tmid) write_rate = numpy.zeros_like(tmid) l1_rate = numpy.zeros_like(tmid) avx_rate = numpy.zeros_like(tmid) sse_rate = numpy.zeros_like(tmid) stall_rate = numpy.zeros_like(tmid) clock_rate = numpy.zeros_like(tmid) for host in ts.j.hosts.keys(): read_rate += numpy.diff(ts.assemble([0], host, 0)) / numpy.diff(ts.t) write_rate += numpy.diff(ts.assemble([1], host, 0)) / numpy.diff(ts.t) l1_rate += numpy.diff(ts.assemble([2], host, 0)) / numpy.diff(ts.t) avx_rate += numpy.diff(ts.assemble([3], host, 0)) / numpy.diff(ts.t) sse_rate += numpy.diff(ts.assemble([4], host, 0)) / numpy.diff(ts.t) stall_rate += numpy.diff(ts.assemble([5], host, 0)) / numpy.diff(ts.t) clock_rate += numpy.diff(ts.assemble([6], host, 0)) / numpy.diff(ts.t) read_rate /= ts.numhosts write_rate /= ts.numhosts l1_rate /= ts.numhosts avx_rate /= ts.numhosts sse_rate /= ts.numhosts stall_rate /= ts.numhosts clock_rate /= ts.numhosts data_ratio = (read_rate + write_rate) / l1_rate flops = avx_rate + sse_rate flops_ratio = (flops - numpy.min(flops)) / (numpy.max(flops) - numpy.min(flops)) stall_ratio = stall_rate / clock_rate mean_data_ratio = numpy.mean(data_ratio) mean_stall_ratio = numpy.mean(stall_ratio) mean_flops = numpy.mean(flops) ename = ld.exc.split("/")[-1] ename = ld.comp_name(ename, ld.equiv_patterns) mean_mem_rate = numpy.mean(read_rate + write_rate) if mean_mem_rate > 2e9: # Put a print in here and investigate bad jobs return return ",".join( [ts.j.id, ts.owner, ename, str(mean_mem_rate), str(mean_stall_ratio), str(mean_data_ratio), str(mean_flops)] )
def main(): parser = argparse.ArgumentParser(description='Look for high meta data rate'\ ' to Lustre') parser.add_argument('-t', metavar='thresh', help='Treshold metadata rate', nargs=1, default=[100000.]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() thresh=float(n.t[0]) print thresh filelist=tspl_utils.getfilelist(n.filearg) # k1=['llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite'] # k2=['open','close','mmap','seek','fsync','setattr', # 'truncate','flock','getattr','statfs','alloc_inode', # 'setxattr','getxattr',' listxattr', # 'removexattr', 'inode_permission', 'readdir', # 'create','lookup','link','unlink','symlink','mkdir', # 'rmdir','mknod','rename',] k1=['llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', ] k2=['open','close','mmap','fsync','setattr', 'truncate','flock','getattr','statfs','alloc_inode', 'setxattr',' listxattr', 'removexattr', 'readdir', 'create','lookup','link','unlink','symlink','mkdir', 'rmdir','mknod','rename',] for file in filelist: try: ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600.,range(1,33)): continue tmid=(ts.t[:-1]+ts.t[1:])/2.0 ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time,'lariatData') meta_rate = numpy.zeros_like(tmid) for k in ts.j.hosts.keys(): meta_rate +=numpy.diff(ts.assemble(range(0,len(k1)),k,0))/numpy.diff(ts.t) meta_rate /= float(ts.numhosts) if numpy.max(meta_rate) > thresh: title=ts.title if ld.exc != 'unknown': title += ', E: ' + ld.exc.split('/')[-1] fig,ax=plt.subplots(1,1,figsize=(10,8),dpi=80) plt.subplots_adjust(hspace=0.35) plt.suptitle(title) markers = ('o','x','+','^','s','8','p', 'h','*','D','<','>','v','d','.') colors = ('b','g','r','c','m','k','y') cnt=0 for v in ts.data: for host in v: for vals in v[host]: rate=numpy.diff(vals)/numpy.diff(ts.t) c=colors[cnt % len(colors)] m=markers[cnt % len(markers)] # print cnt,(cnt % len(colors)), (cnt % len(markers)), k2[cnt], c, m ax.plot(tmid/3600., rate, marker=m, markeredgecolor=c, linestyle='-', color=c, markerfacecolor='None', label=k2[cnt]) ax.hold=True cnt=cnt+1 ax.set_ylabel('Meta Data Rate (op/s)') tspl_utils.adjust_yaxis_range(ax,0.1) handles,labels=ax.get_legend_handles_labels() new_handles={} for h,l in zip(handles,labels): new_handles[l]=h box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.9, box.height]) ax.legend(new_handles.values(),new_handles.keys(),prop={'size':8}, bbox_to_anchor=(1.05,1), borderaxespad=0., loc=2) fname='_'.join(['metadata',ts.j.id,ts.owner]) fig.savefig(fname) plt.close()
def master_plot(file,mode='lines',threshold=False, output_dir='.',prefix='graph',mintime=3600,wayness=16, header='Master'): k1={'amd64' : ['amd64_core','amd64_core','amd64_sock','lnet','lnet', 'ib_sw','ib_sw','cpu'], 'intel' : ['intel_pmc3', 'intel_pmc3', 'intel_pmc3', 'lnet', 'lnet', 'ib_sw','ib_sw','cpu'] } k2={'amd64': ['SSE_FLOPS','DCSF','DRAM','rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user'], 'intel' : ['PMC3', 'PMC2', 'FIXED_CTR0', 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user'] } try: print file ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts,mintime,wayness): return fig,ax=plt.subplots(6,1,figsize=(8,12),dpi=80) ax=my_utils.flatten(ax) if mode == 'hist': plot=plot_thist elif mode == 'percentile': plot=plot_mmm else: plot=plot_lines # Plot SSE FLOPS plot(ax[0],ts,[0],3600.) # Plot DCSF rate plot(ax[1],ts,[1],3600.,1e9) #Plot DRAM rate plot(ax[2],ts,[2],3600.,1e9) # Plot lnet sum rate plot(ax[3],ts,[3,4],3600.,1024.**2,ylabel='Total lnet MB/s') # Plot remaining IB sum rate plot(ax[4],ts,[5,6,-3,-4],3600.,1024.**2,ylabel='Total (ib_sw-lnet) MB/s') #Plot CPU user time plot(ax[5],ts,[7],3600.,ts.wayness*100., xlabel='Time (hr)', ylabel='Total cpu user\nfraction') print ts.j.id + ': ' print 'cc' title=header+'\n'+ts.title if threshold: title+=', V: %(v)-8.3f' % {'v': threshold} ld=lariat_utils.LariatData(ts.j.id,ts.j.end_time,'/scratch/projects/lariatData') title += '\n' + ld.title() print 'dd' plt.suptitle(title) plt.subplots_adjust(hspace=0.35) fname='_'.join([prefix,ts.j.id,ts.owner,'master']) if mode == 'hist': fname+='_hist' elif mode == 'percentile': fname+='_perc' fig.savefig(output_dir+'/'+fname) plt.close()
def compute_imbalance(file, k1, k2, thresh, lariat_dict): try: ts = tspl.TSPLBase(file, k1, k2) except tspl.TSPLException as e: return except EOFError as e: print 'End of file found reading: ' + file return ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev'] if not tspl_utils.checkjob(ts, 3600, 16, ignore_qs): # 1 hour, 16way only return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return if lariat_dict == None: ld = lariat_utils.LariatData(ts.j.id, end_epoch=ts.j.end_time, daysback=3, directory=analyze_conf.lariat_path) else: ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict) if ld.wayness == -1: print 'Unknown wayness: ', ts.j.id return elif ld.wayness != ts.wayness: print 'Lariat and TACC Stats disagree about wayness. Skipping: ', ts.j.id return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 rng = range(1, len(tmid)) # Throw out first and last tmid = tmid[rng] for h in ts.data[0].keys(): host_data = ts.data[0][h] maxval = numpy.zeros(len(rng)) minval = numpy.ones(len(rng)) * 1e100 rate = [] for v in host_data: rate.append(numpy.diff(v)[rng] / numpy.diff(ts.t)[rng]) maxval = numpy.maximum(maxval, rate[-1]) minval = numpy.minimum(minval, rate[-1]) vals = [] mean = [] std = [] for j in range(len(rng)): vals.append([]) for v in rate: vals[j].append(v[j]) mean.append(scipy.stats.tmean(vals[j])) std.append(scipy.stats.tstd(vals[j])) ratio = numpy.divide(std, mean) var = scipy.stats.tmean(ratio) if abs(var) > thresh: print ts.j.id + ': ' + str(var) return file
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,[n.key1],[n.key2]) else: full='' ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id tmid=(ts.t[:-1]+ts.t[1:])/2.0 s=[] for v in ts: s=v break fig,ax=plt.subplots(2,1,figsize=(8,6),dpi=80) ax[0].hold=True ax[1].hold=True xmin,xmax=[0.,0.] xmin1,xmax1=[0.,0.] dt=numpy.diff(ts.t) for v in ts: rate=numpy.array(numpy.divide(numpy.diff(v),dt),dtype=numpy.int64) d=numpy.linalg.norm(rate,ord=1)/float(len(rate)) xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))] xmin1,xmax1=[min(xmin1,min(rate-d)),max(xmax1,max(rate-d))] ax[0].plot(tmid,rate) ax[1].plot(tmid,rate-d) xmin,xmax=tspl_utils.expand_range(xmin,xmax,.1) xmin1,xmax1=tspl_utils.expand_range(xmin1,xmax1,.1) ax[0].set_ylim(bottom=xmin,top=xmax) ax[1].set_ylim(bottom=xmin1,top=xmax1) fname='_'.join(['graph',ts.j.id,ts.k1[0],ts.k2[0],'adjust'+full]) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='Look for high meta data rate'\ ' to Lustre') parser.add_argument('-t', metavar='thresh', help='Treshold metadata rate', nargs=1, default=[100000.]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() thresh = float(n.t[0]) print thresh filelist = tspl_utils.getfilelist(n.filearg) # k1=['llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite'] # k2=['open','close','mmap','seek','fsync','setattr', # 'truncate','flock','getattr','statfs','alloc_inode', # 'setxattr','getxattr',' listxattr', # 'removexattr', 'inode_permission', 'readdir', # 'create','lookup','link','unlink','symlink','mkdir', # 'rmdir','mknod','rename',] k1 = [ 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', ] k2 = [ 'open', 'close', 'mmap', 'fsync', 'setattr', 'truncate', 'flock', 'getattr', 'statfs', 'alloc_inode', 'setxattr', ' listxattr', 'removexattr', 'readdir', 'create', 'lookup', 'link', 'unlink', 'symlink', 'mkdir', 'rmdir', 'mknod', 'rename', ] for file in filelist: try: ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600., range(1, 33)): continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time, 'lariatData') meta_rate = numpy.zeros_like(tmid) for k in ts.j.hosts.keys(): meta_rate += numpy.diff(ts.assemble(range(0, len(k1)), k, 0)) / numpy.diff(ts.t) meta_rate /= float(ts.numhosts) if numpy.max(meta_rate) > thresh: title = ts.title if ld.exc != 'unknown': title += ', E: ' + ld.exc.split('/')[-1] fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=80) plt.subplots_adjust(hspace=0.35) plt.suptitle(title) markers = ('o', 'x', '+', '^', 's', '8', 'p', 'h', '*', 'D', '<', '>', 'v', 'd', '.') colors = ('b', 'g', 'r', 'c', 'm', 'k', 'y') cnt = 0 for v in ts.data: for host in v: for vals in v[host]: rate = numpy.diff(vals) / numpy.diff(ts.t) c = colors[cnt % len(colors)] m = markers[cnt % len(markers)] # print cnt,(cnt % len(colors)), (cnt % len(markers)), k2[cnt], c, m ax.plot(tmid / 3600., rate, marker=m, markeredgecolor=c, linestyle='-', color=c, markerfacecolor='None', label=k2[cnt]) ax.hold = True cnt = cnt + 1 ax.set_ylabel('Meta Data Rate (op/s)') tspl_utils.adjust_yaxis_range(ax, 0.1) handles, labels = ax.get_legend_handles_labels() new_handles = {} for h, l in zip(handles, labels): new_handles[l] = h box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.9, box.height]) ax.legend(new_handles.values(), new_handles.keys(), prop={'size': 8}, bbox_to_anchor=(1.05, 1), borderaxespad=0., loc=2) fname = '_'.join(['metadata', ts.j.id, ts.owner]) fig.savefig(fname) plt.close()
def compute_ratio(file,lariat_dict=None): try: ts=tspl.TSPLSum(file,['intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb'], ['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'SIMD_D_256', 'SSE_D_ALL', 'STALLS', 'CLOCKS_UNHALTED_CORE']) except tspl.TSPLException as e: return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,3600.,range(1,33),ignore_qs): return tmid=(ts.t[:-1]+ts.t[1:])/2.0 if lariat_dict == None: ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path) else: ld=lariat_utils.LariatData(ts.j.id,olddata=lariat_dict) if ld.exc == 'unknown' or ld.wayness != ts.wayness: # try loading older lariat ld=lariat_utils.LariatData(ts.j.id,end_epoch=ts.j.end_time,daysback=3,directory=analyze_conf.lariat_path,olddata=ld.ld) if ld.exc == 'unknown' or ld.wayness != ts.wayness: # Still nothing; return return read_rate = numpy.zeros_like(tmid) write_rate = numpy.zeros_like(tmid) l1_rate = numpy.zeros_like(tmid) avx_rate = numpy.zeros_like(tmid) sse_rate = numpy.zeros_like(tmid) stall_rate = numpy.zeros_like(tmid) clock_rate = numpy.zeros_like(tmid) for host in ts.j.hosts.keys(): read_rate += numpy.diff(ts.assemble([0],host,0))/numpy.diff(ts.t) write_rate += numpy.diff(ts.assemble([1],host,0))/numpy.diff(ts.t) l1_rate += numpy.diff(ts.assemble([2],host,0))/numpy.diff(ts.t) avx_rate += numpy.diff(ts.assemble([3],host,0))/numpy.diff(ts.t) sse_rate += numpy.diff(ts.assemble([4],host,0))/numpy.diff(ts.t) stall_rate += numpy.diff(ts.assemble([5],host,0))/numpy.diff(ts.t) clock_rate += numpy.diff(ts.assemble([6],host,0))/numpy.diff(ts.t) if float(ts.numhosts*int(ts.wayness)*int(ld.threads)) == 0: print 'No tasks in', ts.j.id, ' skipping' return read_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads)) write_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads)) l1_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads)) avx_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads)) sse_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads)) stall_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads)) clock_rate /= float(ts.numhosts*int(ts.wayness)*int(ld.threads)) try: data_ratio = (read_rate+write_rate)/l1_rate except RuntimeWarning: print 'Division by zero, skipping:', ts.j.id return flops = avx_rate+sse_rate try: flops_ratio = (flops-numpy.min(flops))/(numpy.max(flops)-numpy.min(flops)) except RuntimeWarning: print 'Division by zero, skipping:', ts.j.id return try: stall_ratio = stall_rate/clock_rate except RuntimeWarning: print 'Division by zero, skipping:', ts.j.id return mean_data_ratio=numpy.mean(data_ratio) mean_stall_ratio=numpy.mean(stall_ratio) mean_mem_rate=numpy.mean(read_rate + write_rate)*64.0 if mean_stall_ratio > 1.: return elif mean_mem_rate > 75.*1000000000./16.: return ename=ld.exc.split('/')[-1] ename=ld.comp_name(ename,ld.equiv_patterns) ## if mean_mem_rate > 2e9: # Put a print in here and investigate bad jobs ## return return (ts.j.id, ts.su, ename, mean_data_ratio, mean_stall_ratio, mean_mem_rate )
def get_data(file, mintime=1.0, wayness=range(1, 33), lariat_dict=None): try: ts = tspl.TSPLSum( file, [ "intel_snb_imc", "intel_snb_imc", "intel_snb", "intel_snb", "intel_snb", "intel_snb", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "llite", "intel_snb", "intel_snb", "intel_snb", "intel_snb", "intel_snb", ], [ "CAS_READS", "CAS_WRITES", "STALLS", "CLOCKS_UNHALTED_CORE", "SSE_D_ALL", "SIMD_D_256", "open", "close", "mmap", "seek", "fsync", "setattr", "truncate", "flock", "getattr", "statfs", "alloc_inode", "setxattr", "getxattr", " listxattr", "removexattr", "inode_permission", "readdir", "create", "lookup", "link", "unlink", "symlink", "mkdir", "rmdir", "mknod", "rename", "LOAD_OPS_L1_HIT", "LOAD_OPS_L2_HIT", "LOAD_OPS_LLC_HIT", "LOAD_OPS_ALL", "INSTRUCTIONS_RETIRED", ], ) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts, mintime, wayness): return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 if lariat_dict == None: ld = lariat_utils.LariatData(ts.j.id, end_epoch=ts.j.end_time, daysback=3, directory=analyze_conf.lariat_path) else: ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict) read_rate = numpy.zeros_like(tmid) write_rate = numpy.zeros_like(tmid) stall_rate = numpy.zeros_like(tmid) clock_rate = numpy.zeros_like(tmid) avx_rate = numpy.zeros_like(tmid) sse_rate = numpy.zeros_like(tmid) inst_rate = numpy.zeros_like(tmid) meta_rate = numpy.zeros_like(tmid) l1_rate = numpy.zeros_like(tmid) l2_rate = numpy.zeros_like(tmid) l3_rate = numpy.zeros_like(tmid) load_rate = numpy.zeros_like(tmid) for k in ts.j.hosts.keys(): read_rate += numpy.diff(ts.assemble([0], k, 0)) / numpy.diff(ts.t) write_rate += numpy.diff(ts.assemble([1], k, 0)) / numpy.diff(ts.t) stall_rate += numpy.diff(ts.assemble([2], k, 0)) / numpy.diff(ts.t) clock_rate += numpy.diff(ts.assemble([3], k, 0)) / numpy.diff(ts.t) avx_rate += numpy.diff(ts.assemble([5], k, 0)) / numpy.diff(ts.t) sse_rate += numpy.diff(ts.assemble([4], k, 0)) / numpy.diff(ts.t) inst_rate += numpy.diff(ts.assemble([36], k, 0)) / numpy.diff(ts.t) meta_rate += numpy.diff(ts.assemble(range(5, 32), k, 0)) / numpy.diff(ts.t) l1_rate += numpy.diff(ts.assemble([32], k, 0)) / numpy.diff(ts.t) l2_rate += numpy.diff(ts.assemble([33], k, 0)) / numpy.diff(ts.t) l3_rate += numpy.diff(ts.assemble([34], k, 0)) / numpy.diff(ts.t) load_rate += numpy.diff(ts.assemble([35], k, 0)) / numpy.diff(ts.t) read_rate /= float(ts.numhosts) write_rate /= float(ts.numhosts) stall_rate /= float(ts.numhosts) clock_rate /= float(ts.numhosts) avx_rate /= float(ts.numhosts) sse_rate /= float(ts.numhosts) inst_rate /= float(ts.numhosts) meta_rate /= float(ts.numhosts) l1_rate /= float(ts.numhosts) l2_rate /= float(ts.numhosts) l3_rate /= float(ts.numhosts) load_rate /= float(ts.numhosts) read_frac = read_rate / (read_rate + write_rate + 1) stall_frac = stall_rate / clock_rate return ( ts, ld, tmid, read_rate, write_rate, stall_rate, clock_rate, avx_rate, sse_rate, inst_rate, meta_rate, l1_rate, l2_rate, l3_rate, load_rate, read_frac, stall_frac, )
def getuser(file,user): try: k1=['intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'lnet', 'lnet', 'ib_sw','ib_sw','cpu', 'intel_snb', 'intel_snb', 'mem'] k2=['CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes','tx_bytes', 'rx_bytes','tx_bytes','user', 'SSE_D_ALL', 'SIMD_D_256', 'MemUsed'] try: ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return if ts.owner != user: return ignore_qs=['gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts,1.,range(1,33),ignore_qs): return tmid=(ts.t[:-1]+ts.t[1:])/2.0 dt=numpy.diff(ts.t) dram_rate = numpy.zeros_like(tmid) l1_rate = numpy.zeros_like(tmid) lnet_rate = numpy.zeros_like(tmid) ib_rate = numpy.zeros_like(tmid) user_rate = numpy.zeros_like(tmid) flops_rate = numpy.zeros_like(tmid) mem_usage = numpy.zeros_like(tmid) for host in ts.j.hosts.keys(): dram_rate += numpy.diff(ts.assemble([0,1],host,0))/dt l1_rate += numpy.diff(ts.assemble([2],host,0))/dt lnet_rate += numpy.diff(ts.assemble([3,4],host,0))/dt ib_rate += numpy.diff(ts.assemble([5,6,-3,-4],host,0))/dt user_rate += numpy.diff(ts.assemble([7],host,0))/dt flops_rate += numpy.diff(ts.assemble([8,9],host,0))/dt v = ts.assemble([10],host,0) mem_usage += (v[:-1]+v[1:])/2.0 dram_rate /= float(ts.numhosts)*1024.*1024.*1024./64. l1_rate /= float(ts.numhosts)*1024.*1024./64. lnet_rate /= float(ts.numhosts)*1e6 ib_rate /= float(ts.numhosts)*1e6 user_rate /= float(ts.numhosts)*100.*ts.wayness flops_rate /= float(ts.numhosts)*1e9 mem_usage /= float(ts.numhosts)*(1024.*1024.*1024.) min_dram_rate = numpy.min(dram_rate) max_dram_rate = numpy.max(dram_rate) mean_dram_rate = numpy.mean(dram_rate) min_l1_rate = numpy.min(l1_rate) max_l1_rate = numpy.max(l1_rate) mean_l1_rate = numpy.mean(l1_rate) min_lnet_rate = numpy.min(lnet_rate) max_lnet_rate = numpy.max(lnet_rate) mean_lnet_rate = numpy.mean(lnet_rate) min_ib_rate = numpy.min(ib_rate) max_ib_rate = numpy.max(ib_rate) mean_ib_rate = numpy.mean(ib_rate) min_user_rate = numpy.min(user_rate) max_user_rate = numpy.max(user_rate) mean_user_rate = numpy.mean(user_rate) min_flops_rate = numpy.min(flops_rate) max_flops_rate = numpy.max(flops_rate) mean_flops_rate = numpy.mean(flops_rate) min_mem_usage = numpy.min(mem_usage) max_mem_usage = numpy.max(mem_usage) mean_mem_usage = numpy.mean(mem_usage) return (ts.j.acct['end_time'], min_dram_rate,max_dram_rate,mean_dram_rate, min_l1_rate,max_l1_rate,mean_l1_rate, min_lnet_rate,max_lnet_rate,mean_lnet_rate, min_ib_rate,max_ib_rate,mean_ib_rate, min_user_rate,max_user_rate,mean_user_rate, min_flops_rate,max_flops_rate,mean_flops_rate, min_mem_usage,max_mem_usage,mean_mem_usage, ts.j.id) except Exception as e: import sys exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) raise e
def master_plot(file, threshold=False): k1 = [ 'amd64_core', 'amd64_core', 'amd64_sock', 'lnet', 'lnet', 'ib_sw', 'ib_sw', 'cpu' ] k2 = [ 'SSE_FLOPS', 'DCSF', 'DRAM', 'rx_bytes', 'tx_bytes', 'rx_bytes', 'tx_bytes', 'user' ] try: print file ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts, 3600, 16): return elif ts.numhosts < 2: print ts.j.id + ': 1 host' return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 fig, ax = plt.subplots(6, 1, figsize=(8, 12), dpi=80) # Plot flop rate ax[0].hold = True for k in ts.j.hosts.keys(): h = ts.j.hosts[k] rate = numpy.divide(numpy.diff(ts.data[0][k][0]), numpy.diff(ts.t)) ax[0].plot(tmid / 3600, rate) ax[0].set_ylabel('Total ' + ts.k1[0] + '\n' + ts.k2[0] + '/s') # Plot DCSF rate ax[1].hold = True for k in ts.j.hosts.keys(): h = ts.j.hosts[k] rate = numpy.divide(numpy.diff(ts.data[1][k][0]), numpy.diff(ts.t)) ax[1].plot(tmid / 3600, rate) ax[1].set_ylabel('Total ' + ts.k1[1] + '\n' + ts.k2[1] + '/s') #Plot DRAM rate ax[2].hold = True for k in ts.j.hosts.keys(): h = ts.j.hosts[k] rate = numpy.divide(numpy.diff(ts.data[2][k][0]), numpy.diff(ts.t)) ax[2].plot(tmid / 3600, rate) ax[2].set_ylabel('Total ' + ts.k1[2] + '\n' + ts.k2[2] + '/s') # Plot lnet sum rate ax[3].hold = True for k in ts.j.hosts.keys(): h = ts.j.hosts[k] rate = numpy.divide(numpy.diff(ts.data[3][k][0] + ts.data[4][k][0]), numpy.diff(ts.t)) ax[3].plot(tmid / 3600, rate / (1024. * 1024.)) ax[3].set_ylabel('Total lnet MB/s') # Plot remaining IB sum rate ax[4].hold = True for k in ts.j.hosts.keys(): h = ts.j.hosts[k] v = ts.data[5][k][0] + ts.data[6][k][0] - (ts.data[3][k][0] + ts.data[4][k][0]) rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t)) ax[4].plot(tmid / 3600, rate / (1024 * 1024.)) ax[4].set_ylabel('Total (ib_sw-lnet) MB/s') #Plot CPU user time ax[5].hold = True for k in ts.j.hosts.keys(): h = ts.j.hosts[k] rate = numpy.divide(numpy.diff(ts.data[7][k][0] / 100 / ts.wayness), numpy.diff(ts.t)) ax[5].plot(tmid / 3600, rate) ax[5].set_ylabel('Total ' + ts.k1[7] + '\n' + ts.k2[7] + '/s') ax[5].set_xlabel('Time (hr)') print ts.j.id + ': ' title = ts.title if threshold: title += ', V: %(v)-8.3f' % {'v': threshold} plt.suptitle(title) plt.subplots_adjust(hspace=0.35) for a in ax: tspl_utils.adjust_yaxis_range(a, 0.1) fname = '_'.join(['graph', ts.j.id, 'master']) fig.savefig(fname) plt.close()
def is_unfloppy(file, thresh): k1 = { 'amd64': ['amd64_core', 'amd64_sock', 'cpu'], 'intel_snb': ['intel_snb', 'intel_snb', 'intel_snb', 'cpu'], } k2 = { 'amd64': ['SSE_FLOPS', 'DRAM', 'user'], 'intel_snb': ['SIMD_D_256', 'SSE_D_ALL', 'LOAD_L1D_ALL', 'user'], } peak = { 'amd64': [2.3e9 * 16 * 2, 24e9, 1.], 'intel_snb': [16 * 2.7e9 * 2, 16 * 2.7e9 / 2. * 64., 1.], } try: ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: return ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev'] if not tspl_utils.checkjob(ts, 3600, range(1, 33), ignore_qs): return elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' return gfloprate = numpy.zeros(len(ts.t) - 1) gdramrate = numpy.zeros(len(ts.t) - 1) gcpurate = numpy.zeros(len(ts.t) - 1) for h in ts.j.hosts.keys(): if ts.pmc_type == 'amd64': gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]), numpy.diff(ts.t)) gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]), numpy.diff(ts.t)) gcpurate += numpy.divide(numpy.diff(ts.data[2][h][0]), numpy.diff(ts.t)) elif ts.pmc_type == 'intel_snb': gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]), numpy.diff(ts.t)) gfloprate += numpy.divide(numpy.diff(ts.data[1][h][0]), numpy.diff(ts.t)) gdramrate += numpy.divide(numpy.diff(ts.data[2][h][0]), numpy.diff(ts.t)) gcpurate += numpy.divide(numpy.diff(ts.data[3][h][0]), numpy.diff(ts.t)) mfr = scipy.stats.tmean(gfloprate) / ts.numhosts mdr = scipy.stats.tmean(gdramrate) / ts.numhosts mcr = scipy.stats.tmean(gcpurate) / (ts.numhosts * ts.wayness * 100.) print mfr / peak[ts.pmc_type][0], (mdr / peak[ts.pmc_type][1]) # [ts.j.id,mfr/peak[0],mdr/peak[1],mcr/peak[2] #print 'mcr',mcr/peak[ts.pmc_type][2], (mfr/peak[ts.pmc_type][0])/(mdr/peak[ts.pmc_type][1]) if ((mcr / peak[ts.pmc_type][2] > 0.5) and (mfr / peak[ts.pmc_type][0]) / (mdr / peak[ts.pmc_type][1]) < thresh): return True else: return False
def master_plot(file, mode='lines', threshold=False, output_dir='.', prefix='graph', mintime=3600, wayness=16, header='Master', lariat_dict=None, wide=False, job_stats=None): k1 = { 'amd64': [ 'amd64_core', 'amd64_core', 'amd64_sock', 'lnet', 'lnet', 'ib_sw', 'ib_sw', 'cpu' ], 'intel': [ 'intel_pmc3', 'intel_pmc3', 'intel_pmc3', 'lnet', 'lnet', 'ib_ext', 'ib_ext', 'cpu', 'mem', 'mem' ], 'intel_snb': [ 'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'lnet', 'lnet', 'ib_sw', 'ib_sw', 'cpu', 'intel_snb', 'intel_snb', 'mem', 'mem' ], } k2 = { 'amd64': [ 'SSE_FLOPS', 'DCSF', 'DRAM', 'rx_bytes', 'tx_bytes', 'rx_bytes', 'tx_bytes', 'user' ], 'intel': [ 'MEM_LOAD_RETIRED_L1D_HIT', 'FP_COMP_OPS_EXE_X87', 'INSTRUCTIONS_RETIRED', 'rx_bytes', 'tx_bytes', 'port_recv_data', 'port_xmit_data', 'user', 'MemUsed', 'AnonPages' ], 'intel_snb': [ 'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes', 'tx_bytes', 'rx_bytes', 'tx_bytes', 'user', 'SSE_D_ALL', 'SIMD_D_256', 'MemUsed', 'AnonPages' ], } try: print file ts = tspl.TSPLSum(file, k1, k2, job_stats) except tspl.TSPLException as e: return ignore_qs = [] #'gpu','gpudev','vis','visdev'] if not tspl_utils.checkjob(ts, mintime, wayness, ignore_qs): return if lariat_dict == None: ld = lariat_utils.LariatData(ts.j.id, end_epoch=ts.j.end_time, daysback=3, directory=analyze_conf.lariat_path) elif lariat_dict == "pass": ld = lariat_utils.LariatData(ts.j.id) else: ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict) wayness = ts.wayness if ld.wayness != -1 and ld.wayness < ts.wayness: wayness = ld.wayness if wide: fig, ax = plt.subplots(6, 2, figsize=(15.5, 12), dpi=110) # Make 2-d array into 1-d, and reorder so that the left side is blank ax = my_utils.flatten(ax) ax_even = ax[0:12:2] ax_odd = ax[1:12:2] ax = ax_odd + ax_even for a in ax_even: a.axis('off') else: fig, ax = plt.subplots(6, 1, figsize=(8, 12), dpi=110) if mode == 'hist': plot = plot_thist elif mode == 'percentile': plot = plot_mmm else: plot = plot_lines if ts.pmc_type == 'intel_snb': # Plot key 1 plot(ax[0], ts, [8, 9], 3600., 1e9, ylabel='Total AVX +\nSSE Ginst/s') # Plot key 2 plot(ax[1], ts, [0, 1], 3600., 1.0 / 64.0 * 1024. * 1024. * 1024., ylabel='Total Mem BW GB/s') #Plot key 3 #plot(ax[2],ts,[2],3600.,1.0/64.0*1e9, ylabel='L1 BW GB/s') plot(ax[2], ts, [10, -11], 3600., 1024.0 * 1024.0 * 1024.0, ylabel='Memory Usage GB', do_rate=False) elif ts.pmc_type == 'intel': plot(ax[0], ts, [1], 3600., 1e9, ylabel='FP Ginst/s') plot(ax[2], ts, [8, -9], 3600., 1024.0 * 1024.0 * 1024.0, ylabel='Memory Usage GB', do_rate=False) else: #Fix this to support the old amd plots print ts.pmc_type + ' not supported' return # Plot lnet sum rate plot(ax[3], ts, [3, 4], 3600., 1024.**2, ylabel='Total lnet MB/s') # Plot remaining IB sum rate if ts.pmc_type == 'intel_snb': plot(ax[4], ts, [5, 6, -3, -4], 3600., 1024.**2, ylabel='Total (ib_sw-lnet) MB/s') elif ts.pmc_type == 'intel': plot(ax[4], ts, [5, 6, -3, -4], 3600., 1024.**2, ylabel='Total (ib_ext-lnet) MB/s') #Plot CPU user time plot(ax[5], ts, [7], 3600., wayness * 100., xlabel='Time (hr)', ylabel='Total cpu user\nfraction') print ts.j.id + ': ' plt.subplots_adjust(hspace=0.35) if wide: left_text = header + '\n' + my_utils.summary_text(ld, ts) text_len = len(left_text.split('\n')) fontsize = ax[0].yaxis.label.get_size() linespacing = 1.2 fontrate = float(fontsize * linespacing) / 72. / 15.5 yloc = .8 - fontrate * (text_len - 1 ) # this doesn't quite work. fontrate is too # small by a small amount plt.figtext(.05, yloc, left_text, linespacing=linespacing) fname = '_'.join([prefix, ts.j.id, ts.owner, 'wide_master']) elif header != None: title = header + '\n' + ts.title if threshold: title += ', V: %(v)-6.1f' % {'v': threshold} title += '\n' + ld.title() plt.suptitle(title) fname = '_'.join([prefix, ts.j.id, ts.owner, 'master']) else: fname = '_'.join([prefix, ts.j.id, ts.owner, 'master']) if mode == 'hist': fname += '_hist' elif mode == 'percentile': fname += '_perc' plt.close() return fig, fname
def master_plot(file,threshold=False): k1=['amd64_core','amd64_core','amd64_sock','lnet','lnet','ib_sw','ib_sw', 'cpu'] k2=['SSE_FLOPS','DCSF','DRAM','rx_bytes','tx_bytes','rx_bytes','tx_bytes', 'user'] try: print file ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts,3600,16): return elif ts.numhosts < 2: print ts.j.id + ': 1 host' return tmid=(ts.t[:-1]+ts.t[1:])/2.0 fig,ax=plt.subplots(6,1,figsize=(8,12),dpi=80) # Plot flop rate ax[0].hold=True for k in ts.j.hosts.keys(): h=ts.j.hosts[k] rate=numpy.divide(numpy.diff(ts.data[0][k][0]),numpy.diff(ts.t)) ax[0].plot(tmid/3600,rate) ax[0].set_ylabel('Total ' + ts.k1[0] + '\n' + ts.k2[0] + '/s') # Plot DCSF rate ax[1].hold=True for k in ts.j.hosts.keys(): h=ts.j.hosts[k] rate=numpy.divide(numpy.diff(ts.data[1][k][0]),numpy.diff(ts.t)) ax[1].plot(tmid/3600,rate) ax[1].set_ylabel('Total ' + ts.k1[1] + '\n' + ts.k2[1] + '/s') #Plot DRAM rate ax[2].hold=True for k in ts.j.hosts.keys(): h=ts.j.hosts[k] rate=numpy.divide(numpy.diff(ts.data[2][k][0]),numpy.diff(ts.t)) ax[2].plot(tmid/3600,rate) ax[2].set_ylabel('Total ' + ts.k1[2] + '\n' + ts.k2[2] + '/s') # Plot lnet sum rate ax[3].hold=True for k in ts.j.hosts.keys(): h=ts.j.hosts[k] rate=numpy.divide(numpy.diff(ts.data[3][k][0]+ts.data[4][k][0]), numpy.diff(ts.t)) ax[3].plot(tmid/3600,rate/(1024.*1024.)) ax[3].set_ylabel('Total lnet MB/s') # Plot remaining IB sum rate ax[4].hold=True for k in ts.j.hosts.keys(): h=ts.j.hosts[k] v=ts.data[5][k][0]+ts.data[6][k][0]-(ts.data[3][k][0]+ts.data[4][k][0]) rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t)) ax[4].plot(tmid/3600,rate/(1024*1024.)) ax[4].set_ylabel('Total (ib_sw-lnet) MB/s') #Plot CPU user time ax[5].hold=True for k in ts.j.hosts.keys(): h=ts.j.hosts[k] rate=numpy.divide(numpy.diff(ts.data[7][k][0]/100/ts.wayness), numpy.diff(ts.t)) ax[5].plot(tmid/3600,rate) ax[5].set_ylabel('Total ' + ts.k1[7] + '\n' + ts.k2[7] + '/s') ax[5].set_xlabel('Time (hr)') print ts.j.id + ': ' title=ts.title if threshold: title+=', V: %(v)-8.3f' % {'v': threshold} plt.suptitle(title) plt.subplots_adjust(hspace=0.35) for a in ax: tspl_utils.adjust_yaxis_range(a,0.1) fname='_'.join(['graph',ts.j.id,'master']) fig.savefig(fname) plt.close()
def getqueue(file, queue): try: k1 = [ 'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'lnet', 'lnet', 'ib_sw', 'ib_sw', 'cpu', 'intel_snb', 'intel_snb', 'mem' ] k2 = [ 'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'rx_bytes', 'tx_bytes', 'rx_bytes', 'tx_bytes', 'user', 'SSE_D_ALL', 'SIMD_D_256', 'MemUsed' ] try: ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: return if ts.queue != queue: return if not tspl_utils.checkjob(ts, 1., range(1, 33)): return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 dt = numpy.diff(ts.t) dram_rate = numpy.zeros_like(tmid) l1_rate = numpy.zeros_like(tmid) lnet_rate = numpy.zeros_like(tmid) ib_rate = numpy.zeros_like(tmid) user_rate = numpy.zeros_like(tmid) flops_rate = numpy.zeros_like(tmid) mem_usage = numpy.zeros_like(tmid) for host in ts.j.hosts.keys(): dram_rate += numpy.diff(ts.assemble([0, 1], host, 0)) / dt l1_rate += numpy.diff(ts.assemble([2], host, 0)) / dt lnet_rate += numpy.diff(ts.assemble([3, 4], host, 0)) / dt ib_rate += numpy.diff(ts.assemble([5, 6, -3, -4], host, 0)) / dt user_rate += numpy.diff(ts.assemble([7], host, 0)) / dt flops_rate += numpy.diff(ts.assemble([8, 9], host, 0)) / dt v = ts.assemble([10], host, 0) mem_usage += (v[:-1] + v[1:]) / 2.0 dram_rate /= float(ts.numhosts) * 1024. * 1024. * 1024. / 64. l1_rate /= float(ts.numhosts) * 1024. * 1024. / 64. lnet_rate /= float(ts.numhosts) * 1e6 ib_rate /= float(ts.numhosts) * 1e6 user_rate /= float(ts.numhosts) * 100. * ts.wayness flops_rate /= float(ts.numhosts) * 1e9 mem_usage /= float(ts.numhosts) * (1024. * 1024. * 1024.) min_dram_rate = numpy.min(dram_rate) max_dram_rate = numpy.max(dram_rate) mean_dram_rate = numpy.mean(dram_rate) min_l1_rate = numpy.min(l1_rate) max_l1_rate = numpy.max(l1_rate) mean_l1_rate = numpy.mean(l1_rate) min_lnet_rate = numpy.min(lnet_rate) max_lnet_rate = numpy.max(lnet_rate) mean_lnet_rate = numpy.mean(lnet_rate) min_ib_rate = numpy.min(ib_rate) max_ib_rate = numpy.max(ib_rate) mean_ib_rate = numpy.mean(ib_rate) min_user_rate = numpy.min(user_rate) max_user_rate = numpy.max(user_rate) mean_user_rate = numpy.mean(user_rate) min_flops_rate = numpy.min(flops_rate) max_flops_rate = numpy.max(flops_rate) mean_flops_rate = numpy.mean(flops_rate) min_mem_usage = numpy.min(mem_usage) max_mem_usage = numpy.max(mem_usage) mean_mem_usage = numpy.mean(mem_usage) return (ts.j.acct['end_time'], min_dram_rate, max_dram_rate, mean_dram_rate, min_l1_rate, max_l1_rate, mean_l1_rate, min_lnet_rate, max_lnet_rate, mean_lnet_rate, min_ib_rate, max_ib_rate, mean_ib_rate, min_user_rate, max_user_rate, mean_user_rate, min_flops_rate, max_flops_rate, mean_flops_rate, min_mem_usage, max_mem_usage, mean_mem_usage, ts.j.id) except Exception as e: import sys exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) raise e
def compute_ratio(file, lariat_dict=None): try: ts = tspl.TSPLSum(file, [ 'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb' ], [ 'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'SIMD_D_256', 'SSE_D_ALL', 'STALLS', 'CLOCKS_UNHALTED_CORE' ]) except tspl.TSPLException as e: return ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev'] if not tspl_utils.checkjob(ts, 3600., range(1, 33), ignore_qs): return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 if lariat_dict == None: ld = lariat_utils.LariatData(ts.j.id, end_epoch=ts.j.end_time, daysback=3, directory=analyze_conf.lariat_path) else: ld = lariat_utils.LariatData(ts.j.id, olddata=lariat_dict) if ld.exc == 'unknown' or ld.wayness != ts.wayness: # try loading older lariat ld = lariat_utils.LariatData(ts.j.id, end_epoch=ts.j.end_time, daysback=3, directory=analyze_conf.lariat_path, olddata=ld.ld) if ld.exc == 'unknown' or ld.wayness != ts.wayness: # Still nothing; return return read_rate = numpy.zeros_like(tmid) write_rate = numpy.zeros_like(tmid) l1_rate = numpy.zeros_like(tmid) avx_rate = numpy.zeros_like(tmid) sse_rate = numpy.zeros_like(tmid) stall_rate = numpy.zeros_like(tmid) clock_rate = numpy.zeros_like(tmid) for host in ts.j.hosts.keys(): read_rate += numpy.diff(ts.assemble([0], host, 0)) / numpy.diff(ts.t) write_rate += numpy.diff(ts.assemble([1], host, 0)) / numpy.diff(ts.t) l1_rate += numpy.diff(ts.assemble([2], host, 0)) / numpy.diff(ts.t) avx_rate += numpy.diff(ts.assemble([3], host, 0)) / numpy.diff(ts.t) sse_rate += numpy.diff(ts.assemble([4], host, 0)) / numpy.diff(ts.t) stall_rate += numpy.diff(ts.assemble([5], host, 0)) / numpy.diff(ts.t) clock_rate += numpy.diff(ts.assemble([6], host, 0)) / numpy.diff(ts.t) if float(ts.numhosts * int(ts.wayness) * int(ld.threads)) == 0: print 'No tasks in', ts.j.id, ' skipping' return read_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads)) write_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads)) l1_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads)) avx_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads)) sse_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads)) stall_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads)) clock_rate /= float(ts.numhosts * int(ts.wayness) * int(ld.threads)) try: data_ratio = (read_rate + write_rate) / l1_rate except RuntimeWarning: print 'Division by zero, skipping:', ts.j.id return flops = avx_rate + sse_rate try: flops_ratio = (flops - numpy.min(flops)) / (numpy.max(flops) - numpy.min(flops)) except RuntimeWarning: print 'Division by zero, skipping:', ts.j.id return try: stall_ratio = stall_rate / clock_rate except RuntimeWarning: print 'Division by zero, skipping:', ts.j.id return mean_data_ratio = numpy.mean(data_ratio) mean_stall_ratio = numpy.mean(stall_ratio) mean_mem_rate = numpy.mean(read_rate + write_rate) * 64.0 if mean_stall_ratio > 1.: return elif mean_mem_rate > 75. * 1000000000. / 16.: return ename = ld.exc.split('/')[-1] ename = ld.comp_name(ename, ld.equiv_patterns) ## if mean_mem_rate > 2e9: # Put a print in here and investigate bad jobs ## return return (ts.j.id, ts.su, ename, mean_data_ratio, mean_stall_ratio, mean_mem_rate)
def do_compute(file): try: ts = tspl.TSPLSum(file, [ 'intel_snb_imc', 'intel_snb_imc', 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb', 'intel_snb' ], [ 'CAS_READS', 'CAS_WRITES', 'LOAD_L1D_ALL', 'SIMD_D_256', 'SSE_D_ALL', 'STALLS', 'CLOCKS_UNHALTED_CORE' ]) except tspl.TSPLException as e: return if not tspl_utils.checkjob(ts, 0, 16): return elif ts.numhosts < 2: print ts.j.id + ': 1 host' return ignore_qs = ['gpu', 'gpudev', 'vis', 'visdev'] if not tspl_utils.checkjob(ts, 3600., range(1, 33), ignore_qs): return ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time, '/scratch/projects/lariatData') if ld.exc == 'unknown': return tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 read_rate = numpy.zeros_like(tmid) write_rate = numpy.zeros_like(tmid) l1_rate = numpy.zeros_like(tmid) avx_rate = numpy.zeros_like(tmid) sse_rate = numpy.zeros_like(tmid) stall_rate = numpy.zeros_like(tmid) clock_rate = numpy.zeros_like(tmid) for host in ts.j.hosts.keys(): read_rate += numpy.diff(ts.assemble([0], host, 0)) / numpy.diff(ts.t) write_rate += numpy.diff(ts.assemble([1], host, 0)) / numpy.diff(ts.t) l1_rate += numpy.diff(ts.assemble([2], host, 0)) / numpy.diff(ts.t) avx_rate += numpy.diff(ts.assemble([3], host, 0)) / numpy.diff(ts.t) sse_rate += numpy.diff(ts.assemble([4], host, 0)) / numpy.diff(ts.t) stall_rate += numpy.diff(ts.assemble([5], host, 0)) / numpy.diff(ts.t) clock_rate += numpy.diff(ts.assemble([6], host, 0)) / numpy.diff(ts.t) read_rate /= ts.numhosts write_rate /= ts.numhosts l1_rate /= ts.numhosts avx_rate /= ts.numhosts sse_rate /= ts.numhosts stall_rate /= ts.numhosts clock_rate /= ts.numhosts data_ratio = (read_rate + write_rate) / l1_rate flops = avx_rate + sse_rate flops_ratio = (flops - numpy.min(flops)) / (numpy.max(flops) - numpy.min(flops)) stall_ratio = stall_rate / clock_rate mean_data_ratio = numpy.mean(data_ratio) mean_stall_ratio = numpy.mean(stall_ratio) mean_flops = numpy.mean(flops) ename = ld.exc.split('/')[-1] ename = ld.comp_name(ename, ld.equiv_patterns) mean_mem_rate = numpy.mean(read_rate + write_rate) if mean_mem_rate > 2e9: # Put a print in here and investigate bad jobs return return ','.join([ ts.j.id, ts.owner, ename, str(mean_mem_rate), str(mean_stall_ratio), str(mean_data_ratio), str(mean_flops) ])