def get_cpi(dir): try: r = sniper_lib.get_results(0, dir) except ValueError: msg.PrintMsg('\nERROR: Can\'t get sniper results for:\n ' + dir) return 0.0 stats = r['results'] if stats['ncores'] != 1: msg.PrintMsgPlus( 'Warning: Sniper only supports single-threaded SimPoints') # This code works with both Sniper/SniperLite # instrs = stats['core.instructions'] fs_to_cycles = stats['fs_to_cycles_cores'] fs = stats['barrier.global_time'] for i, d in enumerate(instrs): cpi = (float(fs[0]) * fs_to_cycles[i]) / instrs[i] if sum(instrs) <= 0: msg.PrintMsgPlus( '\nERROR: No cycles found in Sniper output for pinball:\n ' + dir) cpi = 0.0 return cpi
def __init__(self, resultsdir = '.'): filename = os.path.join(resultsdir, 'sim.memorytracker') if not os.path.exists(filename): raise IOError('Cannot find output file %s' % filename) results = sniper_lib.get_results(resultsdir = resultsdir) config = results['config'] stats = results['results'] self.hitwhere_load_global = dict([ (k.split('-', 3)[3], sum(v)) for k, v in stats.items() if k.startswith('L1-D.loads-where-') ]) self.hitwhere_load_unknown = self.hitwhere_load_global.copy() self.hitwhere_store_global = dict([ (k.split('-', 3)[3], sum(v)) for k, v in stats.items() if k.startswith('L1-D.stores-where-') ]) self.hitwhere_store_unknown = self.hitwhere_store_global.copy() llc_level = int(sniper_config.get_config(config, 'perf_model/cache/levels')) self.evicts_global = sum([ sum(v) for k, v in stats.items() if re.match('L%d.evict-.$' % llc_level, k) ]) self.evicts_unknown = self.evicts_global self.functions = {} self.sites = {} self.siteids = {} fp = open(filename) for line in fp: if line.startswith('W\t'): self.hitwheres = line.strip().split('\t')[1].strip(',').split(',') elif line.startswith('F\t'): _, eip, name, location = line.strip().split('\t') self.functions[eip] = Function(eip, name, location) elif line.startswith('S\t'): line = line.strip().split('\t') siteid = line[1] stack = line[2].strip(':').split(':') stack = self.collapseStack(stack) results = { 'numallocations': 0, 'totalallocated': 0, 'hitwhereload': {}, 'hitwherestore': {}, 'evictedby': {} } for data in line[3:]: key, value = data.split('=') if key == 'num-allocations': results['numallocations'] = long(value) if key == 'total-allocated': results['totalallocated'] = long(value) elif key == 'hit-where': entries = map(lambda s: s.split(':'), value.strip(',').split(',')) results['hitwhereload'] = dict([ (s[1:], long(v)) for s, v in entries if s.startswith('L') ]) for k, v in results['hitwhereload'].items(): self.hitwhere_load_unknown[k] -= v results['hitwherestore'] = dict([ (s[1:], long(v)) for s, v in entries if s.startswith('S') ]) for k, v in results['hitwherestore'].items(): self.hitwhere_store_unknown[k] -= v elif key == 'evicted-by': results['evictedby'] = dict(map(lambda (s, v): (s, long(v)), map(lambda s: s.split(':'), value.strip(',').split(',')))) self.evicts_unknown -= sum(results['evictedby'].values()) self.siteids[siteid] = stack if stack in self.sites: self.sites[stack].update(**results) else: self.sites[stack] = AllocationSite(stack, **results) else: raise ValueError('Invalid format %s' % line)
def generate_simout(jobid = None, resultsdir = None, output = sys.stdout, silent = False): try: res = sniper_lib.get_results(jobid = jobid, resultsdir = resultsdir) except (KeyError, ValueError), e: if not silent: print 'Failed to generated sim.out:', e return
def computeAccuracy(benchmark, pinpoint, pinball_name): output_dir = output_dir_base.format(sys.argv[1], benchmark, pinpoint) try: output_res = sniper_lib.get_results(jobid=None, resultsdir=output_dir) except (KeyError, ValueError), e: print 'Failed to read stats:', e return 0
def __init__(self, jobid = '', resultsdir = '', config = None, stats = None, data = None, partial = None): if data: data_raw = data else: data_raw = sniper_lib.get_results(jobid = jobid, resultsdir = resultsdir, config = config, stats = stats, partial = partial) self.stats = data_raw['results'] self.config = data_raw['config'] self.parse()
def getTotalInstructionCount(): results = sniper_lib.get_results( config=config, stats=stats, metrics=("performance_model.instruction_count", )) instructioncount = sum( results["results"]["performance_model.instruction_count"]) return instructioncount
def __init__(self, resultsdir = '.'): filename = os.path.join(resultsdir, 'sim.rtntracefull') if not os.path.exists(filename): raise IOError('Cannot find trace file %s' % filename) results = sniper_lib.get_results(resultsdir = resultsdir) config = results['config'] stats = results['results'] freq = 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency')) self.fs_to_cycles = freq / 1e15 self.functions = {} self.calls = {} self.children = collections.defaultdict(set) self.roots = set() self.totals = {} fp = open(filename) self.headers = fp.readline().strip().split('\t') for line in fp: if line.startswith(':'): eip, name, location = line.strip().split('\t') eip = eip[1:] self.functions[eip] = Function(eip, name, location) else: line = line.strip().split('\t') stack = line[0].split(':') eip = stack[-1] stack = ':'.join(map(self.translateEip, stack)) data = dict(zip(self.headers[1:], map(long, line[1:]))) if stack in self.calls: self.calls[stack].add(data) else: self.calls[stack] = Call(str(self.functions[eip]), eip, stack, data) parent = stack.rpartition(':')[0] self.children[parent].add(stack) self.roots = set(self.calls.keys()) for parent in self.calls: for child in self.children[parent]: self.roots.remove(child) # Construct a list of calls where each child is ordered before its parent. calls_ordered = collections.deque() calls_tovisit = collections.deque(self.roots) while calls_tovisit: stack = calls_tovisit.pop() calls_ordered.appendleft(stack) calls_tovisit.extend(self.children[stack]) # Now implement a non-recursive version of buildTotal, which requires that each # function's children have been visited before processing the parent, # by visiting calls_ordered in left-to-right order. for stack in calls_ordered: self.calls[stack].buildTotal(self) ncores = int(config['general/total_cores']) self.totals['total_coretime'] = ncores * stats['barrier.global_time'][0]
def getInstructionCount(intervalstr): results = sniper_lib.get_results( config=config, stats=stats, partial=intervalstr, metrics=("performance_model.instruction_count", )) instructioncount = sum( results["results"]["performance_model.instruction_count"]) return instructioncount
def createJSONData(resultsdir, outputdir, verbose = False): try: res = sniper_lib.get_results(resultsdir = resultsdir) except: return results = res['results'] config = res['config'] ncores = int(config['general/total_cores']) if 'fs_to_cycles_cores' in results: cycles_scale = results['fs_to_cycles_cores'][0] else: cycles_scale = 1. if 'barrier.global_time_begin' in results: # Most accurate: ask the barrier time_begin = results['barrier.global_time_begin'][0] time_end = results['barrier.global_time_end'][0] elif 'performance_model.elapsed_time_end' in self.stats: # Guess based on core that has the latest time (future wakeup is less common than sleep on futex) time_begin = max(results['performance_model.elapsed_time_begin']) time_end = max(results['performance_model.elapsed_time_end']) ninstrs = sum(results['performance_model.instruction_count']) cycles = cycles_scale * (time_end - time_begin) def format_mpki(value): return '%.3f' % (1000. * value / float(ninstrs)) data = { 'ncores': ncores, 'ninstrs': format_number(ninstrs), 'time': format_number(time_end - time_begin, suffixes = ['fs', 'ps', 'ns', 'µs', 'ms', 's']), 'cycles': format_number(cycles), 'ipc': '%.3f' % (ninstrs / float(cycles) / ncores), 'branchmis': format_mpki(sum(results['branch_predictor.num-incorrect'])), 'dram': format_mpki(sum(results['dram.reads']) + sum(results['dram.writes'])), } for cache in [ 'L1-I', 'L1-D' ] + [ 'L%u'%l for l in range(2, 5) ]: if '%s.loads' % cache in results: data['cache.%s' % cache] = format_mpki(sum(results['%s.load-misses'%cache]) + sum(results['%s.store-misses-I'%cache])) data['html'] = '''\ <table> <tr><th>Cores</th><td>%(ncores)d</td> <th>Branch MPKI</th><td>%(branchmis)s</td></tr> <tr><th>Instructions</th><td>%(ninstrs)s</td> <th>L1-I MPKI</th><td>%(cache.L1-I)s</td></tr> <tr><th>IPC</th><td>%(ipc)s</td> <th>L1-D MPKI</th><td>%(cache.L1-D)s</td></tr> <tr><th>Cycles</th><td>%(cycles)s</td> <th>L2 MPKI</th><td>%(cache.L2)s</td></tr> <tr><th>Time</th><td>%(time)s</td> <th>DRAM APKI</th><td>%(dram)s</td></tr> </table> ''' % data jsonfile = open(os.path.join(outputdir,'basicstats.txt'), "w") jsonfile.write("basicstats = "+json.dumps(data)+";\n") jsonfile.close()
def computeCoverage(benchmark, pinpoint, pinball_name): baseline_dir = output_dir_base.format('baseline', benchmark, pinpoint) #baseline_dir = output_dir_base.format('baseline_noninclusive', benchmark, pinpoint) output_dir = output_dir_base.format(sys.argv[1], benchmark, pinpoint) try: baseline_res = sniper_lib.get_results(jobid=None, resultsdir=baseline_dir) except (KeyError, ValueError), e: print 'Failed to read stats:', e, pinpoint return 0
def computeSpeedup(benchmark): #baseline_dir = output_dir_base.format('baseline_1B', benchmark) baseline_dir = output_dir_base.format('baseline', benchmark) #baseline_dir = output_dir_base.format('baseline_250M', benchmark) output_dir = output_dir_base.format(sys.argv[1], benchmark) try: baseline_res = sniper_lib.get_results(jobid=None, resultsdir=baseline_dir) except (KeyError, ValueError), e: print 'Failed to read stats:', e return 0
def generate_cheetah(jobid = None, resultsdir = '.', partial = None, outputbase = 'cheetah', title = None, yscale = (0, 50), logy = False, diff = False): res = sniper_lib.get_results(jobid = jobid, resultsdir = resultsdir, partial = partial) data = dict([ (k.split('.')[1], v) for k, v in res['results'].items() if k.startswith('cheetah.') ]) def grouping_sortkey(grouping): if grouping == 'global': return -99999 # Start with total elif grouping == 'local': return +99999 # End with per-thread else: return -int(grouping.split('-')[-1]) # Big groups (close to total) to small groups (close to per-thread) def grouping_title(grouping): return grouping GROUPINGS = sorted(data.keys(), key = grouping_sortkey) xmax = 1 << max(map(len, data.values())) o = file(outputbase + '.input', 'w') o.write('''\ set fontpath "/usr/share/fonts/truetype/freefont" set terminal png font "FreeSans,15" size 500,350 linewidth 2 rounded set output "%s.png" %s set key top right set logscale x 2 set xrange [:%f] set xtics nomirror out ("1 KB" 1024, "32 KB" 32768, "1 MB" 1048576, "32 MB" 33554432, "1 GB" 1073741824, "32 GB" 34359738368.) set xtics add autofreq set mxtics default %s set yrange [%f:%f] set ytics nomirror set format y "%%.1f%%%%" plot %s ''' % (os.path.basename(outputbase), 'set title "%s"' % title if title else 'unset title', xmax, 'set logscale y' if logy else '', yscale[0], yscale[1], ', '.join([ "'-' using 1:(100*$2) with linespoints title '%s'" % grouping_title(grouping) for grouping in GROUPINGS ]) )) for grouping in GROUPINGS: last = 1 total = data[grouping][0] for size, value in list(enumerate(data[grouping]))[1:]: if value == 0: continue value = 1 - value / float(total) o.write('%d %f\n' % (1 << size, last-value if diff else value)) last = value o.write('e\n') del o subprocess.Popen([ 'gnuplot', '%s.input' % os.path.basename(outputbase) ], cwd = os.path.dirname(outputbase) or '.').communicate()
if o == '-d': resultsdir = a if o == '-j': jobid = long(a) if o == '--partial': if ':' not in a: sys.stderr.write('--partial=<from>:<to>\n') usage() partial = a.split(':') if args: usage() sys.exit(-1) results = sniper_lib.get_results(jobid, resultsdir, partial = partial) config = results['config'] stats = results['results'] ncores = int(config['general/total_cores']) llc_number = int(config['perf_model/cache/levels']) llc_name = 'L%d' % llc_number llc_components = [ name.split('.', 1)[1] for name in sorted(stats.keys()) if '.uncore-time-' in name ] totaltime = stats['%s.uncore-totaltime' % llc_name] requests = stats['%s.uncore-requests' % llc_name] sums = [ 0 for core in range(ncores) ] def format_num(v): return '%8d' % v def format_ns(v):
def periodic(self, time, time_delta): if self.max_snapshots and self.num_snapshots > self.max_snapshots: self.num_snapshots /= 2 for t in range(self.interval, time, self.interval * 2): sim.util.db_delete('periodic-%d' % t) self.interval *= 2 if time >= self.next_interval: self.num_snapshots += 1 sim.stats.write('periodic-%d' % (self.num_snapshots * self.interval)) ################################################# ######### snapshot_last ######################### snapshot_last = sniper_lib.get_results( resultsdir=sim.config.output_dir, partial=('periodic-%d' % ((self.num_snapshots - 1) * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)))['results'] CoreIns = snapshot_last['core.instructions'] IdleTime = snapshot_last['performance_model.idle_elapsed_time'] BusyTime = snapshot_last['performance_model.nonidle_elapsed_time'] CycleCount = snapshot_last['performance_model.cycle_count'] TotalTime = snapshot_last['performance_model.elapsed_time'] for core in range(sim.config.ncores): self.IdleTimePerc[core] = float(IdleTime[core]) / float( TotalTime[core]) IdleTimePercentage = IdleTime[0] / TotalTime[0] BusyTimePercentage = BusyTime[0] / TotalTime[ 0] #just for the first core ##############CoreIns############################################################## self.Wfile_CoreIns = file( os.path.join(sim.config.output_dir, self.filename_CoreIns), 'a') self.Wfile_CoreIns.write('\n') self.Wfile_CoreIns.write('%d\t' % (self.num_snapshots)) self.Wfile_CoreIns.write('CoreIns') for core in range(sim.config.ncores): self.Wfile_CoreIns.write("\t%s" % CoreIns[core]) self.Wfile_CoreIns.close() ##############IdleTime############################################################## self.Wfile_IdleTime = file( os.path.join(sim.config.output_dir, self.filename_IdleTime), 'a') self.Wfile_IdleTime.write('\n') self.Wfile_IdleTime.write('%d\t' % (self.num_snapshots)) self.Wfile_IdleTime.write('IdleTime') for core in range(sim.config.ncores): self.Wfile_IdleTime.write("\t%s" % IdleTime[core]) self.Wfile_IdleTime.close() ##############IdleTimePerc############################################################## self.Wfile_IdleTimePerc = file( os.path.join(sim.config.output_dir, self.filename_IdleTimePerc), 'a') self.Wfile_IdleTimePerc.write('\n') self.Wfile_IdleTimePerc.write('%d\t' % (self.num_snapshots)) self.Wfile_IdleTimePerc.write('IdleTimePerc') for core in range(sim.config.ncores): self.Wfile_IdleTimePerc.write("\t%f" % self.IdleTimePerc[core]) self.Wfile_IdleTimePerc.close() ######################################################################################## print '\nperiodic-%d - periodic-%d\n' % ( (self.num_snapshots * self.interval), ((self.num_snapshots - 1))) print 'CoreIns= %s' % CoreIns print 'IdleTime= %s' % IdleTime print 'BusyTime= %s' % BusyTime print 'CycleCount= %s' % CycleCount print 'TotalTime= %s' % TotalTime ############################################################## ################# Kalman ##################################### #self.predicted_CoreIns[core][self.num_snapshots-1] os.system( "%skalman/build/kalman-test %s %s" % (sniper_path, os.path.join(sim.config.output_dir, self.filename_CoreIns), os.path.join(sim.config.output_dir, self.filename_kalmanOut))) ############################################################## ############### Get Kalman predicted results ################ self.Rfile_kalmanOut = file( os.path.join(sim.config.output_dir, self.filename_kalmanOut), 'r') total_line_count = -1 for line in self.Rfile_kalmanOut: total_line_count += 1 self.Rfile_kalmanOut = file( os.path.join(sim.config.output_dir, self.filename_kalmanOut), 'r') line_count = -1 for line in self.Rfile_kalmanOut: line_splitted = line.split('\t') line_count += 1 if (line_count >= 2) and ( line_count == total_line_count ): #stats_lines (we dont read last line, it just has the predicted value not the future one) for core in range(sim.config.ncores): self.predicted_CoreIns[core].append( float(line_splitted[5 * core + 3])) #offset=4 in the file ############################################################## ################# DVFS ####################################### if self.DVFS: self.fd.write('%u' % (time / 1e6)) # Time in ns for core in range(sim.config.ncores): # detailed-only IPC self.fd.write(' f: %.2fGHz' % (sim.dvfs.get_frequency(core) / 1000.0)) #raw_input() #if (self.IdleTimePerc[core]>0.4): #if (core==0): if (self.num_snapshots % 2 == 0): #if (self.predicted_CoreIns[core][self.num_snapshots-1] < 2000.0) : sim.dvfs.set_frequency(core, 3000) else: sim.dvfs.set_frequency(core, 1500) print "predicted: " print self.predicted_CoreIns[core][self.num_snapshots - 1] cycles = (self.stats['time'][core].delta - self.stats['ffwd_time'][core].delta ) * sim.dvfs.get_frequency( core) / 1e9 # convert fs to cycles instrs = self.stats['instrs'][core].delta ipc = instrs / (cycles or 1) # Avoid division by zero #self.fd.write(' %.3f' % ipc) # include fast-forward IPCs cycles = self.stats['time'][ core].delta * sim.dvfs.get_frequency( core) / 1e9 # convert fs to cycles instrs = self.stats['coreinstrs'][core].delta ipc = instrs / (cycles or 1) self.fd.write(' %.3f' % ipc) self.fd.write('\n') ############################################################## #os.system("/home/milad/sniper/tools/dumpstats.py --partial periodic-%d:periodic-%d | grep power.Core"%( ((self.num_snapshots-1) *self.interval), ((self.num_snapshots) *self.interval ) ) ) #raw_input(); #getch self.fd.write('periodic-%d' % (self.num_snapshots * self.interval)) gen_simout.generate_simout( resultsdir=sim.config.output_dir, partial=('periodic-%d' % ((self.num_snapshots - 1) * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)), output=open( os.path.join(sim.config.output_dir, 'stats/p%d.out' % self.num_snapshots), 'w'), silent=True) #gen_simout.generate_simout(resultsdir = sim.config.output_dir, partial = ('periodic-10000000000','periodic-20000000000'), output = open(os.path.join(sim.config.output_dir, 'stats/p%d.out'%self.num_snapshots), 'w'), silent = True) self.next_interval += self.interval
def periodic(self, time, time_delta): if self.max_snapshots and self.num_snapshots > self.max_snapshots: self.num_snapshots /= 2 for t in range(self.interval, time, self.interval * 2): sim.util.db_delete('periodic-%d' % t) self.interval *= 2 if time >= self.next_interval: self.num_snapshots += 1 sim.stats.write('periodic-%d' % (self.num_snapshots * self.interval)) ################################################# ######### snapshot_last ######################### snapshot_last = sniper_lib.get_results( resultsdir=sim.config.output_dir, partial=('periodic-%d' % ((self.num_snapshots - 1) * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)))['results'] snapshot_all = sniper_lib.get_results( resultsdir=sim.config.output_dir, partial=('periodic-%d' % (0 * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)))['results'] CoreIns = snapshot_last['core.instructions'] IdleTime = snapshot_last['performance_model.idle_elapsed_time'] BusyTime = snapshot_last['performance_model.nonidle_elapsed_time'] CycleCount = snapshot_last['performance_model.cycle_count'] TotalTime = snapshot_last['performance_model.elapsed_time'] Futex = snapshot_last['performance_model.cpiSyncFutex'] L3_uncore_total_time = snapshot_last['L3.uncore-totaltime'] nonidle_elapsed_total_time = snapshot_last[ 'performance_model.nonidle_elapsed_time'] cpu_base_time = snapshot_last['interval_timer.cpiBase'] stall_list = ['cpiBranchPredictor','cpiDataCachecache-remote','cpiDataCachedram','cpiDataCachedram-cache','cpiDataCachedram-local',\ 'cpiDataCachedram-remote','cpiDataCacheL1','cpiDataCacheL1_S','cpiDataCacheL1I',\ 'cpiDataCacheL2','cpiDataCacheL2_S','cpiDataCacheL3','cpiDataCacheL3_S','cpiDataCachemiss',\ 'cpiDataCachenuca-cache','cpiDataCachepredicate-false','cpiDataCacheprefetch-no-mapping','cpiDataCacheunknown',\ 'cpiInstructionCachecache-remote','cpiInstructionCachedram','cpiInstructionCachedram-cache',\ 'cpiInstructionCachedram-local','cpiInstructionCachedram-remote','cpiInstructionCacheL1','cpiInstructionCacheL1_S',\ 'cpiInstructionCacheL1I','cpiInstructionCacheL2','cpiInstructionCacheL2_S','cpiInstructionCacheL3',\ 'cpiInstructionCacheL3_S','cpiInstructionCachemiss','cpiInstructionCachenuca-cache',\ 'cpiInstructionCachepredicate-false','cpiInstructionCacheprefetch-no-mapping',\ 'cpiInstructionCacheunknown','cpiLongLatency','cpiSerialization'] for core in range(sim.config.ncores): self.IdleTimePerc[core] = float(IdleTime[core]) / float( TotalTime[core]) IPC = [] CPI = [] stall = [] stall_cpi = [] stall_time = [ ] #contains percentage of stall time/total time of the period cpu_busy_time = [] futex_time = [] stall_total_time = [] #contains counter time for for all the run L3_uncore_time = [] nonidle_elapsed_time = [] for core in range(sim.config.ncores): stall.append(0) stall_cpi.append(0.0) stall_time.append(0.0) IPC.append(0.0) CPI.append(0.0) cpu_busy_time.append(0.0) futex_time.append(0.0) stall_total_time.append(0) L3_uncore_time.append(0) nonidle_elapsed_time.append(0.0) for name in stall_list: stall_temp = snapshot_last['interval_timer.%s' % name] stall_temp_all = snapshot_all['interval_timer.%s' % name] for core in range(sim.config.ncores): stall[core] += stall_temp[core] stall_total_time[core] += stall_temp_all[core] for core in range(sim.config.ncores): #stall_cpi[core] = float(stall[core]) / (CoreIns[core]*sim.dvfs.get_frequency(core) ) stall_time[core] = float(stall[core]) / TotalTime[core] cpu_busy_time[core] = 1 - (self.IdleTimePerc[core] + stall_time[core]) #cpu_busy_time[core]= cpu_base_time[core] futex_time[core] = float(Futex[core]) / TotalTime[core] L3_uncore_time[core] = float( L3_uncore_total_time[core]) / TotalTime[core] nonidle_elapsed_time[core] = float( nonidle_elapsed_total_time[core]) / TotalTime[core] print 'stall[%s]=%s\n' % (core, stall[core]) # print 'cpi_stall[%s]=%s\n'%(core,stall_cpi[core]) print 'time_stall[%s]=%s\n' % (core, stall_time[core]) print 'idletimePerc[%s]=%s\n' % (core, self.IdleTimePerc[core]) print 'non-stall-busy-time[%s]=%s\n' % ( core, 1 - (self.IdleTimePerc[core] + stall_time[core])) for core in range(sim.config.ncores): self.IdleTimePerc[core] = float(IdleTime[core]) / float( TotalTime[core]) IdleTimePercentage = IdleTime[0] / TotalTime[0] BusyTimePercentage = BusyTime[0] / TotalTime[ 0] #just for the first core ##############CoreIns############################################################## self.Wfile_CoreIns = file( os.path.join(sim.config.output_dir, self.filename_CoreIns), 'a') self.Wfile_CoreIns.write('\n') self.Wfile_CoreIns.write('%d\t' % (self.num_snapshots)) self.Wfile_CoreIns.write('CoreIns') for core in range(sim.config.ncores): self.Wfile_CoreIns.write("\t%s" % CoreIns[core]) self.Wfile_CoreIns.close() ##############IdleTime############################################################## self.Wfile_IdleTime = file( os.path.join(sim.config.output_dir, self.filename_IdleTime), 'a') self.Wfile_IdleTime.write('\n') self.Wfile_IdleTime.write('%d\t' % (self.num_snapshots)) self.Wfile_IdleTime.write('IdleTime') for core in range(sim.config.ncores): self.Wfile_IdleTime.write("\t%s" % IdleTime[core]) self.Wfile_IdleTime.close() ##############IdleTimePerc############################################################## self.Wfile_IdleTimePerc = file( os.path.join(sim.config.output_dir, self.filename_IdleTimePerc), 'a') self.Wfile_IdleTimePerc.write('\n') self.Wfile_IdleTimePerc.write('%d\t' % (self.num_snapshots)) self.Wfile_IdleTimePerc.write('IdleTimePerc') for core in range(sim.config.ncores): self.Wfile_IdleTimePerc.write("\t%f" % self.IdleTimePerc[core]) self.Wfile_IdleTimePerc.close() ######################################################################################## #for core in range(sim.config.ncores): # print ((sim.dvfs.get_frequency(core)/1000.0)*(self.interval/1000000)) #raw_input() ############## IPC ############################################################## self.Wfile_IPC = file( os.path.join(sim.config.output_dir, self.filename_IPC), 'a') self.Wfile_IPC.write('\n') self.Wfile_IPC.write('%d\t' % (self.num_snapshots)) self.Wfile_IPC.write('IPC') for core in range(sim.config.ncores): self.Wfile_IPC.write( "\t%f" % (int(CoreIns[core]) / ((sim.dvfs.get_frequency(core) / 1000.0) * (self.interval / 1000000)))) IPC[core] = (int(CoreIns[core]) / ((sim.dvfs.get_frequency(core) / 1000.0) * (self.interval / 1000000))) self.Wfile_IPC.close() ############## CPI ############################################################## self.Wfile_CPI = file( os.path.join(sim.config.output_dir, self.filename_CPI), 'a') self.Wfile_CPI.write('\n') self.Wfile_CPI.write('%d\t' % (self.num_snapshots)) self.Wfile_CPI.write('CPI') for core in range(sim.config.ncores): if (CoreIns[core] == 0): self.Wfile_CPI.write("\t0") CPI[core] = 0.0 else: #self.Wfile_CPI.write("\t%f"%(float(cpu_base_time[core])/CoreIns[core])) #CPI[core]= float(cpu_base_time[core])/CoreIns[core] self.Wfile_CPI.write("\t%f" % (1.0 / IPC[core])) CPI[core] = 1.0 / IPC[core] # if ((int(CoreIns[core])/((sim.dvfs.get_frequency(core)/1000.0)*(self.interval/1000000))) == 0): # self.Wfile_CPI.write ("\tinf") # else: self.Wfile_CPI.write("\t%f"%(1/(int(CoreIns[core])/((sim.dvfs.get_frequency(core)/1000.0)*(self.interval/1000000))))) self.Wfile_CPI.close() ############## stall_time ############################################################## self.Wfile_stall_time = file( os.path.join(sim.config.output_dir, self.filename_stall_time), 'a') self.Wfile_stall_time.write('\n') self.Wfile_stall_time.write('%d\t' % (self.num_snapshots)) self.Wfile_stall_time.write('stall_time') for core in range(sim.config.ncores): self.Wfile_stall_time.write("\t%s" % stall_time[core]) self.Wfile_stall_time.close() ############## cpu_busy_time ############################################################## just the (busy_time - stalls) self.Wfile_cpu_busy_time = file( os.path.join(sim.config.output_dir, self.filename_cpu_busy_time), 'a') self.Wfile_cpu_busy_time.write('\n') self.Wfile_cpu_busy_time.write('%d\t' % (self.num_snapshots)) self.Wfile_cpu_busy_time.write('cpu_busy_time') for core in range(sim.config.ncores): self.Wfile_cpu_busy_time.write("\t%s" % cpu_busy_time[core]) self.Wfile_cpu_busy_time.close() ##############L3_uncore_time############################################################## self.Wfile_L3_uncore_time = file( os.path.join(sim.config.output_dir, self.filename_L3_uncore_time), 'a') self.Wfile_L3_uncore_time.write('\n') self.Wfile_L3_uncore_time.write('%d\t' % (self.num_snapshots)) self.Wfile_L3_uncore_time.write('L3_uncore_time') for core in range(sim.config.ncores): self.Wfile_L3_uncore_time.write("\t%s" % L3_uncore_time[core]) self.Wfile_L3_uncore_time.close() ############## timing_summary ############################################################## just the (busy_time - stalls) self.Wfile_timing_summary = file( os.path.join(sim.config.output_dir, self.filename_timing_summary), 'a') self.Wfile_timing_summary.write('\n') self.Wfile_timing_summary.write('%d\t' % (self.num_snapshots)) self.Wfile_timing_summary.write('timing_summary') for core in range(sim.config.ncores): self.Wfile_timing_summary.write( "\t----\t%s\t%s\t1\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (CoreIns[core], IPC[core], (1 - self.IdleTimePerc[core]), cpu_busy_time[core], stall_time[core], L3_uncore_time[core], futex_time[core], cpu_base_time[core], stall_total_time[core])) self.Wfile_timing_summary.close() ################################ CacheMiss ######################################## #periodic stat in stats file, #it also generates cachemiss files in the output gen_simout.generate_simout( resultsdir=sim.config.output_dir, partial=('periodic-%d' % ((self.num_snapshots - 1) * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)), output=open( os.path.join(sim.config.output_dir, 'stats/p%d.out' % self.num_snapshots), 'w'), silent=True) ######################################################################################## print '\nperiodic-%d - periodic-%d\n' % ( (self.num_snapshots * self.interval), ((self.num_snapshots - 1))) cycles = ( self.stats['time'][core].delta - self.stats['ffwd_time'][core].delta ) * sim.dvfs.get_frequency(core) / 1e9 # convert fs to cycles instrs = self.stats['instrs'][core].delta ipc = instrs / (cycles or 1) # Avoid division by zero #self.fd.write(' %.3f' % ipc) # include fast-forward IPCs cycles = self.stats['time'][core].delta * sim.dvfs.get_frequency( core) / 1e9 # convert fs to cycles instrs = self.stats['coreinstrs'][core].delta ipc = instrs / (cycles or 1) self.fd.write(' %.3f' % ipc) self.fd.write('\n') self.fd.write('periodic-%d' % (self.num_snapshots * self.interval)) self.next_interval += self.interval
def get_stats(line_count, benchmark_dir, num_core, mode, mode_param1, mode_param2): #online_freq just works with collect_online and for collect_offline, the freq is read from SummaryDVFS.txt # if mode = 'OFFLINE' # mode_param1 contains line_splitted of previous frequencies # mode_param2 contains line_splitted of current frequencies # if mode = 'ONLINE' # mode_param1 contains freqs # mode_param2 contains freqs print("stats for %s" % line_count) stall_time = [] freq = [] freq_class = [] last_freq = [] stall = [] L3_uncore_time = [] IPC = [] cpu_busy_time = [] IdleTimePerc = [] for core in range(0, num_core): freq.append(0.0) freq_class.append(-1) last_freq.append(0.0) stall_time.append(0.0) stall.append(0) IPC.append(0.0) L3_uncore_time.append(0.0) cpu_busy_time.append(0.0) IdleTimePerc.append(0.0) partial = ('periodic-%d' % ((line_count - 1) * interval), 'periodic-%d' % ((line_count) * interval)) #try: results = sniper_lib.get_results( resultsdir=benchmark_dir, partial=('periodic-%d' % ((line_count - 1) * interval), 'periodic-%d' % ((line_count) * interval)))['results'] #except: # return 'TRASH',0 #return class 0 as the label CoreIns = results['core.instructions'] IdleTime = results['performance_model.idle_elapsed_time'] BusyTime = results['performance_model.nonidle_elapsed_time'] CycleCount = results['performance_model.cycle_count'] TotalTime = results['performance_model.elapsed_time'] Futex = results['performance_model.cpiSyncFutex'] L3_uncore_total_time = results['L3.uncore-totaltime'] cpu_base_time = results['interval_timer.cpiBase'] cpContr_branch = results['interval_timer.cpContr_branch'] cpContr_fp_addsub = results['interval_timer.cpContr_fp_addsub'] cpContr_fp_muldiv = results['interval_timer.cpContr_fp_muldiv'] uop_branch = results['interval_timer.uop_branch'] uop_fp_addsub = results['interval_timer.uop_fp_addsub'] uop_fp_muldiv = results['interval_timer.uop_fp_muldiv'] uop_generic = results['interval_timer.uop_generic'] uop_load = results['interval_timer.uop_load'] uop_store = results['interval_timer.uop_store'] ''' stall_list = ['cpiBranchPredictor','cpiDataCachecache-remote','cpiDataCachedram','cpiDataCachedram-cache','cpiDataCachedram-local',\ 'cpiDataCachedram-remote','cpiDataCacheL1','cpiDataCacheL1_S','cpiDataCacheL1I',\ 'cpiDataCacheL2','cpiDataCacheL2_S','cpiDataCacheL3','cpiDataCacheL3_S','cpiDataCachemiss',\ 'cpiDataCachenuca-cache','cpiDataCachepredicate-false','cpiDataCacheprefetch-no-mapping','cpiDataCacheunknown',\ 'cpiInstructionCachecache-remote','cpiInstructionCachedram','cpiInstructionCachedram-cache',\ 'cpiInstructionCachedram-local','cpiInstructionCachedram-remote','cpiInstructionCacheL1','cpiInstructionCacheL1_S',\ 'cpiInstructionCacheL1I','cpiInstructionCacheL2','cpiInstructionCacheL2_S','cpiInstructionCacheL3',\ 'cpiInstructionCacheL3_S','cpiInstructionCachemiss','cpiInstructionCachenuca-cache',\ 'cpiInstructionCachepredicate-false','cpiInstructionCacheprefetch-no-mapping',\ 'cpiInstructionCacheunknown','cpiLongLatency','cpiSerialization'] ''' format_int = lambda v: str(long(v)) format_pct = lambda v: '%.1f%%' % (100. * v) def format_float(digits): return lambda v: ('%%.%uf' % digits) % v def format_ns(digits): return lambda v: ('%%.%uf' % digits) % (v / 1e6) ########## tlb ########################## for tlb in ('itlb', 'dtlb', 'stlb'): results['%s.missrate' % tlb] = map( lambda (a, b): 100 * a / float(b or 1), zip(results['%s.miss' % tlb], results['%s.access' % tlb])) results['%s.mpki' % tlb] = map( lambda (a, b): 1000 * a / float(b or 1), zip(results['%s.miss' % tlb], results['performance_model.instruction_count'])) itlb_missrate = results['itlb.missrate'] dtlb_missrate = results['dtlb.missrate'] stlb_missrate = results['stlb.missrate'] itlb_miss = results['itlb.miss'] dtlb_miss = results['dtlb.miss'] stlb_miss = results['stlb.miss'] itlb_access = results['itlb.access'] dtlb_access = results['dtlb.access'] stlb_access = results['stlb.access'] itlb_mpki = results['itlb.mpki'] dtlb_mpki = results['dtlb.mpki'] stlb_mpki = results['stlb.mpki'] ######## branch_predictor ########## results['branch_predictor.missrate'] = [ 100 * float(results['branch_predictor.num-incorrect'][core]) / ((results['branch_predictor.num-correct'][core] + results['branch_predictor.num-incorrect'][core]) or 1) for core in range(num_core) ] results['branch_predictor.mpki'] = [ 1000 * float(results['branch_predictor.num-incorrect'][core]) / (results['performance_model.instruction_count'][core] or 1) for core in range(num_core) ] branch_predictor_missrate = results['branch_predictor.missrate'] branch_predictor_num_incorrect = results['branch_predictor.num-incorrect'] branch_predictor_num_correct = results['branch_predictor.num-correct'] branch_predictor_mpki = results['branch_predictor.mpki'] ######## cache ########## allcaches = ['L1-I', 'L1-D'] + ['L%u' % l for l in range(2, 5)] existcaches = [c for c in allcaches if '%s.loads' % c in results] for c in existcaches: results['%s.accesses' % c] = map( sum, zip(results['%s.loads' % c], results['%s.stores' % c])) results['%s.misses' % c] = map( sum, zip( results['%s.load-misses' % c], results.get('%s.store-misses-I' % c, results['%s.store-misses' % c]))) results['%s.missrate' % c] = map( lambda (a, b): 100 * a / float(b) if b else float('inf'), zip(results['%s.misses' % c], results['%s.accesses' % c])) results['%s.mpki' % c] = map( lambda (a, b): 1000 * a / float(b) if b else float('inf'), zip(results['%s.misses' % c], results['performance_model.instruction_count'])) L1_I_missrate = results['L1-I.missrate'] L1_I_accesses = results['L1-I.accesses'] L1_I_misses = results['L1-I.misses'] L1_I_load_accesses = results['L1-I.loads'] L1_I_store_accesses = results['L1-I.stores'] L1_I_load_misses = results['L1-I.load-misses'] L1_I_store_misses = results.get('L1-I.store-misses-I', results['L1-I.store-misses']) L1_D_missrate = results['L1-D.missrate'] L1_D_accesses = results['L1-D.accesses'] L1_D_misses = results['L1-D.misses'] L1_D_load_accesses = results['L1-D.loads'] L1_D_store_accesses = results['L1-D.stores'] L1_D_load_misses = results['L1-D.load-misses'] L1_D_store_misses = results.get('L1-D.store-misses-I', results['L1-D.store-misses']) L2_missrate = results['L2.missrate'] L2_accesses = results['L2.accesses'] L2_misses = results['L2.misses'] L2_load_accesses = results['L2.loads'] L2_store_accesses = results['L2.stores'] L2_load_misses = results['L2.load-misses'] L2_store_misses = results.get('L2.store-misses-I', results['L2.store-misses']) L3_missrate = results['L3.missrate'] L3_accesses = results['L3.accesses'] L3_misses = results['L3.misses'] L3_load_accesses = results['L3.loads'] L3_store_accesses = results['L3.stores'] L3_load_misses = results['L3.load-misses'] L3_store_misses = results.get('L3.store-misses-I', results['L3.store-misses']) ######### dram ##################################### results['dram.accesses'] = map( sum, zip(results['dram.reads'], results['dram.writes'])) results['dram.avglatency'] = map( lambda (a, b): a / b if b else float('inf'), zip(results['dram.total-access-latency'], results['dram.accesses'])) dram_accesses = results['dram.accesses'] dram_reads = results['dram.reads'] dram_writes = results['dram.writes'] #################################################### for core in range(0, num_core): #print ('core:%s'%core) #print ('core:%s'%num_core) if (mode == 'OFFLINE'): #freq[core] = float(mode_param[18*core+4]) #old version (for SummaryDVFS.xls) last_freq[core] = float(mode_param1[2 * core + 2]) freq[core] = float(mode_param2[2 * core + 2]) freq_class[core] = UL.translate_freq_to_class(int(freq[core])) elif (mode == 'ONLINE'): #freq[core]=mode_param1[core] #freq_class[core] = UL.translate_freq_to_class(int(freq[core])) freq[core] = mode_param1[core] last_freq[core] = mode_param1[core] ''' for name in stall_list: stall_temp= results['interval_timer.%s'%name] stall[core]+=stall_temp[core] stall_time[core] = float(stall[core]) / TotalTime[core] cpu_busy_time[core] = 1-(IdleTimePerc[core]+stall_time[core]) ''' L3_uncore_time[core] = float( L3_uncore_total_time[core]) / TotalTime[core] IPC[core] = (int(CoreIns[core]) / ((freq[core] / 1000.0) * (interval / 1000000))) IdleTimePerc[core] = float(IdleTime[core]) / float(TotalTime[core]) if (IdleTimePerc[core] < 0): IdleTimePerc[core] = 0.0 #features: # CoreIns,IPC,IdleTimePerc,IdleTime,BusyTime,CycleCount,TotalTime,cpu_base_time,L3_uncore_time,Futex # itlb_missrate,itlb_miss,itlb_access,itlb_mpki, # dtlb_missrate,dtlb_miss,dtlb_access,dtlb_mpki, # stlb_missrate,stlb_miss,stlb_access,stlb_mpki, # branch_predictor_missrate,branch_predictor_num_incorrect,branch_predictor_num_correct,branch_predictor_mpki # L1_I_missrate,L1_I_accesses,L1_I_load_accesses,L1_I_load_accesses,L1_I_store_accesses,L1_I_load_misses,L1_I_store_misses # L1_D_missrate,L1_D_accesses,L1_D_load_accesses,L1_D_load_accesses,L1_D_store_accesses,L1_D_load_misses,L1_D_store_misses # L2_missrate,L2_accesses,L2_load_accesses,L2_load_accesses,L2_store_accesses,L2_load_misses,L2_store_misses # L3_missrate,L3_accesses,L3_load_accesses,L3_load_accesses,L3_store_accesses,L3_load_misses,L3_store_misses # cpContr_branch,cpContr_fp_addsub,cpContr_fp_muldiv # uop_branch,uop_fp_addsub,uop_fp_muldiv,uop_generic,uop_load,uop_store #feature_list = [CoreIns,IPC,IdleTimePerc,L3_uncore_time,L1_I_missrate,L1_D_missrate,uop_fp_addsub,uop_fp_muldiv] #feature_list = [CoreIns,IPC,IdleTimePerc,IdleTime,BusyTime,CycleCount] ''' feature_list = [CoreIns,IPC,IdleTimePerc,IdleTime,BusyTime,CycleCount,TotalTime,cpu_base_time,L3_uncore_time,Futex, itlb_missrate,itlb_miss,itlb_access,itlb_mpki, dtlb_missrate,dtlb_miss,dtlb_access,dtlb_mpki, stlb_missrate,stlb_miss,stlb_access,stlb_mpki, branch_predictor_missrate,branch_predictor_num_incorrect,branch_predictor_num_correct,branch_predictor_mpki, L1_I_missrate,L1_I_accesses,L1_I_load_accesses,L1_I_load_accesses,L1_I_store_accesses,L1_I_load_misses,L1_I_store_misses, L1_D_missrate,L1_D_accesses,L1_D_load_accesses,L1_D_load_accesses,L1_D_store_accesses,L1_D_load_misses,L1_D_store_misses, L2_missrate,L2_accesses,L2_load_accesses,L2_load_accesses,L2_store_accesses,L2_load_misses,L2_store_misses, L3_missrate,L3_accesses,L3_load_accesses,L3_load_accesses,L3_store_accesses,L3_load_misses,L3_store_misses, cpContr_branch,cpContr_fp_addsub,cpContr_fp_muldiv, uop_branch,uop_fp_addsub,uop_fp_muldiv,uop_generic,uop_load,uop_store] ''' feature_list = [ CoreIns, IPC, IdleTimePerc, IdleTime, BusyTime, CycleCount, cpu_base_time, L3_uncore_time, Futex, #removed TotalTime itlb_missrate, itlb_miss, itlb_access, itlb_mpki, dtlb_missrate, dtlb_miss, dtlb_access, dtlb_mpki, stlb_missrate, stlb_miss, stlb_access, stlb_mpki, branch_predictor_missrate, branch_predictor_num_incorrect, branch_predictor_num_correct, branch_predictor_mpki, L1_I_missrate, L1_I_accesses, L1_I_load_accesses, L1_I_load_accesses, L1_I_store_accesses, L1_I_load_misses, L1_I_store_misses, L1_D_missrate, L1_D_accesses, L1_D_load_accesses, L1_D_load_accesses, L1_D_store_accesses, L1_D_load_misses, L1_D_store_misses, L2_missrate, L2_accesses, L2_load_accesses, L2_load_accesses, L2_store_accesses, L2_load_misses, L2_store_misses, L3_missrate, L3_accesses, L3_load_accesses, L3_load_accesses, L3_store_accesses, L3_load_misses, L3_store_misses, cpContr_branch, cpContr_fp_addsub, cpContr_fp_muldiv, uop_branch, uop_fp_addsub, uop_fp_muldiv, uop_generic, uop_load, uop_store, last_freq ] #feature_list = [CoreIns,last_freq] #TODO for core in range(0, num_core): for f in feature_list: if f[core] == float('inf'): #print "inf\n" f[core] = 10.0 if (mode == 'OFFLINE'): #add freq as label return feature_list, freq_class if (mode == 'ONLINE'): #without freq as label return feature_list #freq would be added while passing to DNN_predictor at the sniper run
def periodic(self, time, time_delta): if self.max_snapshots and self.num_snapshots > self.max_snapshots: self.num_snapshots /= 2 for t in range(self.interval, time, self.interval * 2): sim.util.db_delete('periodic-%d' % t) self.interval *= 2 if time >= self.next_interval: self.num_snapshots += 1 sim.stats.write('periodic-%d' % (self.num_snapshots * self.interval)) ################################################# ######### snapshot_last ######################### snapshot_last = sniper_lib.get_results( resultsdir=sim.config.output_dir, partial=('periodic-%d' % ((self.num_snapshots - 1) * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)))['results'] snapshot_all = sniper_lib.get_results( resultsdir=sim.config.output_dir, partial=('periodic-%d' % (0 * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)))['results'] CoreIns = snapshot_last['core.instructions'] IdleTime = snapshot_last['performance_model.idle_elapsed_time'] BusyTime = snapshot_last['performance_model.nonidle_elapsed_time'] CycleCount = snapshot_last['performance_model.cycle_count'] TotalTime = snapshot_last['performance_model.elapsed_time'] Futex = snapshot_last['performance_model.cpiSyncFutex'] L3_uncore_total_time = snapshot_last['L3.uncore-totaltime'] nonidle_elapsed_total_time = snapshot_last[ 'performance_model.nonidle_elapsed_time'] cpu_base_time = snapshot_last['interval_timer.cpiBase'] stall_list = ['cpiBranchPredictor','cpiDataCachecache-remote','cpiDataCachedram','cpiDataCachedram-cache','cpiDataCachedram-local',\ 'cpiDataCachedram-remote','cpiDataCacheL1','cpiDataCacheL1_S','cpiDataCacheL1I',\ 'cpiDataCacheL2','cpiDataCacheL2_S','cpiDataCacheL3','cpiDataCacheL3_S','cpiDataCachemiss',\ 'cpiDataCachenuca-cache','cpiDataCachepredicate-false','cpiDataCacheprefetch-no-mapping','cpiDataCacheunknown',\ 'cpiInstructionCachecache-remote','cpiInstructionCachedram','cpiInstructionCachedram-cache',\ 'cpiInstructionCachedram-local','cpiInstructionCachedram-remote','cpiInstructionCacheL1','cpiInstructionCacheL1_S',\ 'cpiInstructionCacheL1I','cpiInstructionCacheL2','cpiInstructionCacheL2_S','cpiInstructionCacheL3',\ 'cpiInstructionCacheL3_S','cpiInstructionCachemiss','cpiInstructionCachenuca-cache',\ 'cpiInstructionCachepredicate-false','cpiInstructionCacheprefetch-no-mapping',\ 'cpiInstructionCacheunknown','cpiLongLatency','cpiSerialization'] for core in range(sim.config.ncores): self.IdleTimePerc[core] = float(IdleTime[core]) / float( TotalTime[core]) IPC = [] CPI = [] stall = [] stall_cpi = [] stall_time = [ ] #contains percentage of stall time/total time of the period cpu_busy_time = [] futex_time = [] stall_total_time = [] #contains counter time for for all the run L3_uncore_time = [] nonidle_elapsed_time = [] for core in range(sim.config.ncores): stall.append(0) stall_cpi.append(0.0) stall_time.append(0.0) IPC.append(0.0) CPI.append(0.0) cpu_busy_time.append(0.0) futex_time.append(0.0) stall_total_time.append(0) L3_uncore_time.append(0) nonidle_elapsed_time.append(0.0) for name in stall_list: stall_temp = snapshot_last['interval_timer.%s' % name] stall_temp_all = snapshot_all['interval_timer.%s' % name] for core in range(sim.config.ncores): stall[core] += stall_temp[core] stall_total_time[core] += stall_temp_all[core] for core in range(sim.config.ncores): #stall_cpi[core] = float(stall[core]) / (CoreIns[core]*sim.dvfs.get_frequency(core) ) stall_time[core] = float(stall[core]) / TotalTime[core] cpu_busy_time[core] = 1 - (self.IdleTimePerc[core] + stall_time[core]) #cpu_busy_time[core]= cpu_base_time[core] futex_time[core] = float(Futex[core]) / TotalTime[core] L3_uncore_time[core] = float( L3_uncore_total_time[core]) / TotalTime[core] nonidle_elapsed_time[core] = float( nonidle_elapsed_total_time[core]) / TotalTime[core] print 'stall[%s]=%s\n' % (core, stall[core]) # print 'cpi_stall[%s]=%s\n'%(core,stall_cpi[core]) print 'time_stall[%s]=%s\n' % (core, stall_time[core]) print 'idletimePerc[%s]=%s\n' % (core, self.IdleTimePerc[core]) print 'non-stall-busy-time[%s]=%s\n' % ( core, 1 - (self.IdleTimePerc[core] + stall_time[core])) for core in range(sim.config.ncores): self.IdleTimePerc[core] = float(IdleTime[core]) / float( TotalTime[core]) IdleTimePercentage = IdleTime[0] / TotalTime[0] BusyTimePercentage = BusyTime[0] / TotalTime[ 0] #just for the first core ##############CoreIns############################################################## self.Wfile_CoreIns = file( os.path.join(sim.config.output_dir, self.filename_CoreIns), 'a') self.Wfile_CoreIns.write('\n') self.Wfile_CoreIns.write('%d\t' % (self.num_snapshots)) self.Wfile_CoreIns.write('CoreIns') for core in range(sim.config.ncores): self.Wfile_CoreIns.write("\t%s" % CoreIns[core]) self.Wfile_CoreIns.close() ##############IdleTime############################################################## self.Wfile_IdleTime = file( os.path.join(sim.config.output_dir, self.filename_IdleTime), 'a') self.Wfile_IdleTime.write('\n') self.Wfile_IdleTime.write('%d\t' % (self.num_snapshots)) self.Wfile_IdleTime.write('IdleTime') for core in range(sim.config.ncores): self.Wfile_IdleTime.write("\t%s" % IdleTime[core]) self.Wfile_IdleTime.close() ##############IdleTimePerc############################################################## self.Wfile_IdleTimePerc = file( os.path.join(sim.config.output_dir, self.filename_IdleTimePerc), 'a') self.Wfile_IdleTimePerc.write('\n') self.Wfile_IdleTimePerc.write('%d\t' % (self.num_snapshots)) self.Wfile_IdleTimePerc.write('IdleTimePerc') for core in range(sim.config.ncores): self.Wfile_IdleTimePerc.write("\t%f" % self.IdleTimePerc[core]) self.Wfile_IdleTimePerc.close() ######################################################################################## #for core in range(sim.config.ncores): # print ((sim.dvfs.get_frequency(core)/1000.0)*(self.interval/1000000)) #raw_input() ############## IPC ############################################################## self.Wfile_IPC = file( os.path.join(sim.config.output_dir, self.filename_IPC), 'a') self.Wfile_IPC.write('\n') self.Wfile_IPC.write('%d\t' % (self.num_snapshots)) self.Wfile_IPC.write('IPC') for core in range(sim.config.ncores): self.Wfile_IPC.write( "\t%f" % (int(CoreIns[core]) / ((sim.dvfs.get_frequency(core) / 1000.0) * (self.interval / 1000000)))) IPC[core] = (int(CoreIns[core]) / ((sim.dvfs.get_frequency(core) / 1000.0) * (self.interval / 1000000))) self.Wfile_IPC.close() ############## CPI ############################################################## self.Wfile_CPI = file( os.path.join(sim.config.output_dir, self.filename_CPI), 'a') self.Wfile_CPI.write('\n') self.Wfile_CPI.write('%d\t' % (self.num_snapshots)) self.Wfile_CPI.write('CPI') for core in range(sim.config.ncores): if (CoreIns[core] == 0): self.Wfile_CPI.write("\t0") CPI[core] = 0.0 else: #self.Wfile_CPI.write("\t%f"%(float(cpu_base_time[core])/CoreIns[core])) #CPI[core]= float(cpu_base_time[core])/CoreIns[core] self.Wfile_CPI.write("\t%f" % (1.0 / IPC[core])) CPI[core] = 1.0 / IPC[core] # if ((int(CoreIns[core])/((sim.dvfs.get_frequency(core)/1000.0)*(self.interval/1000000))) == 0): # self.Wfile_CPI.write ("\tinf") # else: self.Wfile_CPI.write("\t%f"%(1/(int(CoreIns[core])/((sim.dvfs.get_frequency(core)/1000.0)*(self.interval/1000000))))) self.Wfile_CPI.close() ############## stall_time ############################################################## self.Wfile_stall_time = file( os.path.join(sim.config.output_dir, self.filename_stall_time), 'a') self.Wfile_stall_time.write('\n') self.Wfile_stall_time.write('%d\t' % (self.num_snapshots)) self.Wfile_stall_time.write('stall_time') for core in range(sim.config.ncores): self.Wfile_stall_time.write("\t%s" % stall_time[core]) self.Wfile_stall_time.close() ############## cpu_busy_time ############################################################## just the (busy_time - stalls) self.Wfile_cpu_busy_time = file( os.path.join(sim.config.output_dir, self.filename_cpu_busy_time), 'a') self.Wfile_cpu_busy_time.write('\n') self.Wfile_cpu_busy_time.write('%d\t' % (self.num_snapshots)) self.Wfile_cpu_busy_time.write('cpu_busy_time') for core in range(sim.config.ncores): self.Wfile_cpu_busy_time.write("\t%s" % cpu_busy_time[core]) self.Wfile_cpu_busy_time.close() ##############L3_uncore_time############################################################## self.Wfile_L3_uncore_time = file( os.path.join(sim.config.output_dir, self.filename_L3_uncore_time), 'a') self.Wfile_L3_uncore_time.write('\n') self.Wfile_L3_uncore_time.write('%d\t' % (self.num_snapshots)) self.Wfile_L3_uncore_time.write('L3_uncore_time') for core in range(sim.config.ncores): self.Wfile_L3_uncore_time.write("\t%s" % L3_uncore_time[core]) self.Wfile_L3_uncore_time.close() ############## timing_summary ############################################################## just the (busy_time - stalls) self.Wfile_timing_summary = file( os.path.join(sim.config.output_dir, self.filename_timing_summary), 'a') self.Wfile_timing_summary.write('\n') self.Wfile_timing_summary.write('%d\t' % (self.num_snapshots)) self.Wfile_timing_summary.write('timing_summary') for core in range(sim.config.ncores): self.Wfile_timing_summary.write( "\t----\t%s\t%s\t1\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (CoreIns[core], IPC[core], (1 - self.IdleTimePerc[core]), cpu_busy_time[core], stall_time[core], L3_uncore_time[core], futex_time[core], cpu_base_time[core], stall_total_time[core])) self.Wfile_timing_summary.close() ################################ CacheMiss ######################################## #periodic stat in stats file, #it also generates cachemiss files in the output gen_simout.generate_simout( resultsdir=sim.config.output_dir, partial=('periodic-%d' % ((self.num_snapshots - 1) * self.interval), 'periodic-%d' % ((self.num_snapshots) * self.interval)), output=open( os.path.join(sim.config.output_dir, 'stats/p%d.out' % self.num_snapshots), 'w'), silent=True) ######################################################################################## print '\nperiodic-%d - periodic-%d\n' % ( (self.num_snapshots * self.interval), ((self.num_snapshots - 1))) print 'CoreIns= %s' % CoreIns print 'IdleTime= %s' % IdleTime print 'BusyTime= %s' % BusyTime print 'CycleCount= %s' % CycleCount print 'TotalTime= %s' % TotalTime ############################################################## ################# Kalman ##################################### #self.predicted_CoreIns[core][self.num_snapshots-1] os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_CoreIns),\ os.path.join(sim.config.output_dir, self.filename_kalmanOut_CoreIns))) os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_CPI),\ os.path.join(sim.config.output_dir, self.filename_kalmanOut_CPI))) ''' os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_IdleTimePerc),\ os.path.join(sim.config.output_dir, self.filename_kalmanOut_IdleTimePerc))) os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_IPC),\ os.path.join(sim.config.output_dir, self.filename_kalmanOut_IPC))) os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_cpu_busy_time),\ os.path.join(sim.config.output_dir, self.filename_kalmanOut_cpu_busy_time))) os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_stall_time),\ os.path.join(sim.config.output_dir, self.filename_kalmanOut_stall_time))) os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_L3_uncore_time),\ os.path.join(sim.config.output_dir, self.filename_kalmanOut_L3_uncore_time))) ''' #os.system ("%skalman/build/kalman-test %s %s"%(sniper_path, os.path.join(sim.config.output_dir, self.filename_Workload), os.path.join(sim.config.output_dir, self.filename_kalmanOut_Workload))) ############################################################## ############### Get Kalman predicted results ################ ######### kalmanOut_CoreIns self.Rfile_kalmanOut_CoreIns = file( os.path.join(sim.config.output_dir, self.filename_kalmanOut_CoreIns), 'r') total_line_count = -1 for line in self.Rfile_kalmanOut_CoreIns: total_line_count += 1 self.Rfile_kalmanOut_CoreIns = file( os.path.join(sim.config.output_dir, self.filename_kalmanOut_CoreIns), 'r') line_count = -1 for line in self.Rfile_kalmanOut_CoreIns: line_splitted = line.split('\t') line_count += 1 if (line_count >= 2) and (line_count == total_line_count): for core in range(sim.config.ncores): self.predicted_CoreIns[core].append( float(line_splitted[5 * core + 3])) #offset=3 in the file ''' #note: self.predicted_CoreIns[core][self.num_snapshots-1] shows the last prediction for core= ... ######## kalmanOut_IdleTimePerc self.Rfile_kalmanOut_IdleTimePerc = file(os.path.join(sim.config.output_dir, self.filename_kalmanOut_IdleTimePerc), 'r') line_count=-1 for line in self.Rfile_kalmanOut_IdleTimePerc: line_splitted = line.split('\t') line_count+=1 if (line_count>=2) and (line_count == total_line_count): for core in range(sim.config.ncores): self.predicted_IdleTimePerc[core].append(float(line_splitted[5*core+3])) #offset=3 in the file ######## kalmanOut_IPC self.Rfile_kalmanOut_IPC = file(os.path.join(sim.config.output_dir, self.filename_kalmanOut_IPC), 'r') line_count=-1 for line in self.Rfile_kalmanOut_IPC: line_splitted = line.split('\t') line_count+=1 if (line_count>=2) and (line_count == total_line_count): for core in range(sim.config.ncores): self.predicted_IPC[core].append(float(line_splitted[5*core+3])) #offset=3 in the file ######## kalmanOut_cpu_busy_time self.Rfile_kalmanOut_cpu_busy_time = file(os.path.join(sim.config.output_dir, self.filename_kalmanOut_cpu_busy_time), 'r') line_count=-1 for line in self.Rfile_kalmanOut_cpu_busy_time: line_splitted = line.split('\t') line_count+=1 if (line_count>=2) and (line_count == total_line_count): for core in range(sim.config.ncores): self.predicted_cpu_busy_time[core].append(float(line_splitted[5*core+3])) #offset=3 in the file ######## kalmanOut_stall_time self.Rfile_kalmanOut_stall_time = file(os.path.join(sim.config.output_dir, self.filename_kalmanOut_stall_time), 'r') line_count=-1 for line in self.Rfile_kalmanOut_stall_time: line_splitted = line.split('\t') line_count+=1 if (line_count>=2) and (line_count == total_line_count): for core in range(sim.config.ncores): self.predicted_stall_time[core].append(float(line_splitted[5*core+3])) #offset=3 in the file ''' ######## kalmanOut_CPI self.Rfile_kalmanOut_CPI = file( os.path.join(sim.config.output_dir, self.filename_kalmanOut_CPI), 'r') line_count = -1 for line in self.Rfile_kalmanOut_CPI: line_splitted = line.split('\t') line_count += 1 if (line_count >= 2) and (line_count == total_line_count): for core in range(sim.config.ncores): self.predicted_CPI[core].append( float(line_splitted[5 * core + 3])) #offset=3 in the file ''' ######## kalmanOut_L3_uncore_time self.Rfile_kalmanOut_L3_uncore_time = file(os.path.join(sim.config.output_dir, self.filename_kalmanOut_L3_uncore_time), 'r') line_count=-1 for line in self.Rfile_kalmanOut_L3_uncore_time: line_splitted = line.split('\t') line_count+=1 if (line_count>=2) and (line_count == total_line_count): for core in range(sim.config.ncores): self.predicted_L3_uncore_time[core].append(float(line_splitted[5*core+3])) #offset=3 in the file ''' ######## kalmanOut_Workload #self.Rfile_kalmanOut_Workload = file(os.path.join(sim.config.output_dir, self.filename_kalmanOut_Workload), 'r') #line_count=-1 #for line in self.Rfile_kalmanOut_Workload: # print line # line_splitted = line.split('\t') # line_count+=1 # print line_count # print total_line_count # print "YEYEYEYEYE" # if (line_count>=2) and (line_count == total_line_count-1):# -1 # print "yes" # for core in range(sim.config.ncores): # self.predicted_Workload[core].append(float(line_splitted[5*core+3])) #offset=3 in the file print "last predictions for core 0: " print "CoreIns: %s" % self.predicted_CoreIns[0][self.num_snapshots - 1] #print "IdleTimePerc: %s"%self.predicted_IdleTimePerc[0][self.num_snapshots-1] #print "IPC: %s"%self.predicted_IPC[0][self.num_snapshots-1] #print "cpu_cpu_busy_time: %s"%self.predicted_cpu_busy_time[0][self.num_snapshots-1] #print "cpu_stall_time: %s"%self.predicted_stall_time[0][self.num_snapshots-1] #print "L3_uncore_time: %s"%self.predicted_L3_uncore_time[0][self.num_snapshots-1] print "CPI: %s" % self.predicted_CPI[0][self.num_snapshots - 1] #print "Workload: %s"%self.predicted_Workload[0][self.num_snapshots-2] #raw_input() ############################################################## ################# DVFS ####################################### if self.DVFS: FreqList = [ '1000', '1100', '1200', '1300', '1400', '1500', '1600', '1700', '1800', '1900', '2000' ] AllowedPerformanceLoss = 0.50 freq_H = 2000.0 T = self.interval self.fd.write('%u' % (time / 1e6)) # Time in ns #currrent_freq = sim.dvfs.get_frequency(0) #currrent_freq -= 100 #self.TotalWork+=1 for core in range(sim.config.ncores): freq_P = sim.dvfs.get_frequency(core) CPI_P = CPI[core] I_P = CoreIns[core] self.T_delay[core] += (I_P * (freq_H / freq_P - 1) * CPI_P) / freq_H self.T_ref[core] += (I_P * CPI_P) / freq_P #self.T_delay[core] += (cpu_busy_time[core]*(freq_H/freq_P-1))/freq_H #self.T_ref[core] += (cpu_busy_time[core])/freq_P #self.T_delay[core] += (((freq_H/freq_P)-1)/freq_P)*(cpu_busy_time[core]/freq_H + L3_uncore_time[core]/freq_P) #multiplied by T #self.T_ref[core] += ((cpu_busy_time[core] + L3_uncore_time[core])/freq_P*freq_P) #multiplied by T #self.T_delay[core] += ((freq_H/freq_P)-1) * ( (freq_P/freq_H)*cpu_busy_time[core] + L3_uncore_time[core] ) #self.T_ref[core] += cpu_busy_time[core] + L3_uncore_time[core] #self.T_delay[core] += ((freq_H/freq_P)-1) * ( (freq_P/freq_H)*cpu_busy_time[core] + stall_time[core] ) #self.T_ref[core] += cpu_busy_time[core] + stall_time[core] #self.T_delay[core] += ((freq_H/freq_P)-1) * ( (freq_P/freq_H)*cpu_busy_time[core] ) #self.T_ref[core] += cpu_busy_time[core] if self.T_ref[core] == 0: self.PF[core] = 0 else: self.PF[core] = self.T_delay[core] / self.T_ref[core] print "PF[%d]=%s" % (core, self.PF[core]) print "PF[%d]=%s, delay=%s, ref=%s, freq=%s" % ( core, self.PF[core], self.T_delay[core], self.T_ref[core], freq_P) #raw_input() #self.TotalInstDone[core]+= CoreIns[core]*(2000.0/sim.dvfs.get_frequency(core)) self.TotalInstDone[core] += CoreIns[core] if (self.TotalInstDone[core] != 0): self.InstLoss[core] = CoreIns[core] * ( freq_H / freq_P - 1) self.InstDone[core] = CoreIns[core] self.InstLossRate[core] = ( 1 - (sim.dvfs.get_frequency(core) / 2000.0)) self.Inst_for_freq_H[core] = CoreIns[core] * (freq_H / freq_P) self.TotalInst_for_freq_H[ core] += self.Inst_for_freq_H[core] self.TotallInstLoss[core] += CoreIns[core] * ( 1 - (sim.dvfs.get_frequency(core) / 2000.0)) self.TotallInstDone[core] += CoreIns[core] self.TotalInstLossRate[core] = self.TotallInstLoss[ core] / self.TotalInstDone[core] #self.ExpectedWorkDone[core]+=(2000/sim.dvfs.get_frequency(core)) #ExpectedRemainedWork[core]=TotalWork-ExpectedWorkDone[core] self.fd.write(' f: %.2fGHz' % (sim.dvfs.get_frequency(core) / 1000.0)) #raw_input() ##############SummaryDVFS############################################################## self.Wfile_SummaryDVFS = file( os.path.join(sim.config.output_dir, self.filename_SummaryDVFS), 'a') self.Wfile_SummaryDVFS.write('\n') self.Wfile_SummaryDVFS.write('%d\t' % (self.num_snapshots)) self.Wfile_SummaryDVFS.write('CoreIns') for core in range(sim.config.ncores): self.Wfile_SummaryDVFS.write("\t\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t"%(core,sim.dvfs.get_frequency(core),self.InstDone[core],\ self.PredictedInstDone[core],self.Inst_for_freq_H[core],self.PredictedInst_for_freq_H[core],self.InstLoss[core],\ self.PredictedInstLoss[core],self.InstLossRate[core],self.PredictedInstLossRate[core],self.TotallInstDone[core],\ self.PredictedTotalInstDone[core],self.TotalInstLossRate[core],self.PredictedTotalInstLossRate[core],self.PF[core],self.PF_next[core])) self.Wfile_SummaryDVFS.close() ######################################################################################## for core in range(sim.config.ncores): if (self.num_snapshots > 2): freq_set = 0 for freq in FreqList: #from low freq to high freq freq_P_next = float(freq) if (freq_set == 0): CPI_P_next = self.predicted_CPI[core][ self.num_snapshots - 1] I_P_next = self.predicted_CoreIns[core][ self.num_snapshots - 1] #cpu_busy_time_next = self.predicted_cpu_busy_time[core][self.num_snapshots-1] #L3_uncore_time_next = self.predicted_L3_uncore_time[core][self.num_snapshots-1] #stall_time_next = self.predicted_stall_time[core][self.num_snapshots-1] self.T_delay_next[core] = ( I_P_next * (freq_H / freq_P_next - 1) * CPI_P_next) / freq_H + self.T_delay[core] self.T_ref_next[core] = ( I_P_next * CPI_P_next ) / freq_P_next + self.T_ref[core] #self.T_delay_next[core] = (int(self.predicted_cpu_busy_time[core][self.num_snapshots-1])*(freq_H/freq_P_next-1))/freq_H + self.T_delay[core] #self.T_ref_next[core] = int(self.predicted_cpu_busy_time[core][self.num_snapshots-1])/freq_P_next + self.T_ref[core] #self.T_delay_next[core] = (((freq_H/freq_P_next)-1)/freq_P_next)*(cpu_busy_time_next/freq_H + L3_uncore_time_next/freq_P_next)+self.T_delay[core] #self.T_ref_next[core] = ((cpu_busy_time_next + L3_uncore_time_next)/freq_P_next*freq_P_next)+self.T_ref[core] #self.T_delay_next[core] = ((freq_H/freq_P_next)-1) * ( (freq_P_next/freq_H)*cpu_busy_time_next + L3_uncore_time_next ) + self.T_delay[core] #self.T_ref_next[core] = cpu_busy_time_next + L3_uncore_time_next + self.T_ref[core] #self.T_delay_next[core] = ((freq_H/freq_P_next)-1) * ( (freq_P_next/freq_H)*cpu_busy_time_next + stall_time_next ) + self.T_delay[core] #self.T_ref_next[core] = cpu_busy_time_next + stall_time_next + self.T_ref[core] #self.T_delay_next[core] = ((freq_H/freq_P_next)-1) * ( (freq_P_next/freq_H)*cpu_busy_time_next ) + self.T_delay[core] #self.T_ref_next[core] = cpu_busy_time_next + self.T_ref[core] if self.T_ref_next[core] == 0: self.PF_next[core] = 0 else: self.PF_next[core] = self.T_delay_next[ core] / self.T_ref_next[core] print "PF_next[%d]=%s,delay=%s,ref=%s,freq_next=%s" % ( core, self.PF_next[core], self.T_delay_next[core], self.T_ref_next[core], freq_P_next) #raw_input() self.PredictedInstLoss[core] = int( self.predicted_CoreIns[core][ self.num_snapshots - 1]) * (freq_H / freq_P_next - 1) self.PredictedInstDone[ core] = self.predicted_CoreIns[core][ self.num_snapshots - 1] self.PredictedInst_for_freq_H[ core] = self.predicted_CoreIns[core][ self.num_snapshots - 1] * (freq_H / freq_P_next) if self.PredictedInstDone[core] == 0: self.PredictedInstLossRate[core] = 0 else: self.PredictedInstLossRate[core] = float( self.PredictedInstLoss[core] ) / self.PredictedInstDone[core] self.PredictedTotalInst_for_freq_H[ core] = self.PredictedInst_for_freq_H[ core] + self.TotalInst_for_freq_H[core] self.PredictedTotalInstLoss[ core] = self.PredictedInstLoss[ core] + self.TotallInstLoss[core] self.PredictedTotalInstDone[ core] = self.PredictedInstDone[ core] + self.TotalInstDone[core] if (self.PredictedTotalInstDone[core] == 0): self.PredictedTotalInstLossRate[core] = 0 else: self.PredictedTotalInstLossRate[ core] = float( self.PredictedTotalInstLoss[core] ) / self.PredictedTotalInstDone[core] #if (self.PredictedTotalInstLossRate[core] < AllowedPerformanceLoss): if (self.PF_next[core] < AllowedPerformanceLoss): sim.dvfs.set_frequency( core, int(freq_P_next)) freq_set = 1 if ( freq_set == 0 ): #any of the frequencies not proper. we use the highest not to let the loss increase sim.dvfs.set_frequency(core, int(freq_H)) #if (core==0)or(core==4)or(core==8)or(core==12): #if (core==0): # sim.dvfs.set_frequency(core,5000) cycles = (self.stats['time'][core].delta - self.stats['ffwd_time'][core].delta ) * sim.dvfs.get_frequency( core) / 1e9 # convert fs to cycles instrs = self.stats['instrs'][core].delta ipc = instrs / (cycles or 1) # Avoid division by zero #self.fd.write(' %.3f' % ipc) # include fast-forward IPCs cycles = self.stats['time'][ core].delta * sim.dvfs.get_frequency( core) / 1e9 # convert fs to cycles instrs = self.stats['coreinstrs'][core].delta ipc = instrs / (cycles or 1) self.fd.write(' %.3f' % ipc) self.fd.write('\n') #raw_input() ############################################################## #os.system("/home/milad/sniper/tools/dumpstats.py --partial periodic-%d:periodic-%d | grep power.Core"%( ((self.num_snapshots-1) *self.interval), ((self.num_snapshots) *self.interval ) ) ) #raw_input(); #getch self.fd.write('periodic-%d' % (self.num_snapshots * self.interval)) #periodic stat in stats file #gen_simout.generate_simout(resultsdir = sim.config.output_dir, partial = ('periodic-%d' % ((self.num_snapshots-1) *self.interval), 'periodic-%d' % ((self.num_snapshots) *self.interval ) ), output = open(os.path.join(sim.config.output_dir, 'stats/p%d.out'%self.num_snapshots), 'w'), silent = True) #gen_simout.generate_simout(resultsdir = sim.config.output_dir, partial = ('periodic-10000000000','periodic-20000000000'), output = open(os.path.join(sim.config.output_dir, 'stats/p%d.out'%self.num_snapshots), 'w'), silent = True) self.next_interval += self.interval
svg.paint_box((xpos(lid)-.075, -.2), (size+.15, y+1+.4), color = '#dddddd', zorder = 2, root = tile_root(lid)) size = 0 y += 1 if is_mesh: for lid in range(0, max_id+1, concentration): svg.paint_box((xpos(lid)-.075, -.2), (concentration+.15, y+.4), color = '#dddddd', zorder = 2, root = tile_root(lid)) y += 1 if is_mesh: y *= height else: y = 0 if is_mesh: results = sniper_lib.get_results(resultsdir = resultsdir, jobid = jobid)['results'] if 'dram-queue.total-time-used' in results \ and 'network.shmem-1.mesh.link-up.num-requests' in results: import gridcolors time0 = max(results['performance_model.elapsed_time']) def util2color(utilization): return '#%02x%02x%02x' % gridcolors.colorscale(utilization) OFFSET_Y = y SCALE_X = .6 BOXSIZE = .2 for y in range(height): for x in range(width): for c in range(concentration):
def computeCoverage(benchmark, pinpoint, pinball_name): baseline_dir = output_dir_base.format('baseline', benchmark, pinpoint) #baseline_dir = output_dir_base.format('baseline_noninclusive', benchmark, pinpoint) output_dir = output_dir_base.format(sys.argv[1], benchmark, pinpoint) try: baseline_res = sniper_lib.get_results(jobid=None, resultsdir=baseline_dir) except (KeyError, ValueError), e: print 'Failed to read stats:', e, pinpoint return 0 try: output_res = sniper_lib.get_results(jobid=None, resultsdir=output_dir) except (KeyError, ValueError), e: print 'Failed to read stats:', e, pinpoint return 0 baseline_results = baseline_res['results'] output_results = output_res['results'] #baseline_misses = baseline_results['L3.load-misses'][0] + baseline_results['L3.store-misses'][0] #output_misses = output_results['L3.load-misses'][0] + output_results['L3.store-misses'][0] baseline_misses = baseline_results['L2.load-misses'][0] output_misses = output_results['L2.load-misses'][0] coverage = 100 * (baseline_misses - output_misses) / float(baseline_misses) #print pinpoint, baseline_misses, output_misses, coverage return coverage
def print_diff(parmsort = None, restype = 'results', resultdirs = [], partial = None, print_alldiffs = True, print_average = False, average_nz = True): jobs = [] stats = {} maxkeylen = -1 resultstoprint = [] max_cores = 0 keys = [] for resultdir in resultdirs: res = sniper_lib.get_results(resultsdir = resultdir, partial = partial) stats[resultdir] = res[restype] jobs.append(resultdir) # Find all key names and maximum lenghts def key_map((k, v)): return (k, len(v) if type(v) is list else 0) allkeys = sum([ map(key_map, s.items()) for s in stats.values() ], []) keyinfo = {} for key, length in allkeys: keyinfo[key] = max(keyinfo.get(key, 0), length) def get_element(statkey, key, core): data = stats[statkey].get(key) if data and type(data) is list and len(data) > core: return data[core] else: return None def get_average(statkey, key): data = stats[statkey].get(key) if data and type(data) is list and len(data) > 0: if average_nz: # Find cores for which this statistic is non-zero for at least one of the results alldata = [ stats[_statkey][key] for _statkey in stats.keys() ] nonzero = map(any, zip(*alldata)) cnt = len(filter(None, nonzero)) or 1 else: cnt = len(data) return long(sum(data) / float(cnt)) else: return None for key, length in sorted(keyinfo.items(), key = lambda (k, v): k.lower()): if length > 0: for core in range(1 if print_average else length): if print_average: values = [ get_average(statkey, key) for statkey in jobs ] else: values = [ get_element(statkey, key, core) for statkey in jobs ] if any(values): diff, max_percent_diff, forceprint = max_diff(values) diffs = get_diffs(values) if forceprint or diff != 0: maxkeylen = max(len(key), maxkeylen) # Consider this key for the maximum key character length resultstoprint.append((key, core, values, diff, max_percent_diff, diffs)) max_cores = max(max_cores, core) else: diff, max_percent_diff, forceprint = max_diff(map(lambda x: x.get(key, None), stats.itervalues())) diffs = get_diffs([ stats[statkey].get(key, None) for statkey in jobs ]) if forceprint or diff != 0: maxkeylen = max(len(key), maxkeylen) # Consider this key for the maximum key character length data = [] for statkey in jobs: try: data.append(stats[statkey][key]) except KeyError: data.append(None) resultstoprint.append((key, None, data, diff, max_percent_diff, diffs)) # Iterate through the collected data items and print them out print '%*s ' % (maxkeylen+5, ''), for statkey in jobs: print '%12s' % (('%s'%statkey)[-12:]), if print_alldiffs: for statkey in jobs[1:]: print ' '*max(0, 11 - len(str(statkey))) + u'\u0394'.encode('utf8') + str(statkey)[-11:], else: print '%12s' % 'max-%-err', print '%12s' % 'max-abs-err', print if parmsort == 'abs': resultstoprint = sorted(resultstoprint, key = lambda x: abs(x[3]), reverse = True) elif parmsort == 'percent': resultstoprint = sorted(resultstoprint, key = lambda x: abs(x[4]), reverse = True) for (key, core, datalist, abs_diff, percent_diff, diffs) in resultstoprint: if core != None: if print_average: print '%-*s[*] =' % (maxkeylen, key), else: print '%-*s[%*u] =' % (maxkeylen, key, len(str(max_cores)), core), else: print '%-*s %s =' % (maxkeylen, key, ' '*len(str(max_cores))), for d in datalist: if d == None: print ' ----', else: print format_value(d), if print_alldiffs: for d in diffs: print format_diff(d), else: print format_percent(percent_diff), print '%12.3g' % abs_diff, print
def getInstructionCount(intervalstr): results = sniper_lib.get_results(config = config, stats = stats, partial = intervalstr, metrics = ("performance_model.instruction_count",)) instructioncount = sum(results["results"]["performance_model.instruction_count"]) return instructioncount
if is_mesh: for lid in range(0, max_id + 1, concentration): svg.paint_box((xpos(lid) - .075, -.2), (concentration + .15, y + .4), color='#dddddd', zorder=2, root=tile_root(lid)) y += 1 if is_mesh: y *= height else: y = 0 if is_mesh: results = sniper_lib.get_results(resultsdir=resultsdir, jobid=jobid)['results'] if 'dram-queue.total-time-used' in results \ and 'network.shmem-1.mesh.link-up.num-requests' in results: import gridcolors time0 = max(results['performance_model.elapsed_time']) def util2color(utilization): return '#%02x%02x%02x' % gridcolors.colorscale(utilization) OFFSET_Y = y SCALE_X = .6 BOXSIZE = .2 for y in range(height): for x in range(width): for c in range(concentration):
def print_diff(parmsort = None, restype = 'results', resultdirs = [], partial = None, print_alldiffs = True, print_average = False): jobs = [] stats = {} maxkeylen = -1 resultstoprint = [] max_cores = 0 keys = [] for resultdir in resultdirs: res = sniper_lib.get_results(resultsdir = resultdir, partial = partial) stats[resultdir] = res[restype] jobs.append(resultdir) # Find all key names and maximum lenghts def key_map((k, v)): return (k, len(v) if type(v) is list else 0) allkeys = sum([ map(key_map, s.items()) for s in stats.values() ], []) keyinfo = {} for key, length in allkeys: keyinfo[key] = max(keyinfo.get(key, 0), length) def get_element(statkey, key, core): data = stats[statkey].get(key) if data and type(data) is list and len(data) > core: return data[core] else: return None def get_average(statkey, key): data = stats[statkey].get(key) if data and type(data) is list and len(data) > 0: return long(sum(data) / float(len(data))) else: return None for key, length in sorted(keyinfo.items(), key = lambda (k, v): k.lower()): if length > 0: for core in range(1 if print_average else length): if print_average: values = [ get_average(statkey, key) for statkey in jobs ] else: values = [ get_element(statkey, key, core) for statkey in jobs ] if any(values): diff, max_percent_diff, forceprint = max_diff(values) diffs = get_diffs(values) if forceprint or diff != 0: maxkeylen = max(len(key), maxkeylen) # Consider this key for the maximum key character length resultstoprint.append((key, core, values, diff, max_percent_diff, diffs)) max_cores = max(max_cores, core) else: diff, max_percent_diff, forceprint = max_diff(map(lambda x: x.get(key, None), stats.itervalues())) diffs = get_diffs([ stats[statkey].get(key, None) for statkey in jobs ]) if forceprint or diff != 0: maxkeylen = max(len(key), maxkeylen) # Consider this key for the maximum key character length data = [] for statkey in jobs: try: data.append(stats[statkey][key]) except KeyError: data.append(None) resultstoprint.append((key, None, data, diff, max_percent_diff, diffs)) # Iterate through the collected data items and print them out print '%*s ' % (maxkeylen+5, ''), for statkey in jobs: print '%12s' % (('%s'%statkey)[-12:]), if print_alldiffs: for statkey in jobs[1:]: print ' '*max(0, 11 - len(str(statkey))) + u'\u0394'.encode('utf8') + str(statkey)[-11:], else: print '%12s' % 'max-%-err', print '%12s' % 'max-abs-err', print if parmsort == 'abs': resultstoprint = sorted(resultstoprint, key = lambda x: abs(x[3]), reverse = True) elif parmsort == 'percent': resultstoprint = sorted(resultstoprint, key = lambda x: abs(x[4]), reverse = True) for (key, core, datalist, abs_diff, percent_diff, diffs) in resultstoprint: if core != None: if print_average: print '%-*s[*] =' % (maxkeylen, key), else: print '%-*s[%*u] =' % (maxkeylen, key, len(str(max_cores)), core), else: print '%-*s %s =' % (maxkeylen, key, ' '*len(str(max_cores))), for d in datalist: if d == None: print ' ----', else: print format_value(d), if print_alldiffs: for d in diffs: print format_diff(d), else: print format_percent(percent_diff), print '%12.3g' % abs_diff, print
def createJSONData(interval, num_intervals, resultsdir, outputdir, verbose=False): topodir = os.path.join(outputdir, 'levels', 'topology') mkdir_p(topodir) gen_topology.gen_topology(resultsdir=resultsdir, outputobj=file(os.path.join(topodir, 'topo.svg'), 'w'), format='svg', embedded=True) config = sniper_config.parse_config( file(os.path.join(resultsdir, 'sim.cfg')).read()) ncores = int(config['general/total_cores']) stats = sniper_stats.SniperStats(resultsdir) ids = collections.defaultdict(lambda: {}) for name, lid, mid in stats.get_topology(): ids[name][int(lid)] = int(mid) caches = ['L1-I', 'L1-D', 'L2', 'L3', 'L4', 'dram-cache'] items = sum( [['%s-%d' % (name, core) for name in ['core', 'dram-cntlr'] + caches] for core in range(ncores)], []) data = dict([(item, {'info': '', 'sparkdata': []}) for item in items]) dramcntlrs = [ lid for (name, lid, mid) in stats.get_topology() if name == 'dram-cntlr' ] for i in range(num_intervals): results = sniper_lib.get_results( config=config, stats=stats, partial=('periodic-' + str(i * interval), 'periodic-' + str( (i + 1) * interval)))['results'] if 'barrier.global_time_begin' in results: # Most accurate: ask the barrier results['time_begin'] = results['barrier.global_time_begin'][0] results['time_end'] = results['barrier.global_time_end'][0] elif 'performance_model.elapsed_time_end' in results: # Guess based on core that has the latest time (future wakeup is less common than sleep on futex) results['time_begin'] = max( results['performance_model.elapsed_time_begin']) results['time_end'] = max( results['performance_model.elapsed_time_end']) else: raise ValueError( 'Need either performance_model.elapsed_time or barrier.global_time, simulation is probably too old' ) for core in range(ncores): if 'fs_to_cycles_cores' in results: cycles_scale = results['fs_to_cycles_cores'][core] else: cycles_scale = 1. cycles = cycles_scale * (results['time_end'] - results['time_begin']) ninstrs = results['performance_model.instruction_count'][core] data['core-%d' % core]['sparkdata'].append('%.3f' % (ninstrs / cycles)) data['core-%d' % core]['info'] = 'IPC (core-%d)' % core for cache in caches: if cache not in ids: # Cache level does not exist continue if ids[cache][core] != core: # Non-master cache continue if '%s.loads' % cache in results: # Sum misses and instruction counts over all cores sharing this cache misses = 0 ninstrs = 0 for _core in range(ncores): if ids[cache][_core] == ids[cache][core]: misses += results['%s.load-misses' % cache][_core] + results[ '%s.store-misses-I' % cache][_core] ninstrs += results[ 'performance_model.instruction_count'][_core] data['%s-%d' % (cache, core)]['sparkdata'].append( '%.3f' % (1000. * misses / float(ninstrs or 1.))) data['%s-%d' % (cache, core)]['info'] = 'MPKI (%s-%d)' % (cache, core) for dramcntlr in dramcntlrs: ninstrs = sum(results['performance_model.instruction_count']) if ninstrs == 0: data['dram-cntlr-%d' % dramcntlr]['sparkdata'].append(0.) # FIXME ninstrs should not be zero while we are accessing dram else: data['dram-cntlr-%d' % dramcntlr]['sparkdata'].append( '%.3f' % (1000. * (results['dram.reads'][dramcntlr] + results['dram.writes'][dramcntlr]) / (ninstrs or 1.))) data['dram-cntlr-%d' % dramcntlr]['info'] = 'APKI (dram-cntlr-%d)' % dramcntlr jsonfile = open(os.path.join(topodir, 'topology.txt'), "w") jsonfile.write('topology = %s' % json.dumps(data)) jsonfile.close()
def createJSONData(interval, num_intervals, resultsdir, outputdir, verbose = False): topodir = os.path.join(outputdir,'levels','topology') mkdir_p(topodir) gen_topology.gen_topology(resultsdir = resultsdir, outputobj = file(os.path.join(topodir, 'topo.svg'), 'w'), format = 'svg', embedded = True) config = sniper_config.parse_config(file(os.path.join(resultsdir, 'sim.cfg')).read()) ncores = int(config['general/total_cores']) stats = sniper_stats.SniperStats(resultsdir) ids = collections.defaultdict(lambda: {}) for name, lid, mid in stats.get_topology(): ids[name][int(lid)] = int(mid) caches = [ 'L1-I', 'L1-D', 'L2', 'L3', 'L4', 'dram-cache' ] items = sum([ [ '%s-%d' % (name, core) for name in ['core','dram-cntlr']+caches ] for core in range(ncores) ], []) data = dict([ (item, {'info':'', 'sparkdata':[]}) for item in items ]) dramcntlrs = [ lid for (name, lid, mid) in stats.get_topology() if name == 'dram-cntlr' ] for i in range(num_intervals): results = sniper_lib.get_results(config = config, stats = stats, partial = ('periodic-'+str(i*interval), 'periodic-'+str((i+1)*interval)))['results'] if 'barrier.global_time_begin' in results: # Most accurate: ask the barrier results['time_begin'] = results['barrier.global_time_begin'][0] results['time_end'] = results['barrier.global_time_end'][0] elif 'performance_model.elapsed_time_end' in results: # Guess based on core that has the latest time (future wakeup is less common than sleep on futex) results['time_begin'] = max(results['performance_model.elapsed_time_begin']) results['time_end'] = max(results['performance_model.elapsed_time_end']) else: raise ValueError('Need either performance_model.elapsed_time or barrier.global_time, simulation is probably too old') for core in range(ncores): if 'fs_to_cycles_cores' in results: cycles_scale = results['fs_to_cycles_cores'][core] else: cycles_scale = 1. cycles = cycles_scale * (results['time_end'] - results['time_begin']) ninstrs = results['performance_model.instruction_count'][core] data['core-%d' % core]['sparkdata'].append('%.3f' % (ninstrs / cycles)) data['core-%d' % core]['info'] = 'IPC (core-%d)' % core for cache in caches: if cache not in ids: # Cache level does not exist continue if ids[cache][core] != core: # Non-master cache continue if '%s.loads' % cache in results: # Sum misses and instruction counts over all cores sharing this cache misses = 0; ninstrs = 0 for _core in range(ncores): if ids[cache][_core] == ids[cache][core]: misses += results['%s.load-misses'%cache][_core] + results['%s.store-misses-I'%cache][_core] ninstrs += results['performance_model.instruction_count'][_core] data['%s-%d' % (cache, core)]['sparkdata'].append('%.3f' % (1000. * misses / float(ninstrs or 1.))) data['%s-%d' % (cache, core)]['info'] = 'MPKI (%s-%d)' % (cache, core) for dramcntlr in dramcntlrs: ninstrs = sum(results['performance_model.instruction_count']) if ninstrs == 0: data['dram-cntlr-%d' % dramcntlr]['sparkdata'].append(0.); # FIXME ninstrs should not be zero while we are accessing dram else: data['dram-cntlr-%d' % dramcntlr]['sparkdata'].append('%.3f' % (1000. * (results['dram.reads'][dramcntlr] + results['dram.writes'][dramcntlr]) / (ninstrs or 1.))) data['dram-cntlr-%d' % dramcntlr]['info'] = 'APKI (dram-cntlr-%d)' % dramcntlr jsonfile = open(os.path.join(topodir, 'topology.txt'), "w") jsonfile.write('topology = %s' % json.dumps(data)) jsonfile.close()
def print_diff(parmsort=None, restype="results", resultdirs=[], partial=None, print_alldiffs=True, print_average=False): jobs = [] stats = {} maxkeylen = -1 resultstoprint = [] max_cores = 0 keys = [] for resultdir in resultdirs: res = sniper_lib.get_results(resultsdir=resultdir, partial=partial) stats[resultdir] = res[restype] jobs.append(resultdir) # Find all key names and maximum lenghts def key_map((k, v)): return (k, len(v) if type(v) is list else 0) allkeys = sum([map(key_map, s.items()) for s in stats.values()], []) keyinfo = {} for key, length in allkeys: keyinfo[key] = max(keyinfo.get(key, 0), length) def get_element(statkey, key, core): data = stats[statkey].get(key) if data and type(data) is list and len(data) > core: return data[core] else: return None def get_average(statkey, key): data = stats[statkey].get(key) if data and type(data) is list and len(data) > 0: return long(sum(data) / float(len(data))) else: return None for key, length in sorted(keyinfo.items(), key=lambda (k, v): k.lower()): if length > 0: for core in range(1 if print_average else length): if print_average: values = [get_average(statkey, key) for statkey in jobs] else: values = [get_element(statkey, key, core) for statkey in jobs] if any(values): diff, max_percent_diff, forceprint = max_diff(values) diffs = get_diffs(values) if forceprint or diff != 0: maxkeylen = max(len(key), maxkeylen) # Consider this key for the maximum key character length resultstoprint.append((key, core, values, diff, max_percent_diff, diffs)) max_cores = max(max_cores, core) else: diff, max_percent_diff, forceprint = max_diff(map(lambda x: x.get(key, None), stats.itervalues())) diffs = get_diffs([stats[statkey].get(key, None) for statkey in jobs]) if forceprint or diff != 0: maxkeylen = max(len(key), maxkeylen) # Consider this key for the maximum key character length data = [] for statkey in jobs: try: data.append(stats[statkey][key]) except KeyError: data.append(None) resultstoprint.append((key, None, data, diff, max_percent_diff, diffs)) # Iterate through the collected data items and print them out print "%*s " % (maxkeylen + 5, ""), for statkey in jobs: print "%12s" % (("%s" % statkey)[-12:]), if print_alldiffs: for statkey in jobs[1:]: print " " * max(0, 11 - len(str(statkey))) + u"\u0394".encode("utf8") + str(statkey)[-11:], else: print "%12s" % "max-%-err", print "%12s" % "max-abs-err", print if parmsort == "abs": resultstoprint = sorted(resultstoprint, key=lambda x: abs(x[3]), reverse=True) elif parmsort == "percent": resultstoprint = sorted(resultstoprint, key=lambda x: abs(x[4]), reverse=True) for (key, core, datalist, abs_diff, percent_diff, diffs) in resultstoprint: if core != None: if print_average: print "%-*s[*] =" % (maxkeylen, key), else: print "%-*s[%*u] =" % (maxkeylen, key, len(str(max_cores)), core), else: print "%-*s %s =" % (maxkeylen, key, " " * len(str(max_cores))), for d in datalist: if d == None: print " ----", else: print format_value(d), if print_alldiffs: for d in diffs: print format_diff(d), else: print format_percent(percent_diff), print "%12.3g" % abs_diff, print
def get_results(self, **kwds): return sniper_lib.get_results(stats = self, **kwds)
def generate_cheetah(jobid=None, resultsdir='.', partial=None, outputbase='cheetah', title=None, yscale=(0, 50), logy=False, diff=False): res = sniper_lib.get_results(jobid=jobid, resultsdir=resultsdir, partial=partial) data = dict([(k.split('.')[1], v) for k, v in res['results'].items() if k.startswith('cheetah.')]) def grouping_sortkey(grouping): if grouping == 'global': return -99999 # Start with total elif grouping == 'local': return +99999 # End with per-thread else: return -int( grouping.split('-')[-1] ) # Big groups (close to total) to small groups (close to per-thread) def grouping_title(grouping): return grouping GROUPINGS = sorted(data.keys(), key=grouping_sortkey) xmax = 1 << max(map(len, data.values())) o = file(outputbase + '.input', 'w') o.write('''\ set fontpath "/usr/share/fonts/truetype/freefont" set terminal png font "FreeSans,15" size 500,350 linewidth 2 rounded set output "%s.png" %s set key top right set logscale x 2 set xrange [:%f] set xtics nomirror out ("1 KB" 1024, "32 KB" 32768, "1 MB" 1048576, "32 MB" 33554432, "1 GB" 1073741824, "32 GB" 34359738368.) set xtics add autofreq set mxtics default %s set yrange [%f:%f] set ytics nomirror set format y "%%.1f%%%%" plot %s ''' % (os.path.basename(outputbase), 'set title "%s"' % title if title else 'unset title', xmax, 'set logscale y' if logy else '', yscale[0], yscale[1], ', '.join([ "'-' using 1:(100*$2) with linespoints title '%s'" % grouping_title(grouping) for grouping in GROUPINGS ]))) for grouping in GROUPINGS: last = 1 total = data[grouping][0] for size, value in list(enumerate(data[grouping]))[1:]: if value == 0: continue value = 1 - value / float(total) o.write('%d %f\n' % (1 << size, last - value if diff else value)) last = value o.write('e\n') del o subprocess.Popen( ['gnuplot', '%s.input' % os.path.basename(outputbase)], cwd=os.path.dirname(outputbase) or '.').communicate()
def getTotalInstructionCount(): results = sniper_lib.get_results(config = config, stats = stats, metrics = ("performance_model.instruction_count",)) instructioncount = sum(results["results"]["performance_model.instruction_count"]) return instructioncount