def format_config(name, lid): caches = {'L1-I': 'l1_icache', 'L1-D': 'l1_dcache', 'L2': 'l2_cache', 'L3': 'l3_cache', 'L4': 'l4_cache'} if name in caches: value = sniper_config.get_config(config, 'perf_model/%s/cache_size' % caches[name], lid) return sniper_lib.format_size(1024 * long(value), digits = 0) elif name == 'dram-cache': value = sniper_config.get_config(config, 'perf_model/dram/cache/cache_size', lid) return sniper_lib.format_size(1024 * long(value), digits = 0) else: return ''
def parse_results_from_dir(resultsdir, partial=None, metrics=None): results = [] ## sim.cfg simcfg = os.path.join(resultsdir, "sim.cfg") if not os.path.exists(simcfg): raise SniperResultsException("No valid configuration found") simcfg = sniper_config.parse_config(open(simcfg).read()) ncores = int(simcfg["general/total_cores"]) results += [("ncores", -1, ncores)] results += [ ("corefreq", idx, 1e9 * float(sniper_config.get_config(simcfg, "perf_model/core/frequency", idx))) for idx in range(ncores) ] ## sim.info or graphite.out siminfo = os.path.join(resultsdir, "sim.info") graphiteout = os.path.join(resultsdir, "graphite.out") if os.path.exists(siminfo): siminfo = eval(open(siminfo).read()) elif os.path.exists(graphiteout): siminfo = eval(open(graphiteout).read()) else: siminfo = None if siminfo: # If we're called from inside run-graphite, sim.info may not yet exist results.append(("walltime", -1, siminfo["t_elapsed"])) results.append(("vmem", -1, siminfo["vmem"])) ## sim.stats if partial: k1, k2 = partial[:2] else: k1, k2 = "roi-begin", "roi-end" stats = sniper_stats.SniperStats(resultsdir) results += stats.parse_stats((k1, k2), ncores, metrics=metrics) if not partial: walltime = [v for k, _, v in results if k == "time.walltime"] instrs = [v for k, _, v in results if k == "core.instructions"] if walltime and instrs: walltime = walltime[0] / 1e6 # microseconds -> seconds instrs = sum(instrs) results.append(("roi.walltime", -1, walltime)) results.append(("roi.instrs", -1, instrs)) results.append(("roi.ipstotal", -1, instrs / walltime)) results.append(("roi.ipscore", -1, instrs / (walltime * ncores))) ## power.py power = {} powerfile = os.path.join(resultsdir, "power.py") if os.path.exists(powerfile): exec (open(powerfile).read()) for key, value in power.items(): results.append(("power.%s" % key, -1, value)) return results
def __init__(self, resultsdir = '.'): filename = os.path.join(resultsdir, 'sim.memorytracker') if not os.path.exists(filename): raise IOError('Cannot find output file %s' % filename) results = sniper_lib.get_results(resultsdir = resultsdir) config = results['config'] stats = results['results'] self.hitwhere_load_global = dict([ (k.split('-', 3)[3], sum(v)) for k, v in stats.items() if k.startswith('L1-D.loads-where-') ]) self.hitwhere_load_unknown = self.hitwhere_load_global.copy() self.hitwhere_store_global = dict([ (k.split('-', 3)[3], sum(v)) for k, v in stats.items() if k.startswith('L1-D.stores-where-') ]) self.hitwhere_store_unknown = self.hitwhere_store_global.copy() llc_level = int(sniper_config.get_config(config, 'perf_model/cache/levels')) self.evicts_global = sum([ sum(v) for k, v in stats.items() if re.match('L%d.evict-.$' % llc_level, k) ]) self.evicts_unknown = self.evicts_global self.functions = {} self.sites = {} self.siteids = {} fp = open(filename) for line in fp: if line.startswith('W\t'): self.hitwheres = line.strip().split('\t')[1].strip(',').split(',') elif line.startswith('F\t'): _, eip, name, location = line.strip().split('\t') self.functions[eip] = Function(eip, name, location) elif line.startswith('S\t'): line = line.strip().split('\t') siteid = line[1] stack = line[2].strip(':').split(':') stack = self.collapseStack(stack) results = { 'numallocations': 0, 'totalallocated': 0, 'hitwhereload': {}, 'hitwherestore': {}, 'evictedby': {} } for data in line[3:]: key, value = data.split('=') if key == 'num-allocations': results['numallocations'] = long(value) if key == 'total-allocated': results['totalallocated'] = long(value) elif key == 'hit-where': entries = map(lambda s: s.split(':'), value.strip(',').split(',')) results['hitwhereload'] = dict([ (s[1:], long(v)) for s, v in entries if s.startswith('L') ]) for k, v in results['hitwhereload'].items(): self.hitwhere_load_unknown[k] -= v results['hitwherestore'] = dict([ (s[1:], long(v)) for s, v in entries if s.startswith('S') ]) for k, v in results['hitwherestore'].items(): self.hitwhere_store_unknown[k] -= v elif key == 'evicted-by': results['evictedby'] = dict(map(lambda (s, v): (s, long(v)), map(lambda s: s.split(':'), value.strip(',').split(',')))) self.evicts_unknown -= sum(results['evictedby'].values()) self.siteids[siteid] = stack if stack in self.sites: self.sites[stack].update(**results) else: self.sites[stack] = AllocationSite(stack, **results) else: raise ValueError('Invalid format %s' % line)
def parse_results_from_dir(resultsdir, partial=None, metrics=None): results = [] ## sim.cfg simcfg = os.path.join(resultsdir, 'sim.cfg') if not os.path.exists(simcfg): raise SniperResultsException("No valid configuration found") simcfg = sniper_config.parse_config(open(simcfg).read()) ncores = int(simcfg['general/total_cores']) results += [('ncores', -1, ncores)] results += [('corefreq', idx, 1e9 * float( sniper_config.get_config(simcfg, 'perf_model/core/frequency', idx))) for idx in range(ncores)] ## sim.info or graphite.out siminfo = os.path.join(resultsdir, 'sim.info') graphiteout = os.path.join(resultsdir, 'graphite.out') if os.path.exists(siminfo): siminfo = eval(open(siminfo).read()) elif os.path.exists(graphiteout): siminfo = eval(open(graphiteout).read()) else: siminfo = None if siminfo: # If we're called from inside run-graphite, sim.info may not yet exist results.append(('walltime', -1, siminfo['t_elapsed'])) results.append(('vmem', -1, siminfo['vmem'])) ## sim.stats if partial: k1, k2 = partial[:2] else: k1, k2 = 'roi-begin', 'roi-end' stats = sniper_stats.SniperStats(resultsdir) results += stats.parse_stats((k1, k2), ncores, metrics=metrics) if not partial: walltime = [v for k, _, v in results if k == 'time.walltime'] instrs = [v for k, _, v in results if k == 'core.instructions'] if walltime and instrs: walltime = walltime[0] / 1e6 # microseconds -> seconds instrs = sum(instrs) results.append(('roi.walltime', -1, walltime)) results.append(('roi.instrs', -1, instrs)) results.append(('roi.ipstotal', -1, instrs / walltime)) results.append(('roi.ipscore', -1, instrs / (walltime * ncores))) ## power.py power = {} powerfile = os.path.join(resultsdir, 'power.py') if os.path.exists(powerfile): exec(open(powerfile).read()) for key, value in power.items(): results.append(('power.%s' % key, -1, value)) return results
def __init__(self, resultsdir = '.'): filename = os.path.join(resultsdir, 'sim.rtntracefull') if not os.path.exists(filename): raise IOError('Cannot find trace file %s' % filename) results = sniper_lib.get_results(resultsdir = resultsdir) config = results['config'] stats = results['results'] freq = 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency')) self.fs_to_cycles = freq / 1e15 self.functions = {} self.calls = {} self.children = collections.defaultdict(set) self.roots = set() self.totals = {} fp = open(filename) self.headers = fp.readline().strip().split('\t') for line in fp: if line.startswith(':'): eip, name, location = line.strip().split('\t') eip = eip[1:] self.functions[eip] = Function(eip, name, location) else: line = line.strip().split('\t') stack = line[0].split(':') eip = stack[-1] stack = ':'.join(map(self.translateEip, stack)) data = dict(zip(self.headers[1:], map(long, line[1:]))) if stack in self.calls: self.calls[stack].add(data) else: self.calls[stack] = Call(str(self.functions[eip]), eip, stack, data) parent = stack.rpartition(':')[0] self.children[parent].add(stack) self.roots = set(self.calls.keys()) for parent in self.calls: for child in self.children[parent]: self.roots.remove(child) # Construct a list of calls where each child is ordered before its parent. calls_ordered = collections.deque() calls_tovisit = collections.deque(self.roots) while calls_tovisit: stack = calls_tovisit.pop() calls_ordered.appendleft(stack) calls_tovisit.extend(self.children[stack]) # Now implement a non-recursive version of buildTotal, which requires that each # function's children have been visited before processing the parent, # by visiting calls_ordered in left-to-right order. for stack in calls_ordered: self.calls[stack].buildTotal(self) ncores = int(config['general/total_cores']) self.totals['total_coretime'] = ncores * stats['barrier.global_time'][0]
def format_config(name, lid): caches = { 'L1-I': 'l1_icache', 'L1-D': 'l1_dcache', 'L2': 'l2_cache', 'L3': 'l3_cache', 'L4': 'l4_cache' } if name in caches: value = sniper_config.get_config( config, 'perf_model/%s/cache_size' % caches[name], lid) return sniper_lib.format_size(1024 * long(value), digits=0) elif name == 'dram-cache': value = sniper_config.get_config( config, 'perf_model/dram/cache/cache_size', lid) return sniper_lib.format_size(1024 * long(value), digits=0) else: return ''
def __init__(self, resultsdir = '.'): filename = os.path.join(resultsdir, 'sim.rtntracefull') if not os.path.exists(filename): raise IOError('Cannot find trace file %s' % filename) config = sniper_lib.get_config(resultsdir = resultsdir) freq = 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency')) self.fs_to_cycles = freq / 1e15 self.functions = {} self.calls = {} self.children = collections.defaultdict(set) self.roots = set() self.totals = {} fp = open(filename) self.headers = fp.readline().strip().split('\t') for line in fp: if line.startswith(':'): eip, name, location = line.strip().split('\t') eip = eip[1:] self.functions[eip] = Function(eip, name, location) else: line = line.strip().split('\t') stack = line[0].split(':') eip = stack[-1] stack = ':'.join(map(self.translateEip, stack)) data = dict(zip(self.headers[1:], map(long, line[1:]))) if stack in self.calls: self.calls[stack].add(data) else: self.calls[stack] = Call(str(self.functions[eip]), eip, stack, data) parent = stack.rpartition(':')[0] self.children[parent].add(stack) self.roots = set(self.calls.keys()) for parent in self.calls: for child in self.children[parent]: self.roots.remove(child) # Construct a list of calls where each child is ordered before its parent. calls_ordered = collections.deque() calls_tovisit = collections.deque(self.roots) while calls_tovisit: stack = calls_tovisit.pop() calls_ordered.appendleft(stack) calls_tovisit.extend(self.children[stack]) # Now implement a non-recursive version of buildTotal, which requires that each # function's children have been visited before processing the parent, # by visiting calls_ordered in left-to-right order. for stack in calls_ordered: self.calls[stack].buildTotal(self)
def stats_process(config, results): ncores = int(config['general/total_cores']) stats = {} for key, core, value in results: if core == -1: stats[key] = value else: if key not in stats: stats[key] = [0]*ncores if core < len(stats[key]): stats[key][core] = value else: nskipped = core - len(stats[key]) stats[key] += [0]*nskipped + [value] # Figure out when the interval of time, represented by partial, actually begins/ends # Since cores can account for time in chunks, per-core time can be # both before (``wakeup at future time X'') or after (``sleep until woken up'') # the current time. if 'barrier.global_time_begin' in stats: # Most accurate: ask the barrier time0_begin = stats['barrier.global_time_begin'][0] time0_end = stats['barrier.global_time_end'][0] stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin}) elif 'performance_model.elapsed_time_begin' in stats: # Guess based on core that has the latest time (future wakeup is less common than sleep on futex) time0_begin = max(stats['performance_model.elapsed_time_begin']) time0_end = max(stats['performance_model.elapsed_time_end']) stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin}) # add computed stats try: l1access = sum(stats['L1-D.load-misses']) + sum(stats['L1-D.store-misses']) l1time = sum(stats['L1-D.total-latency']) stats['l1misslat'] = l1time / float(l1access or 1) except KeyError: pass stats['pthread_locks_contended'] = float(sum(stats.get('pthread.pthread_mutex_lock_contended', [0]))) / (sum(stats.get('pthread.pthread_mutex_lock_count', [0])) or 1) # femtosecond to cycles conversion freq = [ 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency', idx)) for idx in range(ncores) ] stats['fs_to_cycles_cores'] = map(lambda f: f / 1e15, freq) # Backwards compatible version returning fs_to_cycles for core 0, for heterogeneous configurations fs_to_cycles_cores needs to be used stats['fs_to_cycles'] = stats['fs_to_cycles_cores'][0] # DVFS-enabled runs: emulate cycle_count asuming constant (initial) frequency if 'performance_model.elapsed_time' in stats and 'performance_model.cycle_count' not in stats: stats['performance_model.cycle_count'] = [ stats['fs_to_cycles_cores'][idx] * stats['performance_model.elapsed_time'][idx] for idx in range(ncores) ] if 'thread.nonidle_elapsed_time' in stats and 'thread.nonidle_cycle_count' not in stats: stats['thread.nonidle_cycle_count'] = [ long(stats['fs_to_cycles'] * t) for t in stats['thread.nonidle_elapsed_time'] ] # IPC stats['ipc'] = sum(stats.get('performance_model.instruction_count', [0])) / float(sum(stats.get('performance_model.cycle_count', [0])) or 1e16) return stats
def stats_process(config, results): ncores = int(config['general/total_cores']) stats = {} for key, core, value in results: if core == -1: stats[key] = value else: if key not in stats: stats[key] = [0]*ncores if core < len(stats[key]): stats[key][core] = value else: nskipped = core - len(stats[key]) stats[key] += [0]*nskipped + [value] # Figure out when the interval of time, represented by partial, actually begins/ends # Since cores can account for time in chunks, per-core time can be # both before (``wakeup at future time X'') or after (``sleep until woken up'') # the current time. if 'barrier.global_time_begin' in stats: # Most accurate: ask the barrier time0_begin = stats['barrier.global_time_begin'][0] time0_end = stats['barrier.global_time_end'][0] stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin}) elif 'performance_model.elapsed_time_begin' in stats: # Guess based on core that has the latest time (future wakeup is less common than sleep on futex) time0_begin = max(stats['performance_model.elapsed_time_begin']) time0_end = max(stats['performance_model.elapsed_time_end']) stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin}) # add computed stats try: l1access = sum(stats['L1-D.load-misses']) + sum(stats['L1-D.store-misses']) l1time = sum(stats['L1-D.total-latency']) stats['l1misslat'] = l1time / float(l1access or 1) except KeyError: pass stats['pthread_locks_contended'] = float(sum(stats.get('pthread.pthread_mutex_lock_contended', [0]))) / (sum(stats.get('pthread.pthread_mutex_lock_count', [0])) or 1) # femtosecond to cycles conversion freq = [ 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency', idx)) for idx in range(ncores) ] stats['fs_to_cycles_cores'] = map(lambda f: f / 1e15, freq) # Backwards compatible version returning fs_to_cycles for core 0, for heterogeneous configurations fs_to_cycles_cores needs to be used stats['fs_to_cycles'] = stats['fs_to_cycles_cores'][0] # DVFS-enabled runs: emulate cycle_count asuming constant (initial) frequency if 'performance_model.elapsed_time' in stats and 'performance_model.cycle_count' not in stats: stats['performance_model.cycle_count'] = [ stats['fs_to_cycles_cores'][idx] * stats['performance_model.elapsed_time'][idx] for idx in range(ncores) ] # IPC stats['ipc'] = sum(stats.get('performance_model.instruction_count', [0])) / float(sum(stats.get('performance_model.cycle_count', [0])) or 1e16) return stats
print >> outputobj, '''\ <svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d"> <g style="stroke-width:.025in; fill:none"> ''' % (self.size_x + 2*self.margin_x, self.size_y + 2*self.margin_y) for order, svg in sorted(self.items, reverse = True): print >> outputobj, svg print >> outputobj, '''\ </g> </svg> ''' svg = Svg() ymax = None is_mesh = (sniper_config.get_config(config, 'network/memory_model_1') == 'emesh_hop_by_hop') if is_mesh: ncores = int(config['general/total_cores']) dimensions = int(sniper_config.get_config(config, 'network/emesh_hop_by_hop/dimensions')) concentration = int(sniper_config.get_config(config, 'network/emesh_hop_by_hop/concentration')) if dimensions == 1: width, height = int(math.ceil(1.0 * ncores / concentration)), 1 else: if config.get('network/emesh_hop_by_hop/size'): width, height = map(int, sniper_config.get_config(config, 'network/emesh_hop_by_hop/size').split(':')) else: width = int(math.sqrt(ncores / concentration)) height = int(math.ceil(1.0 * ncores / concentration / width)) assert width * height * concentration == ncores def lid_tile_root(lid):
def parse(self): ncores = int(self.config['general/total_cores']) instrs = self.stats['performance_model.instruction_count'] if sum( self.stats['performance_model.instruction_count'] ) else self.stats['core.instructions'] try: times = self.stats['performance_model.elapsed_time'] cycles_scale = self.stats['fs_to_cycles_cores'] except KeyError: # On error, assume that we are using the pre-DVFS version times = self.stats['performance_model.cycle_count'] cycles_scale = [1. for idx in range(ncores)] time0_begin = self.stats['global.time_begin'] time0_end = self.stats['global.time_end'] times = [ self.stats['performance_model.elapsed_time_end'][core] - time0_begin for core in range(ncores) ] # TODO: The below is needed for sampling. We're currently set up to work properly with the one-IPC model using in combination with --cache-only #if self.stats.get('fastforward_performance_model.fastforwarded_time', [0])[0]: # fastforward_scale = times[0] / (times[0] - self.stats['fastforward_performance_model.fastforwarded_time'][0]) # fastforward_extrapolate = True # times = [ t-f for t, f in zip(times, self.stats['fastforward_performance_model.fastforwarded_time']) ] #else: # fastforward_scale = 1. # fastforward_extrapolate = False if 'performance_model.cpiFastforwardTime' in self.stats: del self.stats['performance_model.cpiFastforwardTime'] fastforward_scale = 1. fastforward_extrapolate = False data = collections.defaultdict(lambda: collections.defaultdict(long)) for key, values in self.stats.items(): if '.cpi' in key: if key.startswith('thread.'): # Ignore per-thread statistics continue if key.startswith( 'fastforward_timer.') and fastforward_extrapolate: continue key = key.split('.cpi')[1] for core in range(ncores): data[core][key] += values[core] * cycles_scale[core] if not data: raise ValueError( 'No .cpi data found, simulation did not use the interval core model' ) # Split up cpiBase into 1/issue and path dependencies for core in range(ncores): if data[core].get('SyncMemAccess', 0) == data[core].get('SyncPthreadBarrier', 0): # Work around a bug in iGraphite where SyncMemAccess wrongly copied from SyncPthreadBarrier # Since SyncMemAccess usually isn't very big anyway, setting it to zero should be accurate enough # For simulations with a fixed version of iGraphite, the changes of SyncMemAccess being identical to # SyncPthreadBarrier, down to the last femtosecond, are slim, so this code shouldn't trigger data[core]['SyncMemAccess'] = 0 if data[core].get( 'StartTime' ) == None and 'performance_model.idle_elapsed_time' in self.stats: # Fix a bug whereby the start time was not being reported in the CPI stacks correctly data[core]['StartTime'] = cycles_scale * self.stats['performance_model.idle_elapsed_time'][core] - \ data[core]['SyncFutex'] - data[core]['SyncPthreadMutex'] - \ data[core]['SyncPthreadCond'] - data[core]['SyncPthreadBarrier'] - \ data[core]['Recv'] # Critical path accounting cpContrMap = { # critical path components 'interval_timer.cpContr_generic': 'PathInt', 'interval_timer.cpContr_store': 'PathStore', 'interval_timer.cpContr_load_other': 'PathLoadX', 'interval_timer.cpContr_branch': 'PathBranch', 'interval_timer.cpContr_load_l1': 'DataCacheL1', 'interval_timer.cpContr_load_l2': 'DataCacheL2', 'interval_timer.cpContr_load_l3': 'DataCacheL3', 'interval_timer.cpContr_fp_addsub': 'PathFP', 'interval_timer.cpContr_fp_muldiv': 'PathFP', # issue ports 'interval_timer.cpContr_port0': 'PathP0', 'interval_timer.cpContr_port1': 'PathP1', 'interval_timer.cpContr_port2': 'PathP2', 'interval_timer.cpContr_port34': 'PathP34', 'interval_timer.cpContr_port5': 'PathP5', 'interval_timer.cpContr_port05': 'PathP05', 'interval_timer.cpContr_port015': 'PathP015', } for k in self.stats: if k.startswith('interval_timer.cpContr_'): if k not in cpContrMap.keys(): print 'Missing in cpContrMap: ', k # Keep 1/width as base CPI component, break down the remainder according to critical path contributors BaseBest = instrs[core] / float( sniper_config.get_config( self.config, 'perf_model/core/interval_timer/dispatch_width', core)) BaseAct = data[core]['Base'] BaseCp = BaseAct - BaseBest scale = BaseCp / (BaseAct or 1) for cpName, cpiName in cpContrMap.items(): val = float(self.stats.get(cpName, [0] * ncores)[core]) / 1e6 data[core]['Base'] -= val * scale data[core][cpiName] = data[core].get(cpiName, 0) + val * scale # Issue width for key, values in self.stats.items(): if key.startswith('interval_timer.detailed-cpiBase-'): if 'DispatchWidth' in key: if 'DispatchRate' not in key: # We already accounted for DispatchRate above, don't do it twice data[core]['Base'] -= values[core] data[core]['Issue'] = data[core].get( 'Issue', 0) + values[core] # Fix up large cpiSync fractions that started before but ended inside our interval time0_me = 'performance_model.elapsed_time_begin' in self.stats and self.stats[ 'performance_model.elapsed_time_begin'][core] or 0 if time0_me < time0_begin: time0_extra = time0_begin - time0_me # Number of cycles that weren't accounted for when starting this interval cycles_extra = time0_extra * cycles_scale[core] # Components that could be the cause of cycles_extra. It should be just one, but if there's many, we'll have to guess sync_components = dict([ (key, value) for key, value in data[core].items() if (key.startswith('Sync') or key == 'StartTime') and value > cycles_extra ]) sync_total = sum(sync_components.values()) for key, value in sync_components.items(): data[core][key] -= cycles_extra * value / float(sync_total) data[core]['Imbalance'] = cycles_scale[core] * max(times) - sum( data[core].values()) self.data = data self.ncores = ncores self.cores = range(ncores) self.instrs = instrs self.times = times self.cycles_scale = cycles_scale self.fastforward_scale = fastforward_scale
def parse(self): ncores = int(self.config['general/total_cores']) instrs = self.stats['performance_model.instruction_count'] try: times = self.stats['performance_model.elapsed_time'] cycles_scale = self.stats['fs_to_cycles_cores'] except KeyError: # On error, assume that we are using the pre-DVFS version times = self.stats['performance_model.cycle_count'] cycles_scale = [ 1. for idx in range(ncores) ] time0_begin = self.stats['global.time_begin'] time0_end = self.stats['global.time_end'] times = [ self.stats['performance_model.elapsed_time_end'][core] - time0_begin for core in range(ncores) ] if self.stats.get('fastforward_performance_model.fastforwarded_time', [0])[0]: fastforward_scale = times[0] / (times[0] - self.stats['fastforward_performance_model.fastforwarded_time'][0]) times = [ t-f for t, f in zip(times, self.stats['fastforward_performance_model.fastforwarded_time']) ] else: fastforward_scale = 1. if 'performance_model.cpiFastforwardTime' in self.stats: del self.stats['performance_model.cpiFastforwardTime'] data = collections.defaultdict(collections.defaultdict) for key, values in self.stats.items(): if '.cpi' in key: if key.startswith('thread.'): # Ignore per-thread statistics continue key = key.split('.cpi')[1] for core in range(ncores): data[core][key] = values[core] * cycles_scale[core] if not data: raise ValueError('No .cpi data found, simulation did not use the interval core model') # Split up cpiBase into 1/issue and path dependencies for core in range(ncores): if data[core].get('SyncMemAccess', 0) == data[core].get('SyncPthreadBarrier', 0): # Work around a bug in iGraphite where SyncMemAccess wrongly copied from SyncPthreadBarrier # Since SyncMemAccess usually isn't very big anyway, setting it to zero should be accurate enough # For simulations with a fixed version of iGraphite, the changes of SyncMemAccess being identical to # SyncPthreadBarrier, down to the last femtosecond, are slim, so this code shouldn't trigger data[core]['SyncMemAccess'] = 0 if data[core].get('StartTime') == None and 'performance_model.idle_elapsed_time' in self.stats: # Fix a bug whereby the start time was not being reported in the CPI stacks correctly data[core]['StartTime'] = cycles_scale * self.stats['performance_model.idle_elapsed_time'][core] - \ data[core]['SyncFutex'] - data[core]['SyncPthreadMutex'] - \ data[core]['SyncPthreadCond'] - data[core]['SyncPthreadBarrier'] - \ data[core]['Recv'] # Critical path accounting cpContrMap = { # critical path components 'interval_timer.cpContr_generic': 'PathInt', 'interval_timer.cpContr_store': 'PathStore', 'interval_timer.cpContr_load_other': 'PathLoadX', 'interval_timer.cpContr_branch': 'PathBranch', 'interval_timer.cpContr_load_l1': 'DataCacheL1', 'interval_timer.cpContr_load_l2': 'DataCacheL2', 'interval_timer.cpContr_load_l3': 'DataCacheL3', 'interval_timer.cpContr_fp_addsub': 'PathFP', 'interval_timer.cpContr_fp_muldiv': 'PathFP', # issue ports 'interval_timer.cpContr_port0': 'PathP0', 'interval_timer.cpContr_port1': 'PathP1', 'interval_timer.cpContr_port2': 'PathP2', 'interval_timer.cpContr_port34': 'PathP34', 'interval_timer.cpContr_port5': 'PathP5', 'interval_timer.cpContr_port05': 'PathP05', 'interval_timer.cpContr_port015': 'PathP015', } for k in self.stats: if k.startswith('interval_timer.cpContr_'): if k not in cpContrMap.keys(): print 'Missing in cpContrMap: ', k # Keep 1/width as base CPI component, break down the remainder according to critical path contributors BaseBest = instrs[core] / float(sniper_config.get_config(self.config, 'perf_model/core/interval_timer/dispatch_width', core)) BaseAct = data[core]['Base'] BaseCp = BaseAct - BaseBest scale = BaseCp / (BaseAct or 1) for cpName, cpiName in cpContrMap.items(): val = float(self.stats.get(cpName, [0]*ncores)[core]) / 1e6 data[core]['Base'] -= val * scale data[core][cpiName] = data[core].get(cpiName, 0) + val * scale # Issue width for key, values in self.stats.items(): if key.startswith('interval_timer.detailed-cpiBase-'): if 'DispatchWidth' in key: if 'DispatchRate' not in key: # We already accounted for DispatchRate above, don't do it twice data[core]['Base'] -= values[core] data[core]['Issue'] = data[core].get('Issue', 0) + values[core] # Fix up large cpiSync fractions that started before but ended inside our interval time0_me = 'performance_model.elapsed_time_begin' in self.stats and self.stats['performance_model.elapsed_time_begin'][core] or 0 if time0_me < time0_begin: time0_extra = time0_begin - time0_me # Number of cycles that weren't accounted for when starting this interval cycles_extra = time0_extra * cycles_scale[core] # Components that could be the cause of cycles_extra. It should be just one, but if there's many, we'll have to guess sync_components = dict([ (key, value) for key, value in data[core].items() if (key.startswith('Sync') or key == 'StartTime') and value > cycles_extra ]) sync_total = sum(sync_components.values()) for key, value in sync_components.items(): data[core][key] -= cycles_extra*value/float(sync_total) data[core]['Imbalance'] = cycles_scale[core] * max(times) - sum(data[core].values()) self.data = data self.ncores = ncores self.cores = range(ncores) self.instrs = instrs self.times = times self.cycles_scale = cycles_scale self.fastforward_scale = fastforward_scale
''' print >> outputobj, '''\ <svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d"> <g style="stroke-width:.025in; fill:none"> ''' % (self.size_x + 2 * self.margin_x, self.size_y + 2 * self.margin_y) for order, svg in sorted(self.items, reverse=True): print >> outputobj, svg print >> outputobj, '''\ </g> </svg> ''' svg = Svg() ymax = None is_mesh = (sniper_config.get_config( config, 'network/memory_model_1') == 'emesh_hop_by_hop') if is_mesh: ncores = int(config['general/total_cores']) dimensions = int( sniper_config.get_config( config, 'network/emesh_hop_by_hop/dimensions')) concentration = int( sniper_config.get_config( config, 'network/emesh_hop_by_hop/concentration')) if dimensions == 1: width, height = int(math.ceil(1.0 * ncores / concentration)), 1 else: if config.get('network/emesh_hop_by_hop/size'): width, height = map( int, sniper_config.get_config(