Example #1
0
 def format_config(name, lid):
   caches = {'L1-I': 'l1_icache', 'L1-D': 'l1_dcache', 'L2': 'l2_cache', 'L3': 'l3_cache', 'L4': 'l4_cache'}
   if name in caches:
     value = sniper_config.get_config(config, 'perf_model/%s/cache_size' % caches[name], lid)
     return sniper_lib.format_size(1024 * long(value), digits = 0)
   elif name == 'dram-cache':
     value = sniper_config.get_config(config, 'perf_model/dram/cache/cache_size', lid)
     return sniper_lib.format_size(1024 * long(value), digits = 0)
   else:
     return ''
Example #2
0
def parse_results_from_dir(resultsdir, partial=None, metrics=None):
    results = []

    ## sim.cfg
    simcfg = os.path.join(resultsdir, "sim.cfg")
    if not os.path.exists(simcfg):
        raise SniperResultsException("No valid configuration found")
    simcfg = sniper_config.parse_config(open(simcfg).read())
    ncores = int(simcfg["general/total_cores"])

    results += [("ncores", -1, ncores)]
    results += [
        ("corefreq", idx, 1e9 * float(sniper_config.get_config(simcfg, "perf_model/core/frequency", idx)))
        for idx in range(ncores)
    ]

    ## sim.info or graphite.out
    siminfo = os.path.join(resultsdir, "sim.info")
    graphiteout = os.path.join(resultsdir, "graphite.out")
    if os.path.exists(siminfo):
        siminfo = eval(open(siminfo).read())
    elif os.path.exists(graphiteout):
        siminfo = eval(open(graphiteout).read())
    else:
        siminfo = None
    if siminfo:
        # If we're called from inside run-graphite, sim.info may not yet exist
        results.append(("walltime", -1, siminfo["t_elapsed"]))
        results.append(("vmem", -1, siminfo["vmem"]))

    ## sim.stats
    if partial:
        k1, k2 = partial[:2]
    else:
        k1, k2 = "roi-begin", "roi-end"

    stats = sniper_stats.SniperStats(resultsdir)
    results += stats.parse_stats((k1, k2), ncores, metrics=metrics)

    if not partial:
        walltime = [v for k, _, v in results if k == "time.walltime"]
        instrs = [v for k, _, v in results if k == "core.instructions"]
        if walltime and instrs:
            walltime = walltime[0] / 1e6  # microseconds -> seconds
            instrs = sum(instrs)
            results.append(("roi.walltime", -1, walltime))
            results.append(("roi.instrs", -1, instrs))
            results.append(("roi.ipstotal", -1, instrs / walltime))
            results.append(("roi.ipscore", -1, instrs / (walltime * ncores)))

    ## power.py
    power = {}
    powerfile = os.path.join(resultsdir, "power.py")
    if os.path.exists(powerfile):
        exec (open(powerfile).read())
        for key, value in power.items():
            results.append(("power.%s" % key, -1, value))

    return results
  def __init__(self, resultsdir = '.'):
    filename = os.path.join(resultsdir, 'sim.memorytracker')
    if not os.path.exists(filename):
      raise IOError('Cannot find output file %s' % filename)

    results = sniper_lib.get_results(resultsdir = resultsdir)
    config = results['config']
    stats = results['results']

    self.hitwhere_load_global = dict([ (k.split('-', 3)[3], sum(v)) for k, v in stats.items() if k.startswith('L1-D.loads-where-') ])
    self.hitwhere_load_unknown = self.hitwhere_load_global.copy()
    self.hitwhere_store_global = dict([ (k.split('-', 3)[3], sum(v)) for k, v in stats.items() if k.startswith('L1-D.stores-where-') ])
    self.hitwhere_store_unknown = self.hitwhere_store_global.copy()

    llc_level = int(sniper_config.get_config(config, 'perf_model/cache/levels'))
    self.evicts_global = sum([ sum(v) for k, v in stats.items() if re.match('L%d.evict-.$' % llc_level, k) ])
    self.evicts_unknown = self.evicts_global

    self.functions = {}
    self.sites = {}
    self.siteids = {}

    fp = open(filename)
    for line in fp:
      if line.startswith('W\t'):
        self.hitwheres = line.strip().split('\t')[1].strip(',').split(',')
      elif line.startswith('F\t'):
        _, eip, name, location = line.strip().split('\t')
        self.functions[eip] = Function(eip, name, location)
      elif line.startswith('S\t'):
        line = line.strip().split('\t')
        siteid = line[1]
        stack = line[2].strip(':').split(':')
        stack = self.collapseStack(stack)
        results = { 'numallocations': 0, 'totalallocated': 0, 'hitwhereload': {}, 'hitwherestore': {}, 'evictedby': {} }
        for data in line[3:]:
          key, value = data.split('=')
          if key == 'num-allocations':
            results['numallocations'] = long(value)
          if key == 'total-allocated':
            results['totalallocated'] = long(value)
          elif key == 'hit-where':
            entries = map(lambda s: s.split(':'), value.strip(',').split(','))
            results['hitwhereload'] = dict([ (s[1:], long(v)) for s, v in entries if s.startswith('L') ])
            for k, v in results['hitwhereload'].items():
              self.hitwhere_load_unknown[k] -= v
            results['hitwherestore'] = dict([ (s[1:], long(v)) for s, v in entries if s.startswith('S') ])
            for k, v in results['hitwherestore'].items():
              self.hitwhere_store_unknown[k] -= v
          elif key == 'evicted-by':
            results['evictedby'] = dict(map(lambda (s, v): (s, long(v)), map(lambda s: s.split(':'), value.strip(',').split(','))))
            self.evicts_unknown -= sum(results['evictedby'].values())
        self.siteids[siteid] = stack
        if stack in self.sites:
          self.sites[stack].update(**results)
        else:
          self.sites[stack] = AllocationSite(stack, **results)
      else:
        raise ValueError('Invalid format %s' % line)
Example #4
0
def parse_results_from_dir(resultsdir, partial=None, metrics=None):
    results = []

    ## sim.cfg
    simcfg = os.path.join(resultsdir, 'sim.cfg')
    if not os.path.exists(simcfg):
        raise SniperResultsException("No valid configuration found")
    simcfg = sniper_config.parse_config(open(simcfg).read())
    ncores = int(simcfg['general/total_cores'])

    results += [('ncores', -1, ncores)]
    results += [('corefreq', idx, 1e9 * float(
        sniper_config.get_config(simcfg, 'perf_model/core/frequency', idx)))
                for idx in range(ncores)]

    ## sim.info or graphite.out
    siminfo = os.path.join(resultsdir, 'sim.info')
    graphiteout = os.path.join(resultsdir, 'graphite.out')
    if os.path.exists(siminfo):
        siminfo = eval(open(siminfo).read())
    elif os.path.exists(graphiteout):
        siminfo = eval(open(graphiteout).read())
    else:
        siminfo = None
    if siminfo:
        # If we're called from inside run-graphite, sim.info may not yet exist
        results.append(('walltime', -1, siminfo['t_elapsed']))
        results.append(('vmem', -1, siminfo['vmem']))

    ## sim.stats
    if partial:
        k1, k2 = partial[:2]
    else:
        k1, k2 = 'roi-begin', 'roi-end'

    stats = sniper_stats.SniperStats(resultsdir)
    results += stats.parse_stats((k1, k2), ncores, metrics=metrics)

    if not partial:
        walltime = [v for k, _, v in results if k == 'time.walltime']
        instrs = [v for k, _, v in results if k == 'core.instructions']
        if walltime and instrs:
            walltime = walltime[0] / 1e6  # microseconds -> seconds
            instrs = sum(instrs)
            results.append(('roi.walltime', -1, walltime))
            results.append(('roi.instrs', -1, instrs))
            results.append(('roi.ipstotal', -1, instrs / walltime))
            results.append(('roi.ipscore', -1, instrs / (walltime * ncores)))

    ## power.py
    power = {}
    powerfile = os.path.join(resultsdir, 'power.py')
    if os.path.exists(powerfile):
        exec(open(powerfile).read())
        for key, value in power.items():
            results.append(('power.%s' % key, -1, value))

    return results
  def __init__(self, resultsdir = '.'):
    filename = os.path.join(resultsdir, 'sim.rtntracefull')
    if not os.path.exists(filename):
      raise IOError('Cannot find trace file %s' % filename)

    results = sniper_lib.get_results(resultsdir = resultsdir)
    config = results['config']
    stats = results['results']
    freq = 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency'))
    self.fs_to_cycles = freq / 1e15

    self.functions = {}
    self.calls = {}
    self.children = collections.defaultdict(set)
    self.roots = set()
    self.totals = {}

    fp = open(filename)
    self.headers = fp.readline().strip().split('\t')

    for line in fp:
      if line.startswith(':'):
        eip, name, location = line.strip().split('\t')
        eip = eip[1:]
        self.functions[eip] = Function(eip, name, location)
      else:
        line = line.strip().split('\t')
        stack = line[0].split(':')
        eip = stack[-1]
        stack = ':'.join(map(self.translateEip, stack))
        data = dict(zip(self.headers[1:], map(long, line[1:])))
        if stack in self.calls:
          self.calls[stack].add(data)
        else:
          self.calls[stack] = Call(str(self.functions[eip]), eip, stack, data)
          parent = stack.rpartition(':')[0]
          self.children[parent].add(stack)

    self.roots = set(self.calls.keys())
    for parent in self.calls:
      for child in self.children[parent]:
        self.roots.remove(child)

    # Construct a list of calls where each child is ordered before its parent.
    calls_ordered = collections.deque()
    calls_tovisit = collections.deque(self.roots)
    while calls_tovisit:
      stack = calls_tovisit.pop()
      calls_ordered.appendleft(stack)
      calls_tovisit.extend(self.children[stack])
    # Now implement a non-recursive version of buildTotal, which requires that each
    # function's children have been visited before processing the parent,
    # by visiting calls_ordered in left-to-right order.
    for stack in calls_ordered:
      self.calls[stack].buildTotal(self)

    ncores = int(config['general/total_cores'])
    self.totals['total_coretime'] = ncores * stats['barrier.global_time'][0]
 def format_config(name, lid):
     caches = {
         'L1-I': 'l1_icache',
         'L1-D': 'l1_dcache',
         'L2': 'l2_cache',
         'L3': 'l3_cache',
         'L4': 'l4_cache'
     }
     if name in caches:
         value = sniper_config.get_config(
             config, 'perf_model/%s/cache_size' % caches[name], lid)
         return sniper_lib.format_size(1024 * long(value), digits=0)
     elif name == 'dram-cache':
         value = sniper_config.get_config(
             config, 'perf_model/dram/cache/cache_size', lid)
         return sniper_lib.format_size(1024 * long(value), digits=0)
     else:
         return ''
Example #7
0
  def __init__(self, resultsdir = '.'):
    filename = os.path.join(resultsdir, 'sim.rtntracefull')
    if not os.path.exists(filename):
      raise IOError('Cannot find trace file %s' % filename)

    config = sniper_lib.get_config(resultsdir = resultsdir)
    freq = 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency'))
    self.fs_to_cycles = freq / 1e15

    self.functions = {}
    self.calls = {}
    self.children = collections.defaultdict(set)
    self.roots = set()
    self.totals = {}

    fp = open(filename)
    self.headers = fp.readline().strip().split('\t')

    for line in fp:
      if line.startswith(':'):
        eip, name, location = line.strip().split('\t')
        eip = eip[1:]
        self.functions[eip] = Function(eip, name, location)
      else:
        line = line.strip().split('\t')
        stack = line[0].split(':')
        eip = stack[-1]
        stack = ':'.join(map(self.translateEip, stack))
        data = dict(zip(self.headers[1:], map(long, line[1:])))
        if stack in self.calls:
          self.calls[stack].add(data)
        else:
          self.calls[stack] = Call(str(self.functions[eip]), eip, stack, data)
          parent = stack.rpartition(':')[0]
          self.children[parent].add(stack)

    self.roots = set(self.calls.keys())
    for parent in self.calls:
      for child in self.children[parent]:
        self.roots.remove(child)

    # Construct a list of calls where each child is ordered before its parent.
    calls_ordered = collections.deque()
    calls_tovisit = collections.deque(self.roots)
    while calls_tovisit:
      stack = calls_tovisit.pop()
      calls_ordered.appendleft(stack)
      calls_tovisit.extend(self.children[stack])
    # Now implement a non-recursive version of buildTotal, which requires that each
    # function's children have been visited before processing the parent,
    # by visiting calls_ordered in left-to-right order.
    for stack in calls_ordered:
      self.calls[stack].buildTotal(self)
Example #8
0
def stats_process(config, results):
  ncores = int(config['general/total_cores'])
  stats = {}
  for key, core, value in results:
     if core == -1:
       stats[key] = value
     else:
       if key not in stats:
         stats[key] = [0]*ncores
       if core < len(stats[key]):
         stats[key][core] = value
       else:
         nskipped = core - len(stats[key])
         stats[key] += [0]*nskipped + [value]
  # Figure out when the interval of time, represented by partial, actually begins/ends
  # Since cores can account for time in chunks, per-core time can be
  # both before (``wakeup at future time X'') or after (``sleep until woken up'')
  # the current time.
  if 'barrier.global_time_begin' in stats:
    # Most accurate: ask the barrier
    time0_begin = stats['barrier.global_time_begin'][0]
    time0_end = stats['barrier.global_time_end'][0]
    stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin})
  elif 'performance_model.elapsed_time_begin' in stats:
    # Guess based on core that has the latest time (future wakeup is less common than sleep on futex)
    time0_begin = max(stats['performance_model.elapsed_time_begin'])
    time0_end = max(stats['performance_model.elapsed_time_end'])
    stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin})
  # add computed stats
  try:
    l1access = sum(stats['L1-D.load-misses']) + sum(stats['L1-D.store-misses'])
    l1time = sum(stats['L1-D.total-latency'])
    stats['l1misslat'] = l1time / float(l1access or 1)
  except KeyError:
    pass
  stats['pthread_locks_contended'] = float(sum(stats.get('pthread.pthread_mutex_lock_contended', [0]))) / (sum(stats.get('pthread.pthread_mutex_lock_count', [0])) or 1)
  # femtosecond to cycles conversion
  freq = [ 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency', idx)) for idx in range(ncores) ]
  stats['fs_to_cycles_cores'] = map(lambda f: f / 1e15, freq)
  # Backwards compatible version returning fs_to_cycles for core 0, for heterogeneous configurations fs_to_cycles_cores needs to be used
  stats['fs_to_cycles'] = stats['fs_to_cycles_cores'][0]
  # DVFS-enabled runs: emulate cycle_count asuming constant (initial) frequency
  if 'performance_model.elapsed_time' in stats and 'performance_model.cycle_count' not in stats:
    stats['performance_model.cycle_count'] = [ stats['fs_to_cycles_cores'][idx] * stats['performance_model.elapsed_time'][idx] for idx in range(ncores) ]
  if 'thread.nonidle_elapsed_time' in stats and 'thread.nonidle_cycle_count' not in stats:
    stats['thread.nonidle_cycle_count'] = [ long(stats['fs_to_cycles'] * t) for t in stats['thread.nonidle_elapsed_time'] ]
  # IPC
  stats['ipc'] = sum(stats.get('performance_model.instruction_count', [0])) / float(sum(stats.get('performance_model.cycle_count', [0])) or 1e16)

  return stats
Example #9
0
def stats_process(config, results):
  ncores = int(config['general/total_cores'])
  stats = {}
  for key, core, value in results:
     if core == -1:
       stats[key] = value
     else:
       if key not in stats:
         stats[key] = [0]*ncores
       if core < len(stats[key]):
         stats[key][core] = value
       else:
         nskipped = core - len(stats[key])
         stats[key] += [0]*nskipped + [value]
  # Figure out when the interval of time, represented by partial, actually begins/ends
  # Since cores can account for time in chunks, per-core time can be
  # both before (``wakeup at future time X'') or after (``sleep until woken up'')
  # the current time.
  if 'barrier.global_time_begin' in stats:
    # Most accurate: ask the barrier
    time0_begin = stats['barrier.global_time_begin'][0]
    time0_end = stats['barrier.global_time_end'][0]
    stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin})
  elif 'performance_model.elapsed_time_begin' in stats:
    # Guess based on core that has the latest time (future wakeup is less common than sleep on futex)
    time0_begin = max(stats['performance_model.elapsed_time_begin'])
    time0_end = max(stats['performance_model.elapsed_time_end'])
    stats.update({'global.time_begin': time0_begin, 'global.time_end': time0_end, 'global.time': time0_end - time0_begin})
  # add computed stats
  try:
    l1access = sum(stats['L1-D.load-misses']) + sum(stats['L1-D.store-misses'])
    l1time = sum(stats['L1-D.total-latency'])
    stats['l1misslat'] = l1time / float(l1access or 1)
  except KeyError:
    pass
  stats['pthread_locks_contended'] = float(sum(stats.get('pthread.pthread_mutex_lock_contended', [0]))) / (sum(stats.get('pthread.pthread_mutex_lock_count', [0])) or 1)
  # femtosecond to cycles conversion
  freq = [ 1e9 * float(sniper_config.get_config(config, 'perf_model/core/frequency', idx)) for idx in range(ncores) ]
  stats['fs_to_cycles_cores'] = map(lambda f: f / 1e15, freq)
  # Backwards compatible version returning fs_to_cycles for core 0, for heterogeneous configurations fs_to_cycles_cores needs to be used
  stats['fs_to_cycles'] = stats['fs_to_cycles_cores'][0]
  # DVFS-enabled runs: emulate cycle_count asuming constant (initial) frequency
  if 'performance_model.elapsed_time' in stats and 'performance_model.cycle_count' not in stats:
    stats['performance_model.cycle_count'] = [ stats['fs_to_cycles_cores'][idx] * stats['performance_model.elapsed_time'][idx] for idx in range(ncores) ]
  # IPC
  stats['ipc'] = sum(stats.get('performance_model.instruction_count', [0])) / float(sum(stats.get('performance_model.cycle_count', [0])) or 1e16)

  return stats
Example #10
0
        print >> outputobj, '''\
<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d">
<g style="stroke-width:.025in; fill:none">
  ''' % (self.size_x + 2*self.margin_x, self.size_y + 2*self.margin_y)
        for order, svg in sorted(self.items, reverse = True):
          print >> outputobj, svg
        print >> outputobj, '''\
</g>
</svg>
  '''

    svg = Svg()
    ymax = None


    is_mesh = (sniper_config.get_config(config, 'network/memory_model_1') == 'emesh_hop_by_hop')
    if is_mesh:
      ncores = int(config['general/total_cores'])
      dimensions = int(sniper_config.get_config(config, 'network/emesh_hop_by_hop/dimensions'))
      concentration = int(sniper_config.get_config(config, 'network/emesh_hop_by_hop/concentration'))
      if dimensions == 1:
        width, height = int(math.ceil(1.0 * ncores / concentration)), 1
      else:
        if config.get('network/emesh_hop_by_hop/size'):
          width, height = map(int, sniper_config.get_config(config, 'network/emesh_hop_by_hop/size').split(':'))
        else:
          width = int(math.sqrt(ncores / concentration))
          height = int(math.ceil(1.0 * ncores / concentration / width))
      assert width * height * concentration == ncores

      def lid_tile_root(lid):
Example #11
0
    def parse(self):
        ncores = int(self.config['general/total_cores'])
        instrs = self.stats['performance_model.instruction_count'] if sum(
            self.stats['performance_model.instruction_count']
        ) else self.stats['core.instructions']
        try:
            times = self.stats['performance_model.elapsed_time']
            cycles_scale = self.stats['fs_to_cycles_cores']
        except KeyError:
            # On error, assume that we are using the pre-DVFS version
            times = self.stats['performance_model.cycle_count']
            cycles_scale = [1. for idx in range(ncores)]
        time0_begin = self.stats['global.time_begin']
        time0_end = self.stats['global.time_end']
        times = [
            self.stats['performance_model.elapsed_time_end'][core] -
            time0_begin for core in range(ncores)
        ]

        # TODO: The below is needed for sampling. We're currently set up to work properly with the one-IPC model using in combination with --cache-only
        #if self.stats.get('fastforward_performance_model.fastforwarded_time', [0])[0]:
        #  fastforward_scale = times[0] / (times[0] - self.stats['fastforward_performance_model.fastforwarded_time'][0])
        #  fastforward_extrapolate = True
        #  times = [ t-f for t, f in zip(times, self.stats['fastforward_performance_model.fastforwarded_time']) ]
        #else:
        #  fastforward_scale = 1.
        #  fastforward_extrapolate = False
        if 'performance_model.cpiFastforwardTime' in self.stats:
            del self.stats['performance_model.cpiFastforwardTime']
        fastforward_scale = 1.
        fastforward_extrapolate = False

        data = collections.defaultdict(lambda: collections.defaultdict(long))
        for key, values in self.stats.items():
            if '.cpi' in key:
                if key.startswith('thread.'):
                    # Ignore per-thread statistics
                    continue
                if key.startswith(
                        'fastforward_timer.') and fastforward_extrapolate:
                    continue
                key = key.split('.cpi')[1]
                for core in range(ncores):
                    data[core][key] += values[core] * cycles_scale[core]

        if not data:
            raise ValueError(
                'No .cpi data found, simulation did not use the interval core model'
            )

        # Split up cpiBase into 1/issue and path dependencies
        for core in range(ncores):
            if data[core].get('SyncMemAccess',
                              0) == data[core].get('SyncPthreadBarrier', 0):
                # Work around a bug in iGraphite where SyncMemAccess wrongly copied from SyncPthreadBarrier
                # Since SyncMemAccess usually isn't very big anyway, setting it to zero should be accurate enough
                # For simulations with a fixed version of iGraphite, the changes of SyncMemAccess being identical to
                #   SyncPthreadBarrier, down to the last femtosecond, are slim, so this code shouldn't trigger
                data[core]['SyncMemAccess'] = 0
            if data[core].get(
                    'StartTime'
            ) == None and 'performance_model.idle_elapsed_time' in self.stats:
                # Fix a bug whereby the start time was not being reported in the CPI stacks correctly
                data[core]['StartTime'] = cycles_scale * self.stats['performance_model.idle_elapsed_time'][core] - \
                                          data[core]['SyncFutex']       - data[core]['SyncPthreadMutex']    - \
                                          data[core]['SyncPthreadCond'] - data[core]['SyncPthreadBarrier']  - \
                                          data[core]['Recv']
            # Critical path accounting
            cpContrMap = {
                # critical path components
                'interval_timer.cpContr_generic': 'PathInt',
                'interval_timer.cpContr_store': 'PathStore',
                'interval_timer.cpContr_load_other': 'PathLoadX',
                'interval_timer.cpContr_branch': 'PathBranch',
                'interval_timer.cpContr_load_l1': 'DataCacheL1',
                'interval_timer.cpContr_load_l2': 'DataCacheL2',
                'interval_timer.cpContr_load_l3': 'DataCacheL3',
                'interval_timer.cpContr_fp_addsub': 'PathFP',
                'interval_timer.cpContr_fp_muldiv': 'PathFP',
                # issue ports
                'interval_timer.cpContr_port0': 'PathP0',
                'interval_timer.cpContr_port1': 'PathP1',
                'interval_timer.cpContr_port2': 'PathP2',
                'interval_timer.cpContr_port34': 'PathP34',
                'interval_timer.cpContr_port5': 'PathP5',
                'interval_timer.cpContr_port05': 'PathP05',
                'interval_timer.cpContr_port015': 'PathP015',
            }
            for k in self.stats:
                if k.startswith('interval_timer.cpContr_'):
                    if k not in cpContrMap.keys():
                        print 'Missing in cpContrMap: ', k
            # Keep 1/width as base CPI component, break down the remainder according to critical path contributors
            BaseBest = instrs[core] / float(
                sniper_config.get_config(
                    self.config,
                    'perf_model/core/interval_timer/dispatch_width', core))
            BaseAct = data[core]['Base']
            BaseCp = BaseAct - BaseBest
            scale = BaseCp / (BaseAct or 1)
            for cpName, cpiName in cpContrMap.items():
                val = float(self.stats.get(cpName, [0] * ncores)[core]) / 1e6
                data[core]['Base'] -= val * scale
                data[core][cpiName] = data[core].get(cpiName, 0) + val * scale
            # Issue width
            for key, values in self.stats.items():
                if key.startswith('interval_timer.detailed-cpiBase-'):
                    if 'DispatchWidth' in key:
                        if 'DispatchRate' not in key:  # We already accounted for DispatchRate above, don't do it twice
                            data[core]['Base'] -= values[core]
                            data[core]['Issue'] = data[core].get(
                                'Issue', 0) + values[core]
            # Fix up large cpiSync fractions that started before but ended inside our interval
            time0_me = 'performance_model.elapsed_time_begin' in self.stats and self.stats[
                'performance_model.elapsed_time_begin'][core] or 0
            if time0_me < time0_begin:
                time0_extra = time0_begin - time0_me
                #    Number of cycles that weren't accounted for when starting this interval
                cycles_extra = time0_extra * cycles_scale[core]
                #    Components that could be the cause of cycles_extra. It should be just one, but if there's many, we'll have to guess
                sync_components = dict([
                    (key, value) for key, value in data[core].items()
                    if (key.startswith('Sync') or key == 'StartTime')
                    and value > cycles_extra
                ])
                sync_total = sum(sync_components.values())
                for key, value in sync_components.items():
                    data[core][key] -= cycles_extra * value / float(sync_total)
            data[core]['Imbalance'] = cycles_scale[core] * max(times) - sum(
                data[core].values())

        self.data = data
        self.ncores = ncores
        self.cores = range(ncores)
        self.instrs = instrs
        self.times = times
        self.cycles_scale = cycles_scale
        self.fastforward_scale = fastforward_scale
Example #12
0
  def parse(self):
    ncores = int(self.config['general/total_cores'])
    instrs = self.stats['performance_model.instruction_count']
    try:
      times = self.stats['performance_model.elapsed_time']
      cycles_scale = self.stats['fs_to_cycles_cores']
    except KeyError:
      # On error, assume that we are using the pre-DVFS version
      times = self.stats['performance_model.cycle_count']
      cycles_scale = [ 1. for idx in range(ncores) ]
    time0_begin = self.stats['global.time_begin']
    time0_end = self.stats['global.time_end']
    times = [ self.stats['performance_model.elapsed_time_end'][core] - time0_begin for core in range(ncores) ]

    if self.stats.get('fastforward_performance_model.fastforwarded_time', [0])[0]:
      fastforward_scale = times[0] / (times[0] - self.stats['fastforward_performance_model.fastforwarded_time'][0])
      times = [ t-f for t, f in zip(times, self.stats['fastforward_performance_model.fastforwarded_time']) ]
    else:
      fastforward_scale = 1.
    if 'performance_model.cpiFastforwardTime' in self.stats:
      del self.stats['performance_model.cpiFastforwardTime']


    data = collections.defaultdict(collections.defaultdict)
    for key, values in self.stats.items():
      if '.cpi' in key:
        if key.startswith('thread.'):
          # Ignore per-thread statistics
          continue
        key = key.split('.cpi')[1]
        for core in range(ncores):
          data[core][key] = values[core] * cycles_scale[core]

    if not data:
      raise ValueError('No .cpi data found, simulation did not use the interval core model')

    # Split up cpiBase into 1/issue and path dependencies
    for core in range(ncores):
      if data[core].get('SyncMemAccess', 0) == data[core].get('SyncPthreadBarrier', 0):
        # Work around a bug in iGraphite where SyncMemAccess wrongly copied from SyncPthreadBarrier
        # Since SyncMemAccess usually isn't very big anyway, setting it to zero should be accurate enough
        # For simulations with a fixed version of iGraphite, the changes of SyncMemAccess being identical to
        #   SyncPthreadBarrier, down to the last femtosecond, are slim, so this code shouldn't trigger
        data[core]['SyncMemAccess'] = 0
      if data[core].get('StartTime') == None and 'performance_model.idle_elapsed_time' in self.stats:
        # Fix a bug whereby the start time was not being reported in the CPI stacks correctly
        data[core]['StartTime'] = cycles_scale * self.stats['performance_model.idle_elapsed_time'][core] - \
                                  data[core]['SyncFutex']       - data[core]['SyncPthreadMutex']    - \
                                  data[core]['SyncPthreadCond'] - data[core]['SyncPthreadBarrier']  - \
                                  data[core]['Recv']
      # Critical path accounting
      cpContrMap = {
        # critical path components
        'interval_timer.cpContr_generic': 'PathInt',
        'interval_timer.cpContr_store': 'PathStore',
        'interval_timer.cpContr_load_other': 'PathLoadX',
        'interval_timer.cpContr_branch': 'PathBranch',
        'interval_timer.cpContr_load_l1': 'DataCacheL1',
        'interval_timer.cpContr_load_l2': 'DataCacheL2',
        'interval_timer.cpContr_load_l3': 'DataCacheL3',
        'interval_timer.cpContr_fp_addsub': 'PathFP',
        'interval_timer.cpContr_fp_muldiv': 'PathFP',
        # issue ports
        'interval_timer.cpContr_port0': 'PathP0',
        'interval_timer.cpContr_port1': 'PathP1',
        'interval_timer.cpContr_port2': 'PathP2',
        'interval_timer.cpContr_port34': 'PathP34',
        'interval_timer.cpContr_port5': 'PathP5',
        'interval_timer.cpContr_port05': 'PathP05',
        'interval_timer.cpContr_port015': 'PathP015',
      }
      for k in self.stats:
        if k.startswith('interval_timer.cpContr_'):
          if k not in cpContrMap.keys():
            print 'Missing in cpContrMap: ', k
      # Keep 1/width as base CPI component, break down the remainder according to critical path contributors
      BaseBest = instrs[core] / float(sniper_config.get_config(self.config, 'perf_model/core/interval_timer/dispatch_width', core))
      BaseAct = data[core]['Base']
      BaseCp = BaseAct - BaseBest
      scale = BaseCp / (BaseAct or 1)
      for cpName, cpiName in cpContrMap.items():
        val = float(self.stats.get(cpName, [0]*ncores)[core]) / 1e6
        data[core]['Base'] -= val * scale
        data[core][cpiName] = data[core].get(cpiName, 0) + val * scale
      # Issue width
      for key, values in self.stats.items():
        if key.startswith('interval_timer.detailed-cpiBase-'):
          if 'DispatchWidth' in key:
            if 'DispatchRate' not in key: # We already accounted for DispatchRate above, don't do it twice
              data[core]['Base'] -= values[core]
              data[core]['Issue'] = data[core].get('Issue', 0) + values[core]
      # Fix up large cpiSync fractions that started before but ended inside our interval
      time0_me = 'performance_model.elapsed_time_begin' in self.stats and self.stats['performance_model.elapsed_time_begin'][core] or 0
      if time0_me < time0_begin:
        time0_extra = time0_begin - time0_me
        #    Number of cycles that weren't accounted for when starting this interval
        cycles_extra = time0_extra * cycles_scale[core]
        #    Components that could be the cause of cycles_extra. It should be just one, but if there's many, we'll have to guess
        sync_components = dict([ (key, value) for key, value in data[core].items() if (key.startswith('Sync') or key == 'StartTime') and value > cycles_extra ])
        sync_total = sum(sync_components.values())
        for key, value in sync_components.items():
          data[core][key] -= cycles_extra*value/float(sync_total)
      data[core]['Imbalance'] = cycles_scale[core] * max(times) - sum(data[core].values())

    self.data = data
    self.ncores = ncores
    self.cores = range(ncores)
    self.instrs = instrs
    self.times = times
    self.cycles_scale = cycles_scale
    self.fastforward_scale = fastforward_scale
'''
                print >> outputobj, '''\
<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d">
<g style="stroke-width:.025in; fill:none">
  ''' % (self.size_x + 2 * self.margin_x, self.size_y + 2 * self.margin_y)
                for order, svg in sorted(self.items, reverse=True):
                    print >> outputobj, svg
                print >> outputobj, '''\
</g>
</svg>
  '''

        svg = Svg()
        ymax = None

        is_mesh = (sniper_config.get_config(
            config, 'network/memory_model_1') == 'emesh_hop_by_hop')
        if is_mesh:
            ncores = int(config['general/total_cores'])
            dimensions = int(
                sniper_config.get_config(
                    config, 'network/emesh_hop_by_hop/dimensions'))
            concentration = int(
                sniper_config.get_config(
                    config, 'network/emesh_hop_by_hop/concentration'))
            if dimensions == 1:
                width, height = int(math.ceil(1.0 * ncores / concentration)), 1
            else:
                if config.get('network/emesh_hop_by_hop/size'):
                    width, height = map(
                        int,
                        sniper_config.get_config(