Esempio n. 1
0
    def __init__(self, arch, scale, variant):
        self.descr = f'NAMD check ({arch}, {variant})'
        if self.current_system.name == 'pilatus':
            self.valid_prog_environs = ['cpeIntel']
        else:
            self.valid_prog_environs = ['builtin']

        self.modules = ['NAMD']

        # Reset sources dir relative to the SCS apps prefix
        self.sourcesdir = os.path.join(self.current_system.resourcesdir,
                                       'NAMD', 'prod')
        self.executable = 'namd2'
        self.use_multithreading = True
        self.num_tasks_per_core = 2

        if scale == 'small':
            # On Eiger a no-smp NAMD version is the default
            if self.current_system.name in ['eiger', 'pilatus']:
                self.num_tasks = 768
                self.num_tasks_per_node = 128
            else:
                self.num_tasks = 6
                self.num_tasks_per_node = 1
        else:
            if self.current_system.name in ['eiger', 'pilatus']:
                self.num_tasks = 2048
                self.num_tasks_per_node = 128
            else:
                self.num_tasks = 16
                self.num_tasks_per_node = 1

        energy = sn.avg(
            sn.extractall(r'ENERGY:([ \t]+\S+){10}[ \t]+(?P<energy>\S+)',
                          self.stdout, 'energy', float))
        energy_reference = -2451359.5
        energy_diff = sn.abs(energy - energy_reference)
        self.sanity_patterns = sn.all([
            sn.assert_eq(
                sn.count(
                    sn.extractall(r'TIMING: (?P<step_num>\S+)  CPU:',
                                  self.stdout, 'step_num')), 50),
            sn.assert_lt(energy_diff, 2720)
        ])

        self.perf_patterns = {
            'days_ns':
            sn.avg(
                sn.extractall(
                    r'Info: Benchmark time: \S+ CPUs \S+ '
                    r's/step (?P<days_ns>\S+) days/ns \S+ MB memory',
                    self.stdout, 'days_ns', float))
        }

        self.maintainers = ['CB', 'LM']
        self.tags = {'scs', 'external-resources'}
        self.extra_resources = {'switches': {'num_switches': 1}}
Esempio n. 2
0
    def test_avg(self):
        res = evaluate(sn.avg([1, 2, 3, 4]))
        self.assertEqual(2.5, res)

        # Check result when passing a generator
        res = evaluate(sn.avg(range(1, 5)))
        self.assertEqual(2.5, res)

        # Check with single element container
        res = evaluate(sn.avg(range(1, 2)))
        self.assertEqual(1, res)

        # Check with empty container
        self.assertRaises(SanityError, evaluate, sn.avg([]))
Esempio n. 3
0
    def __init__(self, version, variant):
        super().__init__()
        self.name = 'namd_%s_%s_check' % (version, variant)
        self.descr = 'NAMD check (%s, %s)' % (version, variant)

        self.valid_prog_environs = ['PrgEnv-intel']

        self.modules = ['NAMD']

        # Reset sources dir relative to the SCS apps prefix
        self.sourcesdir = os.path.join(self.current_system.resourcesdir,
                                       'NAMD', 'prod')
        self.executable = 'namd2'

        self.use_multithreading = True
        self.num_tasks_per_core = 2

        if self.current_system.name == 'dom':
            self.num_tasks = 6
            self.num_tasks_per_node = 1
        else:
            self.num_tasks = 16
            self.num_tasks_per_node = 1

        energy = sn.avg(
            sn.extractall(r'ENERGY:(\s+\S+){10}\s+(?P<energy>\S+)',
                          self.stdout, 'energy', float))
        energy_reference = -2451359.5
        energy_diff = sn.abs(energy - energy_reference)
        self.sanity_patterns = sn.all([
            sn.assert_eq(
                sn.count(
                    sn.extractall(r'TIMING: (?P<step_num>\S+)  CPU:',
                                  self.stdout, 'step_num')), 50),
            sn.assert_lt(energy_diff, 2720)
        ])

        self.perf_patterns = {
            'days_ns':
            sn.avg(
                sn.extractall(
                    'Info: Benchmark time: \S+ CPUs \S+ '
                    's/step (?P<days_ns>\S+) days/ns \S+ MB memory',
                    self.stdout, 'days_ns', float))
        }

        self.maintainers = ['CB', 'LM']
        self.tags = {'scs'}
        self.strict_check = False
        self.extra_resources = {'switches': {'num_switches': 1}}
Esempio n. 4
0
def test_avg():
    res = sn.evaluate(sn.avg([1, 2, 3, 4]))
    assert 2.5 == res

    # Check result when passing a generator
    res = sn.evaluate(sn.avg(range(1, 5)))
    assert 2.5 == res

    # Check with single element container
    res = sn.evaluate(sn.avg(range(1, 2)))
    assert 1 == res

    # Check with empty container
    with pytest.raises(SanityError):
        sn.evaluate(sn.avg([]))
Esempio n. 5
0
       def __init__(self, name, *args, **kwargs):
          if name is not '':
             name += '_'
          super().__init__('{0}{1}runs'.format(name,self.multi_rep), 
                           *args, **kwargs)

          # scale the assumed runtime
          self.time_limit = (self.time_limit[0]*self.multi_rep+
                                int((self.time_limit[1]*self.multi_rep)/60), 
                             (self.time_limit[1]*self.multi_rep) % 60+
                                int((self.time_limit[2]*self.multi_rep) /60), 
                             (self.time_limit[2]*self.multi_rep) % 60)

          # check if we got #multi_rep the the sanity patern
          if hasattr(self, 'multirun_san_pat'):
             self.sanity_patterns = sn.assert_eq(sn.count(
                sn.findall(*self.multirun_san_pat)), self.multi_rep)

          # create the list of result values: first the average and  
          #   then all single elements (to be stored)
          if hasattr(self, 'multirun_perf_pat'):
             self.perf_patterns = {}
             for key in list(self.multirun_perf_pat.keys()):
                self.perf_patterns[key] = sn.avg(
                   sn.extractall(*(self.multirun_perf_pat[key])))
                for run in range(0,self.multi_rep):
                   self.perf_patterns[key+"_{}".format(run)] = sn.extractall(
                      *(self.multirun_perf_pat[key]))[run]
Esempio n. 6
0
def nsys_report_cudaMemcpy_pct(self):
    '''Reports ``CUDA API`` Time (%) for cudaMemcpy measured by the tool and
    averaged over compute nodes

    .. code-block::

      > job.stdout

      # CUDA API Statistics (nanoseconds)
      #
      # Time(%)      Total Time       Calls         Average         Minimum
      # -------  --------------  ----------  --------------  --------------
      #    44.9       309427138         378        818590.3            9709
      #    ****
      #    40.6       279978449           2     139989224.5           24173
      #     9.5        65562201         308        212864.3             738
      #     4.9        33820196         306        110523.5            2812
      #     0.1          704223          36         19561.8            9305
      # ....
      #         Maximum  Name
      #  --------------  ------------------
      #        11665852  cudaMemcpy
      #       279954276  cudaMemcpyToSymbol
      #         3382747  cudaFree
      #          591094  cudaMalloc
      #           34042  cudaLaunch
    '''
    regex = r'^\s+(?P<pctg>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+cudaMemcpy\s+$'
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
Esempio n. 7
0
def nsys_report_DtoH_KiB(self):
    '''Reports ``[CUDA memcpy DtoH]`` Memory Operation (KiB) measured by the
    tool and averaged over compute nodes

    .. code-block::

      > job.stdout
      # CUDA Memory Operation Statistics (KiB)
      #
      #             Total      Operations            Average            Minimum
      # -----------------  --------------  -----------------  -----------------
      #         1530313.0             296             5170.0              0.055
      #           16500.0              84              196.4             62.500
      #           *******
      # ...
      #            Maximum  Name
      #  -----------------  -------------------
      #            81250.0  [CUDA memcpy HtoD]
      #              250.0  [CUDA memcpy DtoH]
    '''
    regex = (r'^\s+(?P<KiB>\d+.\d+)\s+\d+\s+\S+\s+\S+\s+\S+\s+'
             r'\[CUDA memcpy DtoH\]\s+$')
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'KiB', float)),
                      1)
    return result
Esempio n. 8
0
def nsys_report_computeIAD_pct(self):
    '''Reports ``CUDA Kernel`` Time (%) for computeIAD measured by
    the tool and averaged over compute nodes

    .. code-block::

      > job.stdout
      # CUDA Kernel Statistics (nanoseconds)
      #
      # Time(%)      Total Time   Instances         Average         Minimum
      # -------  --------------  ----------  --------------  --------------
      #    49.7        69968829           6      11661471.5        11507063
      #    26.4        37101887           6       6183647.8         6047175
      #    ****
      #    24.0        33719758          24       1404989.9         1371531
      # ...
      #         Maximum  Name
      #  --------------  ------------------
      #        11827539  computeMomentumAndEnergyIAD
      #         6678078  computeIAD
      #         1459594  density
      '''
    # new regex:
    regex = (r'^\s+(?P<pctg>\S+).*::computeIAD<')
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
Esempio n. 9
0
    def __init__(self, prg_envs):
        self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_prog_environs = prg_envs
        self.modules = ['craype-accel-nvidia60']

        self.configs = {
            'PrgEnv-gnu': 'cscs-gnu',
            'PrgEnv-cray': 'cscs-cray',
            'PrgEnv-pgi': 'cscs-pgi',
        }

        app_source = os.path.join(self.current_system.resourcesdir,
                                  'SPEC_ACCELv1.2')
        self.prebuild_cmd = [
            'cp -r %s/* .' % app_source, './install.sh -d . -f'
        ]

        # I just want prebuild_cmd, but no action for the build_system
        # is not supported, so I find it something useless to do
        self.build_system = 'SingleSource'
        self.sourcepath = './benchspec/ACCEL/353.clvrleaf/src/timer_c.c'
        self.build_system.cflags = ['-c']

        self.refs = {
            env: {
                bench_name: (rt, None, 0.1, 'Seconds')
                for (bench_name,
                     rt) in zip(self.benchmarks[env], self.exec_times[env])
            }
            for env in self.valid_prog_environs
        }

        self.num_tasks = 1
        self.num_tasks_per_node = 1
        self.time_limit = (0, 30, 0)

        self.executable = 'runspec'

        outfile = sn.getitem(sn.glob('result/ACCEL.*.log'), 0)
        self.sanity_patterns_ = {
            env: sn.all([
                sn.assert_found(r'Success.*%s' % bn, outfile)
                for bn in self.benchmarks[env]
            ])
            for env in self.valid_prog_environs
        }

        self.perf_patterns_ = {
            env: {
                bench_name: sn.avg(
                    sn.extractall(
                        r'Success.*%s.*runtime=(?P<rt>[0-9.]+)' % bench_name,
                        outfile, 'rt', float))
                for bench_name in self.benchmarks[env]
            }
            for env in self.valid_prog_environs
        }

        self.maintainers = ['SK']
        self.tags = {'diagnostic', 'external-resources'}
 def density_ns(self):
     regex = self.set_regex('density')
     rpt = os.path.join(self.stagedir,
                        self.metric_file.replace(".txt", ".csv"))
     begin_ns = sn.extractall(regex, rpt, 'begin', int)
     end_ns = sn.extractall(regex, rpt, 'end', int)
     ns_list = [zz[1] - zz[0] for zz in zip(begin_ns, end_ns)]
     return sn.round(sn.avg(ns_list), 0)
Esempio n. 11
0
    def __init__(self, benchmark, part, num_tasks, num_tasks_per_node):
        """ Run a WRF benchmark using pre-downloaded files.
        
            Should be subclassed by a test decorated with
        
                @rfm.parameterized_test(*scaling_config())
            
            Args:
                benchmark: str, key in BENCHMARKS
                others: see `modules.reframe_extras.scaling_config()`
        """

        self.benchmark = benchmark
        self.num_tasks = num_tasks
        self.num_tasks_per_node = num_tasks_per_node
        self.num_nodes = int(self.num_tasks / self.num_tasks_per_node)
        self.time_limit = '3h'  # TODO: change in child classes?
        self.benchdir = os.path.join(self.prefix, 'downloads', self.benchmark)
        # NB we do NOT set sourcesdir or readonly_files as we want to symlink in files ourselves

        self.valid_systems = [part]
        self.valid_prog_environs = ['wrf']

        self.executable = 'wrf.exe'
        self.executable_opts = []
        self.keep_files = ['rsl.error.0000']

        self.sanity_patterns = sn.all([
            sn.assert_found(r'wrf: SUCCESS COMPLETE WRF', 'rsl.error.0000'),
        ])

        self.model_timestep = TIMING_CONSTANTS[
            self.benchmark]['model_timestep']
        self.gflops_factor = TIMING_CONSTANTS[self.benchmark]['gflops_factor']

        self.perf_patterns = {
            'runtime_real':
            sn.extractsingle(r'^real\s+(\d+m[\d.]+s)$', self.stderr, 1,
                             parse_time_cmd),
            'gflops':
            (self.model_timestep /
             sn.avg(sn.sanity_function(extract_timings)('rsl.error.0000'))) *
            self.gflops_factor
        }
        self.reference = {
            '*': {
                'runtime_real': (0, None, None, 's'),
                'gflops': (0, None, None, '/s'),
            }
        }

        self.tags |= {
            self.benchmark,
            'num_procs=%i' % self.num_tasks,
            'num_nodes=%i' % self.num_nodes, 'run'
        }
Esempio n. 12
0
    def __init__(self, variant):
        self.descr = 'Distributed training with TensorFlow and Horovod'
        self.valid_systems = ['daint:gpu']
        self.valid_prog_environs = ['PrgEnv-gnu']
        tfshortver = '1.14'
        self.sourcesdir = 'https://github.com/tensorflow/benchmarks'
        self.modules = ['Horovod/0.16.4-CrayGNU-19.06-tf-%s.0' % tfshortver]
        if variant == 'small':
            self.valid_systems += ['dom:gpu']
            self.num_tasks = 8
            self.reference = {
                'dom:gpu': {
                    'throughput': (1133.6, None, 0.05, 'images/s'),
                },
                'daint:gpu': {
                    'throughput': (1134.8, None, 0.05, 'images/s')
                },
            }
        else:
            self.num_tasks = 32
            self.reference = {
                'daint:gpu': {
                    'throughput': (4403.0, None, 0.05, 'images/s')
                },
            }

        self.num_tasks_per_node = 1
        self.num_cpus_per_task = 12
        self.perf_patterns = {
            'throughput':
            sn.avg(
                sn.extractall(r'total images/sec:\s+(?P<throughput>\S+)',
                              self.stdout, 'throughput', float))
        }

        self.sanity_patterns = sn.assert_eq(
            sn.count(sn.findall(r'total images/sec:', self.stdout)),
            self.num_tasks)

        self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver]
        self.variables = {
            'NCCL_DEBUG': 'INFO',
            'NCCL_IB_HCA': 'ipogif0',
            'NCCL_IB_CUDA_SUPPORT': '1',
            'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK',
        }
        self.executable = 'python'
        self.executable_opts = [
            'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py',
            '--model inception3', '--batch_size 64',
            '--variable_update horovod', '--log_dir ./logs',
            '--train_dir ./checkpoints'
        ]
        self.tags = {'production'}
        self.maintainers = ['MS', 'RS']
Esempio n. 13
0
    def __init__(self, arch, flavor):
        super().__init__()
        self.descr = 'NAMD check (%s)' % (arch)
        if flavor == 'multicore':
            self.valid_prog_environs = ['intel-2016.4', 'intel-2018.3']
        if flavor == 'verbs':
            self.valid_prog_environs = ['intel-2016.4', 'intel-2018.3']

        self.modules = ['namd-%s' % flavor]

        # Reset sources dir relative to the SCS apps prefix
        self.sourcesdir = os.path.join(self.current_system.resourcesdir,
                                       'NAMD')
        self.executable = 'namd2'
        self.use_multithreading = True

        energy = sn.avg(
            sn.extractall(r'^ENERGY:(\s+\S+){10}\s+(?P<energy>\S+)',
                          self.stdout, 'energy', float))
        energy_reference = -2451359.5
        energy_diff = sn.abs(energy - energy_reference)
        self.sanity_patterns = sn.all([
            sn.assert_eq(
                sn.count(
                    sn.extractall(r'TIMING: (?P<step_num>\S+)  CPU:',
                                  self.stdout, 'step_num')), 25),
            sn.assert_lt(energy_diff, 2720)
        ])

        self.perf_patterns = {
            'days_ns':
            sn.avg(
                sn.extractall(
                    'Info: Benchmark time: \S+ CPUs \S+ '
                    's/step (?P<days_ns>\S+) days/ns \S+ MB memory',
                    self.stdout, 'days_ns', float))
        }

        self.maintainers = ['CB', 'LM']
        self.tags = {'scs', 'external-resources'}
        self.strict_check = False
        self.extra_resources = {'switches': {'num_switches': 1}}
Esempio n. 14
0
    def setting_variables(self):

        self.descr = 'NAMD 2.13 CUDA version benchmark apoa1'

        self.valid_systems = ['ibex:batch_mpi']

        self.valid_prog_environs = ['gpustack_builtin']

        self.sourcesdir = '../src/namd'

        self.modules = ['namd']
        #/2.13/cuda10-verbs-smp-icc17
        self.prerun_cmds = [
            'module list', 'which namd2', 'hostname', 'echo $MODULEPATH'
        ]
        #['export SLURM_CPU_BIND_TYPE=sockets','export SLURM_CPU_BIND_VERBOSE=verbose']

        self.executable = 'namd2'
        self.executable_opts = '+p8 +devices 0,1,2,3,4,5,6,7 +idlepoll +setcpuaffinity apoa1.namd'.split(
        )

        # Job script attributes

        self.time_limit = '1h'
        self.num_tasks = 1
        self.num_tasks_per_node = 1
        self.num_gpus_per_node = 8
        self.num_cpus_per_task = 8
        self.extra_resources = {'constraint': {'type': 'v100'}}

        self.sanity_patterns = sn.assert_eq(
            sn.count(
                sn.extractall(r'TIMING: (?P<step_num>\S+)  CPU:', self.stdout,
                              'step_num')), 25)

        self.perf_patterns = {
            'days_ns':
            sn.avg(
                sn.extractall(
                    'Info: Benchmark time: \S+ CPUs \S+ '
                    's/step (?P<days_ns>\S+) days/ns \S+ MB memory',
                    self.stdout, 'days_ns', float))
        }

        self.reference = {
            'ibex': {
                'days_ns': (0.037, None, 0.1, None)
            },
        }

        self.tags = {'namd', 'acceptance'}

        # initials or email of the maintainer
        self.maintainers = ['MS']
Esempio n. 15
0
 def __init__(self):
     self.valid_prog_environs = ['builtin']
     self.modules = ['CMake', 'Boost']
     self.valid_systems = ['daint:gpu', 'dom:gpu']
     self.num_tasks = 1
     self.sanity_patterns = sn.assert_found(r'PASSED', self.stdout)
     literal_eval = sn.sanity_function(ast.literal_eval)
     self.perf_patterns = {
         'wall_time': sn.avg(literal_eval(
             sn.extractsingle(r'"series" : \[(?P<wall_times>.+)\]',
                              self.stdout, 'wall_times')))
     }
Esempio n. 16
0
 def setup_perf_vars(self):
     self.perf_patterns = {
         'days_ns':
         sn.avg(
             sn.extractall(
                 r'Info: Benchmark time: \S+ CPUs \S+ '
                 r's/step (?P<days_ns>\S+) days/ns \S+ MB memory',
                 self.stdout, 'days_ns', float))
     }
     if self.arch == 'gpu':
         if self.scale == 'small':
             self.reference = {
                 'dom:gpu': {
                     'days_ns': (0.15, None, 0.05, 'days/ns')
                 },
                 'daint:gpu': {
                     'days_ns': (0.15, None, 0.05, 'days/ns')
                 }
             }
         else:
             self.reference = {
                 'daint:gpu': {
                     'days_ns': (0.07, None, 0.05, 'days/ns')
                 }
             }
     else:
         if self.scale == 'small':
             self.reference = {
                 'dom:mc': {
                     'days_ns': (0.51, None, 0.05, 'days/ns')
                 },
                 'daint:mc': {
                     'days_ns': (0.51, None, 0.05, 'days/ns')
                 },
                 'eiger:mc': {
                     'days_ns': (0.12, None, 0.05, 'days/ns')
                 },
                 'pilatus:mc': {
                     'days_ns': (0.12, None, 0.05, 'days/ns')
                 },
             }
         else:
             self.reference = {
                 'daint:mc': {
                     'days_ns': (0.28, None, 0.05, 'days/ns')
                 },
                 'eiger:mc': {
                     'days_ns': (0.05, None, 0.05, 'days/ns')
                 },
                 'pilatus:mc': {
                     'days_ns': (0.05, None, 0.05, 'days/ns')
                 }
             }
Esempio n. 17
0
 def validate_energy(self):
     energy = sn.avg(sn.extractall(
         r'ENERGY:([ \t]+\S+){10}[ \t]+(?P<energy>\S+)',
         self.stdout, 'energy', float)
     )
     energy_reference = -2451359.5
     energy_diff = sn.abs(energy - energy_reference)
     return sn.all([
         sn.assert_eq(sn.count(sn.extractall(
                      r'TIMING: (?P<step_num>\S+)  CPU:',
                      self.stdout, 'step_num')), 50),
         sn.assert_lt(energy_diff, 2720)
     ])
Esempio n. 18
0
def nvprof_report_DtoH_pct(self):
    '''Reports ``[CUDA memcpy DtoH]`` Time(%) measured by the tool and averaged
    over compute nodes

    .. code-block::

      > job.stdout (Name: [CUDA memcpy DtoH])
      # Time(%)    Time   Calls       Avg       Min      Max
      # 2.80%  1.3194ms      44  29.986us  29.855us 30.528us [CUDA memcpy DtoH]
      # 1.34%  1.7667ms      44  40.152us  39.519us 41.887us [CUDA memcpy DtoH]
      # ^^^^
    '''
    regex = r'^\s+\s+(?P<pctg>\S+)%.*\[CUDA memcpy DtoH\]$'
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
Esempio n. 19
0
def nvprof_report_HtoD_pct(self):
    '''Reports ``[CUDA memcpy HtoD]`` Time(%) measured by the tool and averaged
    over compute nodes

    .. code-block::

      > job.stdout (Name: [CUDA memcpy HtoD])
      #             Type  Time(%)      Time Calls       Avg   Min       Max
      #  GPU activities:   48.57%  22.849ms   162  141.04us 896ns  1.6108ms
      #  GPU activities:   56.12%  74.108ms   162  457.45us 928ns  5.8896ms
      #                    ^^^^^
    '''
    regex = r'^\s+GPU activities:\s+(?P<pctg>\S+)%.*\[CUDA memcpy HtoD\]$'
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
Esempio n. 20
0
    def __init__(self):
        super().__init__()
        self.descr = 'Distributed training with TensorFlow and Horovod'
        self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_prog_environs = ['PrgEnv-gnu']
        tfshortver = '1.11'
        self.sourcesdir = 'https://github.com/tensorflow/benchmarks'
        self.modules = ['Horovod/0.15.0-CrayGNU-18.08-tf-%s.0' % tfshortver]
        self.reference = {
            'dom:gpu': {
                'throughput': (1133.6, None, 0.05, 'images/s'),
            },
            'daint:gpu': {
                'throughput': (4403.0, None, 0.05, 'images/s')
            },
        }
        self.perf_patterns = {
            'throughput': sn.avg(sn.extractall(
                r'total images/sec:\s+(?P<throughput>\S+)',
                self.stdout, 'throughput', float))
        }
        self.sanity_patterns = sn.assert_found(
            r'[\S+\s+] INFO NET\/IB : Using interface ipogif0'
            r' for sideband communication', self.stdout)
        self.num_tasks_per_node = 1
        if self.current_system.name == 'dom':
            self.num_tasks = 8
        elif self.current_system.name == 'daint':
            self.num_tasks = 32

        self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver]
        self.variables = {
            'NCCL_DEBUG': 'INFO',
            'NCCL_IB_HCA': 'ipogif0',
            'NCCL_IB_CUDA_SUPPORT': '1',
            'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK',
        }
        self.executable = ('python')
        self.executable_opts = [
            'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py',
            '--model inception3',
            '--batch_size 64',
            '--variable_update horovod',
            '--log_dir ./logs',
            '--train_dir ./checkpoints']
        self.tags = {'production'}
        self.maintainers = ['MS', 'RS']
Esempio n. 21
0
def nvprof_report_momentumEnergy_pct(self):
    '''Reports ``CUDA Kernel`` Time (%) for MomentumAndEnergyIAD measured by
    the tool and averaged over compute nodes

    .. code-block::

      > job.stdout
      # (where Name = sphexa::sph::cuda::kernels::computeMomentumAndEnergyIAD)
      # Time(%)     Time  Calls   Avg       Min       Max  Name
      # 28.25%  13.288ms  4  3.3220ms  3.1001ms  3.4955ms  void ...
      # 21.63%  28.565ms  4  7.1414ms  6.6616ms  7.4616ms  void ...
      # ^^^^^
      '''
    regex = (r'^\s+(?P<pctg>\S+)%.*::computeMomentumAndEnergyIAD<')
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
Esempio n. 22
0
def nvprof_report_computeIAD_pct(self):
    '''Reports ``CUDA Kernel`` Time (%) for computeIAD measured by
    the tool and averaged over compute nodes

    .. code-block::

      > job.stdout
      # (where Name = sphexa::sph::cuda::kernels::computeIAD)
      # Time(%)     Time  Calls       Avg       Min       Max  Name
      # 12.62%  5.9380ms      4  1.4845ms  1.3352ms  1.6593ms  void ...
      # 10.54%  13.915ms      4  3.4788ms  3.3458ms  3.7058ms  void ...
      # ^^^^^
      '''
    regex = (r'^\s+(?P<pctg>\S+)%.*::computeIAD<')
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
Esempio n. 23
0
def nvprof_report_cudaMemcpy_pct(self):
    '''Reports ``CUDA API`` Time (%) for cudaMemcpy measured by the tool and
    averaged over compute nodes

    .. code-block::

      > job.stdout (where Name = cudaMemcpy|cudaMemcpyToSymbol)
      #           Time(%) Total Time Calls   Average   Minimum   Maximum  Name
      # API calls: 74.37%   219.93ms     2  109.96ms  20.433us  219.90ms  ...
      #            18.32%   54.169ms   204  265.53us  11.398us  3.5624ms  ...
      # API calls: 54.65%   222.03ms     2  111.02ms  20.502us  222.01ms  ...
      #            34.88%   141.73ms   204  694.76us  21.168us  7.5486ms  ...
    '''
    regex = r'^.*?\s+(?P<pctg>\S+)%.*cudaMemcpy.*$'
    result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)),
                      1)
    return result
Esempio n. 24
0
def pw_perf_patterns(obj):
    '''Reports hardware counter values from the tool

    .. code-block::

     collector                       time time (%)   PAPI_REF_CYC   PAPI_L2_DCM
     --------------------------------------------------------------------------
     computeMomentumAndEnergyIAD   0.6816   100.00     1770550470       2438527
                                                                        ^^^^^^^

    '''
    regex = r'^computeMomentumAndEnergyIAD\s+\S+\s+\S+\s+\S+\s+(?P<hwc>\d+)$'
    hwc_min = sn.min(sn.extractall(regex, obj.stderr, 'hwc', int))
    hwc_avg = sn.round(sn.avg(sn.extractall(regex, obj.stderr, 'hwc', int)), 1)
    hwc_max = sn.max(sn.extractall(regex, obj.stderr, 'hwc', int))
    res_d = {
        'papiwrap_hwc_min': hwc_min,
        'papiwrap_hwc_avg': hwc_avg,
        'papiwrap_hwc_max': hwc_max,
    }
    return res_d
 def __init__(self):
     self.descr = ('stream/5.10-intel-19.0.5 benchmark CPU check '
                   'RunOnlyRegressionTest')
     self.valid_systems = ['pi2:cpu']
     self.valid_prog_environs = ['*']
     # self.sourcesdir = None
     self.num_cpus_per_task = 1
     self.num_tasks = 40
     self.num_tasks_per_node = 40
     self.time_limit = None
     self.modules = ['stream/5.10-intel-19.0.5']
     self.executable = 'for i in `seq 1 8`; do stream_c.exe; sleep 3; done'
     self.sanity_patterns = sn.assert_found(
         r'Solution Validates: avg error less than', self.stdout)
     self.perf_patterns = {
         'triad':
         sn.avg(
             sn.extractiter(r'Triad:\s+(?P<triad>\S+)\s+\S+', self.stdout,
                            'triad', float))
     }
     self.reference = {'pi2:cpu': {'triad': (112640, 0, None, 'MB/s')}}
     self.maintainers = ['blacknail']
     self.tags = {'benchmark', 'pro', 'stream', 'node_health'}
Esempio n. 26
0
def nsys_report_HtoD_pct(self):
    '''Reports ``[CUDA memcpy HtoD]`` Time(%) measured by the tool and averaged
    over compute nodes

    .. code-block::

      > job.stdout
      # CUDA Memory Operation Statistics (nanoseconds)
      #
      # Time(%)      Total Time  Operations         Average  ...
      # -------  --------------  ----------  --------------  ...
      #    99.1       154400354         296        521622.8  ...
      #    ****
      #
      #             Minimum         Maximum  Name
      #      --------------  --------------  -------------------
      #                 896         8496291  [CUDA memcpy HtoD]
    '''
    regex = (r'^\s+(?P<pctg_nsec>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+'
             r'\[CUDA memcpy HtoD\]\s+$')
    result = sn.round(
        sn.avg(sn.extractall(regex, self.stdout, 'pctg_nsec', float)), 1)
    return result
Esempio n. 27
0
def nsys_report_DtoH_pct(self):
    '''Reports ``[CUDA memcpy DtoH]`` Time(%) measured by the tool and averaged
    over compute nodes

    .. code-block::

      > job.stdout
      # CUDA Memory Operation Statistics (nanoseconds)
      #
      # Time(%)      Total Time  Operations         Average  ...
      # -------  --------------  ----------  --------------  ...
      #     0.9         1385579          84         16495.0  ...
      #    ****
      #
      #             Minimum         Maximum  Name
      #      --------------  --------------  -------------------
      #                6144           21312  [CUDA memcpy DtoH]
    '''
    regex = (r'^\s+(?P<pctg_nsec>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+'
             r'\[CUDA memcpy DtoH\]\s+$')
    result = sn.round(
        sn.avg(sn.extractall(regex, self.stdout, 'pctg_nsec', float)), 1)
    return result
 def __init__(self):
     self.descr = ('linpack intel-mkl/2019.3.199-intel-19.0.4 benchmark CPU check '
                   'RunOnlyRegressionTest')
     self.valid_systems = ['pi2:cpu']
     self.valid_prog_environs = ['*']
     self.modules = ["intel-parallel-studio"]
     self.sourcesdir = "/lustre/opt/cascadelake/linux-centos7-cascadelake/intel-19.0.4/intel-mkl-2019.3.199-fwha3ldpm5qbymzf45nzfpaehfztqwms/mkl/benchmarks/mp_linpack"
     self.num_cpus_per_task = 1
     self.num_tasks = 40
     self.num_tasks_per_node = 40
     self.time_limit = None
     self.executable = './runme_intel64_static'
     self.sanity_patterns = sn.and_(sn.assert_found(r'1 tests completed and passed residual checks', self.stdout),sn.assert_found(r'End of Tests.', self.stdout))
     self.perf_patterns = {
         'perf': sn.avg(sn.extractiter('WR00L2L2\s+\S+\s+\S+\s+\S+\s+1\s+\S+\s+(?P<perf>\S+)',
                                   self.stdout, 'perf', float))
     }
     self.reference = {
             'pi2:cpu': {
                 'perf': (2000, 0, None, 'GFlops')
             }
         }
     self.maintainers = ['blacknail']
     self.tags = {'benchmark','pro','stream','node_health'}
Esempio n. 29
0
 def wall_time(self):
     literal_eval = sn.deferrable(ast.literal_eval)
     return sn.avg(
         literal_eval(sn.extractsingle(r'"series" : \[(?P<wall_times>.+)\]',
                                       self.stdout, 'wall_times'))
     )
Esempio n. 30
0
 def days_ns(self):
     return sn.avg(sn.extractall(
         r'Info: Benchmark time: \S+ CPUs \S+ '
         r's/step (?P<days_ns>\S+) days/ns \S+ MB memory',
         self.stdout, 'days_ns', float))