def __init__(self, arch, scale, variant): self.descr = f'NAMD check ({arch}, {variant})' if self.current_system.name == 'pilatus': self.valid_prog_environs = ['cpeIntel'] else: self.valid_prog_environs = ['builtin'] self.modules = ['NAMD'] # Reset sources dir relative to the SCS apps prefix self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'NAMD', 'prod') self.executable = 'namd2' self.use_multithreading = True self.num_tasks_per_core = 2 if scale == 'small': # On Eiger a no-smp NAMD version is the default if self.current_system.name in ['eiger', 'pilatus']: self.num_tasks = 768 self.num_tasks_per_node = 128 else: self.num_tasks = 6 self.num_tasks_per_node = 1 else: if self.current_system.name in ['eiger', 'pilatus']: self.num_tasks = 2048 self.num_tasks_per_node = 128 else: self.num_tasks = 16 self.num_tasks_per_node = 1 energy = sn.avg( sn.extractall(r'ENERGY:([ \t]+\S+){10}[ \t]+(?P<energy>\S+)', self.stdout, 'energy', float)) energy_reference = -2451359.5 energy_diff = sn.abs(energy - energy_reference) self.sanity_patterns = sn.all([ sn.assert_eq( sn.count( sn.extractall(r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 50), sn.assert_lt(energy_diff, 2720) ]) self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( r'Info: Benchmark time: \S+ CPUs \S+ ' r's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } self.maintainers = ['CB', 'LM'] self.tags = {'scs', 'external-resources'} self.extra_resources = {'switches': {'num_switches': 1}}
def test_avg(self): res = evaluate(sn.avg([1, 2, 3, 4])) self.assertEqual(2.5, res) # Check result when passing a generator res = evaluate(sn.avg(range(1, 5))) self.assertEqual(2.5, res) # Check with single element container res = evaluate(sn.avg(range(1, 2))) self.assertEqual(1, res) # Check with empty container self.assertRaises(SanityError, evaluate, sn.avg([]))
def __init__(self, version, variant): super().__init__() self.name = 'namd_%s_%s_check' % (version, variant) self.descr = 'NAMD check (%s, %s)' % (version, variant) self.valid_prog_environs = ['PrgEnv-intel'] self.modules = ['NAMD'] # Reset sources dir relative to the SCS apps prefix self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'NAMD', 'prod') self.executable = 'namd2' self.use_multithreading = True self.num_tasks_per_core = 2 if self.current_system.name == 'dom': self.num_tasks = 6 self.num_tasks_per_node = 1 else: self.num_tasks = 16 self.num_tasks_per_node = 1 energy = sn.avg( sn.extractall(r'ENERGY:(\s+\S+){10}\s+(?P<energy>\S+)', self.stdout, 'energy', float)) energy_reference = -2451359.5 energy_diff = sn.abs(energy - energy_reference) self.sanity_patterns = sn.all([ sn.assert_eq( sn.count( sn.extractall(r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 50), sn.assert_lt(energy_diff, 2720) ]) self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( 'Info: Benchmark time: \S+ CPUs \S+ ' 's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } self.maintainers = ['CB', 'LM'] self.tags = {'scs'} self.strict_check = False self.extra_resources = {'switches': {'num_switches': 1}}
def test_avg(): res = sn.evaluate(sn.avg([1, 2, 3, 4])) assert 2.5 == res # Check result when passing a generator res = sn.evaluate(sn.avg(range(1, 5))) assert 2.5 == res # Check with single element container res = sn.evaluate(sn.avg(range(1, 2))) assert 1 == res # Check with empty container with pytest.raises(SanityError): sn.evaluate(sn.avg([]))
def __init__(self, name, *args, **kwargs): if name is not '': name += '_' super().__init__('{0}{1}runs'.format(name,self.multi_rep), *args, **kwargs) # scale the assumed runtime self.time_limit = (self.time_limit[0]*self.multi_rep+ int((self.time_limit[1]*self.multi_rep)/60), (self.time_limit[1]*self.multi_rep) % 60+ int((self.time_limit[2]*self.multi_rep) /60), (self.time_limit[2]*self.multi_rep) % 60) # check if we got #multi_rep the the sanity patern if hasattr(self, 'multirun_san_pat'): self.sanity_patterns = sn.assert_eq(sn.count( sn.findall(*self.multirun_san_pat)), self.multi_rep) # create the list of result values: first the average and # then all single elements (to be stored) if hasattr(self, 'multirun_perf_pat'): self.perf_patterns = {} for key in list(self.multirun_perf_pat.keys()): self.perf_patterns[key] = sn.avg( sn.extractall(*(self.multirun_perf_pat[key]))) for run in range(0,self.multi_rep): self.perf_patterns[key+"_{}".format(run)] = sn.extractall( *(self.multirun_perf_pat[key]))[run]
def nsys_report_cudaMemcpy_pct(self): '''Reports ``CUDA API`` Time (%) for cudaMemcpy measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # CUDA API Statistics (nanoseconds) # # Time(%) Total Time Calls Average Minimum # ------- -------------- ---------- -------------- -------------- # 44.9 309427138 378 818590.3 9709 # **** # 40.6 279978449 2 139989224.5 24173 # 9.5 65562201 308 212864.3 738 # 4.9 33820196 306 110523.5 2812 # 0.1 704223 36 19561.8 9305 # .... # Maximum Name # -------------- ------------------ # 11665852 cudaMemcpy # 279954276 cudaMemcpyToSymbol # 3382747 cudaFree # 591094 cudaMalloc # 34042 cudaLaunch ''' regex = r'^\s+(?P<pctg>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+cudaMemcpy\s+$' result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def nsys_report_DtoH_KiB(self): '''Reports ``[CUDA memcpy DtoH]`` Memory Operation (KiB) measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # CUDA Memory Operation Statistics (KiB) # # Total Operations Average Minimum # ----------------- -------------- ----------------- ----------------- # 1530313.0 296 5170.0 0.055 # 16500.0 84 196.4 62.500 # ******* # ... # Maximum Name # ----------------- ------------------- # 81250.0 [CUDA memcpy HtoD] # 250.0 [CUDA memcpy DtoH] ''' regex = (r'^\s+(?P<KiB>\d+.\d+)\s+\d+\s+\S+\s+\S+\s+\S+\s+' r'\[CUDA memcpy DtoH\]\s+$') result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'KiB', float)), 1) return result
def nsys_report_computeIAD_pct(self): '''Reports ``CUDA Kernel`` Time (%) for computeIAD measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # CUDA Kernel Statistics (nanoseconds) # # Time(%) Total Time Instances Average Minimum # ------- -------------- ---------- -------------- -------------- # 49.7 69968829 6 11661471.5 11507063 # 26.4 37101887 6 6183647.8 6047175 # **** # 24.0 33719758 24 1404989.9 1371531 # ... # Maximum Name # -------------- ------------------ # 11827539 computeMomentumAndEnergyIAD # 6678078 computeIAD # 1459594 density ''' # new regex: regex = (r'^\s+(?P<pctg>\S+).*::computeIAD<') result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def __init__(self, prg_envs): self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = prg_envs self.modules = ['craype-accel-nvidia60'] self.configs = { 'PrgEnv-gnu': 'cscs-gnu', 'PrgEnv-cray': 'cscs-cray', 'PrgEnv-pgi': 'cscs-pgi', } app_source = os.path.join(self.current_system.resourcesdir, 'SPEC_ACCELv1.2') self.prebuild_cmd = [ 'cp -r %s/* .' % app_source, './install.sh -d . -f' ] # I just want prebuild_cmd, but no action for the build_system # is not supported, so I find it something useless to do self.build_system = 'SingleSource' self.sourcepath = './benchspec/ACCEL/353.clvrleaf/src/timer_c.c' self.build_system.cflags = ['-c'] self.refs = { env: { bench_name: (rt, None, 0.1, 'Seconds') for (bench_name, rt) in zip(self.benchmarks[env], self.exec_times[env]) } for env in self.valid_prog_environs } self.num_tasks = 1 self.num_tasks_per_node = 1 self.time_limit = (0, 30, 0) self.executable = 'runspec' outfile = sn.getitem(sn.glob('result/ACCEL.*.log'), 0) self.sanity_patterns_ = { env: sn.all([ sn.assert_found(r'Success.*%s' % bn, outfile) for bn in self.benchmarks[env] ]) for env in self.valid_prog_environs } self.perf_patterns_ = { env: { bench_name: sn.avg( sn.extractall( r'Success.*%s.*runtime=(?P<rt>[0-9.]+)' % bench_name, outfile, 'rt', float)) for bench_name in self.benchmarks[env] } for env in self.valid_prog_environs } self.maintainers = ['SK'] self.tags = {'diagnostic', 'external-resources'}
def density_ns(self): regex = self.set_regex('density') rpt = os.path.join(self.stagedir, self.metric_file.replace(".txt", ".csv")) begin_ns = sn.extractall(regex, rpt, 'begin', int) end_ns = sn.extractall(regex, rpt, 'end', int) ns_list = [zz[1] - zz[0] for zz in zip(begin_ns, end_ns)] return sn.round(sn.avg(ns_list), 0)
def __init__(self, benchmark, part, num_tasks, num_tasks_per_node): """ Run a WRF benchmark using pre-downloaded files. Should be subclassed by a test decorated with @rfm.parameterized_test(*scaling_config()) Args: benchmark: str, key in BENCHMARKS others: see `modules.reframe_extras.scaling_config()` """ self.benchmark = benchmark self.num_tasks = num_tasks self.num_tasks_per_node = num_tasks_per_node self.num_nodes = int(self.num_tasks / self.num_tasks_per_node) self.time_limit = '3h' # TODO: change in child classes? self.benchdir = os.path.join(self.prefix, 'downloads', self.benchmark) # NB we do NOT set sourcesdir or readonly_files as we want to symlink in files ourselves self.valid_systems = [part] self.valid_prog_environs = ['wrf'] self.executable = 'wrf.exe' self.executable_opts = [] self.keep_files = ['rsl.error.0000'] self.sanity_patterns = sn.all([ sn.assert_found(r'wrf: SUCCESS COMPLETE WRF', 'rsl.error.0000'), ]) self.model_timestep = TIMING_CONSTANTS[ self.benchmark]['model_timestep'] self.gflops_factor = TIMING_CONSTANTS[self.benchmark]['gflops_factor'] self.perf_patterns = { 'runtime_real': sn.extractsingle(r'^real\s+(\d+m[\d.]+s)$', self.stderr, 1, parse_time_cmd), 'gflops': (self.model_timestep / sn.avg(sn.sanity_function(extract_timings)('rsl.error.0000'))) * self.gflops_factor } self.reference = { '*': { 'runtime_real': (0, None, None, 's'), 'gflops': (0, None, None, '/s'), } } self.tags |= { self.benchmark, 'num_procs=%i' % self.num_tasks, 'num_nodes=%i' % self.num_nodes, 'run' }
def __init__(self, variant): self.descr = 'Distributed training with TensorFlow and Horovod' self.valid_systems = ['daint:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] tfshortver = '1.14' self.sourcesdir = 'https://github.com/tensorflow/benchmarks' self.modules = ['Horovod/0.16.4-CrayGNU-19.06-tf-%s.0' % tfshortver] if variant == 'small': self.valid_systems += ['dom:gpu'] self.num_tasks = 8 self.reference = { 'dom:gpu': { 'throughput': (1133.6, None, 0.05, 'images/s'), }, 'daint:gpu': { 'throughput': (1134.8, None, 0.05, 'images/s') }, } else: self.num_tasks = 32 self.reference = { 'daint:gpu': { 'throughput': (4403.0, None, 0.05, 'images/s') }, } self.num_tasks_per_node = 1 self.num_cpus_per_task = 12 self.perf_patterns = { 'throughput': sn.avg( sn.extractall(r'total images/sec:\s+(?P<throughput>\S+)', self.stdout, 'throughput', float)) } self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'total images/sec:', self.stdout)), self.num_tasks) self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver] self.variables = { 'NCCL_DEBUG': 'INFO', 'NCCL_IB_HCA': 'ipogif0', 'NCCL_IB_CUDA_SUPPORT': '1', 'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK', } self.executable = 'python' self.executable_opts = [ 'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py', '--model inception3', '--batch_size 64', '--variable_update horovod', '--log_dir ./logs', '--train_dir ./checkpoints' ] self.tags = {'production'} self.maintainers = ['MS', 'RS']
def __init__(self, arch, flavor): super().__init__() self.descr = 'NAMD check (%s)' % (arch) if flavor == 'multicore': self.valid_prog_environs = ['intel-2016.4', 'intel-2018.3'] if flavor == 'verbs': self.valid_prog_environs = ['intel-2016.4', 'intel-2018.3'] self.modules = ['namd-%s' % flavor] # Reset sources dir relative to the SCS apps prefix self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'NAMD') self.executable = 'namd2' self.use_multithreading = True energy = sn.avg( sn.extractall(r'^ENERGY:(\s+\S+){10}\s+(?P<energy>\S+)', self.stdout, 'energy', float)) energy_reference = -2451359.5 energy_diff = sn.abs(energy - energy_reference) self.sanity_patterns = sn.all([ sn.assert_eq( sn.count( sn.extractall(r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 25), sn.assert_lt(energy_diff, 2720) ]) self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( 'Info: Benchmark time: \S+ CPUs \S+ ' 's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } self.maintainers = ['CB', 'LM'] self.tags = {'scs', 'external-resources'} self.strict_check = False self.extra_resources = {'switches': {'num_switches': 1}}
def setting_variables(self): self.descr = 'NAMD 2.13 CUDA version benchmark apoa1' self.valid_systems = ['ibex:batch_mpi'] self.valid_prog_environs = ['gpustack_builtin'] self.sourcesdir = '../src/namd' self.modules = ['namd'] #/2.13/cuda10-verbs-smp-icc17 self.prerun_cmds = [ 'module list', 'which namd2', 'hostname', 'echo $MODULEPATH' ] #['export SLURM_CPU_BIND_TYPE=sockets','export SLURM_CPU_BIND_VERBOSE=verbose'] self.executable = 'namd2' self.executable_opts = '+p8 +devices 0,1,2,3,4,5,6,7 +idlepoll +setcpuaffinity apoa1.namd'.split( ) # Job script attributes self.time_limit = '1h' self.num_tasks = 1 self.num_tasks_per_node = 1 self.num_gpus_per_node = 8 self.num_cpus_per_task = 8 self.extra_resources = {'constraint': {'type': 'v100'}} self.sanity_patterns = sn.assert_eq( sn.count( sn.extractall(r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 25) self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( 'Info: Benchmark time: \S+ CPUs \S+ ' 's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } self.reference = { 'ibex': { 'days_ns': (0.037, None, 0.1, None) }, } self.tags = {'namd', 'acceptance'} # initials or email of the maintainer self.maintainers = ['MS']
def __init__(self): self.valid_prog_environs = ['builtin'] self.modules = ['CMake', 'Boost'] self.valid_systems = ['daint:gpu', 'dom:gpu'] self.num_tasks = 1 self.sanity_patterns = sn.assert_found(r'PASSED', self.stdout) literal_eval = sn.sanity_function(ast.literal_eval) self.perf_patterns = { 'wall_time': sn.avg(literal_eval( sn.extractsingle(r'"series" : \[(?P<wall_times>.+)\]', self.stdout, 'wall_times'))) }
def setup_perf_vars(self): self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( r'Info: Benchmark time: \S+ CPUs \S+ ' r's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } if self.arch == 'gpu': if self.scale == 'small': self.reference = { 'dom:gpu': { 'days_ns': (0.15, None, 0.05, 'days/ns') }, 'daint:gpu': { 'days_ns': (0.15, None, 0.05, 'days/ns') } } else: self.reference = { 'daint:gpu': { 'days_ns': (0.07, None, 0.05, 'days/ns') } } else: if self.scale == 'small': self.reference = { 'dom:mc': { 'days_ns': (0.51, None, 0.05, 'days/ns') }, 'daint:mc': { 'days_ns': (0.51, None, 0.05, 'days/ns') }, 'eiger:mc': { 'days_ns': (0.12, None, 0.05, 'days/ns') }, 'pilatus:mc': { 'days_ns': (0.12, None, 0.05, 'days/ns') }, } else: self.reference = { 'daint:mc': { 'days_ns': (0.28, None, 0.05, 'days/ns') }, 'eiger:mc': { 'days_ns': (0.05, None, 0.05, 'days/ns') }, 'pilatus:mc': { 'days_ns': (0.05, None, 0.05, 'days/ns') } }
def validate_energy(self): energy = sn.avg(sn.extractall( r'ENERGY:([ \t]+\S+){10}[ \t]+(?P<energy>\S+)', self.stdout, 'energy', float) ) energy_reference = -2451359.5 energy_diff = sn.abs(energy - energy_reference) return sn.all([ sn.assert_eq(sn.count(sn.extractall( r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 50), sn.assert_lt(energy_diff, 2720) ])
def nvprof_report_DtoH_pct(self): '''Reports ``[CUDA memcpy DtoH]`` Time(%) measured by the tool and averaged over compute nodes .. code-block:: > job.stdout (Name: [CUDA memcpy DtoH]) # Time(%) Time Calls Avg Min Max # 2.80% 1.3194ms 44 29.986us 29.855us 30.528us [CUDA memcpy DtoH] # 1.34% 1.7667ms 44 40.152us 39.519us 41.887us [CUDA memcpy DtoH] # ^^^^ ''' regex = r'^\s+\s+(?P<pctg>\S+)%.*\[CUDA memcpy DtoH\]$' result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def nvprof_report_HtoD_pct(self): '''Reports ``[CUDA memcpy HtoD]`` Time(%) measured by the tool and averaged over compute nodes .. code-block:: > job.stdout (Name: [CUDA memcpy HtoD]) # Type Time(%) Time Calls Avg Min Max # GPU activities: 48.57% 22.849ms 162 141.04us 896ns 1.6108ms # GPU activities: 56.12% 74.108ms 162 457.45us 928ns 5.8896ms # ^^^^^ ''' regex = r'^\s+GPU activities:\s+(?P<pctg>\S+)%.*\[CUDA memcpy HtoD\]$' result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def __init__(self): super().__init__() self.descr = 'Distributed training with TensorFlow and Horovod' self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] tfshortver = '1.11' self.sourcesdir = 'https://github.com/tensorflow/benchmarks' self.modules = ['Horovod/0.15.0-CrayGNU-18.08-tf-%s.0' % tfshortver] self.reference = { 'dom:gpu': { 'throughput': (1133.6, None, 0.05, 'images/s'), }, 'daint:gpu': { 'throughput': (4403.0, None, 0.05, 'images/s') }, } self.perf_patterns = { 'throughput': sn.avg(sn.extractall( r'total images/sec:\s+(?P<throughput>\S+)', self.stdout, 'throughput', float)) } self.sanity_patterns = sn.assert_found( r'[\S+\s+] INFO NET\/IB : Using interface ipogif0' r' for sideband communication', self.stdout) self.num_tasks_per_node = 1 if self.current_system.name == 'dom': self.num_tasks = 8 elif self.current_system.name == 'daint': self.num_tasks = 32 self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver] self.variables = { 'NCCL_DEBUG': 'INFO', 'NCCL_IB_HCA': 'ipogif0', 'NCCL_IB_CUDA_SUPPORT': '1', 'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK', } self.executable = ('python') self.executable_opts = [ 'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py', '--model inception3', '--batch_size 64', '--variable_update horovod', '--log_dir ./logs', '--train_dir ./checkpoints'] self.tags = {'production'} self.maintainers = ['MS', 'RS']
def nvprof_report_momentumEnergy_pct(self): '''Reports ``CUDA Kernel`` Time (%) for MomentumAndEnergyIAD measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # (where Name = sphexa::sph::cuda::kernels::computeMomentumAndEnergyIAD) # Time(%) Time Calls Avg Min Max Name # 28.25% 13.288ms 4 3.3220ms 3.1001ms 3.4955ms void ... # 21.63% 28.565ms 4 7.1414ms 6.6616ms 7.4616ms void ... # ^^^^^ ''' regex = (r'^\s+(?P<pctg>\S+)%.*::computeMomentumAndEnergyIAD<') result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def nvprof_report_computeIAD_pct(self): '''Reports ``CUDA Kernel`` Time (%) for computeIAD measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # (where Name = sphexa::sph::cuda::kernels::computeIAD) # Time(%) Time Calls Avg Min Max Name # 12.62% 5.9380ms 4 1.4845ms 1.3352ms 1.6593ms void ... # 10.54% 13.915ms 4 3.4788ms 3.3458ms 3.7058ms void ... # ^^^^^ ''' regex = (r'^\s+(?P<pctg>\S+)%.*::computeIAD<') result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def nvprof_report_cudaMemcpy_pct(self): '''Reports ``CUDA API`` Time (%) for cudaMemcpy measured by the tool and averaged over compute nodes .. code-block:: > job.stdout (where Name = cudaMemcpy|cudaMemcpyToSymbol) # Time(%) Total Time Calls Average Minimum Maximum Name # API calls: 74.37% 219.93ms 2 109.96ms 20.433us 219.90ms ... # 18.32% 54.169ms 204 265.53us 11.398us 3.5624ms ... # API calls: 54.65% 222.03ms 2 111.02ms 20.502us 222.01ms ... # 34.88% 141.73ms 204 694.76us 21.168us 7.5486ms ... ''' regex = r'^.*?\s+(?P<pctg>\S+)%.*cudaMemcpy.*$' result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def pw_perf_patterns(obj): '''Reports hardware counter values from the tool .. code-block:: collector time time (%) PAPI_REF_CYC PAPI_L2_DCM -------------------------------------------------------------------------- computeMomentumAndEnergyIAD 0.6816 100.00 1770550470 2438527 ^^^^^^^ ''' regex = r'^computeMomentumAndEnergyIAD\s+\S+\s+\S+\s+\S+\s+(?P<hwc>\d+)$' hwc_min = sn.min(sn.extractall(regex, obj.stderr, 'hwc', int)) hwc_avg = sn.round(sn.avg(sn.extractall(regex, obj.stderr, 'hwc', int)), 1) hwc_max = sn.max(sn.extractall(regex, obj.stderr, 'hwc', int)) res_d = { 'papiwrap_hwc_min': hwc_min, 'papiwrap_hwc_avg': hwc_avg, 'papiwrap_hwc_max': hwc_max, } return res_d
def __init__(self): self.descr = ('stream/5.10-intel-19.0.5 benchmark CPU check ' 'RunOnlyRegressionTest') self.valid_systems = ['pi2:cpu'] self.valid_prog_environs = ['*'] # self.sourcesdir = None self.num_cpus_per_task = 1 self.num_tasks = 40 self.num_tasks_per_node = 40 self.time_limit = None self.modules = ['stream/5.10-intel-19.0.5'] self.executable = 'for i in `seq 1 8`; do stream_c.exe; sleep 3; done' self.sanity_patterns = sn.assert_found( r'Solution Validates: avg error less than', self.stdout) self.perf_patterns = { 'triad': sn.avg( sn.extractiter(r'Triad:\s+(?P<triad>\S+)\s+\S+', self.stdout, 'triad', float)) } self.reference = {'pi2:cpu': {'triad': (112640, 0, None, 'MB/s')}} self.maintainers = ['blacknail'] self.tags = {'benchmark', 'pro', 'stream', 'node_health'}
def nsys_report_HtoD_pct(self): '''Reports ``[CUDA memcpy HtoD]`` Time(%) measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # CUDA Memory Operation Statistics (nanoseconds) # # Time(%) Total Time Operations Average ... # ------- -------------- ---------- -------------- ... # 99.1 154400354 296 521622.8 ... # **** # # Minimum Maximum Name # -------------- -------------- ------------------- # 896 8496291 [CUDA memcpy HtoD] ''' regex = (r'^\s+(?P<pctg_nsec>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+' r'\[CUDA memcpy HtoD\]\s+$') result = sn.round( sn.avg(sn.extractall(regex, self.stdout, 'pctg_nsec', float)), 1) return result
def nsys_report_DtoH_pct(self): '''Reports ``[CUDA memcpy DtoH]`` Time(%) measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # CUDA Memory Operation Statistics (nanoseconds) # # Time(%) Total Time Operations Average ... # ------- -------------- ---------- -------------- ... # 0.9 1385579 84 16495.0 ... # **** # # Minimum Maximum Name # -------------- -------------- ------------------- # 6144 21312 [CUDA memcpy DtoH] ''' regex = (r'^\s+(?P<pctg_nsec>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+' r'\[CUDA memcpy DtoH\]\s+$') result = sn.round( sn.avg(sn.extractall(regex, self.stdout, 'pctg_nsec', float)), 1) return result
def __init__(self): self.descr = ('linpack intel-mkl/2019.3.199-intel-19.0.4 benchmark CPU check ' 'RunOnlyRegressionTest') self.valid_systems = ['pi2:cpu'] self.valid_prog_environs = ['*'] self.modules = ["intel-parallel-studio"] self.sourcesdir = "/lustre/opt/cascadelake/linux-centos7-cascadelake/intel-19.0.4/intel-mkl-2019.3.199-fwha3ldpm5qbymzf45nzfpaehfztqwms/mkl/benchmarks/mp_linpack" self.num_cpus_per_task = 1 self.num_tasks = 40 self.num_tasks_per_node = 40 self.time_limit = None self.executable = './runme_intel64_static' self.sanity_patterns = sn.and_(sn.assert_found(r'1 tests completed and passed residual checks', self.stdout),sn.assert_found(r'End of Tests.', self.stdout)) self.perf_patterns = { 'perf': sn.avg(sn.extractiter('WR00L2L2\s+\S+\s+\S+\s+\S+\s+1\s+\S+\s+(?P<perf>\S+)', self.stdout, 'perf', float)) } self.reference = { 'pi2:cpu': { 'perf': (2000, 0, None, 'GFlops') } } self.maintainers = ['blacknail'] self.tags = {'benchmark','pro','stream','node_health'}
def wall_time(self): literal_eval = sn.deferrable(ast.literal_eval) return sn.avg( literal_eval(sn.extractsingle(r'"series" : \[(?P<wall_times>.+)\]', self.stdout, 'wall_times')) )
def days_ns(self): return sn.avg(sn.extractall( r'Info: Benchmark time: \S+ CPUs \S+ ' r's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float))