def gpu_usage_sanity(self): '''Verify that the jobreport output has sensible numbers. This function asserts that the nodes reported are at least a subset of all nodes used by the gpu burn app. Also, the GPU usage is verified by assuming that in the worst case scenario, the usage is near 100% during the burn, and 0% outside the burn period. Lastly, the GPU usage time for each node is also asserted to be greater or equal than the burn time. ''' # Get set with all nodes patt = r'^\s*\[([^\]]*)\]\s*GPU\s*\d+\(OK\)' full_node_set = set(sn.extractall(patt, self.stdout, 1)) # Parse job report data patt = r'^\s*(\w*)\s*(\d+)\s*%\s*\d+\s*MiB\s*\d+:\d+:(\d+)' self.nodes_reported = sn.extractall(patt, self.stdout, 1) usage = sn.extractall(patt, self.stdout, 2, int) time_reported = sn.extractall(patt, self.stdout, 3, int) return sn.all([ sn.assert_ge(sn.count(self.nodes_reported), 1), set(self.nodes_reported).issubset(full_node_set), sn.all( map(lambda x, y: self.burn_time / x <= y, time_reported, usage)), sn.assert_ge(sn.min(time_reported), self.burn_time) ])
def __init__(self): self.valid_systems = ['cannon:local-gpu','cannon:gpu_test','fasse:fasse_gpu','test:gpu'] self.descr = 'GPU burn test' self.valid_prog_environs = ['gpu'] self.executable_opts = ['-d', '40'] self.build_system = 'Make' self.build_system.makefile = 'makefile.cuda' self.executable = './gpu_burn.x' patt = (r'^\s*\[[^\]]*\]\s*GPU\s+\d+\(\S*\):\s+(?P<perf>\S*)\s+GF\/s' r'\s+(?P<temp>\S*)\s+Celsius') self.perf_patterns = { 'perf': sn.min(sn.extractall(patt, self.stdout, 'perf', float)), 'temp': sn.max(sn.extractall(patt, self.stdout, 'temp', float)), } self.reference = { 'cannon:local-gpu': { 'perf': (6200, -0.10, None, 'Gflop/s per gpu'), }, 'cannon:gpu_test': { 'perf': (6200, -0.10, None, 'Gflop/s per gpu'), }, 'test:gpu': { 'perf': (4115, None, None, 'Gflop/s per gpu'), }, '*': { 'perf': (4115, None, None, 'Gflop/s per gpu'), }, '*': {'temp': (0, None, None, 'degC')} }
def __init__(self): super().__init__() self.descr = 'Test Cray LibSci on the GPU (dgemm with libsci alloc)' self.build_system = 'SingleSource' self.sourcesdir = None self.sourcepath = ('$CRAY_LIBSCI_ACC_DIR/examples/examples/c_simple/' 'dgemm_simple.c') self.sanity_patterns = sn.assert_found(r'(4096\s+){3}', self.stdout) regex = r'(\s+\d+){3}\s+(?P<gpu_flops>\S+)\s+(?P<cpu_flops>\S+)\s+' self.perf_patterns = { 'dgemm_gpu': sn.max(sn.extractall(regex, self.stdout, 'gpu_flops', float)), 'dgemm_cpu': sn.max(sn.extractall(regex, self.stdout, 'cpu_flops', float)), } self.reference = { 'daint:gpu': { 'dgemm_gpu': (2264.0, -0.05, None, 'GFLop/s'), 'dgemm_cpu': (45.0, -0.05, None, 'GFLop/s'), }, 'dom:gpu': { 'dgemm_gpu': (2264.0, -0.05, None, 'GFLop/s'), 'dgemm_cpu': (45.0, -0.05, None, 'GFLop/s'), }, }
def __init__(self, name, *args, **kwargs): if name is not '': name += '_' super().__init__('{0}{1}runs'.format(name,self.multi_rep), *args, **kwargs) # scale the assumed runtime self.time_limit = (self.time_limit[0]*self.multi_rep+ int((self.time_limit[1]*self.multi_rep)/60), (self.time_limit[1]*self.multi_rep) % 60+ int((self.time_limit[2]*self.multi_rep) /60), (self.time_limit[2]*self.multi_rep) % 60) # check if we got #multi_rep the the sanity patern if hasattr(self, 'multirun_san_pat'): self.sanity_patterns = sn.assert_eq(sn.count( sn.findall(*self.multirun_san_pat)), self.multi_rep) # create the list of result values: first the average and # then all single elements (to be stored) if hasattr(self, 'multirun_perf_pat'): self.perf_patterns = {} for key in list(self.multirun_perf_pat.keys()): self.perf_patterns[key] = sn.avg( sn.extractall(*(self.multirun_perf_pat[key]))) for run in range(0,self.multi_rep): self.perf_patterns[key+"_{}".format(run)] = sn.extractall( *(self.multirun_perf_pat[key]))[run]
def mpip_perf_patterns(obj, reg): '''More perf_patterns for the tool .. code-block:: ----------------------------------- @--- MPI Time (seconds) ----------- ----------------------------------- Task AppTime MPITime MPI% 0 8.6 0.121 1.40 <-- min 1 8.6 0.157 1.82 2 8.6 5.92 68.84 <-- max * 25.8 6.2 24.02 <--- => NonMPI= AppTime - MPITime Typical performance reporting: .. code-block:: * mpip_avg_app_time: 8.6 s (= 25.8/3mpi) * mpip_avg_mpi_time: 2.07 s (= 6.2/3mpi) * %mpip_avg_mpi_time: 24.02 % * %max/%min * %mpip_avg_non_mpi_time: 75.98 % ''' # rpt = os.path.join(obj.stagedir, obj.rpt_file_txt) rpt = sn.extractsingle(r'^mpiP: Storing mpiP output in \[(?P<rpt>.*)\]', obj.stdout, 'rpt', str) regex_star = r'^\s+\*\s+(?P<appt>\S+)\s+(?P<mpit>\S+)\s+(?P<pct>\S+)$' regex_minmax = (r'^\s+(?P<mpirk>\S+)\s+(?P<appt>\S+)\s+(?P<mpit>\S+)\s+' r'(?P<pct>\S+)$') if reg == 1: # mpip_avg_mpi_time result = sn.round( sn.extractsingle(regex_star, rpt, 'mpit', float) / obj.num_tasks, 2) elif reg == 2: # mpip_avg_app_time result = sn.round( sn.extractsingle(regex_star, rpt, 'appt', float) / obj.num_tasks, 2) elif reg == 3: # %mpip_avg_mpi_time result = sn.extractsingle(regex_star, rpt, 'pct', float) elif reg == 4: # %nonmpi mpi_pct = sn.extractsingle(regex_star, rpt, 'pct', float) result = sn.round(100 - mpi_pct, 2) elif reg == 5: # %mpip_avg_mpi_time_max result = sn.max(sn.extractall(regex_minmax, rpt, 'pct', float)) elif reg == 6: # %mpip_avg_mpi_time_min result = sn.min(sn.extractall(regex_minmax, rpt, 'pct', float)) else: raise ValueError('unknown region id in mpip_perf_patterns') return result
def set_sanity(self): # {{{ 0/ MPICH version: # MPI VERSION : CRAY MPICH version 7.7.15 (ANL base 3.2) # MPI VERSION : CRAY MPICH version 8.0.16.17 (ANL base 3.3) # MPI VERSION : CRAY MPICH version 8.1.4.31 (ANL base 3.4a2) regex = r'^MPI VERSION\s+: CRAY MPICH version \S+ \(ANL base (\S+)\)' rpt_file = os.path.join(self.stagedir, self.rpt) mpich_version = sn.extractsingle(regex, rpt_file, 1) reference_files = { '3.2': { 'control': 'mpit_control_vars_32.ref', 'categories': 'mpit_categories_32.ref', }, '3.3': { 'control': 'mpit_control_vars_33.ref', 'categories': 'mpit_categories_33.ref', }, '3.4a2': { 'control': 'mpit_control_vars_34a2.ref', 'categories': 'mpit_categories_34a2.ref', }, } # }}} # {{{ 1/ MPI Control Variables: MPIR_... # --- extract reference data: regex = r'^(?P<vars>MPIR\S+)$' ref_file = os.path.join( self.stagedir, reference_files[sn.evaluate(mpich_version)]['control']) self.ref_control_vars = sorted(sn.extractall(regex, ref_file, 'vars')) # --- extract runtime data: regex = r'^\t(?P<vars>MPIR\S+)\t' self.run_control_vars = sorted(sn.extractall(regex, rpt_file, 'vars')) # --- debug with:"grep -P '\tMPIR+\S*\t' rpt | awk '{print $1}' | sort" # }}} # {{{ 2/ MPI Category: # --- extract reference data: regex = r'^(?P<category>.*)$' ref = os.path.join( self.stagedir, reference_files[sn.evaluate(mpich_version)]['categories']) ref_cat_vars = sorted(sn.extractall(regex, ref, 'category')) self.ref_cat_vars = list(filter(None, ref_cat_vars)) # --- extract runtime data: regex = (r'^(?P<category>Category \w+ has \d+ control variables, \d+' r' performance variables, \d+ subcategories)') rpt = os.path.join(self.stagedir, self.rpt) self.run_cat_vars = sorted(sn.extractall(regex, rpt, 'category')) # --- debug with:"grep Category rpt | sort" # }}} # {{{ 3/ Extracted lists can be compared (when sorted): self.sanity_patterns = sn.all([ sn.assert_eq(self.ref_control_vars, self.run_control_vars, msg='sanity1 "mpit_control_vars.ref" failed'), sn.assert_eq(self.ref_cat_vars, self.run_cat_vars, msg='sanity2 "mpit_categories.ref" failed'), ])
def density_ns(self): regex = self.set_regex('density') rpt = os.path.join(self.stagedir, self.metric_file.replace(".txt", ".csv")) begin_ns = sn.extractall(regex, rpt, 'begin', int) end_ns = sn.extractall(regex, rpt, 'end', int) ns_list = [zz[1] - zz[0] for zz in zip(begin_ns, end_ns)] return sn.round(sn.avg(ns_list), 0)
def __init__(self, arch, scale, variant): self.descr = f'NAMD check ({arch}, {variant})' if self.current_system.name == 'pilatus': self.valid_prog_environs = ['cpeIntel'] else: self.valid_prog_environs = ['builtin'] self.modules = ['NAMD'] # Reset sources dir relative to the SCS apps prefix self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'NAMD', 'prod') self.executable = 'namd2' self.use_multithreading = True self.num_tasks_per_core = 2 if scale == 'small': # On Eiger a no-smp NAMD version is the default if self.current_system.name in ['eiger', 'pilatus']: self.num_tasks = 768 self.num_tasks_per_node = 128 else: self.num_tasks = 6 self.num_tasks_per_node = 1 else: if self.current_system.name in ['eiger', 'pilatus']: self.num_tasks = 2048 self.num_tasks_per_node = 128 else: self.num_tasks = 16 self.num_tasks_per_node = 1 energy = sn.avg( sn.extractall(r'ENERGY:([ \t]+\S+){10}[ \t]+(?P<energy>\S+)', self.stdout, 'energy', float)) energy_reference = -2451359.5 energy_diff = sn.abs(energy - energy_reference) self.sanity_patterns = sn.all([ sn.assert_eq( sn.count( sn.extractall(r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 50), sn.assert_lt(energy_diff, 2720) ]) self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( r'Info: Benchmark time: \S+ CPUs \S+ ' r's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } self.maintainers = ['CB', 'LM'] self.tags = {'scs', 'external-resources'} self.extra_resources = {'switches': {'num_switches': 1}}
def test_extractall_error(self): self.assertRaises(SanityError, evaluate, sn.extractall('Step: (\d+)', 'foo.txt', 1)) self.assertRaises( SanityError, evaluate, sn.extractall('Step: (?P<no>\d+)', self.tempfile, conv=int)) self.assertRaises(SanityError, evaluate, sn.extractall('Step: (\d+)', self.tempfile, 2)) self.assertRaises( SanityError, evaluate, sn.extractall('Step: (?P<no>\d+)', self.tempfile, 'foo'))
def setting_variables(self): self.descr = 'NAMD 2.13 CUDA version benchmark apoa1' self.valid_systems = ['ibex:batch_mpi'] self.valid_prog_environs = ['gpustack_builtin'] self.sourcesdir = '../src/namd' self.modules = ['namd'] #/2.13/cuda10-verbs-smp-icc17 self.prerun_cmds = [ 'module list', 'which namd2', 'hostname', 'echo $MODULEPATH' ] #['export SLURM_CPU_BIND_TYPE=sockets','export SLURM_CPU_BIND_VERBOSE=verbose'] self.executable = 'namd2' self.executable_opts = '+p8 +devices 0,1,2,3,4,5,6,7 +idlepoll +setcpuaffinity apoa1.namd'.split( ) # Job script attributes self.time_limit = '1h' self.num_tasks = 1 self.num_tasks_per_node = 1 self.num_gpus_per_node = 8 self.num_cpus_per_task = 8 self.extra_resources = {'constraint': {'type': 'v100'}} self.sanity_patterns = sn.assert_eq( sn.count( sn.extractall(r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 25) self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( 'Info: Benchmark time: \S+ CPUs \S+ ' 's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } self.reference = { 'ibex': { 'days_ns': (0.037, None, 0.1, None) }, } self.tags = {'namd', 'acceptance'} # initials or email of the maintainer self.maintainers = ['MS']
def ipc_rk0(obj): '''Reports the ``IPC`` (instructions per cycle) for rank 0 ''' regex1 = (r'^METRIC\s+0\s+.*Values: \(\"PAPI_TOT_INS\" <0>; UINT64;' r'\s+(?P<ins>\d+)\)') tot_ins_rk0 = sn.extractall(regex1, obj.rpt_otf2, 'ins', float) regex2 = (r'^METRIC\s+0\s+.*Values:.*\(\"PAPI_TOT_CYC\" <1>; UINT64;' r'\s+(?P<cyc>\d+)\)') tot_cyc_rk0 = sn.extractall(regex2, obj.rpt_otf2, 'cyc', float) ipc = [a / b for a, b in zip(tot_ins_rk0, tot_cyc_rk0)] return sn.round(max(ipc), 6)
def test_extractall_error(tempfile): with pytest.raises(SanityError): sn.evaluate(sn.extractall(r'Step: (\d+)', 'foo.txt', 1)) with pytest.raises(SanityError): sn.evaluate(sn.extractall(r'Step: (?P<no>\d+)', tempfile, conv=int)) with pytest.raises(SanityError): sn.evaluate(sn.extractall(r'Step: (\d+)', tempfile, 2)) with pytest.raises(SanityError): sn.evaluate(sn.extractall(r'Step: (?P<no>\d+)', tempfile, 'foo'))
def __init__(self): self.valid_systems = [ 'daint:gpu', 'dom:gpu', 'arolla:cn', 'tsa:cn', 'ault:amdv100', 'ault:intelv100', 'ault:amda100', 'ault:amdvega' ] self.descr = 'GPU burn test' self.valid_prog_environs = ['PrgEnv-gnu'] self.exclusive_access = True self.executable_opts = ['-d', '40'] self.build_system = 'Make' self.executable = './gpu_burn.x' self.num_tasks = 0 self.num_tasks_per_node = 1 self.sanity_patterns = self.assert_num_tasks() patt = (r'^\s*\[[^\]]*\]\s*GPU\s+\d+\(\S*\):\s+(?P<perf>\S*)\s+GF\/s' r'\s+(?P<temp>\S*)\s+Celsius') self.perf_patterns = { 'perf': sn.min(sn.extractall(patt, self.stdout, 'perf', float)), 'temp': sn.max(sn.extractall(patt, self.stdout, 'temp', float)), } self.reference = { 'dom:gpu': { 'perf': (4115, -0.10, None, 'Gflop/s'), }, 'daint:gpu': { 'perf': (4115, -0.10, None, 'Gflop/s'), }, 'arolla:cn': { 'perf': (5861, -0.10, None, 'Gflop/s'), }, 'tsa:cn': { 'perf': (5861, -0.10, None, 'Gflop/s'), }, 'ault:amda100': { 'perf': (15000, -0.10, None, 'Gflop/s'), }, 'ault:amdv100': { 'perf': (5500, -0.10, None, 'Gflop/s'), }, 'ault:intelv100': { 'perf': (5500, -0.10, None, 'Gflop/s'), }, 'ault:amdvega': { 'perf': (3450, -0.10, None, 'Gflop/s'), }, '*': { 'temp': (0, None, None, 'degC') } } self.maintainers = ['AJ', 'TM'] self.tags = {'diagnostic', 'benchmark', 'craype'}
def report_nid_with_smallest_flops(self): regex = r'\[(\S+)\] GPU\s+\d\(OK\): (\d+) GF/s' rptf = os.path.join(self.stagedir, sn.evaluate(self.stdout)) self.nids = sn.extractall(regex, rptf, 1) self.flops = sn.extractall(regex, rptf, 2, float) # Find index of smallest flops and update reference dictionary to # include our patched units index = self.flops.evaluate().index(min(self.flops)) unit = f'GF/s ({self.nids[index]})' for key, ref in self.reference.items(): if not key.endswith(':temp'): self.reference[key] = (*ref[:3], unit)
def validate_energy(self): energy = sn.avg(sn.extractall( r'ENERGY:([ \t]+\S+){10}[ \t]+(?P<energy>\S+)', self.stdout, 'energy', float) ) energy_reference = -2451359.5 energy_diff = sn.abs(energy - energy_reference) return sn.all([ sn.assert_eq(sn.count(sn.extractall( r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 50), sn.assert_lt(energy_diff, 2720) ])
def set_perf_patterns(self): '''Extract the minimum performance and maximum temperature recorded. The performance and temperature data are reported in Gflops/s and deg. Celsius respectively. ''' patt = (r'^\s*\[[^\]]*\]\s*GPU\s+\d+\(\S*\):\s+(?P<perf>\S*)\s+GF\/s' r'\s+(?P<temp>\S*)\s+Celsius') self.perf_patterns = { 'perf': sn.min(sn.extractall(patt, self.stdout, 'perf', float)), 'temp': sn.max(sn.extractall(patt, self.stdout, 'temp', float)), }
def __init__(self, version, variant): super().__init__() self.name = 'namd_%s_%s_check' % (version, variant) self.descr = 'NAMD check (%s, %s)' % (version, variant) self.valid_prog_environs = ['PrgEnv-intel'] self.modules = ['NAMD'] # Reset sources dir relative to the SCS apps prefix self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'NAMD', 'prod') self.executable = 'namd2' self.use_multithreading = True self.num_tasks_per_core = 2 if self.current_system.name == 'dom': self.num_tasks = 6 self.num_tasks_per_node = 1 else: self.num_tasks = 16 self.num_tasks_per_node = 1 energy = sn.avg( sn.extractall(r'ENERGY:(\s+\S+){10}\s+(?P<energy>\S+)', self.stdout, 'energy', float)) energy_reference = -2451359.5 energy_diff = sn.abs(energy - energy_reference) self.sanity_patterns = sn.all([ sn.assert_eq( sn.count( sn.extractall(r'TIMING: (?P<step_num>\S+) CPU:', self.stdout, 'step_num')), 50), sn.assert_lt(energy_diff, 2720) ]) self.perf_patterns = { 'days_ns': sn.avg( sn.extractall( 'Info: Benchmark time: \S+ CPUs \S+ ' 's/step (?P<days_ns>\S+) days/ns \S+ MB memory', self.stdout, 'days_ns', float)) } self.maintainers = ['CB', 'LM'] self.tags = {'scs'} self.strict_check = False self.extra_resources = {'switches': {'num_switches': 1}}
def do_sanity_check(self): # Check that every node has the right number of GPUs # Store this nodes in case they're used later by the perf functions. self.my_nodes = set(sn.extractall( rf'^\s*\[([^\]]*)\]\s*Found {self.num_gpus_per_node} device\(s\).', self.stdout, 1)) # Check that every node has made it to the end. nodes_at_end = len(set(sn.extractall( r'^\s*\[([^\]]*)\]\s*Pointer chase complete.', self.stdout, 1))) return sn.evaluate(sn.assert_eq( sn.assert_eq(self.job.num_tasks, len(self.my_nodes)), sn.assert_eq(self.job.num_tasks, nodes_at_end)))
def seconds_elaps(self): '''Reports elapsed time in seconds using the internal timer from the code .. code-block:: === Total time for iteration(0) 3.61153s reports: * Elapsed: 3.6115 s ''' regex = r'^=== Total time for iteration\(\d+\)\s+(?P<sec>\d+\D\d+)s' res = sn.round(sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4) if res > 0: return sn.round( sn.sum(sn.extractall(regex, self.stdout, 'sec', float)), 4) else: return 1
def do_sanity_check(self): '''Check that every node has the right number of GPUs.''' my_nodes = set(sn.extractall( rf'^\s*\[([^\]]*)\]\s*Found {self.num_gpus_per_node} device\(s\).', self.stdout, 1)) # Check that every node has made it to the end. nodes_at_end = len(set(sn.extractall( r'^\s*\[([^\]]*)\]\s*Pointer chase complete.', self.stdout, 1))) return sn.assert_eq( sn.assert_eq(self.job.num_tasks, sn.count(my_nodes)), sn.assert_eq(self.job.num_tasks, nodes_at_end) )
def test_extractall(tempfile): # Check numeric groups res = sn.evaluate(sn.extractall(r'Step: (?P<no>\d+)', tempfile, 1)) for expected, v in enumerate(res, start=1): assert str(expected) == v # Check named groups res = sn.evaluate(sn.extractall(r'Step: (?P<no>\d+)', tempfile, 'no')) for expected, v in enumerate(res, start=1): assert str(expected) == v # Check convert function res = sn.evaluate(sn.extractall(r'Step: (?P<no>\d+)', tempfile, 'no', int)) for expected, v in enumerate(res, start=1): assert expected == v
def set_perf_patterns(self): '''Set the performance patterns. These include host-device (h2d), device-host (d2h) and device=device (d2d) transfers. ''' self.perf_patterns = { 'h2d': sn.min(sn.extractall(self._xfer_pattern('h2d'), self.stdout, 1, float)), 'd2h': sn.min(sn.extractall(self._xfer_pattern('d2h'), self.stdout, 1, float)), 'd2d': sn.min(sn.extractall(self._xfer_pattern('d2d'), self.stdout, 1, float)), }
def nsys_report_cudaMemcpy_pct(self): '''Reports ``CUDA API`` Time (%) for cudaMemcpy measured by the tool and averaged over compute nodes .. code-block:: > job.stdout # CUDA API Statistics (nanoseconds) # # Time(%) Total Time Calls Average Minimum # ------- -------------- ---------- -------------- -------------- # 44.9 309427138 378 818590.3 9709 # **** # 40.6 279978449 2 139989224.5 24173 # 9.5 65562201 308 212864.3 738 # 4.9 33820196 306 110523.5 2812 # 0.1 704223 36 19561.8 9305 # .... # Maximum Name # -------------- ------------------ # 11665852 cudaMemcpy # 279954276 cudaMemcpyToSymbol # 3382747 cudaFree # 591094 cudaMalloc # 34042 cudaLaunch ''' regex = r'^\s+(?P<pctg>\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+cudaMemcpy\s+$' result = sn.round(sn.avg(sn.extractall(regex, self.stdout, 'pctg', float)), 1) return result
def __init__(self): self.valid_prog_environs = ['builtin'] self.executable = 'cp2k.psmp' self.executable_opts = ['H2O-256.inp'] energy = sn.extractsingle( r'\s+ENERGY\| Total FORCE_EVAL \( QS \) ' r'energy \(a\.u\.\):\s+(?P<energy>\S+)', self.stdout, 'energy', float, item=-1) energy_reference = -4404.2323 energy_diff = sn.abs(energy - energy_reference) self.sanity_patterns = sn.all([ sn.assert_found(r'PROGRAM STOPPED IN', self.stdout), sn.assert_eq( sn.count( sn.extractall(r'(?P<step_count>STEP NUM)', self.stdout, 'step_count')), 10), sn.assert_lt(energy_diff, 1e-4) ]) self.perf_patterns = { 'time': sn.extractsingle(r'^ CP2K(\s+[\d\.]+){4}\s+(?P<perf>\S+)', self.stdout, 'perf', float) } self.maintainers = ['LM'] self.tags = {'scs'} self.strict_check = False self.modules = ['CP2K'] self.extra_resources = {'switches': {'num_switches': 1}}
def __init__(self, prg_envs): self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = prg_envs self.modules = ['craype-accel-nvidia60'] self.configs = { 'PrgEnv-gnu': 'cscs-gnu', 'PrgEnv-cray': 'cscs-cray', 'PrgEnv-pgi': 'cscs-pgi', } app_source = os.path.join(self.current_system.resourcesdir, 'SPEC_ACCELv1.2') self.prebuild_cmd = [ 'cp -r %s/* .' % app_source, './install.sh -d . -f' ] # I just want prebuild_cmd, but no action for the build_system # is not supported, so I find it something useless to do self.build_system = 'SingleSource' self.sourcepath = './benchspec/ACCEL/353.clvrleaf/src/timer_c.c' self.build_system.cflags = ['-c'] self.refs = { env: { bench_name: (rt, None, 0.1, 'Seconds') for (bench_name, rt) in zip(self.benchmarks[env], self.exec_times[env]) } for env in self.valid_prog_environs } self.num_tasks = 1 self.num_tasks_per_node = 1 self.time_limit = (0, 30, 0) self.executable = 'runspec' outfile = sn.getitem(sn.glob('result/ACCEL.*.log'), 0) self.sanity_patterns_ = { env: sn.all([ sn.assert_found(r'Success.*%s' % bn, outfile) for bn in self.benchmarks[env] ]) for env in self.valid_prog_environs } self.perf_patterns_ = { env: { bench_name: sn.avg( sn.extractall( r'Success.*%s.*runtime=(?P<rt>[0-9.]+)' % bench_name, outfile, 'rt', float)) for bench_name in self.benchmarks[env] } for env in self.valid_prog_environs } self.maintainers = ['SK'] self.tags = {'diagnostic', 'external-resources'}
def set_sanity_patterns(self): numbers = sn.extractall(r'Random: (?P<number>\S+)', self.stdout, 'number', float) self.sanity_patterns = sn.all([ sn.assert_eq(sn.count(numbers), 100), sn.all(sn.map(lambda x: sn.assert_bounded(x, 90, 100), numbers)) ])
def __init__(self): super().__init__() self.maintainers = ['JG'] self.valid_systems += ['eiger:mc', 'pilatus:mc'] self.time_limit = '5m' self.sourcepath = 'eatmemory_mpi.c' self.tags.add('mem') self.executable_opts = ['100%'] self.sanity_patterns = sn.assert_found(r'(oom-kill)|(Killed)', self.stderr) # {{{ perf regex = (r'^Eating \d+ MB\/mpi \*\d+mpi = -\d+ MB memory from \/proc\/' r'meminfo: total: \d+ GB, free: \d+ GB, avail: \d+ GB, using:' r' (\d+) GB') self.perf_patterns = { 'max_cn_memory': sn.getattr(self, 'reference_meminfo'), 'max_allocated_memory': sn.max(sn.extractall(regex, self.stdout, 1, int)), } no_limit = (0, None, None, 'GB') self.reference = { '*': { 'max_cn_memory': no_limit, 'max_allocated_memory': (sn.getattr(self, 'reference_meminfo'), -0.05, None, 'GB'), } }
def test_extractall(self): # Check numeric groups res = evaluate(sn.extractall('Step: (?P<no>\d+)', self.tempfile, 1)) for expected, v in enumerate(res, start=1): self.assertEqual(str(expected), v) # Check named groups res = evaluate(sn.extractall('Step: (?P<no>\d+)', self.tempfile, 'no')) for expected, v in enumerate(res, start=1): self.assertEqual(str(expected), v) # Check convert function res = evaluate( sn.extractall('Step: (?P<no>\d+)', self.tempfile, 'no', int)) for expected, v in enumerate(res, start=1): self.assertEqual(expected, v)
def validate(self): # FIXME: This is currently complicated due to GH #2334 all_tested_nodes = sn.evaluate( sn.extractall( r'(?P<hostname>\S+):\s+Time for \d+ DGEMM operations', self.stdout, 'hostname')) num_tested_nodes = len(all_tested_nodes) failure_msg = ('Requested %s node(s), but found %s node(s)' % (self.job.num_tasks, num_tested_nodes)) sn.evaluate( sn.assert_eq(num_tested_nodes, self.job.num_tasks, msg=failure_msg)) pname = self.current_partition.fullname arch = self.current_partition.processor.arch for hostname in all_tested_nodes: key = f'{arch}@{self.num_cpus_per_task}c' if key in self.arch_refs: self.reference[f'{pname}:{hostname}'] = self.arch_refs[key] self.perf_patterns[hostname] = sn.extractsingle( fr'{hostname}:\s+Avg\. performance\s+:\s+(?P<gflops>\S+)' fr'\sGflop/s', self.stdout, 'gflops', float) return True
def scorep_inclusivepct_energy(obj): '''Reports % of elapsed time (inclusive) for MomentumAndEnergy function (small scale job) .. code-block:: > sqpatch_048mpi_001omp_125n_10steps_1000000cycles/rpt.exclusive 0.0193958 (0.0009252%) sqpatch.exe 1.39647 (0.06661%) + main ... 714.135 (34.063%) | + ... ******* _ZN6sphexa3sph31computeMomentumAndEnergyIADImplIdNS_13 ... ParticlesDataIdEEEEvRKNS_4TaskERT0_ 0.205453 (0.0098%) | + _ZN6sphexa3sph15computeTimestepIdNS0_21TimestepPress2ndOrderIdNS_13 ... ParticlesDataIdEEEES4_EEvRKSt6vectorINS_4TaskESaIS7_EERT1_ 201.685 (9.62%) | | + MPI_Allreduce ''' # regex = r'^\d+.\d+ \((?P<pct>\d+.\d+).*computeMomentumAndEnergy' # return sn.extractsingle(regex, obj.rpt_inclusive, 'pct', float) regex = r'^\d+.\d+\s+\((?P<pct>\d+.\d+).*momentumAndEnergyIAD' try: result = sn.round( sn.sum(sn.extractall(regex, obj.rpt_inclusive, 'pct', float)), 2) except Exception as e: printer.error(f'scorep_inclusivepct_energy failed: {e}') result = 0 return result
def __init__(self, **kwargs): super().__init__('gemm_example', **kwargs) self.sourcepath = 'gemm/' self.executable = './gemm/gemm.openacc' self.num_cpus_per_task = 12 self.variables = {'OMP_NUM_THREADS': str(self.num_cpus_per_task)} self.sanity_patterns = sn.assert_eq( 3, sn.count(sn.extractall('success', self.stdout)) )
def __init__(self, **kwargs): super().__init__('image_pipeline_example', **kwargs) self.sourcepath = 'image-pipeline/' self.valid_prog_environs = ['PrgEnv-pgi'] # We need to reload the PGI compiler here, cos OpenCV loads PrgEnv-gnu self.modules = ['craype-accel-nvidia60', 'OpenCV', 'pgi'] self.executable = './image-pipeline/filter.x' self.executable_opts = ['image-pipeline/california-1751455_1280.jpg', 'image-pipeline/output.jpg'] self.sanity_patterns = sn.assert_eq( {'original', 'blocked', 'update', 'pipelined', 'multi'}, dset(sn.extractall('Time \((\S+)\):.*', self.stdout, 1)))