def test_assert_true(self): assert sn.assert_true(True) assert sn.assert_true(1) assert sn.assert_true([1]) assert sn.assert_true(range(1)) with pytest.raises(SanityError, match='False is not True'): sn.evaluate(sn.assert_true(False)) with pytest.raises(SanityError, match='0 is not True'): sn.evaluate(sn.assert_true(0)) with pytest.raises(SanityError, match=r'\[\] is not True'): sn.evaluate(sn.assert_true([])) with pytest.raises(SanityError, match=r'range\(.+\) is not True'): sn.evaluate(sn.assert_true(range(0))) with pytest.raises(SanityError, match='not true'): sn.evaluate(sn.assert_true(0, msg='not true'))
class _T0(rfm.RegressionTest): valid_systems = ['*'] valid_prog_environs = ['*'] sourcepath = 'hello.c' executable = 'echo' sanity_patterns = sn.assert_true(1) def check_and_skip(self): self.skip_if(True) # Attach the hook manually based on the request.param when, stage = request.param.split('_', maxsplit=1) hook = rfm.run_before if when == 'pre' else rfm.run_after check_and_skip = hook(stage)(check_and_skip)
def test_assert_true_with_deferrables(self): self.assertTrue(sn.assert_true(make_deferrable(True))) self.assertTrue(sn.assert_true(make_deferrable(1))) self.assertTrue(sn.assert_true(make_deferrable([1]))) self.assertRaisesRegex(SanityError, 'False is not True', evaluate, sn.assert_true(make_deferrable(False))) self.assertRaisesRegex(SanityError, '0 is not True', evaluate, sn.assert_true(make_deferrable(0))) self.assertRaisesRegex(SanityError, '\[\] is not True', evaluate, sn.assert_true(make_deferrable([])))
def test_assert_true_with_deferrables(): assert sn.assert_true(sn.defer(True)) assert sn.assert_true(sn.defer(1)) assert sn.assert_true(sn.defer([1])) with pytest.raises(SanityError, match='False is not True'): sn.evaluate(sn.assert_true(sn.defer(False))) with pytest.raises(SanityError, match='0 is not True'): sn.evaluate(sn.assert_true(sn.defer(0))) with pytest.raises(SanityError, match=r'\[\] is not True'): sn.evaluate(sn.assert_true(sn.defer([])))
def validate_fixture_resolution(self): return sn.all([ # Access all the fixtures with a fork action. sn.assert_eq( (self.f0.data + self.f1.data + self.f2.data + self.f3.data), 4), # Assert that only one fixture is resolved with join action. sn.assert_eq(sn.len(self.f4), 1), # Assert that the fixtures with join and fork actions resolve to # the same instance for the same scope. sn.assert_eq(self.f4[0], self.f0), # Assert that there are only 4 underlying fixture instances. sn.assert_eq( sn.len({self.f0, self.f1, self.f2, self.f3, *self.f4}), 4), # Assert is_fixture() function sn.assert_true(self.f0.is_fixture()), sn.assert_false(self.is_fixture()) ])
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'gpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'nvprof' self.tool_mf = 'nvhpc' tc_ver = '20.08' self.prebuild_cmds = ['module rm xalt', 'module list -t'] self.tool_modules = { 'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', 'craype-accel-nvidia60', self.tool_mf], } self.build_system = 'Make' self.build_system.makefile = 'Makefile' self.build_system.nvcc = 'nvcc' self.build_system.cxx = 'CC' self.build_system.max_concurrency = 2 self.executable = self.tool self.target_executable = 'mpi+omp+cuda' self.build_system.options = [ self.target_executable, 'MPICXX=CC', 'SRCDIR=.', 'BUILDDIR=.', 'BINDIR=.', 'CUDA_PATH=$CUDATOOLKIT_HOME', # The makefile adds -DUSE_MPI # 'CXXFLAGS=', ] self.postbuild_cmds = [f'mv {self.target_executable}.app ' f'{self.target_executable}'] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] self.name = 'sphexa_nvprofcuda_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 1 self.num_cpus_per_task = ompthread self.num_tasks_per_core = 1 self.use_multithreading = False self.exclusive = True self.time_limit = '15m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), # 'COMPUTE_PROFILE': '', # 'PMI_NO_FORK': '1', } self.tool_opts = '' # self.tool_opts = r'-o nvprof.output.%h.%p' self.executable_opts = [ self.tool_opts, f'./{self.target_executable}', f'-n {self.cubeside}', f'-s {self.steps}', '2>&1'] self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.summary_rpt = 'summary.rpt' # Reminder: NVreg_RestrictProfilingToAdminUsers=0 (RFC-16) needed since # cuda/10.1 self.postrun_cmds = ['cat /etc/modprobe.d/nvidia.conf'] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version: sn.assert_true(sphsnv.nvprof_version(self)), # check the summary report: sn.assert_found('NVPROF is profiling process', self.stdout), ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) tool_perf_patterns = sn.evaluate(sphsnv.nvprof_perf_patterns(self)) self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns} # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool's reference myzero_k = (0, None, None, 'KiB') myzero_p = (0, None, None, '%') self.reference['*:%cudaMemcpy'] = myzero_p self.reference['*:%CUDA_memcpy_HtoD_time'] = myzero_p self.reference['*:%CUDA_memcpy_DtoH_time'] = myzero_p self.reference['*:CUDA_memcpy_HtoD_KiB'] = myzero_k self.reference['*:CUDA_memcpy_DtoH_KiB'] = myzero_k self.reference['*:%computeMomentumAndEnergyIAD'] = myzero_p self.reference['*:%computeIAD'] = myzero_p
class Eggs(rfm.RunOnlyRegressionTest): eggs = fixture(Bacon) executable = 'echo' sanity_patterns = sn.assert_true(1)
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'scalasca' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tool_ver = '2.5' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Scalasca/{tool_ver}-CrayGNU-{tc_ver}'], 'PrgEnv-intel': [f'Scalasca/{tool_ver}-CrayIntel-{tc_ver}'], 'PrgEnv-cray': [f'Scalasca/{tool_ver}-CrayCCE-{tc_ver}'], 'PrgEnv-pgi': [f'Scalasca/{tool_ver}-CrayPGI-{tc_ver}'], } self.prgenv_flags = { 'PrgEnv-gnu': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-intel': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-cray': ['-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-pgi': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], } self.build_system = 'SingleSource' self.build_system.cxx = 'scorep --mpp=mpi --nocompiler CC' self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] cycles = cycles_dict[mpi_task] self.name = \ 'sphexa_scalascaS+T_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps, cycles) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'false', 'SCOREP_ENABLE_TRACING': 'true', 'SCOREP_ENABLE_UNWINDING': 'true', 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles, # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1', # 'SCOREP_VERBOSE': 'true', # To avoid "No free memory page available": 'SCOREP_TOTAL_MEMORY': '1G', # Advanced performance metrics: 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.rpt = 'rpt' # must use scorep.score: self.score_rpt = '%s.postproc' % self.rpt self.stat_rpt = 'scorep_%s_%s_trace/trace.stat' % \ (self.testname, self.num_tasks) # self.rpt_inclusive = '%s.inclusive' % self.rpt # self.rpt_exclusive = '%s.exclusive' % self.rpt # self.cubetool = 'cube_calltree' self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1'] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} -V &> {self.version_rpt}', f'scorep --version >> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', f'which scorep >> {self.which_rpt}', # f'which {self.cubetool} >> {self.which_rpt}', f'scorep-info config-summary &> {self.info_rpt}', ] cubetree = 'cube_calltree -m time -p -t 1' # -m metricname -- print out values for the metric <metricname> # -i -- calculate inclusive values instead of exclusive # -t treshold -- print out only call path with a value larger # than <treshold>% # -p -- diplay percent value self.postrun_cmds = [ # can't test directly from vampir gui, dumping tracefile content: 'otf2-print scorep_*_trace/traces.otf2 > %s' % self.rpt # 'otf2-print scorep-*/traces.otf2 > %s' % self.rpt ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the report: sn.assert_eq(sphsscorep.program_begin_count(self), self.num_tasks), sn.assert_eq(sphsscorep.program_end_count(self), self.num_tasks), # check the summary report: # sn.assert_found(r'^S=C=A=N: \S+ complete\.', self.stderr) ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # tool: scalasca tool_perf_patterns = sn.evaluate(sphssca.rpt_trace_stats_d(self)) self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns} # tool: scorep self.perf_patterns.update({ 'max_ipc_rk0': sphsscorep.ipc_rk0(self), 'max_rumaxrss_rk0': sphsscorep.ru_maxrss_rk0(self), }) # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool myzero_n = (0, None, None, 'count') myzero_ipc = (0, None, None, 'ins/cyc') myzero_kb = (0, None, None, 'kilobytes') # tool self.reference['*:mpi_latesender'] = myzero_n self.reference['*:mpi_latesender_wo'] = myzero_n self.reference['*:mpi_latereceiver'] = myzero_n self.reference['*:mpi_wait_nxn'] = myzero_n self.reference['*:max_ipc_rk0'] = myzero_ipc self.reference['*:max_rumaxrss_rk0'] = myzero_kb
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'scorep' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tool_ver = '6.0' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Score-P/{tool_ver}-CrayGNU-{tc_ver}'], 'PrgEnv-intel': [f'Score-P/{tool_ver}-CrayIntel-{tc_ver}'], 'PrgEnv-cray': [f'Score-P/{tool_ver}-CrayCCE-{tc_ver}'], 'PrgEnv-pgi': [f'Score-P/{tool_ver}-CrayPGI-{tc_ver}'], } self.prgenv_flags = { 'PrgEnv-gnu': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-intel': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-cray': ['-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-pgi': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], } self.build_system = 'SingleSource' self.build_system.cxx = 'scorep --mpp=mpi --nocompiler CC' self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] cycles = cycles_dict[mpi_task] self.name = \ 'sphexa_scorepS+P_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps, cycles) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'true', 'SCOREP_ENABLE_TRACING': 'false', 'SCOREP_ENABLE_UNWINDING': 'true', 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles, # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1', # 'SCOREP_VERBOSE': 'true', # 'SCOREP_TOTAL_MEMORY': '1G', } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.rpt = 'rpt' self.rpt_inclusive = '%s.inclusive' % self.rpt self.rpt_exclusive = '%s.exclusive' % self.rpt self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1'] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', 'scorep-info config-summary &> %s' % self.info_rpt, ] cubetree = 'cube_calltree -m time -p -t 1' # -m metricname -- print out values for the metric <metricname> # -i -- calculate inclusive values instead of exclusive # -t treshold -- print out only call path with a value larger # than <treshold>% # -p -- diplay percent value self.postrun_cmds = [ # working around memory crash in scorep-score: '(scorep-score -r scorep-*/profile.cubex ;rm -f core*) > %s' \ % self.rpt, '(%s scorep-*/profile.cubex ;rm -f core*) >> %s' \ % (cubetree, self.rpt_exclusive), '(%s -i scorep-*/profile.cubex ;rm -f core*) >> %s' \ % (cubetree, self.rpt_inclusive), ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the summary report: sn.assert_found(r'Estimated aggregate size of event trace', self.rpt) ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # tool self.perf_patterns.update({ 'scorep_elapsed': sphsscorep.scorep_elapsed(self), '%scorep_USR': sphsscorep.scorep_usr_pct(self), '%scorep_MPI': sphsscorep.scorep_mpi_pct(self), 'scorep_top1': sphsscorep.scorep_top1_pct(self), '%scorep_Energy_exclusive': sphsscorep.scorep_exclusivepct_energy(self), '%scorep_Energy_inclusive': sphsscorep.scorep_inclusivepct_energy(self), }) # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool: self.reference['*:scorep_elapsed'] = (0, None, None, 's') self.reference['*:%scorep_USR'] = (0, None, None, '%') self.reference['*:%scorep_MPI'] = (0, None, None, '%') top1_name = sphsscorep.scorep_top1_name(self) # TODO: self.reference['*:scorep_top1'] = (0, None, None, top1_name) self.reference['*:scorep_top1'] = (0, None, None, '') self.reference['*:%scorep_Energy_exclusive'] = (0, None, None, '%') self.reference['*:%scorep_Energy_inclusive'] = (0, None, None, '%')
class T0(BaseTest): sanity_patterns = sn.assert_true(1)
def assert_foo(self): return sn.all([sn.assert_eq(self.foo, 3), sn.assert_true(self.ham)])
def __init__(self): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['*'] self.valid_systems = ['*'] self.modules = ['Score-P'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sedov' self.tool = 'scorep' self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.prebuild_cmds = [ f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', f'which vampir >> {self.which_rpt}', f'scorep-info config-summary &> {self.info_rpt}', ] # }}} # {{{ run self.variables = { # 'CRAYPE_LINK_TYPE': 'dynamic', # 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'true', 'SCOREP_ENABLE_TRACING': 'false', 'SCOREP_ENABLE_UNWINDING': 'true', 'SCOREP_SAMPLING_EVENTS': f'perf_cycles@{self.cycles}', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_VERBOSE': 'true', 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '10', 'SCOREP_TOTAL_MEMORY': '1G', # 'PATH': f'{tool_path}:{cube_path}:$PATH', } self.rpt = 'rpt' self.rpt_score = 'scorep-score.rpt' self.rpt_inclusive = 'cube_calltree_inclusive.rpt' self.rpt_exclusive = 'cube_calltree_exclusive.rpt' self.rpt_otf2 = 'otf2-print.rpt' # cubetree = 'cube_calltree -m time -p -t 1' # -m metricname -- print out values for the metric <metricname> # -i -- calculate inclusive values instead of exclusive # -t treshold -- print out only call path with a value larger # than <treshold>% # -p -- diplay percent value self.postrun_cmds += [ f'# -------------------------------------------------------------', # working around memory crash in scorep-score: f'(scorep-score -r scorep-*/profile.cubex ;rm -f core*) >' f'{self.rpt_score}', # exclusive time (+ workaround memory crash): f'({cubetree} scorep-*/profile.cubex ;rm -f core*) &>' f' {self.rpt_exclusive}', # inclusive time (+ workaround memory crash): f'({cubetree} -i scorep-*/profile.cubex ;rm -f core*) &>' f' {self.rpt_inclusive}', f'# -------------------------------------------------------------', ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: # sn.assert_true(sphsscorep.scorep_assert_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the summary report: sn.assert_found(r'Estimated aggregate size of event trace', self.rpt_score) ])
class MyOtherTest(MyTest): '''Test both syntaxes are incompatible.''' sanity_patterns = sn.assert_true(1)
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'gpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'scorep' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tool_ver = '6.0' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Score-P/{tool_ver}-CrayGNU-{tc_ver}-cuda'], } self.build_system = 'Make' self.build_system.makefile = 'Makefile' self.build_system.nvcc = 'nvcc' self.build_system.cxx = 'CC' self.build_system.max_concurrency = 2 self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' self.target_executable = 'mpi+omp+cuda' self.build_system.cxx = 'scorep --mpp=mpi --cuda --nocompiler CC' self.build_system.nvcc = 'scorep --cuda --nocompiler nvcc' self.build_system.options = [ self.target_executable, f'MPICXX="{self.build_system.cxx}"', 'SRCDIR=.', 'BUILDDIR=.', 'BINDIR=.', 'CXXFLAGS=-std=c++14', 'CUDA_PATH=$CUDATOOLKIT_HOME', # The makefile adds -DUSE_MPI ] self.postbuild_cmds = [ f'mv {self.target_executable}.app ' f'{self.executable}' ] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] # cycles = cycles_dict[mpi_task] self.name = \ 'sphexa_scorep+cuda_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 1 self.num_cpus_per_task = ompthread self.num_tasks_per_core = 1 self.use_multithreading = False self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'false', 'SCOREP_ENABLE_TRACING': 'true', 'SCOREP_CUDA_ENABLE': 'yes', 'SCOREP_ENABLE_UNWINDING': 'true', # 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles, # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1', # 'SCOREP_VERBOSE': 'true', # 'SCOREP_TOTAL_MEMORY': '1G', } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', 'scorep-info config-summary &> %s' % self.info_rpt, ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), sn.assert_true(sphsscorep.scorep_info_cuda_support(self)), ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) self.perf_patterns = {**basic_perf_patterns} # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.prebuild_cmds = ['module rm xalt'] self.prgenv_flags = { 'PrgEnv-gnu': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], } # ---------------------------------------------------------------- tool self.tool = 'tool.sh' tool_ver = '3.8.1' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Extrae/{tool_ver}-CrayGNU-{tc_ver}'], } # ---------------------------------------------------------------- tool self.build_system = 'SingleSource' self.build_system.cxx = 'CC' self.sourcepath = '%s.cpp' % self.testname self.executable = self.tool self.target_executable = './%s.exe' % self.testname # {{{ openmp: # 'PrgEnv-intel': ['-qopenmp'], # 'PrgEnv-gnu': ['-fopenmp'], # 'PrgEnv-pgi': ['-mp'], # 'PrgEnv-cray_classic': ['-homp'], # 'PrgEnv-cray': ['-fopenmp'], # # '-homp' if lang == 'F90' else '-fopenmp', # }}} # }}} # {{{ run ompthread = 1 self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] self.name = \ 'sphexa_extrae_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks = mpi_task self.num_tasks_per_node = 24 # 72 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.rpt = 'rpt' self.tool = './tool.sh' self.executable = self.tool self.executable_opts = [ f'-- -n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.xml1 = '$EBROOTEXTRAE/share/example/MPI/extrae.xml' self.xml2 = 'extrae.xml' self.patch = 'extrae.xml.patch' self.version_file = 'extrae_version.h' self.prerun_cmds = [ 'module rm xalt', # tool version 'cp $EBROOTEXTRAE/include/extrae_version.h %s' % self.version_file, # will launch ./tool.sh myexe myexe_args: 'mv %s %s' % (self.executable, self.target_executable), # .xml 'echo %s &> %s' % (self.xml1, self.which_rpt), 'patch -i %s %s -o %s' % (self.patch, self.xml1, self.xml2), # .sh 'echo -e \'%s\' >> %s' % (sphsextrae.create_sh(self), self.tool), 'chmod u+x %s' % (self.tool), ] self.prv = '%s.prv' % self.target_executable[2:] # stripping './' self.postrun_cmds = [ 'stats-wrapper.sh %s -comms_histo' % self.prv, ] self.rpt_mpistats = '%s.comms.dat' % self.target_executable # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool version: sn.assert_true(sphsextrae.extrae_version(self)), # check the summary report: sn.assert_found( r'Congratulations! %s has been generated.' % self.prv, self.stdout), ]) # }}} # {{{ performance # {{{ internal timers # use linux date as timer: self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) tool_perf_patterns = sn.evaluate(sphsextrae.rpt_mpistats(self)) self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns} # }}} # {{{ reference: basic_reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) self.reference = basic_reference # tool's reference myzero = (0, None, None, '') myzero_p = (0, None, None, '%') self.reference['*:num_comms_0-10B'] = myzero self.reference['*:num_comms_10B-100B'] = myzero self.reference['*:num_comms_100B-1KB'] = myzero self.reference['*:num_comms_1KB-10KB'] = myzero self.reference['*:num_comms_10KB-100KB'] = myzero self.reference['*:num_comms_100KB-1MB'] = myzero self.reference['*:num_comms_1MB-10MB'] = myzero self.reference['*:num_comms_10MB'] = myzero # self.reference['*:%_of_bytes_sent_0-10B'] = myzero_p self.reference['*:%_of_bytes_sent_10B-100B'] = myzero_p self.reference['*:%_of_bytes_sent_100B-1KB'] = myzero_p self.reference['*:%_of_bytes_sent_1KB-10KB'] = myzero_p self.reference['*:%_of_bytes_sent_10KB-100KB'] = myzero_p self.reference['*:%_of_bytes_sent_100KB-1MB'] = myzero_p self.reference['*:%_of_bytes_sent_1MB-10MB'] = myzero_p self.reference['*:%_of_bytes_sent_10MB'] = myzero_p
def __init__(self): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['*'] self.valid_systems = ['*'] self.modules = ['Scalasca'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sedov' self.executable = 'mpi+omp' self.tool = 'scalasca' self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.cubetool = 'cube_calltree' self.prebuild_cmds = [ f'{self.tool} -V &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', f'scorep --version >> {self.version_rpt}', f'which scorep >> {self.which_rpt}', f'which cube_remap2 >> {self.which_rpt}', f'which cube_dump >> {self.which_rpt}', f'which {self.cubetool} >> {self.which_rpt}', # f'which vampir >> {self.which_rpt}', f'scorep-info config-summary &> {self.info_rpt}', f'# step1: prepare executable with: scalasca -instrument (skin)', f'# step2: run executable with: scalasca -analyze (scan)', f'# step3: explore report with: scalasca -examine (square)', f'# step4: get calltree with: cube_calltree' ] # }}} # {{{ run self.variables = { # 'SCOREP_ENABLE_UNWINDING': 'true', # 'SCOREP_SAMPLING_EVENTS': f'perf_cycles@{self.cycles}', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '10', 'SCOREP_TOTAL_MEMORY': '1G', # 'SCOREP_TIMER': 'gettimeofday', 'SCAN_ANALYZE_OPTS': '--time-correct', } self.rpt = 'rpt' self.rpt_score = 'scorep-score.rpt' self.rpt_exclusive = 'cube_calltree_exclusive.rpt' self.rpt_inclusive = 'cube_calltree_inclusive.rpt' # cubetree = 'cube_calltree -m time -p -t 1' self.postrun_cmds += [ '# {{{ --- Postprocessing steps: ---', f'# -------------------------------------------------------------', '# profile.cubex - scalasca -examine -s = square -> scorep.score:', f'# ({self.rpt_score} is used by sanity checks)', f'scalasca -examine -s scorep_*sum/profile.cubex &> {self.rpt}', f'cp scorep_*_sum/scorep.score {self.rpt_score}', '# --------------------------------------------------------------', '# transform metric tree into metric hierarchy with remap2', '# profile.cubex - cube_remap2 (slow) -> summary.cubex: ', f'# time -p cube_remap2 -d -o summary.cubex */profile.cubex', f'# scorep-score summary.cubex &> {self.rpt_score}', '# --------------------------------------------------------------', '# exclusive time: summary.cubex - cubetree -> rpt_exclusive:', f'# ({cubetree} scorep_*_sum/summary.cubex ;rm -f core*) &>' f' {self.rpt_exclusive}', '# --------------------------------------------------------------', '# inclusive time: summary.cubex - cubetree -i -> rpt_inclusive:', f'# ({cubetree} -i scorep_*_sum/summary.cubex ;rm -f core*) &>' f' {self.rpt_inclusive}', '# -m metricname -- print out values for the metric <metricname>', '# -i -- calculate inclusive values instead of', '# exclusive', '# -t treshold -- print out only call path with a value larger', '# than <treshold>%', '# -p -- diplay percent value', '# --------------------------------------------------------- }}}', ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: # sn.assert_true(sphsscorep.scorep_assert_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the tool report: sn.assert_found(r'Estimated aggregate size of event trace', self.rpt_score), sn.assert_found(r'^S=C=A=N: \S+ complete\.', self.stderr) ])
def sanity_check_download(self): return sanity.assert_true(os.path.exists("xthi"))
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = [ 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray' ] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'advixe-cl' self.modules = ['advisor'] self.prebuild_cmds = ['module rm xalt', 'module list -t'] self.tool_v = '2020_update2' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-intel': [f'CrayIntel/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-cray': [f'CrayCCE/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-pgi': [f'CrayPGI/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], } self.prgenv_flags = { 'PrgEnv-gnu': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-intel': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-cray': [ '-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-pgi': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], } self.build_system = 'SingleSource' # self.build_system.cxx = 'CC' self.sourcepath = f'{self.testname}.cpp' self.executable = self.tool self.target_executable = f'./{self.testname}.exe' self.postbuild_cmds = [f'mv {self.tool} {self.target_executable}'] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] self.name = 'sphexa_advisor_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'.format( self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), # to avoid core when reporting (venv/jenkins): 'LANG': 'C', 'LC_ALL': 'C', } self.dir_rpt = 'rpt' self.tool_opts = '--collect=survey --search-dir src:rp=. ' \ '--data-limit=0 --no-auto-finalize --trace-mpi ' \ '--project-dir=%s -- ' % self.dir_rpt self.executable_opts = [ self.tool_opts, self.target_executable, f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.summary_rpt = 'summary.rpt' self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version >> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', ] self.postrun_cmds = [ f'cd {self.dir_rpt} ;ln -s nid?????.000 e000 ;cd -', f'{self.tool} --report=survey --project-dir={self.dir_rpt} ' f'&> {self.summary_rpt}', ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version: sn.assert_true(sphsintel.advisor_version(self)), # check the summary report: sn.assert_found(r'advixe: This data has been saved', self.summary_rpt), ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # tool self.perf_patterns.update({ 'advisor_elapsed': sphsintel.advisor_elapsed(self), 'advisor_loop1_line': sphsintel.advisor_loop1_line(self), }) # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool: self.reference['*:advisor_elapsed'] = (0, None, None, 's') # TODO: fix loop1_fname to avoid error with --report-file: # "Object of type '_DeferredExpression' is not JSON serializable" # loop1_fname = sphsintel.advisor_loop1_filename(self) loop1_fname = '' self.reference['*:advisor_loop1_line'] = (0, None, None, loop1_fname)
def __init__(self, mpitask, steps, cycles, rumetric): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-pgi'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'gpu', 'openacc'} # }}} # {{{ compile self.testname = 'sqpatch' self.prebuild_cmds = ['module rm xalt'] self.prgenv_flags = { 'PrgEnv-pgi': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DNDEBUG', '-DUSE_MPI', '-DUSE_ACC', '-DUSE_STD_MATH_IN_KERNELS', '-acc', '-ta=tesla:managed,cc60' ], # -mp } # ---------------------------------------------------------------- tool self.modules = ['craype-accel-nvidia60'] tc_ver = '19.10' tool_ver = '6.0' postproc_tool_ver = '4ef9d3f' postproc_tool_serial = 'otf-profiler' self.postproc_tool = 'otf-profiler-mpi' self.tool_modules = { 'PrgEnv-pgi': ['Score-P/%s-CrayPGI-%s' % (tool_ver, tc_ver)] } # ---------------------------------------------------------------- tool self.build_system = 'SingleSource' self.build_system.cxx = 'scorep-CC' self.sourcepath = '%s.cpp' % self.testname self.executable = '%s.exe' % self.testname # {{{ openmp: # 'PrgEnv-intel': ['-qopenmp'], # 'PrgEnv-gnu': ['-fopenmp'], # 'PrgEnv-pgi': ['-mp'], # 'PrgEnv-cray_classic': ['-homp'], # 'PrgEnv-cray': ['-fopenmp'], # # '-homp' if lang == 'F90' else '-fopenmp', # }}} # }}} # {{{ run ompthread = 1 # weak scaling = 10^6 p/cn: size_dict = { 1: 100, 2: 126, 4: 159, 8: 200, 16: 252, 32: 318, 64: 400, 128: 504, 256: 635 } cubesize = size_dict[mpitask] self.name = \ 'openacc_scorepT_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles_{}'. \ format(self.testname, mpitask, ompthread, cubesize, steps, cycles, rumetric) self.num_tasks = mpitask self.num_tasks_per_node = 1 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 1 self.use_multithreading = False self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'SCOREP_OPENACC_ENABLE': 'yes', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_WRAPPER_INSTRUMENTER_FLAGS': '"--mpp=mpi --openacc"', 'SCOREP_ENABLE_PROFILING': 'false', 'SCOREP_ENABLE_TRACING': 'true', 'SCOREP_FILTERING_FILE': 'myfilt', 'SCOREP_VERBOSE': 'true', # Needed to avoid "No free memory page available" 'SCOREP_TOTAL_MEMORY': '1G', # Adding some performance metrics: # http://scorepci.pages.jsc.fz-juelich.de/scorep-pipelines/docs/ # => scorep-6.0/html/measurement.html#rusage_counters # => https://vampir.eu/public/files/pdf/spcheatsheet_letter.pdf # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss,ru_utime', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_METRIC_RUSAGE': '', 'SCOREP_METRIC_RUSAGE': rumetric, 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', } self.rusage_name = sn.evaluate(sphsscacc.otf2cli_metric_name(self)) if cycles > 0: self.variables['SCOREP_SAMPLING_EVENTS'] \ = 'perf_cycles@%s' % cycles self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'scorep-info.rpt' self.rpt = 'rpt' self.rpt_jsn = 'result.json' self.rpt_inclusive = '%s.inclusive' % self.rpt self.rpt_exclusive = '%s.exclusive' % self.rpt self.tool = 'scorep' self.executable_opts = ['-n %s' % cubesize, '-s %s' % steps] self.prerun_cmds = [ 'module rm xalt', '%s --version &> %s' % (self.tool, self.version_rpt), 'which %s &> %s' % (self.tool, self.which_rpt), 'scorep-info config-summary &> %s' % self.info_rpt, ] self.postrun_cmds = [ # otf-profiler is needed for postprocessing but i managed to # compile only gnu version => removing CubeLib to avoid conflict # with CrayPGI: 'module rm CubeLib', 'module load otf2_cli_profile/%s-CrayGNU-%s' % (postproc_tool_ver, tc_ver), # report post-processing tools version '%s --version' % postproc_tool_serial, # OTF-Profiler version 2.0.0 'which %s %s' % (postproc_tool_serial, self.postproc_tool), # create result.json performance report from tracefile # see otf_profiler method (@run_after) ] # }}} # {{{ sanity # sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), # Needed when using papi counters: # sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), ]) # }}} # {{{ performance # {{{ internal timers # use linux date as timer: self.prerun_cmds += ['echo starttime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) tool_perf_patterns = sn.evaluate(sphsscacc.otf2cli_perf_patterns(self)) self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns} # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) self.reference = sn.evaluate(sphsscacc.otf2cli_tool_reference(self))
def assert_foo(self): return sn.all([ sn.assert_eq(self.foo, 3), sn.assert_true(self.ham), sn.assert_eq(self.spam.eggs.bacon, 10) ])
def __init__(self): self.valid_systems = [] self.valid_prog_environs = [] self.sanity_patterns = sn.assert_true(1)
class Bacon(rfm.RunOnlyRegressionTest): bacon = variable(int, value=-1) executable = 'echo' sanity_patterns = sn.assert_true(1)
class MyTest(rfm.RunOnlyRegressionTest): executable = 'echo' valid_prog_environs = ['*'] valid_systems = ['*'] sourcesdir = None sanity_patterns = sn.assert_true(1)
def validate_download(self): return sn.assert_true(os.path.exists('osu-micro-benchmarks-5.6.2'))
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = [ 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray' ] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'inspxe-cl' self.modules = ['inspector'] self.prebuild_cmds = ['module rm xalt', 'module list -t'] self.tool_v = '2020_update2' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-intel': [f'CrayIntel/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-cray': [f'CrayCCE/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-pgi': [f'CrayPGI/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], } self.prgenv_flags = { 'PrgEnv-gnu': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-intel': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-cray': [ '-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-pgi': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], } self.build_system = 'SingleSource' # self.build_system.cxx = 'CC' self.sourcepath = f'{self.testname}.cpp' self.executable = self.tool self.target_executable = f'./{self.testname}.exe' self.postbuild_cmds = [f'mv {self.tool} {self.target_executable}'] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] self.name = 'sphexa_inspector_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), } self.dir_rpt = 'rpt' self.tool_opts = '-collect mi1 -trace-mpi -no-auto-finalize -r %s' \ % self.dir_rpt self.executable_opts = [ self.tool_opts, self.target_executable, f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.summary_rpt = 'summary.rpt' self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version >> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', ] self.postrun_cmds = [ f'{self.tool} -r {self.dir_rpt}.* -report=summary ' f'&> {self.summary_rpt}', # '%s -report=problems &> %s' % (self.tool, self.problems_rpt), # '%s -report=observations &> %s' % # (self.tool, self.observations_rpt), ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version: sn.assert_true(sphsintel.inspector_version(self)), # check the summary report: sn.assert_found(r'\d new problem\(s\) found', self.summary_rpt), ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # tool self.perf_patterns.update({ 'Memory not deallocated': sphsintel.inspector_not_deallocated(self), # 'Memory leak': sphsintel.inspector_leak(self), }) # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool self.reference['*:Memory not deallocated'] = (0, None, None, '')
def __init__(self, mpi_task, cubeside): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools'} # }}} # {{{ compile self.testname = 'sqpatch' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tc_ver = '20.08' self.modules = ['craype-accel-nvidia60', 'perftools-base'] self.tool = 'pat_report' self.tool_modules = { 'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', 'perftools-lite-gpu'], } self.build_system = 'Make' self.build_system.makefile = 'Makefile' self.build_system.nvcc = 'nvcc' self.build_system.cxx = 'CC' self.build_system.max_concurrency = 2 self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' self.target_executable = 'mpi+omp+cuda' self.build_system.options = [ self.target_executable, f'MPICXX="{self.build_system.cxx}"', 'SRCDIR=.', 'BUILDDIR=.', 'BINDIR=.', 'CXXFLAGS=-std=c++14', 'CUDA_PATH=$CUDATOOLKIT_HOME', # The makefile adds -DUSE_MPI ] self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.rpt = 'RUNTIME.rpt' self.postbuild_cmds = [ f'mv {self.target_executable}.app ' f'{self.executable}', f'{self.tool} -V &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', ] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside self.steps = steps_dict[mpi_task] self.name = \ 'sphexa_perftools-gpu-cuda_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 1 self.num_cpus_per_task = ompthread self.num_tasks_per_core = 1 self.use_multithreading = False self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), } self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.prerun_cmds = ['module rm xalt'] self.postrun_cmds = [ f'cp {self.executable}+*/rpt-files/RUNTIME.rpt {self.rpt}' ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), sn.assert_true(sphsptlgpu.tool_version(self)), ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) tool_perf_patterns = sn.evaluate(sphsptlgpu.tool_perf_patterns(self)) self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns} # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool's reference myzero_p = (0, None, None, '%') myzero_mb = (0, None, None, 'MiBytes') self.reference['*:host_time%'] = myzero_p self.reference['*:device_time%'] = myzero_p self.reference['*:acc_copyin'] = myzero_mb self.reference['*:acc_copyout'] = myzero_mb
class Test0(rfm.RegressionTest): valid_systems = ['sys0:p0', 'sys0:p1'] valid_prog_environs = ['e0', 'e1'] executable = 'echo' sanity_patterns = sn.assert_true(1)
def __init__(self): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['*'] self.valid_systems = ['*'] self.modules = ['Scalasca'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sedov' self.tool = 'scalasca' self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.cubetool = 'cube_calltree' self.prebuild_cmds = [ f'{self.tool} -V &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', f'scorep --version >> {self.version_rpt}', f'which scorep >> {self.which_rpt}', # f'which vampir >> {self.which_rpt}', f'which {self.cubetool} >> {self.which_rpt}', f'scorep-info config-summary &> {self.info_rpt}', f'# step1: prepare executable with: scalasca -instrument (skin)', f'# step2: run executable with: scalasca -analyze (scan)', f'# step3: explore report with: scalasca -examine (square)', f'# step4: get calltree with: cube_calltree' ] # }}} # {{{ run self.variables = { 'SCOREP_ENABLE_UNWINDING': 'true', 'SCOREP_SAMPLING_EVENTS': f'perf_cycles@{self.cycles}', 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '10', 'SCOREP_TOTAL_MEMORY': '1G', } self.rpt = 'rpt' self.rpt_score = 'scorep-score.rpt' self.rpt_exclusive = 'cube_calltree_exclusive.rpt' self.rpt_inclusive = 'cube_calltree_inclusive.rpt' # cubetree = 'cube_calltree -m time -p -t 1' # -m metricname -- print out values for the metric <metricname> # -i -- calculate inclusive values instead of exclusive # -t treshold -- print out only call path with a value larger # than <treshold>% # -p -- diplay percent value self.postrun_cmds += [ f'# -------------------------------------------------------------', # generate summary.cubex from profile.cubex with: scalasca -examine # (it will report scoring too) f'{self.tool} -examine -s scorep_*sum/profile.cubex &> {self.rpt}', # rpt will always be written to scorep.score, not into self.rpt f'rm -f core*', # this file is used for sanity checks: f'cp scorep_*_sum/scorep.score {self.rpt_score}', # exclusive time: f'({cubetree} scorep_*_sum/summary.cubex ;rm -f core*) &>' f' {self.rpt_exclusive}', # inclusive time: f'({cubetree} -i scorep_*_sum/summary.cubex ;rm -f core*) &>' f' {self.rpt_inclusive}', f'# -------------------------------------------------------------', ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: # sn.assert_true(sphsscorep.scorep_assert_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the tool report: sn.assert_found(r'Estimated aggregate size of event trace', self.rpt_score), sn.assert_found(r'^S=C=A=N: \S+ complete\.', self.stderr) ])
def set_sanity_gpu(self): # {{{ ''' This method runs sanity checks on the following logs: - info cuda devices .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_devices.log :lines: 1-3 - info cuda kernels .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_kernels.log :lines: 5-7 - info cuda threads .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_threads.log :lines: 1-5, 458-459 - navigate between cuda kernels/blocks/threads/ .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_navigate.log :lines: 5-6, 17-18, 33-34 :emphasize-lines: 1, 3, 5 - inspect variables (std::vector) .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_std_vector.log :lines: 1-25 :emphasize-lines: 4 - inspect variables (int*) .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_const_int.log :lines: 6-37 :emphasize-lines: 17 ''' # }}} self.gpu_specs = {} self.gpu_specs_bool = {} ref_gpu_specs = {} ref_gpu_specs['P100'] = {} ref_gpu_specs['V100'] = {} # {{{ info_devices.log: # Dev PCI Bus/Dev ID Name Description SM Type SMs Warps/SM Lanes/Warp # Max Regs/Lane Active SMs Mask # * 0 88:00.0 Tesla V100-SXM2-16GB GV100GL-A sm_70 80 64 ... # ^^^^ ^^^^^ ^^ ^^ # 32 256 0x000000000000ffffffffffffffffffff # ^^ self.rpt = os.path.join(self.stagedir, self.log_devices) ref_gpu_specs = { 'V100': { 'capability': 'sm_70', 'sms': 80, 'WarpsPerSM': 64, 'LanesPerWarp': 32, # = warpSize 'max_threads_per_sm': 2048, 'max_threads_per_device': 163840, }, 'P100': { 'capability': 'sm_60', 'sms': 56, 'WarpsPerSM': 64, 'LanesPerWarp': 32, # = warpSize 'max_threads_per_sm': 2048, 'max_threads_per_device': 114688, }, } regex = (r'Tesla (?P<gpu_name>\S+)-\S+-\S+\s+\S+\s+(?P<cap>sm_\d+)\s+' r'(?P<sms>\d+)\s+(?P<WarpsPerSM>\d+)\s+(?P<LanesPerWarp>\d+)') # --- get gpu_name (V100 or P100): gpu_name = sn.evaluate(sn.extractsingle(regex, self.rpt, 'gpu_name')) # --- get capability (True means that extracted value matches ref): res = sn.extractsingle(regex, self.rpt, 'cap') self.gpu_specs['capability'] = res self.gpu_specs_bool['capability'] = \ (res == ref_gpu_specs[gpu_name]['capability']) # --- get sms: res = sn.extractsingle(regex, self.rpt, 'sms', int) self.gpu_specs['sms'] = res self.gpu_specs_bool['sms'] = (res == ref_gpu_specs[gpu_name]['sms']) # --- get WarpsPerSM: res = sn.extractsingle(regex, self.rpt, 'WarpsPerSM', int) self.gpu_specs['WarpsPerSM'] = res self.gpu_specs_bool['WarpsPerSM'] = \ (res == ref_gpu_specs[gpu_name]['WarpsPerSM']) # --- get LanesPerWarp|warpSize: res = sn.extractsingle(regex, self.rpt, 'LanesPerWarp', int) self.gpu_specs['LanesPerWarp'] = res self.gpu_specs_bool['LanesPerWarp'] = \ (res == ref_gpu_specs[gpu_name]['LanesPerWarp']) # --- threads_per_sm <= LanesPerWarp * WarpsPerSM res = self.gpu_specs['LanesPerWarp'] * self.gpu_specs['WarpsPerSM'] self.gpu_specs['max_threads_per_sm'] = res self.gpu_specs_bool['max_threads_per_sm'] = \ (res == ref_gpu_specs[gpu_name]['max_threads_per_sm']) # --- threads_per_device <= threads_per_sm * sms res = self.gpu_specs['sms'] * self.gpu_specs['max_threads_per_sm'] self.gpu_specs['max_threads_per_device'] = res self.gpu_specs_bool['max_threads_per_device'] = \ (res == ref_gpu_specs[gpu_name]['max_threads_per_device']) # --- max_np of 1gpu = f(max_threads_per_device) where np = cube_size^3 import math self.gpu_specs['max_cubesz'] = sn.defer( math.ceil(pow(sn.evaluate(res), 1 / 3))) # }}} # {{{ info_kernels.log: # Kernel Parent Dev Grid Status SMs Mask GridDim BlockDim Invocation # * 0 - 0 3 Active 0x (106,1,1) (256,1,1) ...::density<double>(n=27000, # ^^^^^^^ ^^^^^^^ ^^^^^ # --------------------------------------------------------------------- self.log = os.path.join(self.stagedir, self.log_kernels) regex = (r'\*.*Active \S+ \((?P<grid_x>\d+),(?P<grid_y>\d+),' r'(?P<grid_z>\d+)\)\s+\((?P<block_x>\d+),(?P<block_y>\d+),' r'(?P<block_z>\d+)\).*\(n=(?P<np>\d+), ') grid_x = sn.extractsingle(regex, self.log, 'grid_x', int) grid_y = sn.extractsingle(regex, self.log, 'grid_y', int) grid_z = sn.extractsingle(regex, self.log, 'grid_z', int) block_x = sn.extractsingle(regex, self.log, 'block_x', int) block_y = sn.extractsingle(regex, self.log, 'block_y', int) block_z = sn.extractsingle(regex, self.log, 'block_z', int) np = sn.extractsingle(regex, self.log, 'np', int) self.kernel_grid = grid_x * grid_y * grid_z self.kernel_block = block_x * block_y * block_z self.kernel_np = np import math self.gpu_specs['cubesz'] = \ sn.defer(math.ceil(pow(sn.evaluate(self.kernel_np), 1/3))) # {{{ TODO:tuple # https://github.com/eth-cscs/reframe/blob/master/cscs-checks/ # prgenv/affinity_check.py#L38 # regex=(r'\*.*Active \S+ (?P<griddim>\(\d+,\d+,\d+\))\s+(?P<blockdim>' # r'\(\d+,\d+,\d+\)).*\(n=(?P<np>\d+), ') # from functools import reduce # self.res = reduce(lambda x, y: x*y, list(res)) # sn.extractsingle(regex, self.stdout, 'nrgy', # conv=lambda x: int(x.replace(',', ''))) # res: ('(', '1', '0', '6', ',', '1', ',', '1', ')') # }}} # }}} # {{{ info_threads.log: # BlockIdx ThreadIdx To BlockIdx ThreadIdx Count Virtual PC Filename L # Kernel 0 # * (0,0,0) (0,0,0) (1,0,0) (63,0,0) 320 0x0... ../cudaDensity.cu 27 # (1,0,0) (64,0,0) (1,0,0) (95,0,0) 32 0x0... ../cudaDensity.cu 26 # etc... sum(^^^) # --------------------------------------------------------------------- self.log = os.path.join(self.stagedir, self.log_threads) regex = r'(\(\S+\)\s+){4}(?P<nth>\d+)\s+0x' self.threads_np = sn.sum(sn.extractall(regex, self.log, 'nth', int)) # }}} # {{{ info_navigate.log: # gridDim=(106,1,1) blockDim=(256,1,1) blockIdx=(0,0,0) \ # threadIdx=(0,0,0) warpSize=32 thid=0 # kernel 0 grid 3 block (0,0,0) thread (0,0,0) device 0 sm 0 warp 0 ... # -- # gridDim=(106,1,1) blockDim=(256,1,1) blockIdx=(105,0,0) # threadIdx=(255,0,0) warpSize=32 thid=27135 # kernel 0 grid 3 block (105,0,0) thread (255,0,0) device 0 sm 43 ... # -- # gridDim=(106,1,1) blockDim=(256,1,1) blockIdx=(55,0,0) # threadIdx=(255,0,0) warpSize=32 thid=14335 # kernel 0 grid 3 block (55,0,0) thread (255,0,0) device 0 sm 55 ... # --------------------------------------------------------------------- self.log = os.path.join(self.stagedir, self.log_navigate) regex = r'^gridDim.*warpSize=\d+ thid=(?P<th>\d+)$' self.thids = sn.extractall(regex, self.log, 'th', int) # }}} # {{{ info_std_vector.log: # --- get vector length(True means that extracted value matches ref): self.rpt = os.path.join(self.stagedir, self.log_stdvector) # std::vector of length 27000, capacity 27000 regex = r'std::vector of length (?P<vec_len1>\d+),' res = sn.extractsingle(regex, self.rpt, 'vec_len1', int) self.gpu_specs['vec_len1'] = res self.gpu_specs_bool['vec_len1'] = (res == self.cubesize**3) # Vector size = 27000 (pvector) regex = r'^Vector size = (?P<vec_len2>\d+)$' res = sn.extractsingle(regex, self.rpt, 'vec_len2', int) self.gpu_specs['vec_len2'] = res self.gpu_specs_bool['vec_len2'] = (res == self.cubesize**3) # }}} # {{{ --- sanity_patterns: self.sanity_patterns = sn.all([ sn.assert_true(self.gpu_specs_bool['capability']), sn.assert_true(self.gpu_specs_bool['sms']), sn.assert_true(self.gpu_specs_bool['WarpsPerSM']), sn.assert_true(self.gpu_specs_bool['LanesPerWarp']), sn.assert_true(self.gpu_specs_bool['max_threads_per_sm']), sn.assert_true(self.gpu_specs_bool['max_threads_per_device']), sn.assert_true(self.gpu_specs_bool['vec_len1']), sn.assert_true(self.gpu_specs_bool['vec_len2']), # NO: sn.assert_true(self.gpu_specs_bool), ])
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = [ 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray' ] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'vtune' self.modules = ['vtune_profiler'] self.prebuild_cmds = ['module rm xalt', 'module list -t'] self.tool_v = '2020_update2' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-intel': [f'CrayIntel/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-cray': [f'CrayCCE/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], 'PrgEnv-pgi': [f'CrayPGI/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'], } self.prgenv_flags = { 'PrgEnv-gnu': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-intel': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-cray': [ '-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-pgi': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], } self.build_system = 'SingleSource' # self.build_system.cxx = 'CC' self.sourcepath = f'{self.testname}.cpp' self.executable = self.tool self.target_executable = f'./{self.testname}.exe' self.postbuild_cmds = [f'mv {self.tool} {self.target_executable}'] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] self.name = 'sphexa_vtune_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'.format( self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), } self.dir_rpt = 'rpt' collect = 'hotspots' self.tool_opts = '-trace-mpi -collect %s -r ./%s -data-limit=0' % \ (collect, self.dir_rpt) # example dir: rpt.nid00032 self.executable_opts = [ self.tool_opts, '%s' % self.target_executable, f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.summary_rpt = 'summary.rpt' self.srcfile_rpt = 'srcfile.rpt' self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version >> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', ] column = ('"CPU Time:Self,CPU Time:Effective Time:Self,' 'CPU Time:Spin Time:Self,CPU Time:Overhead Time:Self"') self.postrun_cmds = [ # summary rpt: TODO: for ... # '%s -R hotspots -r %s* -column="CPU Time:Self" &> %s' % # (self.tool, self.dir_rpt, self.summary_rpt), # csv report: 'for vtdir in %s.nid* ;do %s -R hotspots -r $vtdir/*.vtune ' '-group-by=function -format=csv -csv-delimiter=semicolon ' '-column=%s &>$vtdir.csv ;done' % (self.dir_rpt, self.tool, column), # keep as reminder: # '%s cat /proc/sys/kernel/perf_event_paranoid &> %s' % # (run_cmd, self.paranoid_rpt), # 'cd %s ;ln -s nid*.000 e000 ;cd -' % self.dir_rpt, # '%s --report=survey --project-dir=%s &> %s' % # (self.tool, self.dir_rpt, self.summary_rpt), 'cp *_job.out %s' % self.dir_rpt, ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version: sn.assert_true(sphsintel.vtune_version(self)), # check the summary report: sn.assert_found(r'vtune: Executing actions 100 % done', self.stdout) ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`']