def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'scalasca' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tool_ver = '2.5' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Scalasca/{tool_ver}-CrayGNU-{tc_ver}'], 'PrgEnv-intel': [f'Scalasca/{tool_ver}-CrayIntel-{tc_ver}'], 'PrgEnv-cray': [f'Scalasca/{tool_ver}-CrayCCE-{tc_ver}'], 'PrgEnv-pgi': [f'Scalasca/{tool_ver}-CrayPGI-{tc_ver}'], } self.prgenv_flags = { 'PrgEnv-gnu': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-intel': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-cray': ['-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-pgi': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], } self.build_system = 'SingleSource' self.build_system.cxx = 'scorep --mpp=mpi --nocompiler CC' self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] cycles = cycles_dict[mpi_task] self.name = \ 'sphexa_scalascaS+T_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps, cycles) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'false', 'SCOREP_ENABLE_TRACING': 'true', 'SCOREP_ENABLE_UNWINDING': 'true', 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles, # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1', # 'SCOREP_VERBOSE': 'true', # To avoid "No free memory page available": 'SCOREP_TOTAL_MEMORY': '1G', # Advanced performance metrics: 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.rpt = 'rpt' # must use scorep.score: self.score_rpt = '%s.postproc' % self.rpt self.stat_rpt = 'scorep_%s_%s_trace/trace.stat' % \ (self.testname, self.num_tasks) # self.rpt_inclusive = '%s.inclusive' % self.rpt # self.rpt_exclusive = '%s.exclusive' % self.rpt # self.cubetool = 'cube_calltree' self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1'] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} -V &> {self.version_rpt}', f'scorep --version >> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', f'which scorep >> {self.which_rpt}', # f'which {self.cubetool} >> {self.which_rpt}', f'scorep-info config-summary &> {self.info_rpt}', ] cubetree = 'cube_calltree -m time -p -t 1' # -m metricname -- print out values for the metric <metricname> # -i -- calculate inclusive values instead of exclusive # -t treshold -- print out only call path with a value larger # than <treshold>% # -p -- diplay percent value self.postrun_cmds = [ # can't test directly from vampir gui, dumping tracefile content: 'otf2-print scorep_*_trace/traces.otf2 > %s' % self.rpt # 'otf2-print scorep-*/traces.otf2 > %s' % self.rpt ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the report: sn.assert_eq(sphsscorep.program_begin_count(self), self.num_tasks), sn.assert_eq(sphsscorep.program_end_count(self), self.num_tasks), # check the summary report: # sn.assert_found(r'^S=C=A=N: \S+ complete\.', self.stderr) ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # tool: scalasca tool_perf_patterns = sn.evaluate(sphssca.rpt_trace_stats_d(self)) self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns} # tool: scorep self.perf_patterns.update({ 'max_ipc_rk0': sphsscorep.ipc_rk0(self), 'max_rumaxrss_rk0': sphsscorep.ru_maxrss_rk0(self), }) # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool myzero_n = (0, None, None, 'count') myzero_ipc = (0, None, None, 'ins/cyc') myzero_kb = (0, None, None, 'kilobytes') # tool self.reference['*:mpi_latesender'] = myzero_n self.reference['*:mpi_latesender_wo'] = myzero_n self.reference['*:mpi_latereceiver'] = myzero_n self.reference['*:mpi_wait_nxn'] = myzero_n self.reference['*:max_ipc_rk0'] = myzero_ipc self.reference['*:max_rumaxrss_rk0'] = myzero_kb
def __init__(self, mpitask, steps, cycles, rumetric): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-pgi'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'gpu', 'openacc'} # }}} # {{{ compile self.testname = 'sqpatch' self.prebuild_cmds = ['module rm xalt'] self.prgenv_flags = { 'PrgEnv-pgi': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DNDEBUG', '-DUSE_MPI', '-DUSE_ACC', '-DUSE_STD_MATH_IN_KERNELS', '-acc', '-ta=tesla:managed,cc60' ], # -mp } # ---------------------------------------------------------------- tool self.modules = ['craype-accel-nvidia60'] tc_ver = '19.10' tool_ver = '6.0' postproc_tool_ver = '4ef9d3f' postproc_tool_serial = 'otf-profiler' self.postproc_tool = 'otf-profiler-mpi' self.tool_modules = { 'PrgEnv-pgi': ['Score-P/%s-CrayPGI-%s' % (tool_ver, tc_ver)] } # ---------------------------------------------------------------- tool self.build_system = 'SingleSource' self.build_system.cxx = 'scorep-CC' self.sourcepath = '%s.cpp' % self.testname self.executable = '%s.exe' % self.testname # {{{ openmp: # 'PrgEnv-intel': ['-qopenmp'], # 'PrgEnv-gnu': ['-fopenmp'], # 'PrgEnv-pgi': ['-mp'], # 'PrgEnv-cray_classic': ['-homp'], # 'PrgEnv-cray': ['-fopenmp'], # # '-homp' if lang == 'F90' else '-fopenmp', # }}} # }}} # {{{ run ompthread = 1 # weak scaling = 10^6 p/cn: size_dict = { 1: 100, 2: 126, 4: 159, 8: 200, 16: 252, 32: 318, 64: 400, 128: 504, 256: 635 } cubesize = size_dict[mpitask] self.name = \ 'openacc_scorepT_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles_{}'. \ format(self.testname, mpitask, ompthread, cubesize, steps, cycles, rumetric) self.num_tasks = mpitask self.num_tasks_per_node = 1 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 1 self.use_multithreading = False self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'SCOREP_OPENACC_ENABLE': 'yes', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_WRAPPER_INSTRUMENTER_FLAGS': '"--mpp=mpi --openacc"', 'SCOREP_ENABLE_PROFILING': 'false', 'SCOREP_ENABLE_TRACING': 'true', 'SCOREP_FILTERING_FILE': 'myfilt', 'SCOREP_VERBOSE': 'true', # Needed to avoid "No free memory page available" 'SCOREP_TOTAL_MEMORY': '1G', # Adding some performance metrics: # http://scorepci.pages.jsc.fz-juelich.de/scorep-pipelines/docs/ # => scorep-6.0/html/measurement.html#rusage_counters # => https://vampir.eu/public/files/pdf/spcheatsheet_letter.pdf # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss,ru_utime', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_METRIC_RUSAGE': '', 'SCOREP_METRIC_RUSAGE': rumetric, 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', } self.rusage_name = sn.evaluate(sphsscacc.otf2cli_metric_name(self)) if cycles > 0: self.variables['SCOREP_SAMPLING_EVENTS'] \ = 'perf_cycles@%s' % cycles self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'scorep-info.rpt' self.rpt = 'rpt' self.rpt_jsn = 'result.json' self.rpt_inclusive = '%s.inclusive' % self.rpt self.rpt_exclusive = '%s.exclusive' % self.rpt self.tool = 'scorep' self.executable_opts = ['-n %s' % cubesize, '-s %s' % steps] self.prerun_cmds = [ 'module rm xalt', '%s --version &> %s' % (self.tool, self.version_rpt), 'which %s &> %s' % (self.tool, self.which_rpt), 'scorep-info config-summary &> %s' % self.info_rpt, ] self.postrun_cmds = [ # otf-profiler is needed for postprocessing but i managed to # compile only gnu version => removing CubeLib to avoid conflict # with CrayPGI: 'module rm CubeLib', 'module load otf2_cli_profile/%s-CrayGNU-%s' % (postproc_tool_ver, tc_ver), # report post-processing tools version '%s --version' % postproc_tool_serial, # OTF-Profiler version 2.0.0 'which %s %s' % (postproc_tool_serial, self.postproc_tool), # create result.json performance report from tracefile # see otf_profiler method (@run_after) ] # }}} # {{{ sanity # sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), # Needed when using papi counters: # sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), ]) # }}} # {{{ performance # {{{ internal timers # use linux date as timer: self.prerun_cmds += ['echo starttime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) tool_perf_patterns = sn.evaluate(sphsscacc.otf2cli_perf_patterns(self)) self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns} # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) self.reference = sn.evaluate(sphsscacc.otf2cli_tool_reference(self))
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'scorep' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tool_ver = '6.0' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Score-P/{tool_ver}-CrayGNU-{tc_ver}'], 'PrgEnv-intel': [f'Score-P/{tool_ver}-CrayIntel-{tc_ver}'], 'PrgEnv-cray': [f'Score-P/{tool_ver}-CrayCCE-{tc_ver}'], 'PrgEnv-pgi': [f'Score-P/{tool_ver}-CrayPGI-{tc_ver}'], } self.prgenv_flags = { 'PrgEnv-gnu': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-intel': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-cray': ['-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG'], 'PrgEnv-pgi': ['-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG'], } self.build_system = 'SingleSource' self.build_system.cxx = 'scorep --mpp=mpi --nocompiler CC' self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] cycles = cycles_dict[mpi_task] self.name = \ 'sphexa_scorepS+P_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps, cycles) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'true', 'SCOREP_ENABLE_TRACING': 'false', 'SCOREP_ENABLE_UNWINDING': 'true', 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles, # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1', # 'SCOREP_VERBOSE': 'true', # 'SCOREP_TOTAL_MEMORY': '1G', } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.rpt = 'rpt' self.rpt_inclusive = '%s.inclusive' % self.rpt self.rpt_exclusive = '%s.exclusive' % self.rpt self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1'] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', 'scorep-info config-summary &> %s' % self.info_rpt, ] cubetree = 'cube_calltree -m time -p -t 1' # -m metricname -- print out values for the metric <metricname> # -i -- calculate inclusive values instead of exclusive # -t treshold -- print out only call path with a value larger # than <treshold>% # -p -- diplay percent value self.postrun_cmds = [ # working around memory crash in scorep-score: '(scorep-score -r scorep-*/profile.cubex ;rm -f core*) > %s' \ % self.rpt, '(%s scorep-*/profile.cubex ;rm -f core*) >> %s' \ % (cubetree, self.rpt_exclusive), '(%s -i scorep-*/profile.cubex ;rm -f core*) >> %s' \ % (cubetree, self.rpt_inclusive), ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the summary report: sn.assert_found(r'Estimated aggregate size of event trace', self.rpt) ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # tool self.perf_patterns.update({ 'scorep_elapsed': sphsscorep.scorep_elapsed(self), '%scorep_USR': sphsscorep.scorep_usr_pct(self), '%scorep_MPI': sphsscorep.scorep_mpi_pct(self), 'scorep_top1': sphsscorep.scorep_top1_pct(self), '%scorep_Energy_exclusive': sphsscorep.scorep_exclusivepct_energy(self), '%scorep_Energy_inclusive': sphsscorep.scorep_inclusivepct_energy(self), }) # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool: self.reference['*:scorep_elapsed'] = (0, None, None, 's') self.reference['*:%scorep_USR'] = (0, None, None, '%') self.reference['*:%scorep_MPI'] = (0, None, None, '%') top1_name = sphsscorep.scorep_top1_name(self) # TODO: self.reference['*:scorep_top1'] = (0, None, None, top1_name) self.reference['*:scorep_top1'] = (0, None, None, '') self.reference['*:%scorep_Energy_exclusive'] = (0, None, None, '%') self.reference['*:%scorep_Energy_inclusive'] = (0, None, None, '%')
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = ['PrgEnv-gnu'] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'gpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'scorep' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tool_ver = '6.0' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Score-P/{tool_ver}-CrayGNU-{tc_ver}-cuda'], } self.build_system = 'Make' self.build_system.makefile = 'Makefile' self.build_system.nvcc = 'nvcc' self.build_system.cxx = 'CC' self.build_system.max_concurrency = 2 self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' self.target_executable = 'mpi+omp+cuda' self.build_system.cxx = 'scorep --mpp=mpi --cuda --nocompiler CC' self.build_system.nvcc = 'scorep --cuda --nocompiler nvcc' self.build_system.options = [ self.target_executable, f'MPICXX="{self.build_system.cxx}"', 'SRCDIR=.', 'BUILDDIR=.', 'BINDIR=.', 'CXXFLAGS=-std=c++14', 'CUDA_PATH=$CUDATOOLKIT_HOME', # The makefile adds -DUSE_MPI ] self.postbuild_cmds = [ f'mv {self.target_executable}.app ' f'{self.executable}' ] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] # cycles = cycles_dict[mpi_task] self.name = \ 'sphexa_scorep+cuda_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 1 self.num_cpus_per_task = ompthread self.num_tasks_per_core = 1 self.use_multithreading = False self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'false', 'SCOREP_ENABLE_TRACING': 'true', 'SCOREP_CUDA_ENABLE': 'yes', 'SCOREP_ENABLE_UNWINDING': 'true', # 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles, # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1', # 'SCOREP_VERBOSE': 'true', # 'SCOREP_TOTAL_MEMORY': '1G', } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'info.rpt' self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', 'scorep-info config-summary &> %s' % self.info_rpt, ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), sn.assert_true(sphsscorep.scorep_info_cuda_support(self)), ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) self.perf_patterns = {**basic_perf_patterns} # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = [ 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray' ] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'scorep' self.prebuild_cmds = ['module rm xalt', 'module list -t'] tool_ver = '6.0' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'Score-P/{tool_ver}-CrayGNU-{tc_ver}'], 'PrgEnv-intel': [f'Score-P/{tool_ver}-CrayIntel-{tc_ver}'], 'PrgEnv-cray': [f'Score-P/{tool_ver}-CrayCCE-{tc_ver}'], 'PrgEnv-pgi': [f'Score-P/{tool_ver}-CrayPGI-{tc_ver}'], } self.prgenv_flags = { 'PrgEnv-gnu': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-intel': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-cray': [ '-I.', '-I./include', '-std=c++17', '-g', '-Ofast', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-pgi': [ '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI', '-DNDEBUG' ], } self.build_system = 'SingleSource' self.build_system.cxx = 'scorep --mpp=mpi --nocompiler CC' self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] cycles = cycles_dict[mpi_task] self.name = \ 'sphexa_scorepS+T_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles'. \ format(self.testname, mpi_task, ompthread, self.cubeside, self.steps, cycles) self.num_tasks_per_node = 24 # {{{ ht: # self.num_tasks_per_node = mpitask if mpitask < 36 else 36 # noht # self.use_multithreading = False # noht # self.num_tasks_per_core = 1 # noht # self.num_tasks_per_node = mpitask if mpitask < 72 else 72 # self.use_multithreading = True # ht # self.num_tasks_per_core = 2 # ht # }}} self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'SCOREP_ENABLE_PROFILING': 'false', 'SCOREP_ENABLE_TRACING': 'true', 'SCOREP_ENABLE_UNWINDING': 'true', 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles, # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000', # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles, # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000 # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only: # ok: 'SCOREP_SAMPLING_EVENTS': '', # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', # 'SCOREP_TIMER': 'clock_gettime', # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1', # 'SCOREP_VERBOSE': 'true', # --- # Needed to avoid "No free memory page available" # (SCOREP_TOTAL_MEMORY=16384000 bytes) 'SCOREP_TOTAL_MEMORY': '1G', # --- # adding some metrics to test my sanity_functions: 'SCOREP_METRIC_RUSAGE': 'ru_maxrss', 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC', } self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.info_rpt = 'scorep-info.rpt' self.rpt = 'rpt' self.rpt_inclusive = '%s.inclusive' % self.rpt self.rpt_exclusive = '%s.exclusive' % self.rpt self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', 'scorep-info config-summary &> %s' % self.info_rpt, ] self.postrun_cmds = [ # can't test directly from vampir gui: 'otf2-print scorep-*/traces.otf2 > %s' % self.rpt ] # }}} # {{{ sanity self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'Total time for iteration\(0\)', self.stdout), # check the tool's version and configuration: sn.assert_true(sphsscorep.scorep_version(self)), sn.assert_true(sphsscorep.scorep_info_papi_support(self)), sn.assert_true(sphsscorep.scorep_info_perf_support(self)), sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)), # check the report: sn.assert_eq(sphsscorep.program_begin_count(self), self.num_tasks), sn.assert_eq(sphsscorep.program_end_count(self), self.num_tasks), # TODO: create derived metric (ipc) in cube ]) # }}} # {{{ performance # {{{ internal timers self.prerun_cmds += ['echo starttime=`date +%s`'] self.postrun_cmds += ['echo stoptime=`date +%s`'] # }}} # {{{ perf_patterns: self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # tool self.perf_patterns.update({ 'max_ipc_rk0': sphsscorep.ipc_rk0(self), 'max_rumaxrss_rk0': sphsscorep.ru_maxrss_rk0(self), }) # }}} # {{{ reference: self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # tool: self.reference['*:max_ipc_rk0'] = (0, None, None, 'ins/cyc') self.reference['*:max_rumaxrss_rk0'] = (0, None, None, 'kilobytes')