def test_assert_true(self):
        assert sn.assert_true(True)
        assert sn.assert_true(1)
        assert sn.assert_true([1])
        assert sn.assert_true(range(1))
        with pytest.raises(SanityError, match='False is not True'):
            sn.evaluate(sn.assert_true(False))

        with pytest.raises(SanityError, match='0 is not True'):
            sn.evaluate(sn.assert_true(0))

        with pytest.raises(SanityError, match=r'\[\] is not True'):
            sn.evaluate(sn.assert_true([]))

        with pytest.raises(SanityError, match=r'range\(.+\) is not True'):
            sn.evaluate(sn.assert_true(range(0)))

        with pytest.raises(SanityError, match='not true'):
            sn.evaluate(sn.assert_true(0, msg='not true'))
Beispiel #2
0
        class _T0(rfm.RegressionTest):
            valid_systems = ['*']
            valid_prog_environs = ['*']
            sourcepath = 'hello.c'
            executable = 'echo'
            sanity_patterns = sn.assert_true(1)

            def check_and_skip(self):
                self.skip_if(True)

            # Attach the hook manually based on the request.param
            when, stage = request.param.split('_', maxsplit=1)
            hook = rfm.run_before if when == 'pre' else rfm.run_after
            check_and_skip = hook(stage)(check_and_skip)
Beispiel #3
0
 def test_assert_true_with_deferrables(self):
     self.assertTrue(sn.assert_true(make_deferrable(True)))
     self.assertTrue(sn.assert_true(make_deferrable(1)))
     self.assertTrue(sn.assert_true(make_deferrable([1])))
     self.assertRaisesRegex(SanityError, 'False is not True', evaluate,
                            sn.assert_true(make_deferrable(False)))
     self.assertRaisesRegex(SanityError, '0 is not True', evaluate,
                            sn.assert_true(make_deferrable(0)))
     self.assertRaisesRegex(SanityError, '\[\] is not True', evaluate,
                            sn.assert_true(make_deferrable([])))
Beispiel #4
0
def test_assert_true_with_deferrables():
    assert sn.assert_true(sn.defer(True))
    assert sn.assert_true(sn.defer(1))
    assert sn.assert_true(sn.defer([1]))
    with pytest.raises(SanityError, match='False is not True'):
        sn.evaluate(sn.assert_true(sn.defer(False)))

    with pytest.raises(SanityError, match='0 is not True'):
        sn.evaluate(sn.assert_true(sn.defer(0)))

    with pytest.raises(SanityError, match=r'\[\] is not True'):
        sn.evaluate(sn.assert_true(sn.defer([])))
Beispiel #5
0
    def validate_fixture_resolution(self):
        return sn.all([
            # Access all the fixtures with a fork action.
            sn.assert_eq(
                (self.f0.data + self.f1.data + self.f2.data + self.f3.data),
                4),

            # Assert that only one fixture is resolved with join action.
            sn.assert_eq(sn.len(self.f4), 1),

            # Assert that the fixtures with join and fork actions resolve to
            # the same instance for the same scope.
            sn.assert_eq(self.f4[0], self.f0),

            # Assert that there are only 4 underlying fixture instances.
            sn.assert_eq(
                sn.len({self.f0, self.f1, self.f2, self.f3, *self.f4}), 4),

            # Assert is_fixture() function
            sn.assert_true(self.f0.is_fixture()),
            sn.assert_false(self.is_fixture())
        ])
Beispiel #6
0
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['PrgEnv-gnu']
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'gpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.tool = 'nvprof'
        self.tool_mf = 'nvhpc'
        tc_ver = '20.08'
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        self.tool_modules = {
            'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', 'craype-accel-nvidia60',
                           self.tool_mf],
        }
        self.build_system = 'Make'
        self.build_system.makefile = 'Makefile'
        self.build_system.nvcc = 'nvcc'
        self.build_system.cxx = 'CC'
        self.build_system.max_concurrency = 2
        self.executable = self.tool
        self.target_executable = 'mpi+omp+cuda'
        self.build_system.options = [
            self.target_executable, 'MPICXX=CC', 'SRCDIR=.', 'BUILDDIR=.',
            'BINDIR=.', 'CUDA_PATH=$CUDATOOLKIT_HOME',
            # The makefile adds -DUSE_MPI
            # 'CXXFLAGS=',
        ]
        self.postbuild_cmds = [f'mv {self.target_executable}.app '
                               f'{self.target_executable}']
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        self.name = 'sphexa_nvprofcuda_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \
            format(self.testname, mpi_task, ompthread, self.cubeside,
                   self.steps)
        self.num_tasks_per_node = 1
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 1
        self.use_multithreading = False
        self.exclusive = True
        self.time_limit = '15m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
            # 'COMPUTE_PROFILE': '',
            # 'PMI_NO_FORK': '1',
        }
        self.tool_opts = ''
        # self.tool_opts = r'-o nvprof.output.%h.%p'
        self.executable_opts = [
            self.tool_opts, f'./{self.target_executable}',
            f'-n {self.cubeside}', f'-s {self.steps}', '2>&1']
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.summary_rpt = 'summary.rpt'
        # Reminder: NVreg_RestrictProfilingToAdminUsers=0 (RFC-16) needed since
        # cuda/10.1
        self.postrun_cmds = ['cat /etc/modprobe.d/nvidia.conf']
        self.prerun_cmds = [
            'module rm xalt',
            f'{self.tool} --version &> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version:
            sn.assert_true(sphsnv.nvprof_version(self)),
            # check the summary report:
            sn.assert_found('NVPROF is profiling process', self.stdout),
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        tool_perf_patterns = sn.evaluate(sphsnv.nvprof_perf_patterns(self))
        self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns}
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        # tool's reference
        myzero_k = (0, None, None, 'KiB')
        myzero_p = (0, None, None, '%')
        self.reference['*:%cudaMemcpy'] = myzero_p
        self.reference['*:%CUDA_memcpy_HtoD_time'] = myzero_p
        self.reference['*:%CUDA_memcpy_DtoH_time'] = myzero_p
        self.reference['*:CUDA_memcpy_HtoD_KiB'] = myzero_k
        self.reference['*:CUDA_memcpy_DtoH_KiB'] = myzero_k
        self.reference['*:%computeMomentumAndEnergyIAD'] = myzero_p
        self.reference['*:%computeIAD'] = myzero_p
Beispiel #7
0
class Eggs(rfm.RunOnlyRegressionTest):
    eggs = fixture(Bacon)
    executable = 'echo'
    sanity_patterns = sn.assert_true(1)
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi',
                                    'PrgEnv-cray']
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.tool = 'scalasca'
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        tool_ver = '2.5'
        tc_ver = '20.08'
        self.tool_modules = {
            'PrgEnv-gnu': [f'Scalasca/{tool_ver}-CrayGNU-{tc_ver}'],
            'PrgEnv-intel': [f'Scalasca/{tool_ver}-CrayIntel-{tc_ver}'],
            'PrgEnv-cray': [f'Scalasca/{tool_ver}-CrayCCE-{tc_ver}'],
            'PrgEnv-pgi': [f'Scalasca/{tool_ver}-CrayPGI-{tc_ver}'],
        }
        self.prgenv_flags = {
            'PrgEnv-gnu': ['-I.', '-I./include', '-std=c++14', '-g', '-O3',
                           '-DUSE_MPI', '-DNDEBUG'],
            'PrgEnv-intel': ['-I.', '-I./include', '-std=c++14', '-g', '-O3',
                             '-DUSE_MPI', '-DNDEBUG'],
            'PrgEnv-cray': ['-I.', '-I./include', '-std=c++17', '-g', '-Ofast',
                            '-DUSE_MPI', '-DNDEBUG'],
            'PrgEnv-pgi': ['-I.', '-I./include', '-std=c++14', '-g', '-O3',
                           '-DUSE_MPI', '-DNDEBUG'],
        }
        self.build_system = 'SingleSource'
        self.build_system.cxx = 'scorep --mpp=mpi --nocompiler CC'
        self.sourcepath = f'{self.testname}.cpp'
        self.executable = f'./{self.testname}.exe'
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        cycles = cycles_dict[mpi_task]
        self.name = \
            'sphexa_scalascaS+T_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles'. \
            format(self.testname, mpi_task, ompthread, self.cubeside,
                   self.steps, cycles)
        self.num_tasks_per_node = 24
# {{{ ht:
        # self.num_tasks_per_node = mpitask if mpitask < 36 else 36   # noht
        # self.use_multithreading = False  # noht
        # self.num_tasks_per_core = 1      # noht

        # self.num_tasks_per_node = mpitask if mpitask < 72 else 72
        # self.use_multithreading = True # ht
        # self.num_tasks_per_core = 2    # ht
# }}}
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 2
        self.use_multithreading = True
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
            'SCOREP_ENABLE_PROFILING': 'false',
            'SCOREP_ENABLE_TRACING': 'true',
            'SCOREP_ENABLE_UNWINDING': 'true',
            'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles,
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000',
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles,
            # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000
            # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only:
            # ok: 'SCOREP_SAMPLING_EVENTS': '',
            # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC',
            # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss',
            # 'SCOREP_TIMER': 'clock_gettime',
            # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1',
            # 'SCOREP_VERBOSE': 'true',
            # To avoid "No free memory page available":
            'SCOREP_TOTAL_MEMORY': '1G',
            # Advanced performance metrics:
            'SCOREP_METRIC_RUSAGE': 'ru_maxrss',
            'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC',
        }
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.info_rpt = 'info.rpt'
        self.rpt = 'rpt'
        # must use scorep.score:
        self.score_rpt = '%s.postproc' % self.rpt
        self.stat_rpt = 'scorep_%s_%s_trace/trace.stat' % \
                        (self.testname, self.num_tasks)
        # self.rpt_inclusive = '%s.inclusive' % self.rpt
        # self.rpt_exclusive = '%s.exclusive' % self.rpt
        # self.cubetool = 'cube_calltree'
        self.executable_opts = [
            f'-n {self.cubeside}', f'-s {self.steps}', '2>&1']
        self.prerun_cmds = [
            'module rm xalt',
            f'{self.tool} -V &> {self.version_rpt}',
            f'scorep --version >> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
            f'which scorep >> {self.which_rpt}',
            # f'which {self.cubetool} >> {self.which_rpt}',
            f'scorep-info config-summary &> {self.info_rpt}',
        ]
        cubetree = 'cube_calltree -m time -p -t 1'
        # -m metricname -- print out values for the metric <metricname>
        # -i            -- calculate inclusive values instead of exclusive
        # -t treshold   -- print out only call path with a value larger
        #                  than <treshold>%
        # -p            -- diplay percent value
        self.postrun_cmds = [
            # can't test directly from vampir gui, dumping tracefile content:
            'otf2-print scorep_*_trace/traces.otf2 > %s' % self.rpt
            # 'otf2-print scorep-*/traces.otf2 > %s' % self.rpt
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version and configuration:
            sn.assert_true(sphsscorep.scorep_version(self)),
            sn.assert_true(sphsscorep.scorep_info_papi_support(self)),
            sn.assert_true(sphsscorep.scorep_info_perf_support(self)),
            sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)),
            # check the report:
            sn.assert_eq(sphsscorep.program_begin_count(self), self.num_tasks),
            sn.assert_eq(sphsscorep.program_end_count(self), self.num_tasks),
            # check the summary report:
            # sn.assert_found(r'^S=C=A=N: \S+ complete\.', self.stderr)
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        # tool: scalasca
        tool_perf_patterns = sn.evaluate(sphssca.rpt_trace_stats_d(self))
        self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns}
        # tool: scorep
        self.perf_patterns.update({
            'max_ipc_rk0': sphsscorep.ipc_rk0(self),
            'max_rumaxrss_rk0': sphsscorep.ru_maxrss_rk0(self),
        })
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        # tool
        myzero_n = (0, None, None, 'count')
        myzero_ipc = (0, None, None, 'ins/cyc')
        myzero_kb = (0, None, None, 'kilobytes')
        # tool
        self.reference['*:mpi_latesender'] = myzero_n
        self.reference['*:mpi_latesender_wo'] = myzero_n
        self.reference['*:mpi_latereceiver'] = myzero_n
        self.reference['*:mpi_wait_nxn'] = myzero_n
        self.reference['*:max_ipc_rk0'] = myzero_ipc
        self.reference['*:max_rumaxrss_rk0'] = myzero_kb
Beispiel #9
0
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi',
                                    'PrgEnv-cray']
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.tool = 'scorep'
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        tool_ver = '6.0'
        tc_ver = '20.08'
        self.tool_modules = {
            'PrgEnv-gnu': [f'Score-P/{tool_ver}-CrayGNU-{tc_ver}'],
            'PrgEnv-intel': [f'Score-P/{tool_ver}-CrayIntel-{tc_ver}'],
            'PrgEnv-cray': [f'Score-P/{tool_ver}-CrayCCE-{tc_ver}'],
            'PrgEnv-pgi': [f'Score-P/{tool_ver}-CrayPGI-{tc_ver}'],
        }
        self.prgenv_flags = {
            'PrgEnv-gnu': ['-I.', '-I./include', '-std=c++14', '-g', '-O3',
                           '-DUSE_MPI', '-DNDEBUG'],
            'PrgEnv-intel': ['-I.', '-I./include', '-std=c++14', '-g', '-O3',
                             '-DUSE_MPI', '-DNDEBUG'],
            'PrgEnv-cray': ['-I.', '-I./include', '-std=c++17', '-g', '-Ofast',
                            '-DUSE_MPI', '-DNDEBUG'],
            'PrgEnv-pgi': ['-I.', '-I./include', '-std=c++14', '-g', '-O3',
                           '-DUSE_MPI', '-DNDEBUG'],
        }
        self.build_system = 'SingleSource'
        self.build_system.cxx = 'scorep --mpp=mpi --nocompiler CC'
        self.sourcepath = f'{self.testname}.cpp'
        self.executable = f'./{self.testname}.exe'
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        cycles = cycles_dict[mpi_task]
        self.name = \
            'sphexa_scorepS+P_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles'. \
            format(self.testname, mpi_task, ompthread, self.cubeside,
                   self.steps, cycles)
        self.num_tasks_per_node = 24
# {{{ ht:
        # self.num_tasks_per_node = mpitask if mpitask < 36 else 36   # noht
        # self.use_multithreading = False  # noht
        # self.num_tasks_per_core = 1      # noht

        # self.num_tasks_per_node = mpitask if mpitask < 72 else 72
        # self.use_multithreading = True # ht
        # self.num_tasks_per_core = 2    # ht
# }}}
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 2
        self.use_multithreading = True
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
            'SCOREP_ENABLE_PROFILING': 'true',
            'SCOREP_ENABLE_TRACING': 'false',
            'SCOREP_ENABLE_UNWINDING': 'true',
            'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles,
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000',
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles,
            # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000
            # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only:
            # ok: 'SCOREP_SAMPLING_EVENTS': '',
            # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC',
            # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss',
            # 'SCOREP_TIMER': 'clock_gettime',
            # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1',
            # 'SCOREP_VERBOSE': 'true',
            # 'SCOREP_TOTAL_MEMORY': '1G',
        }
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.info_rpt = 'info.rpt'
        self.rpt = 'rpt'
        self.rpt_inclusive = '%s.inclusive' % self.rpt
        self.rpt_exclusive = '%s.exclusive' % self.rpt
        self.executable_opts = [
            f'-n {self.cubeside}', f'-s {self.steps}', '2>&1']
        self.prerun_cmds = [
            'module rm xalt',
            f'{self.tool} --version &> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
            'scorep-info config-summary &> %s' % self.info_rpt,
        ]
        cubetree = 'cube_calltree -m time -p -t 1'
        # -m metricname -- print out values for the metric <metricname>
        # -i            -- calculate inclusive values instead of exclusive
        # -t treshold   -- print out only call path with a value larger
        #                  than <treshold>%
        # -p            -- diplay percent value
        self.postrun_cmds = [
            # working around memory crash in scorep-score:
            '(scorep-score -r scorep-*/profile.cubex ;rm -f core*) > %s' \
            % self.rpt,
            '(%s    scorep-*/profile.cubex ;rm -f core*) >> %s' \
            % (cubetree, self.rpt_exclusive),
            '(%s -i scorep-*/profile.cubex ;rm -f core*) >> %s' \
            % (cubetree, self.rpt_inclusive),
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version and configuration:
            sn.assert_true(sphsscorep.scorep_version(self)),
            sn.assert_true(sphsscorep.scorep_info_papi_support(self)),
            sn.assert_true(sphsscorep.scorep_info_perf_support(self)),
            sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)),
            # check the summary report:
            sn.assert_found(r'Estimated aggregate size of event trace',
                            self.rpt)
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        # tool
        self.perf_patterns.update({
            'scorep_elapsed': sphsscorep.scorep_elapsed(self),
            '%scorep_USR': sphsscorep.scorep_usr_pct(self),
            '%scorep_MPI': sphsscorep.scorep_mpi_pct(self),
            'scorep_top1': sphsscorep.scorep_top1_pct(self),
            '%scorep_Energy_exclusive':
            sphsscorep.scorep_exclusivepct_energy(self),
            '%scorep_Energy_inclusive':
            sphsscorep.scorep_inclusivepct_energy(self),
        })
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        # tool:
        self.reference['*:scorep_elapsed'] = (0, None, None, 's')
        self.reference['*:%scorep_USR'] = (0, None, None, '%')
        self.reference['*:%scorep_MPI'] = (0, None, None, '%')
        top1_name = sphsscorep.scorep_top1_name(self)
        # TODO: self.reference['*:scorep_top1'] = (0, None, None, top1_name)
        self.reference['*:scorep_top1'] = (0, None, None, '')
        self.reference['*:%scorep_Energy_exclusive'] = (0, None, None, '%')
        self.reference['*:%scorep_Energy_inclusive'] = (0, None, None, '%')
Beispiel #10
0
class T0(BaseTest):
    sanity_patterns = sn.assert_true(1)
Beispiel #11
0
 def assert_foo(self):
     return sn.all([sn.assert_eq(self.foo, 3), sn.assert_true(self.ham)])
Beispiel #12
0
    def __init__(self):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['*']
        self.valid_systems = ['*']
        self.modules = ['Score-P']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sedov'
        self.tool = 'scorep'
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.info_rpt = 'info.rpt'
        self.prebuild_cmds = [
            f'{self.tool} --version &> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
            f'which vampir >> {self.which_rpt}',
            f'scorep-info config-summary &> {self.info_rpt}',
        ]
        # }}}

        # {{{ run
        self.variables = {
            # 'CRAYPE_LINK_TYPE': 'dynamic',
            # 'OMP_NUM_THREADS': str(self.num_cpus_per_task),
            'SCOREP_ENABLE_PROFILING': 'true',
            'SCOREP_ENABLE_TRACING': 'false',
            'SCOREP_ENABLE_UNWINDING': 'true',
            'SCOREP_SAMPLING_EVENTS': f'perf_cycles@{self.cycles}',
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000',
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles,
            # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000
            # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only:
            # ok: 'SCOREP_SAMPLING_EVENTS': '',
            # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC',
            # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss',
            # 'SCOREP_TIMER': 'clock_gettime',
            # 'SCOREP_VERBOSE': 'true',
            'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '10',
            'SCOREP_TOTAL_MEMORY': '1G',
            # 'PATH': f'{tool_path}:{cube_path}:$PATH',
        }
        self.rpt = 'rpt'
        self.rpt_score = 'scorep-score.rpt'
        self.rpt_inclusive = 'cube_calltree_inclusive.rpt'
        self.rpt_exclusive = 'cube_calltree_exclusive.rpt'
        self.rpt_otf2 = 'otf2-print.rpt'
        #
        cubetree = 'cube_calltree -m time -p -t 1'
        # -m metricname -- print out values for the metric <metricname>
        # -i            -- calculate inclusive values instead of exclusive
        # -t treshold   -- print out only call path with a value larger
        #                  than <treshold>%
        # -p            -- diplay percent value
        self.postrun_cmds += [
            f'# -------------------------------------------------------------',
            # working around memory crash in scorep-score:
            f'(scorep-score -r scorep-*/profile.cubex ;rm -f core*) >'
            f'{self.rpt_score}',
            # exclusive time (+ workaround memory crash):
            f'({cubetree} scorep-*/profile.cubex ;rm -f core*) &>'
            f' {self.rpt_exclusive}',
            # inclusive time (+ workaround memory crash):
            f'({cubetree} -i scorep-*/profile.cubex ;rm -f core*) &>'
            f' {self.rpt_inclusive}',
            f'# -------------------------------------------------------------',
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version and configuration:
            # sn.assert_true(sphsscorep.scorep_assert_version(self)),
            sn.assert_true(sphsscorep.scorep_info_papi_support(self)),
            sn.assert_true(sphsscorep.scorep_info_perf_support(self)),
            sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)),
            # check the summary report:
            sn.assert_found(r'Estimated aggregate size of event trace',
                            self.rpt_score)
        ])
 class MyOtherTest(MyTest):
     '''Test both syntaxes are incompatible.'''
     sanity_patterns = sn.assert_true(1)
Beispiel #14
0
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['PrgEnv-gnu']
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'gpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.tool = 'scorep'
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        tool_ver = '6.0'
        tc_ver = '20.08'
        self.tool_modules = {
            'PrgEnv-gnu': [f'Score-P/{tool_ver}-CrayGNU-{tc_ver}-cuda'],
        }
        self.build_system = 'Make'
        self.build_system.makefile = 'Makefile'
        self.build_system.nvcc = 'nvcc'
        self.build_system.cxx = 'CC'
        self.build_system.max_concurrency = 2
        self.sourcepath = f'{self.testname}.cpp'
        self.executable = f'./{self.testname}.exe'
        self.target_executable = 'mpi+omp+cuda'
        self.build_system.cxx = 'scorep --mpp=mpi --cuda --nocompiler CC'
        self.build_system.nvcc = 'scorep --cuda --nocompiler nvcc'
        self.build_system.options = [
            self.target_executable,
            f'MPICXX="{self.build_system.cxx}"',
            'SRCDIR=.',
            'BUILDDIR=.',
            'BINDIR=.',
            'CXXFLAGS=-std=c++14',
            'CUDA_PATH=$CUDATOOLKIT_HOME',
            # The makefile adds -DUSE_MPI
        ]
        self.postbuild_cmds = [
            f'mv {self.target_executable}.app '
            f'{self.executable}'
        ]
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        # cycles = cycles_dict[mpi_task]
        self.name = \
            'sphexa_scorep+cuda_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \
            format(self.testname, mpi_task, ompthread, self.cubeside,
                   self.steps)
        self.num_tasks_per_node = 1
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 1
        self.use_multithreading = False
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
            'SCOREP_ENABLE_PROFILING': 'false',
            'SCOREP_ENABLE_TRACING': 'true',
            'SCOREP_CUDA_ENABLE': 'yes',
            'SCOREP_ENABLE_UNWINDING': 'true',
            # 'SCOREP_SAMPLING_EVENTS': 'perf_cycles@%s' % cycles,
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@1000000',
            # 'SCOREP_SAMPLING_EVENTS': 'PAPI_TOT_CYC@%s' % cycles,
            # export SCOREP_SAMPLING_EVENTS=PAPI_TOT_CYC@1000000
            # empty SCOREP_SAMPLING_EVENTS will profile mpi calls only:
            # ok: 'SCOREP_SAMPLING_EVENTS': '',
            # 'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC',
            # 'SCOREP_METRIC_RUSAGE': 'ru_maxrss',
            # 'SCOREP_TIMER': 'clock_gettime',
            # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '1',
            # 'SCOREP_VERBOSE': 'true',
            # 'SCOREP_TOTAL_MEMORY': '1G',
        }
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.info_rpt = 'info.rpt'
        self.executable_opts = [
            f'-n {self.cubeside}', f'-s {self.steps}', '2>&1'
        ]
        self.prerun_cmds = [
            'module rm xalt',
            f'{self.tool} --version &> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
            'scorep-info config-summary &> %s' % self.info_rpt,
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version and configuration:
            sn.assert_true(sphsscorep.scorep_version(self)),
            sn.assert_true(sphsscorep.scorep_info_papi_support(self)),
            sn.assert_true(sphsscorep.scorep_info_perf_support(self)),
            sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)),
            sn.assert_true(sphsscorep.scorep_info_cuda_support(self)),
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        self.perf_patterns = {**basic_perf_patterns}
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
Beispiel #15
0
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['PrgEnv-gnu']
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.prebuild_cmds = ['module rm xalt']
        self.prgenv_flags = {
            'PrgEnv-gnu': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
        }
        # ---------------------------------------------------------------- tool
        self.tool = 'tool.sh'
        tool_ver = '3.8.1'
        tc_ver = '20.08'
        self.tool_modules = {
            'PrgEnv-gnu': [f'Extrae/{tool_ver}-CrayGNU-{tc_ver}'],
        }
        # ---------------------------------------------------------------- tool
        self.build_system = 'SingleSource'
        self.build_system.cxx = 'CC'
        self.sourcepath = '%s.cpp' % self.testname
        self.executable = self.tool
        self.target_executable = './%s.exe' % self.testname
        # {{{ openmp:
        # 'PrgEnv-intel': ['-qopenmp'],
        # 'PrgEnv-gnu': ['-fopenmp'],
        # 'PrgEnv-pgi': ['-mp'],
        # 'PrgEnv-cray_classic': ['-homp'],
        # 'PrgEnv-cray': ['-fopenmp'],
        # # '-homp' if lang == 'F90' else '-fopenmp',
        # }}}
        # }}}

        # {{{ run
        ompthread = 1
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        self.name = \
            'sphexa_extrae_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \
            format(self.testname, mpi_task, ompthread, self.cubeside,
                   self.steps)
        self.num_tasks = mpi_task
        self.num_tasks_per_node = 24  # 72
        # {{{ ht:
        # self.num_tasks_per_node = mpitask if mpitask < 36 else 36   # noht
        # self.use_multithreading = False  # noht
        # self.num_tasks_per_core = 1      # noht

        # self.num_tasks_per_node = mpitask if mpitask < 72 else 72
        # self.use_multithreading = True # ht
        # self.num_tasks_per_core = 2    # ht
        # }}}
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 2
        self.use_multithreading = True
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
        }
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.rpt = 'rpt'
        self.tool = './tool.sh'
        self.executable = self.tool
        self.executable_opts = [
            f'-- -n {self.cubeside}', f'-s {self.steps}', '2>&1'
        ]
        self.xml1 = '$EBROOTEXTRAE/share/example/MPI/extrae.xml'
        self.xml2 = 'extrae.xml'
        self.patch = 'extrae.xml.patch'
        self.version_file = 'extrae_version.h'
        self.prerun_cmds = [
            'module rm xalt',
            # tool version
            'cp $EBROOTEXTRAE/include/extrae_version.h %s' % self.version_file,
            # will launch ./tool.sh myexe myexe_args:
            'mv %s %s' % (self.executable, self.target_executable),
            # .xml
            'echo %s &> %s' % (self.xml1, self.which_rpt),
            'patch -i %s %s -o %s' % (self.patch, self.xml1, self.xml2),
            # .sh
            'echo -e \'%s\' >> %s' % (sphsextrae.create_sh(self), self.tool),
            'chmod u+x %s' % (self.tool),
        ]
        self.prv = '%s.prv' % self.target_executable[2:]  # stripping './'
        self.postrun_cmds = [
            'stats-wrapper.sh %s -comms_histo' % self.prv,
        ]
        self.rpt_mpistats = '%s.comms.dat' % self.target_executable
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool version:
            sn.assert_true(sphsextrae.extrae_version(self)),
            # check the summary report:
            sn.assert_found(
                r'Congratulations! %s has been generated.' % self.prv,
                self.stdout),
        ])
        # }}}

        # {{{  performance
        # {{{ internal timers
        # use linux date as timer:
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        tool_perf_patterns = sn.evaluate(sphsextrae.rpt_mpistats(self))
        self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns}
        # }}}

        # {{{ reference:
        basic_reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        self.reference = basic_reference
        # tool's reference
        myzero = (0, None, None, '')
        myzero_p = (0, None, None, '%')
        self.reference['*:num_comms_0-10B'] = myzero
        self.reference['*:num_comms_10B-100B'] = myzero
        self.reference['*:num_comms_100B-1KB'] = myzero
        self.reference['*:num_comms_1KB-10KB'] = myzero
        self.reference['*:num_comms_10KB-100KB'] = myzero
        self.reference['*:num_comms_100KB-1MB'] = myzero
        self.reference['*:num_comms_1MB-10MB'] = myzero
        self.reference['*:num_comms_10MB'] = myzero
        #
        self.reference['*:%_of_bytes_sent_0-10B'] = myzero_p
        self.reference['*:%_of_bytes_sent_10B-100B'] = myzero_p
        self.reference['*:%_of_bytes_sent_100B-1KB'] = myzero_p
        self.reference['*:%_of_bytes_sent_1KB-10KB'] = myzero_p
        self.reference['*:%_of_bytes_sent_10KB-100KB'] = myzero_p
        self.reference['*:%_of_bytes_sent_100KB-1MB'] = myzero_p
        self.reference['*:%_of_bytes_sent_1MB-10MB'] = myzero_p
        self.reference['*:%_of_bytes_sent_10MB'] = myzero_p
Beispiel #16
0
    def __init__(self):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['*']
        self.valid_systems = ['*']
        self.modules = ['Scalasca']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sedov'
        self.executable = 'mpi+omp'
        self.tool = 'scalasca'
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.info_rpt = 'info.rpt'
        self.cubetool = 'cube_calltree'
        self.prebuild_cmds = [
            f'{self.tool} -V &> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
            f'scorep --version >> {self.version_rpt}',
            f'which scorep >> {self.which_rpt}',
            f'which cube_remap2 >> {self.which_rpt}',
            f'which cube_dump >> {self.which_rpt}',
            f'which {self.cubetool} >> {self.which_rpt}',
            # f'which vampir >> {self.which_rpt}',
            f'scorep-info config-summary &> {self.info_rpt}',
            f'# step1: prepare executable with: scalasca -instrument (skin)',
            f'# step2: run executable with: scalasca -analyze (scan)',
            f'# step3: explore report with: scalasca -examine (square)',
            f'# step4: get calltree with: cube_calltree'
        ]
        # }}}

        # {{{ run
        self.variables = {
            # 'SCOREP_ENABLE_UNWINDING': 'true',
            # 'SCOREP_SAMPLING_EVENTS': f'perf_cycles@{self.cycles}',
            # 'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '10',
            'SCOREP_TOTAL_MEMORY': '1G',
            # 'SCOREP_TIMER': 'gettimeofday',
            'SCAN_ANALYZE_OPTS': '--time-correct',
        }
        self.rpt = 'rpt'
        self.rpt_score = 'scorep-score.rpt'
        self.rpt_exclusive = 'cube_calltree_exclusive.rpt'
        self.rpt_inclusive = 'cube_calltree_inclusive.rpt'
        #
        cubetree = 'cube_calltree -m time -p -t 1'
        self.postrun_cmds += [
            '# {{{ --- Postprocessing steps: ---',
            f'# -------------------------------------------------------------',
            '# profile.cubex - scalasca -examine -s = square -> scorep.score:',
            f'# ({self.rpt_score} is used by sanity checks)',
            f'scalasca -examine -s scorep_*sum/profile.cubex &> {self.rpt}',
            f'cp scorep_*_sum/scorep.score {self.rpt_score}',
            '# --------------------------------------------------------------',
            '# transform metric tree into metric hierarchy with remap2',
            '# profile.cubex - cube_remap2 (slow)          -> summary.cubex: ',
            f'# time -p cube_remap2 -d -o summary.cubex */profile.cubex',
            f'# scorep-score summary.cubex &> {self.rpt_score}',
            '# --------------------------------------------------------------',
            '# exclusive time: summary.cubex - cubetree -> rpt_exclusive:',
            f'# ({cubetree} scorep_*_sum/summary.cubex ;rm -f core*) &>'
            f' {self.rpt_exclusive}',
            '# --------------------------------------------------------------',
            '# inclusive time: summary.cubex - cubetree -i -> rpt_inclusive:',
            f'# ({cubetree} -i scorep_*_sum/summary.cubex ;rm -f core*) &>'
            f' {self.rpt_inclusive}',
            '# -m metricname -- print out values for the metric <metricname>',
            '# -i            -- calculate inclusive values instead of',
            '#                  exclusive',
            '# -t treshold   -- print out only call path with a value larger',
            '#                  than <treshold>%',
            '# -p            -- diplay percent value',
            '# --------------------------------------------------------- }}}',
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version and configuration:
            # sn.assert_true(sphsscorep.scorep_assert_version(self)),
            sn.assert_true(sphsscorep.scorep_info_papi_support(self)),
            sn.assert_true(sphsscorep.scorep_info_perf_support(self)),
            sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)),
            # check the tool report:
            sn.assert_found(r'Estimated aggregate size of event trace',
                            self.rpt_score),
            sn.assert_found(r'^S=C=A=N: \S+ complete\.', self.stderr)
        ])
Beispiel #17
0
    def sanity_check_download(self):

        return sanity.assert_true(os.path.exists("xthi"))
Beispiel #18
0
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = [
            'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray'
        ]
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.tool = 'advixe-cl'
        self.modules = ['advisor']
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        self.tool_v = '2020_update2'
        tc_ver = '20.08'
        self.tool_modules = {
            'PrgEnv-gnu':
            [f'CrayGNU/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-intel':
            [f'CrayIntel/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-cray':
            [f'CrayCCE/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-pgi':
            [f'CrayPGI/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
        }
        self.prgenv_flags = {
            'PrgEnv-gnu': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
            'PrgEnv-intel': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
            'PrgEnv-cray': [
                '-I.', '-I./include', '-std=c++17', '-g', '-Ofast',
                '-DUSE_MPI', '-DNDEBUG'
            ],
            'PrgEnv-pgi': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
        }
        self.build_system = 'SingleSource'
        # self.build_system.cxx = 'CC'
        self.sourcepath = f'{self.testname}.cpp'
        self.executable = self.tool
        self.target_executable = f'./{self.testname}.exe'
        self.postbuild_cmds = [f'mv {self.tool} {self.target_executable}']
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        self.name = 'sphexa_advisor_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'.format(
            self.testname, mpi_task, ompthread, self.cubeside, self.steps)
        self.num_tasks_per_node = 24
        # {{{ ht:
        # self.num_tasks_per_node = mpitask if mpitask < 36 else 36   # noht
        # self.use_multithreading = False  # noht
        # self.num_tasks_per_core = 1      # noht

        # self.num_tasks_per_node = mpitask if mpitask < 72 else 72
        # self.use_multithreading = True # ht
        # self.num_tasks_per_core = 2    # ht
        # }}}
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 2
        self.use_multithreading = True
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
            # to avoid core when reporting (venv/jenkins):
            'LANG': 'C',
            'LC_ALL': 'C',
        }
        self.dir_rpt = 'rpt'
        self.tool_opts = '--collect=survey --search-dir src:rp=. ' \
                         '--data-limit=0 --no-auto-finalize --trace-mpi ' \
                         '--project-dir=%s -- ' % self.dir_rpt
        self.executable_opts = [
            self.tool_opts, self.target_executable, f'-n {self.cubeside}',
            f'-s {self.steps}', '2>&1'
        ]
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.summary_rpt = 'summary.rpt'
        self.prerun_cmds = [
            'module rm xalt',
            f'{self.tool} --version >> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
        ]
        self.postrun_cmds = [
            f'cd {self.dir_rpt} ;ln -s nid?????.000 e000 ;cd -',
            f'{self.tool} --report=survey --project-dir={self.dir_rpt} '
            f'&> {self.summary_rpt}',
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version:
            sn.assert_true(sphsintel.advisor_version(self)),
            # check the summary report:
            sn.assert_found(r'advixe: This data has been saved',
                            self.summary_rpt),
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        # tool
        self.perf_patterns.update({
            'advisor_elapsed':
            sphsintel.advisor_elapsed(self),
            'advisor_loop1_line':
            sphsintel.advisor_loop1_line(self),
        })
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        # tool:
        self.reference['*:advisor_elapsed'] = (0, None, None, 's')
        # TODO: fix loop1_fname to avoid error with --report-file:
        # "Object of type '_DeferredExpression' is not JSON serializable"
        # loop1_fname = sphsintel.advisor_loop1_filename(self)
        loop1_fname = ''
        self.reference['*:advisor_loop1_line'] = (0, None, None, loop1_fname)
Beispiel #19
0
    def __init__(self, mpitask, steps, cycles, rumetric):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['PrgEnv-pgi']
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'gpu', 'openacc'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.prebuild_cmds = ['module rm xalt']
        self.prgenv_flags = {
            'PrgEnv-pgi': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DNDEBUG',
                '-DUSE_MPI', '-DUSE_ACC', '-DUSE_STD_MATH_IN_KERNELS', '-acc',
                '-ta=tesla:managed,cc60'
            ],  # -mp
        }
        # ---------------------------------------------------------------- tool
        self.modules = ['craype-accel-nvidia60']
        tc_ver = '19.10'
        tool_ver = '6.0'
        postproc_tool_ver = '4ef9d3f'
        postproc_tool_serial = 'otf-profiler'
        self.postproc_tool = 'otf-profiler-mpi'
        self.tool_modules = {
            'PrgEnv-pgi': ['Score-P/%s-CrayPGI-%s' % (tool_ver, tc_ver)]
        }
        # ---------------------------------------------------------------- tool
        self.build_system = 'SingleSource'
        self.build_system.cxx = 'scorep-CC'
        self.sourcepath = '%s.cpp' % self.testname
        self.executable = '%s.exe' % self.testname
        # {{{ openmp:
        # 'PrgEnv-intel': ['-qopenmp'],
        # 'PrgEnv-gnu': ['-fopenmp'],
        # 'PrgEnv-pgi': ['-mp'],
        # 'PrgEnv-cray_classic': ['-homp'],
        # 'PrgEnv-cray': ['-fopenmp'],
        # # '-homp' if lang == 'F90' else '-fopenmp',
        # }}}
        # }}}

        # {{{ run
        ompthread = 1
        # weak scaling = 10^6 p/cn:
        size_dict = {
            1: 100,
            2: 126,
            4: 159,
            8: 200,
            16: 252,
            32: 318,
            64: 400,
            128: 504,
            256: 635
        }
        cubesize = size_dict[mpitask]
        self.name = \
            'openacc_scorepT_{}_{:03d}mpi_{:03d}omp_{}n_{}steps_{}cycles_{}'. \
            format(self.testname, mpitask, ompthread, cubesize, steps, cycles,
                   rumetric)
        self.num_tasks = mpitask
        self.num_tasks_per_node = 1
        # {{{ ht:
        # self.num_tasks_per_node = mpitask if mpitask < 36 else 36   # noht
        # self.use_multithreading = False  # noht
        # self.num_tasks_per_core = 1      # noht

        # self.num_tasks_per_node = mpitask if mpitask < 72 else 72
        # self.use_multithreading = True # ht
        # self.num_tasks_per_core = 2    # ht
        # }}}
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 1
        self.use_multithreading = False
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'SCOREP_OPENACC_ENABLE': 'yes',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
            'SCOREP_WRAPPER_INSTRUMENTER_FLAGS': '"--mpp=mpi --openacc"',
            'SCOREP_ENABLE_PROFILING': 'false',
            'SCOREP_ENABLE_TRACING': 'true',
            'SCOREP_FILTERING_FILE': 'myfilt',
            'SCOREP_VERBOSE': 'true',
            # Needed to avoid "No free memory page available"
            'SCOREP_TOTAL_MEMORY': '1G',
            # Adding some performance metrics:
            # http://scorepci.pages.jsc.fz-juelich.de/scorep-pipelines/docs/
            # => scorep-6.0/html/measurement.html#rusage_counters
            # => https://vampir.eu/public/files/pdf/spcheatsheet_letter.pdf
            #   'SCOREP_METRIC_RUSAGE': 'ru_maxrss',
            #   'SCOREP_METRIC_RUSAGE': 'ru_maxrss,ru_utime',
            #   'SCOREP_METRIC_RUSAGE': 'ru_maxrss',
            #   'SCOREP_METRIC_RUSAGE': '',
            'SCOREP_METRIC_RUSAGE': rumetric,
            'SCOREP_METRIC_PAPI': 'PAPI_TOT_INS,PAPI_TOT_CYC',
        }
        self.rusage_name = sn.evaluate(sphsscacc.otf2cli_metric_name(self))
        if cycles > 0:
            self.variables['SCOREP_SAMPLING_EVENTS'] \
                = 'perf_cycles@%s' % cycles

        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.info_rpt = 'scorep-info.rpt'
        self.rpt = 'rpt'
        self.rpt_jsn = 'result.json'
        self.rpt_inclusive = '%s.inclusive' % self.rpt
        self.rpt_exclusive = '%s.exclusive' % self.rpt
        self.tool = 'scorep'
        self.executable_opts = ['-n %s' % cubesize, '-s %s' % steps]
        self.prerun_cmds = [
            'module rm xalt',
            '%s --version &> %s' % (self.tool, self.version_rpt),
            'which %s &> %s' % (self.tool, self.which_rpt),
            'scorep-info config-summary &> %s' % self.info_rpt,
        ]
        self.postrun_cmds = [
            # otf-profiler is needed for postprocessing but i managed to
            # compile only gnu version => removing CubeLib to avoid conflict
            # with CrayPGI:
            'module rm CubeLib',
            'module load otf2_cli_profile/%s-CrayGNU-%s' %
            (postproc_tool_ver, tc_ver),
            # report post-processing tools version
            '%s --version' % postproc_tool_serial,
            # OTF-Profiler version 2.0.0
            'which %s %s' % (postproc_tool_serial, self.postproc_tool),
            # create result.json performance report from tracefile
            # see otf_profiler method (@run_after)
        ]
        # }}}

        # {{{ sanity
        # sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version and configuration:
            sn.assert_true(sphsscorep.scorep_version(self)),
            # Needed when using papi counters:
            # sn.assert_true(sphsscorep.scorep_info_papi_support(self)),
            sn.assert_true(sphsscorep.scorep_info_perf_support(self)),
            sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)),
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        # use linux date as timer:
        self.prerun_cmds += ['echo starttime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        tool_perf_patterns = sn.evaluate(sphsscacc.otf2cli_perf_patterns(self))
        self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns}
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        self.reference = sn.evaluate(sphsscacc.otf2cli_tool_reference(self))
Beispiel #20
0
 def assert_foo(self):
     return sn.all([
         sn.assert_eq(self.foo, 3),
         sn.assert_true(self.ham),
         sn.assert_eq(self.spam.eggs.bacon, 10)
     ])
Beispiel #21
0
 def __init__(self):
     self.valid_systems = []
     self.valid_prog_environs = []
     self.sanity_patterns = sn.assert_true(1)
Beispiel #22
0
class Bacon(rfm.RunOnlyRegressionTest):
    bacon = variable(int, value=-1)
    executable = 'echo'
    sanity_patterns = sn.assert_true(1)
 class MyTest(rfm.RunOnlyRegressionTest):
     executable = 'echo'
     valid_prog_environs = ['*']
     valid_systems = ['*']
     sourcesdir = None
     sanity_patterns = sn.assert_true(1)
Beispiel #24
0
 def validate_download(self):
     return sn.assert_true(os.path.exists('osu-micro-benchmarks-5.6.2'))
Beispiel #25
0
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = [
            'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray'
        ]
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.tool = 'inspxe-cl'
        self.modules = ['inspector']
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        self.tool_v = '2020_update2'
        tc_ver = '20.08'
        self.tool_modules = {
            'PrgEnv-gnu':
            [f'CrayGNU/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-intel':
            [f'CrayIntel/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-cray':
            [f'CrayCCE/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-pgi':
            [f'CrayPGI/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
        }
        self.prgenv_flags = {
            'PrgEnv-gnu': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
            'PrgEnv-intel': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
            'PrgEnv-cray': [
                '-I.', '-I./include', '-std=c++17', '-g', '-Ofast',
                '-DUSE_MPI', '-DNDEBUG'
            ],
            'PrgEnv-pgi': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
        }
        self.build_system = 'SingleSource'
        # self.build_system.cxx = 'CC'
        self.sourcepath = f'{self.testname}.cpp'
        self.executable = self.tool
        self.target_executable = f'./{self.testname}.exe'
        self.postbuild_cmds = [f'mv {self.tool} {self.target_executable}']
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        self.name = 'sphexa_inspector_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \
            format(self.testname, mpi_task, ompthread, self.cubeside,
                   self.steps)
        self.num_tasks_per_node = 24
        # {{{ ht:
        # self.num_tasks_per_node = mpitask if mpitask < 36 else 36   # noht
        # self.use_multithreading = False  # noht
        # self.num_tasks_per_core = 1      # noht

        # self.num_tasks_per_node = mpitask if mpitask < 72 else 72
        # self.use_multithreading = True # ht
        # self.num_tasks_per_core = 2    # ht
        # }}}
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 2
        self.use_multithreading = True
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
        }
        self.dir_rpt = 'rpt'
        self.tool_opts = '-collect mi1 -trace-mpi -no-auto-finalize -r %s' \
            % self.dir_rpt
        self.executable_opts = [
            self.tool_opts, self.target_executable, f'-n {self.cubeside}',
            f'-s {self.steps}', '2>&1'
        ]
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.summary_rpt = 'summary.rpt'
        self.prerun_cmds = [
            'module rm xalt',
            f'{self.tool} --version >> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
        ]
        self.postrun_cmds = [
            f'{self.tool} -r {self.dir_rpt}.* -report=summary '
            f'&> {self.summary_rpt}',
            # '%s -report=problems &> %s' % (self.tool, self.problems_rpt),
            # '%s -report=observations &> %s' %
            # (self.tool, self.observations_rpt),
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version:
            sn.assert_true(sphsintel.inspector_version(self)),
            # check the summary report:
            sn.assert_found(r'\d new problem\(s\) found', self.summary_rpt),
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        # tool
        self.perf_patterns.update({
            'Memory not deallocated':
            sphsintel.inspector_not_deallocated(self),
            # 'Memory leak': sphsintel.inspector_leak(self),
        })
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        # tool
        self.reference['*:Memory not deallocated'] = (0, None, None, '')
Beispiel #26
0
    def __init__(self, mpi_task, cubeside):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['PrgEnv-gnu']
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        tc_ver = '20.08'
        self.modules = ['craype-accel-nvidia60', 'perftools-base']
        self.tool = 'pat_report'
        self.tool_modules = {
            'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', 'perftools-lite-gpu'],
        }
        self.build_system = 'Make'
        self.build_system.makefile = 'Makefile'
        self.build_system.nvcc = 'nvcc'
        self.build_system.cxx = 'CC'
        self.build_system.max_concurrency = 2
        self.sourcepath = f'{self.testname}.cpp'
        self.executable = f'./{self.testname}.exe'
        self.target_executable = 'mpi+omp+cuda'
        self.build_system.options = [
            self.target_executable,
            f'MPICXX="{self.build_system.cxx}"',
            'SRCDIR=.',
            'BUILDDIR=.',
            'BINDIR=.',
            'CXXFLAGS=-std=c++14',
            'CUDA_PATH=$CUDATOOLKIT_HOME',
            # The makefile adds -DUSE_MPI
        ]
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.rpt = 'RUNTIME.rpt'
        self.postbuild_cmds = [
            f'mv {self.target_executable}.app '
            f'{self.executable}',
            f'{self.tool} -V &> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
        ]
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside
        self.steps = steps_dict[mpi_task]
        self.name = \
            'sphexa_perftools-gpu-cuda_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'. \
            format(self.testname, mpi_task, ompthread, self.cubeside,
                   self.steps)
        self.num_tasks_per_node = 1
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 1
        self.use_multithreading = False
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
        }
        self.executable_opts = [
            f'-n {self.cubeside}', f'-s {self.steps}', '2>&1'
        ]
        self.prerun_cmds = ['module rm xalt']
        self.postrun_cmds = [
            f'cp {self.executable}+*/rpt-files/RUNTIME.rpt {self.rpt}'
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            sn.assert_true(sphsptlgpu.tool_version(self)),
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']
        # }}}

        # {{{ perf_patterns:
        basic_perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
        tool_perf_patterns = sn.evaluate(sphsptlgpu.tool_perf_patterns(self))
        self.perf_patterns = {**basic_perf_patterns, **tool_perf_patterns}
        # }}}

        # {{{ reference:
        self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
        # tool's reference
        myzero_p = (0, None, None, '%')
        myzero_mb = (0, None, None, 'MiBytes')
        self.reference['*:host_time%'] = myzero_p
        self.reference['*:device_time%'] = myzero_p
        self.reference['*:acc_copyin'] = myzero_mb
        self.reference['*:acc_copyout'] = myzero_mb
Beispiel #27
0
 class Test0(rfm.RegressionTest):
     valid_systems = ['sys0:p0', 'sys0:p1']
     valid_prog_environs = ['e0', 'e1']
     executable = 'echo'
     sanity_patterns = sn.assert_true(1)
Beispiel #28
0
    def __init__(self):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = ['*']
        self.valid_systems = ['*']
        self.modules = ['Scalasca']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sedov'
        self.tool = 'scalasca'
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.info_rpt = 'info.rpt'
        self.cubetool = 'cube_calltree'
        self.prebuild_cmds = [
            f'{self.tool} -V &> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
            f'scorep --version >> {self.version_rpt}',
            f'which scorep >> {self.which_rpt}',
            # f'which vampir >> {self.which_rpt}',
            f'which {self.cubetool} >> {self.which_rpt}',
            f'scorep-info config-summary &> {self.info_rpt}',
            f'# step1: prepare executable with: scalasca -instrument (skin)',
            f'# step2: run executable with: scalasca -analyze (scan)',
            f'# step3: explore report with: scalasca -examine (square)',
            f'# step4: get calltree with: cube_calltree'
        ]
        # }}}

        # {{{ run
        self.variables = {
            'SCOREP_ENABLE_UNWINDING': 'true',
            'SCOREP_SAMPLING_EVENTS': f'perf_cycles@{self.cycles}',
            'SCOREP_PROFILING_MAX_CALLPATH_DEPTH': '10',
            'SCOREP_TOTAL_MEMORY': '1G',
        }
        self.rpt = 'rpt'
        self.rpt_score = 'scorep-score.rpt'
        self.rpt_exclusive = 'cube_calltree_exclusive.rpt'
        self.rpt_inclusive = 'cube_calltree_inclusive.rpt'
        #
        cubetree = 'cube_calltree -m time -p -t 1'
        # -m metricname -- print out values for the metric <metricname>
        # -i            -- calculate inclusive values instead of exclusive
        # -t treshold   -- print out only call path with a value larger
        #                  than <treshold>%
        # -p            -- diplay percent value
        self.postrun_cmds += [
            f'# -------------------------------------------------------------',
            # generate summary.cubex from profile.cubex with: scalasca -examine
            # (it will report scoring too)
            f'{self.tool} -examine -s scorep_*sum/profile.cubex &> {self.rpt}',
            # rpt will always be written to scorep.score, not into self.rpt
            f'rm -f core*',
            # this file is used for sanity checks:
            f'cp scorep_*_sum/scorep.score {self.rpt_score}',
            # exclusive time:
            f'({cubetree} scorep_*_sum/summary.cubex ;rm -f core*) &>'
            f' {self.rpt_exclusive}',
            # inclusive time:
            f'({cubetree} -i scorep_*_sum/summary.cubex ;rm -f core*) &>'
            f' {self.rpt_inclusive}',
            f'# -------------------------------------------------------------',
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version and configuration:
            # sn.assert_true(sphsscorep.scorep_assert_version(self)),
            sn.assert_true(sphsscorep.scorep_info_papi_support(self)),
            sn.assert_true(sphsscorep.scorep_info_perf_support(self)),
            sn.assert_true(sphsscorep.scorep_info_unwinding_support(self)),
            # check the tool report:
            sn.assert_found(r'Estimated aggregate size of event trace',
                            self.rpt_score),
            sn.assert_found(r'^S=C=A=N: \S+ complete\.', self.stderr)
        ])
Beispiel #29
0
    def set_sanity_gpu(self):
        # {{{
        '''
        This method runs sanity checks on the following logs:

        - info cuda devices

        .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_devices.log
          :lines: 1-3

        - info cuda kernels

        .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_kernels.log
          :lines: 5-7

        - info cuda threads

        .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_threads.log
          :lines: 1-5, 458-459

        - navigate between cuda kernels/blocks/threads/

        .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_navigate.log
          :lines: 5-6, 17-18, 33-34
          :emphasize-lines: 1, 3, 5

        - inspect variables (std::vector)

        .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_std_vector.log
          :lines: 1-25
          :emphasize-lines: 4

        - inspect variables (int*)

        .. literalinclude:: ../../reframechecks/debug/res/cuda-gdb/info_const_int.log
          :lines: 6-37
          :emphasize-lines: 17
        '''
        # }}}
        self.gpu_specs = {}
        self.gpu_specs_bool = {}
        ref_gpu_specs = {}
        ref_gpu_specs['P100'] = {}
        ref_gpu_specs['V100'] = {}
        # {{{ info_devices.log:
        #   Dev PCI Bus/Dev ID Name Description SM Type SMs Warps/SM Lanes/Warp
        #    Max Regs/Lane Active SMs Mask
        # *   0   88:00.0 Tesla V100-SXM2-16GB   GV100GL-A   sm_70  80 64 ...
        #                       ^^^^                         ^^^^^  ^^ ^^
        #               32 256 0x000000000000ffffffffffffffffffff
        #               ^^
        self.rpt = os.path.join(self.stagedir, self.log_devices)
        ref_gpu_specs = {
            'V100': {
                'capability': 'sm_70',
                'sms': 80,
                'WarpsPerSM': 64,
                'LanesPerWarp': 32,  # = warpSize
                'max_threads_per_sm': 2048,
                'max_threads_per_device': 163840,
            },
            'P100': {
                'capability': 'sm_60',
                'sms': 56,
                'WarpsPerSM': 64,
                'LanesPerWarp': 32,  # = warpSize
                'max_threads_per_sm': 2048,
                'max_threads_per_device': 114688,
            },
        }
        regex = (r'Tesla (?P<gpu_name>\S+)-\S+-\S+\s+\S+\s+(?P<cap>sm_\d+)\s+'
                 r'(?P<sms>\d+)\s+(?P<WarpsPerSM>\d+)\s+(?P<LanesPerWarp>\d+)')
        # --- get gpu_name (V100 or P100):
        gpu_name = sn.evaluate(sn.extractsingle(regex, self.rpt, 'gpu_name'))
        # --- get capability (True means that extracted value matches ref):
        res = sn.extractsingle(regex, self.rpt, 'cap')
        self.gpu_specs['capability'] = res
        self.gpu_specs_bool['capability'] = \
            (res == ref_gpu_specs[gpu_name]['capability'])
        # --- get sms:
        res = sn.extractsingle(regex, self.rpt, 'sms', int)
        self.gpu_specs['sms'] = res
        self.gpu_specs_bool['sms'] = (res == ref_gpu_specs[gpu_name]['sms'])
        # --- get WarpsPerSM:
        res = sn.extractsingle(regex, self.rpt, 'WarpsPerSM', int)
        self.gpu_specs['WarpsPerSM'] = res
        self.gpu_specs_bool['WarpsPerSM'] = \
            (res == ref_gpu_specs[gpu_name]['WarpsPerSM'])
        # --- get LanesPerWarp|warpSize:
        res = sn.extractsingle(regex, self.rpt, 'LanesPerWarp', int)
        self.gpu_specs['LanesPerWarp'] = res
        self.gpu_specs_bool['LanesPerWarp'] = \
            (res == ref_gpu_specs[gpu_name]['LanesPerWarp'])
        # --- threads_per_sm <= LanesPerWarp * WarpsPerSM
        res = self.gpu_specs['LanesPerWarp'] * self.gpu_specs['WarpsPerSM']
        self.gpu_specs['max_threads_per_sm'] = res
        self.gpu_specs_bool['max_threads_per_sm'] = \
            (res == ref_gpu_specs[gpu_name]['max_threads_per_sm'])
        # --- threads_per_device <= threads_per_sm * sms
        res = self.gpu_specs['sms'] * self.gpu_specs['max_threads_per_sm']
        self.gpu_specs['max_threads_per_device'] = res
        self.gpu_specs_bool['max_threads_per_device'] = \
            (res == ref_gpu_specs[gpu_name]['max_threads_per_device'])
        # --- max_np of 1gpu = f(max_threads_per_device) where np = cube_size^3
        import math
        self.gpu_specs['max_cubesz'] = sn.defer(
            math.ceil(pow(sn.evaluate(res), 1 / 3)))
        # }}}

        # {{{ info_kernels.log:
        # Kernel Parent Dev Grid Status SMs Mask   GridDim  BlockDim Invocation
        # * 0 - 0 3 Active 0x (106,1,1) (256,1,1) ...::density<double>(n=27000,
        #                      ^^^^^^^   ^^^^^^^                         ^^^^^
        # ---------------------------------------------------------------------
        self.log = os.path.join(self.stagedir, self.log_kernels)
        regex = (r'\*.*Active \S+ \((?P<grid_x>\d+),(?P<grid_y>\d+),'
                 r'(?P<grid_z>\d+)\)\s+\((?P<block_x>\d+),(?P<block_y>\d+),'
                 r'(?P<block_z>\d+)\).*\(n=(?P<np>\d+), ')
        grid_x = sn.extractsingle(regex, self.log, 'grid_x', int)
        grid_y = sn.extractsingle(regex, self.log, 'grid_y', int)
        grid_z = sn.extractsingle(regex, self.log, 'grid_z', int)
        block_x = sn.extractsingle(regex, self.log, 'block_x', int)
        block_y = sn.extractsingle(regex, self.log, 'block_y', int)
        block_z = sn.extractsingle(regex, self.log, 'block_z', int)
        np = sn.extractsingle(regex, self.log, 'np', int)
        self.kernel_grid = grid_x * grid_y * grid_z
        self.kernel_block = block_x * block_y * block_z
        self.kernel_np = np
        import math
        self.gpu_specs['cubesz'] = \
            sn.defer(math.ceil(pow(sn.evaluate(self.kernel_np), 1/3)))

        # {{{ TODO:tuple
        # https://github.com/eth-cscs/reframe/blob/master/cscs-checks/
        # prgenv/affinity_check.py#L38
        # regex=(r'\*.*Active \S+ (?P<griddim>\(\d+,\d+,\d+\))\s+(?P<blockdim>'
        #        r'\(\d+,\d+,\d+\)).*\(n=(?P<np>\d+), ')
        # from functools import reduce
        # self.res  = reduce(lambda x, y: x*y, list(res))
        # sn.extractsingle(regex, self.stdout, 'nrgy',
        #   conv=lambda x: int(x.replace(',', '')))
        # res: ('(', '1', '0', '6', ',', '1', ',', '1', ')')
        # }}}
        # }}}

        # {{{ info_threads.log:
        # BlockIdx ThreadIdx To BlockIdx ThreadIdx Count Virtual PC Filename L
        # Kernel 0
        # * (0,0,0) (0,0,0)  (1,0,0) (63,0,0) 320 0x0... ../cudaDensity.cu 27
        #   (1,0,0) (64,0,0) (1,0,0) (95,0,0)  32 0x0... ../cudaDensity.cu 26
        #   etc...                        sum(^^^)
        # ---------------------------------------------------------------------
        self.log = os.path.join(self.stagedir, self.log_threads)
        regex = r'(\(\S+\)\s+){4}(?P<nth>\d+)\s+0x'
        self.threads_np = sn.sum(sn.extractall(regex, self.log, 'nth', int))
        # }}}

        # {{{ info_navigate.log:
        # gridDim=(106,1,1) blockDim=(256,1,1) blockIdx=(0,0,0) \
        # threadIdx=(0,0,0) warpSize=32 thid=0
        # kernel 0 grid 3 block (0,0,0) thread (0,0,0) device 0 sm 0 warp 0 ...
        # --
        # gridDim=(106,1,1) blockDim=(256,1,1) blockIdx=(105,0,0)
        # threadIdx=(255,0,0) warpSize=32 thid=27135
        # kernel 0 grid 3 block (105,0,0) thread (255,0,0) device 0 sm 43 ...
        # --
        # gridDim=(106,1,1) blockDim=(256,1,1) blockIdx=(55,0,0)
        # threadIdx=(255,0,0) warpSize=32 thid=14335
        # kernel 0 grid 3 block (55,0,0) thread (255,0,0) device 0 sm 55 ...
        # ---------------------------------------------------------------------
        self.log = os.path.join(self.stagedir, self.log_navigate)
        regex = r'^gridDim.*warpSize=\d+ thid=(?P<th>\d+)$'
        self.thids = sn.extractall(regex, self.log, 'th', int)
        # }}}

        # {{{ info_std_vector.log:
        # --- get vector length(True means that extracted value matches ref):
        self.rpt = os.path.join(self.stagedir, self.log_stdvector)
        # std::vector of length 27000, capacity 27000
        regex = r'std::vector of length (?P<vec_len1>\d+),'
        res = sn.extractsingle(regex, self.rpt, 'vec_len1', int)
        self.gpu_specs['vec_len1'] = res
        self.gpu_specs_bool['vec_len1'] = (res == self.cubesize**3)
        # Vector size = 27000 (pvector)
        regex = r'^Vector size = (?P<vec_len2>\d+)$'
        res = sn.extractsingle(regex, self.rpt, 'vec_len2', int)
        self.gpu_specs['vec_len2'] = res
        self.gpu_specs_bool['vec_len2'] = (res == self.cubesize**3)
        # }}}

        # {{{ --- sanity_patterns:
        self.sanity_patterns = sn.all([
            sn.assert_true(self.gpu_specs_bool['capability']),
            sn.assert_true(self.gpu_specs_bool['sms']),
            sn.assert_true(self.gpu_specs_bool['WarpsPerSM']),
            sn.assert_true(self.gpu_specs_bool['LanesPerWarp']),
            sn.assert_true(self.gpu_specs_bool['max_threads_per_sm']),
            sn.assert_true(self.gpu_specs_bool['max_threads_per_device']),
            sn.assert_true(self.gpu_specs_bool['vec_len1']),
            sn.assert_true(self.gpu_specs_bool['vec_len2']),
            # NO: sn.assert_true(self.gpu_specs_bool),
        ])
Beispiel #30
0
    def __init__(self, mpi_task):
        # {{{ pe
        self.descr = 'Tool validation'
        self.valid_prog_environs = [
            'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray'
        ]
        # self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_systems = ['*']
        self.maintainers = ['JG']
        self.tags = {'sph', 'hpctools', 'cpu'}
        # }}}

        # {{{ compile
        self.testname = 'sqpatch'
        self.tool = 'vtune'
        self.modules = ['vtune_profiler']
        self.prebuild_cmds = ['module rm xalt', 'module list -t']
        self.tool_v = '2020_update2'
        tc_ver = '20.08'
        self.tool_modules = {
            'PrgEnv-gnu':
            [f'CrayGNU/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-intel':
            [f'CrayIntel/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-cray':
            [f'CrayCCE/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
            'PrgEnv-pgi':
            [f'CrayPGI/.{tc_ver}', f'{self.modules[0]}/{self.tool_v}'],
        }
        self.prgenv_flags = {
            'PrgEnv-gnu': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
            'PrgEnv-intel': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
            'PrgEnv-cray': [
                '-I.', '-I./include', '-std=c++17', '-g', '-Ofast',
                '-DUSE_MPI', '-DNDEBUG'
            ],
            'PrgEnv-pgi': [
                '-I.', '-I./include', '-std=c++14', '-g', '-O3', '-DUSE_MPI',
                '-DNDEBUG'
            ],
        }
        self.build_system = 'SingleSource'
        # self.build_system.cxx = 'CC'
        self.sourcepath = f'{self.testname}.cpp'
        self.executable = self.tool
        self.target_executable = f'./{self.testname}.exe'
        self.postbuild_cmds = [f'mv {self.tool} {self.target_executable}']
        # }}}

        # {{{ run
        ompthread = 1
        self.num_tasks = mpi_task
        self.cubeside = cubeside_dict[mpi_task]
        self.steps = steps_dict[mpi_task]
        self.name = 'sphexa_vtune_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'.format(
            self.testname, mpi_task, ompthread, self.cubeside, self.steps)
        self.num_tasks_per_node = 24
        # {{{ ht:
        # self.num_tasks_per_node = mpitask if mpitask < 36 else 36   # noht
        # self.use_multithreading = False  # noht
        # self.num_tasks_per_core = 1      # noht

        # self.num_tasks_per_node = mpitask if mpitask < 72 else 72
        # self.use_multithreading = True # ht
        # self.num_tasks_per_core = 2    # ht
        # }}}
        self.num_cpus_per_task = ompthread
        self.num_tasks_per_core = 2
        self.use_multithreading = True
        self.exclusive = True
        self.time_limit = '10m'
        self.variables = {
            'CRAYPE_LINK_TYPE': 'dynamic',
            'OMP_NUM_THREADS': str(self.num_cpus_per_task),
        }
        self.dir_rpt = 'rpt'
        collect = 'hotspots'
        self.tool_opts = '-trace-mpi -collect %s -r ./%s -data-limit=0' % \
            (collect, self.dir_rpt)  # example dir: rpt.nid00032
        self.executable_opts = [
            self.tool_opts,
            '%s' % self.target_executable, f'-n {self.cubeside}',
            f'-s {self.steps}', '2>&1'
        ]
        self.version_rpt = 'version.rpt'
        self.which_rpt = 'which.rpt'
        self.summary_rpt = 'summary.rpt'
        self.srcfile_rpt = 'srcfile.rpt'
        self.prerun_cmds = [
            'module rm xalt',
            f'{self.tool} --version >> {self.version_rpt}',
            f'which {self.tool} &> {self.which_rpt}',
        ]
        column = ('"CPU Time:Self,CPU Time:Effective Time:Self,'
                  'CPU Time:Spin Time:Self,CPU Time:Overhead Time:Self"')
        self.postrun_cmds = [
            # summary rpt: TODO: for ...
            # '%s -R hotspots -r %s* -column="CPU Time:Self" &> %s' %
            # (self.tool, self.dir_rpt, self.summary_rpt),
            # csv report:
            'for vtdir in %s.nid* ;do %s -R hotspots -r $vtdir/*.vtune '
            '-group-by=function -format=csv -csv-delimiter=semicolon '
            '-column=%s &>$vtdir.csv ;done' %
            (self.dir_rpt, self.tool, column),
            # keep as reminder:
            # '%s cat /proc/sys/kernel/perf_event_paranoid &> %s' %
            # (run_cmd, self.paranoid_rpt),
            # 'cd %s ;ln -s nid*.000 e000 ;cd -' % self.dir_rpt,
            # '%s --report=survey --project-dir=%s &> %s' %
            # (self.tool, self.dir_rpt, self.summary_rpt),
            'cp *_job.out %s' % self.dir_rpt,
        ]
        # }}}

        # {{{ sanity
        self.sanity_patterns = sn.all([
            # check the job output:
            sn.assert_found(r'Total time for iteration\(0\)', self.stdout),
            # check the tool's version:
            sn.assert_true(sphsintel.vtune_version(self)),
            # check the summary report:
            sn.assert_found(r'vtune: Executing actions 100 % done',
                            self.stdout)
        ])
        # }}}

        # {{{ performance
        # {{{ internal timers
        self.prerun_cmds += ['echo starttime=`date +%s`']
        self.postrun_cmds += ['echo stoptime=`date +%s`']