Exemple #1
0
 def __init__(self):
     super().__init__()
     self.maintainers = ['JG']
     self.valid_systems += ['eiger:mc', 'pilatus:mc']
     self.time_limit = '5m'
     self.sourcepath = 'eatmemory_mpi.c'
     self.tags.add('mem')
     self.executable_opts = ['100%']
     self.sanity_patterns = sn.assert_found(r'(oom-kill)|(Killed)',
                                            self.stderr)
     # {{{ perf
     regex = (r'^Eating \d+ MB\/mpi \*\d+mpi = -\d+ MB memory from \/proc\/'
              r'meminfo: total: \d+ GB, free: \d+ GB, avail: \d+ GB, using:'
              r' (\d+) GB')
     self.perf_patterns = {
         'max_cn_memory':
         sn.getattr(self, 'reference_meminfo'),
         'max_allocated_memory':
         sn.max(sn.extractall(regex, self.stdout, 1, int)),
     }
     no_limit = (0, None, None, 'GB')
     self.reference = {
         '*': {
             'max_cn_memory':
             no_limit,
             'max_allocated_memory':
             (sn.getattr(self, 'reference_meminfo'), -0.05, None, 'GB'),
         }
     }
Exemple #2
0
    def __init__(self, variant, lang, linkage):
        self.linkage = linkage
        self.variables = {'CRAYPE_LINK_TYPE': linkage}
        self.prgenv_flags = {}
        self.lang_names = {'c': 'C', 'cpp': 'C++', 'f90': 'Fortran 90'}
        self.descr = self.lang_names[lang] + ' Hello World'
        self.sourcepath = 'hello_world'
        self.build_system = 'SingleSource'
        self.valid_systems = ['ubelix:compute', 'ubelix:gpu']

        self.valid_prog_environs = ['foss', 'intel']

        self.compilation_time_seconds = None

        result = sn.findall(
            r'Hello World from thread \s*(\d+) out '
            r'of \s*(\d+) from process \s*(\d+) out of '
            r'\s*(\d+)', self.stdout)

        num_tasks = sn.getattr(self, 'num_tasks')
        num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task')

        def tid(match):
            return int(match.group(1))

        def num_threads(match):
            return int(match.group(2))

        def rank(match):
            return int(match.group(3))

        def num_ranks(match):
            return int(match.group(4))

        self.sanity_patterns = sn.all(
            sn.chain(
                [
                    sn.assert_eq(sn.count(result),
                                 num_tasks * num_cpus_per_task)
                ],
                sn.map(lambda x: sn.assert_lt(tid(x), num_threads(x)), result),
                sn.map(lambda x: sn.assert_lt(rank(x), num_ranks(x)), result),
                sn.map(lambda x: sn.assert_lt(tid(x), num_cpus_per_task),
                       result),
                sn.map(
                    lambda x: sn.assert_eq(num_threads(x), num_cpus_per_task),
                    result),
                sn.map(lambda x: sn.assert_lt(rank(x), num_tasks), result),
                sn.map(lambda x: sn.assert_eq(num_ranks(x), num_tasks),
                       result),
            ))
        self.perf_patterns = {
            'compilation_time': sn.getattr(self, 'compilation_time_seconds')
        }
        self.reference = {'*': {'compilation_time': (60, None, 0.1, 's')}}

        self.maintainers = ['VH', 'EK']
        self.tags = {'production', 'prgenv'}
Exemple #3
0
 def set_compiler_flags_and_variables(self):
     self.sourcesdir = os.path.join(
         self.current_system.resourcesdir,
         'roofline',
         'cs-roofline-toolkit.git',
         'Empirical_Roofline_Tool-1.1.0',
     )
     self.readonly_files = [
         'Batch',
         'Config',
         'Drivers',
         'ert',
         'ert_cscs.py',
         'ERT_Users_Manual.pdf',
         'Kernels',
         'Plot',
         'Python',
         'README.md',
         'Results',
         'Scripts',
     ]
     # Using a sourcepath trick here to remain close to the way building is
     # executed in the official repo script (no makefile provided):
     # https://bitbucket.org/berkeleylab/cs-roofline-toolkit/src/master/
     # Empirical_Roofline_Tool-1.1.0/Python/ert_core.py#lines-279
     self.sourcepath = 'Kernels/kernel1.cxx Drivers/driver1.cxx'
     self.build_system = 'SingleSource'
     # get all parameters:
     ert_trials_min = sn.getattr(self, 'ert_trials_min')
     ert_precision = sn.getattr(self, 'ert_precision')
     ert_flop = sn.getattr(self, 'ert_flop')
     self.build_system.cppflags = [
         '-I./Kernels',
         f'-DERT_FLOP={ert_flop}',
         '-DERT_ALIGN=32',
         '-DERT_MEMORY_MAX=1073741824',
         '-DERT_MPI=True',
         '-DERT_OPENMP=True',
         '-DERT_WORKING_SET_MIN=1',
         '-DERT_WSS_MULT=1.1',
         f'-D{ert_precision}',
         f'-DERT_TRIALS_MIN={ert_trials_min}',
         # keeping as reminder
         # '-DERT_INTEL',
     ]
     self.prgenv_flags = {
         'PrgEnv-gnu': ['-fopenmp', '-O3'],
     }
     envname = self.current_environ.name
     self.build_system.cxxflags = self.prgenv_flags[envname]
     self.prebuild_cmds = ['module list', 'which gcc']
Exemple #4
0
 def set_run_cmds(self):
     # Usage: ./exe gpu_blocks gpu_threads
     ert_gpu_blocks = sn.getattr(self, 'ert_gpu_blocks')
     ert_gpu_threads = sn.getattr(self, 'ert_gpu_threads')
     self.prerun_cmds += [f'for ii in `seq {repeat}`;do']
     self.executable_opts = [
         f'{ert_gpu_blocks} {ert_gpu_threads} &> try.00$ii'
     ]
     self.postrun_cmds += [
         'done',
         'cat try.00* | ./Scripts/preprocess.py > pre',
         './Scripts/maximum.py < pre > max',
         './Scripts/summary.py < max > sum',
     ]
Exemple #5
0
    def setup(self, partition, environ, **job_opts):
        result = sn.findall(
            r'Hello World from thread \s*(\d+) out '
            r'of \s*(\d+) from process \s*(\d+) out of '
            r'\s*(\d+)', self.stdout)

        self.sanity_patterns = sn.all(
            sn.chain(
                [
                    sn.assert_eq(sn.count(result),
                                 self.num_tasks * self.num_cpus_per_task)
                ],
                sn.map(
                    lambda x: sn.assert_lt(int(x.group(1)), int(x.group(2))),
                    result),
                sn.map(
                    lambda x: sn.assert_lt(int(x.group(3)), int(x.group(4))),
                    result),
                sn.map(
                    lambda x: sn.assert_lt(int(x.group(1)), self.
                                           num_cpus_per_task), result),
                sn.map(
                    lambda x: sn.assert_eq(int(x.group(2)), self.
                                           num_cpus_per_task), result),
                sn.map(lambda x: sn.assert_lt(int(x.group(3)), self.num_tasks),
                       result),
                sn.map(lambda x: sn.assert_eq(int(x.group(4)), self.num_tasks),
                       result),
            ))

        self.perf_patterns = {
            'compilation_time': sn.getattr(self, 'compilation_time_seconds')
        }
        self.reference = {'*': {'compilation_time': (60, None, 0.1)}}
        super().setup(partition, environ, **job_opts)
Exemple #6
0
 def set_compiler_flags_and_variables(self):
     self.sourcesdir = os.path.join(
         self.current_system.resourcesdir,
         'roofline',
         'cs-roofline-toolkit.git',
         'Empirical_Roofline_Tool-1.1.0',
     )
     self.readonly_files = [
         'Batch',
         'Config',
         'Drivers',
         'ert',
         'ert_cscs.py',
         'ERT_Users_Manual.pdf',
         'Kernels',
         'Plot',
         'Python',
         'README.md',
         'Results',
         'Scripts',
     ]
     self.sourcepath = 'Kernels/kernel1.cxx Drivers/driver1.cxx'
     self.build_system = 'SingleSource'
     # get all parameters:
     capability = sn.getattr(self, 'cap')
     ert_trials_min = sn.getattr(self, 'ert_trials_min')
     ert_precision = sn.getattr(self, 'ert_precision')
     ert_flop = sn.getattr(self, 'ert_flop')
     self.build_system.cppflags = [
         '-I./Kernels',
         '-DERT_ALIGN=32',
         '-DERT_MEMORY_MAX=1073741824',
         '-DERT_WORKING_SET_MIN=128',
         '-DERT_WSS_MULT=1.1',
         '-DERT_GPU',
         f'-DERT_TRIALS_MIN={ert_trials_min}',
         f'-D{ert_precision}',
         f'-DERT_FLOP={ert_flop}',
     ]
     self.prgenv_flags = {
         'PrgEnv-gnu': ['-O3', '-x cu', f'-arch={capability}'],
     }
     self.build_system.cxx = 'nvcc'
     self.build_system.cxxflags = \
         self.prgenv_flags[self.current_environ.name]
     self.prebuild_cmds = ['module list', 'which gcc', 'which nvcc']
Exemple #7
0
 def __init__(self):
     self.valid_systems = ['daint:gpu', 'daint:mc']
     self.valid_prog_environs = ['cray']
     self.executable = 'hostname'
     self.num_tasks = 0
     self.num_tasks_per_node = 1
     self.sanity_patterns = sn.assert_eq(
         sn.getattr(self, 'num_tasks'),
         sn.count(sn.findall(r'^nid\d+$', self.stdout)))
 def __init__(self):
     self.valid_systems = ['daint:gpu', 'daint:mc']
     self.valid_prog_environs = ['PrgEnv-cray']
     self.executable = 'hostname'
     self.sourcesdir = None
     self.num_tasks = 0
     self.num_tasks_per_node = 1
     self.sanity_patterns = sn.assert_eq(
         sn.getattr(self, 'num_tasks'),
         sn.count(sn.findall(r'nid\d+', self.stdout)))
     self.maintainers = ['you-can-type-your-email-here']
     self.tags = {'tutorial'}
Exemple #9
0
    def assert_hello_world(self):
        result = sn.findall(
            r'Hello, World from thread \s*(\d+) out '
            r'of \s*(\d+) from process \s*(\d+) out of '
            r'\s*(\d+)', self.stdout)

        num_tasks = sn.getattr(self, 'num_tasks')
        num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task')

        def tid(match):
            return int(match.group(1))

        def num_threads(match):
            return int(match.group(2))

        def rank(match):
            return int(match.group(3))

        def num_ranks(match):
            return int(match.group(4))

        return sn.all(
            sn.chain(
                [
                    sn.assert_eq(sn.count(result),
                                 num_tasks * num_cpus_per_task)
                ],
                sn.map(lambda x: sn.assert_lt(tid(x), num_threads(x)), result),
                sn.map(lambda x: sn.assert_lt(rank(x), num_ranks(x)), result),
                sn.map(lambda x: sn.assert_lt(tid(x), num_cpus_per_task),
                       result),
                sn.map(
                    lambda x: sn.assert_eq(num_threads(x), num_cpus_per_task),
                    result),
                sn.map(lambda x: sn.assert_lt(rank(x), num_tasks), result),
                sn.map(lambda x: sn.assert_eq(num_ranks(x), num_tasks),
                       result),
            ))
Exemple #10
0
 def assert_count_gpus(self):
     '''Assert GPU count is consistent.'''
     return sn.all([
         sn.assert_eq(
             sn.count(sn.findall(r'\[\S+\] Found \d+ gpu\(s\)',
                                 self.stdout)),
             sn.getattr(self.job, 'num_tasks')),
         sn.assert_eq(
             sn.count(
                 sn.findall(
                     r'\[\S+\] \[gpu \d+\] Kernel launch '
                     r'latency: \S+ us', self.stdout)),
             self.job.num_tasks * self.num_gpus_per_node)
     ])
Exemple #11
0
    def eval_sanity(self):
        output_files = []
        output_files = [
            file for file in os.listdir(self.stagedir)
            if file.startswith('output-')
        ]
        num_greasy_tasks = len(output_files)
        failure_msg = (f'Requested {self.num_greasy_tasks} task(s), but '
                       f'executed only {num_greasy_tasks} tasks(s)')
        sn.evaluate(
            sn.assert_eq(num_greasy_tasks,
                         self.num_greasy_tasks,
                         msg=failure_msg))
        num_tasks = sn.getattr(self, 'nranks_per_worker')
        num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task')

        def tid(match):
            return int(match.group(1))

        def num_threads(match):
            return int(match.group(2))

        def rank(match):
            return int(match.group(3))

        def num_ranks(match):
            return int(match.group(4))

        for output_file in output_files:
            result = sn.findall(
                r'Hello, World from thread \s*(\d+) out '
                r'of \s*(\d+) from process \s*(\d+) out of '
                r'\s*(\d+)', output_file)

            failure_msg = (f'Found {sn.count(result)} Hello, World... '
                           f'pattern(s) but expected '
                           f'{num_tasks * num_cpus_per_task} pattern(s) '
                           f'inside the output file {output_file}')
            sn.evaluate(
                sn.assert_eq(sn.count(result),
                             num_tasks * num_cpus_per_task,
                             msg=failure_msg))

            sn.evaluate(
                sn.all(
                    sn.chain(
                        sn.map(
                            lambda x: sn.assert_lt(
                                tid(x),
                                num_threads(x),
                                msg=(f'Found {tid(x)} threads rather than '
                                     f'{num_threads(x)}')), result),
                        sn.map(
                            lambda x: sn.assert_lt(
                                rank(x),
                                num_ranks(x),
                                msg
                                =(f'Rank id {rank(x)} is not lower than the '
                                  f'number of ranks {self.nranks_per_worker} '
                                  f'in output file')), result),
                        sn.map(
                            lambda x: sn.assert_lt(
                                tid(x),
                                self.num_cpus_per_task,
                                msg=(f'Rank id {tid(x)} is not lower than the '
                                     f'number of cpus per task '
                                     f'{self.num_cpus_per_task} in output '
                                     f'file {output_file}')), result),
                        sn.map(
                            lambda x: sn.assert_eq(
                                num_threads(x),
                                num_cpus_per_task,
                                msg
                                =(f'Found {num_threads(x)} threads rather than '
                                  f'{self.num_cpus_per_task} in output file '
                                  f'{output_file}')), result),
                        sn.map(
                            lambda x: sn.assert_lt(
                                rank(x),
                                num_tasks,
                                msg=(
                                    f'Found {rank(x)} threads rather than '
                                    f'{self.num_cpus_per_task} in output file '
                                    f'{output_file}')), result),
                        sn.map(
                            lambda x: sn.assert_eq(
                                num_ranks(x),
                                num_tasks,
                                msg=(f'Number of ranks {num_ranks(x)} is not '
                                     f'equal to {self.nranks_per_worker} in '
                                     f'output file {output_file}')), result))))
        sn.evaluate(sn.assert_found(r'Finished greasing', self.greasy_logfile))
        sn.evaluate(
            sn.assert_found((f'INFO: Summary of {self.num_greasy_tasks} '
                             f'tasks: '
                             f'{self.num_greasy_tasks} OK, '
                             f'0 FAILED, '
                             f'0 CANCELLED, '
                             fr'0 INVALID\.'), self.greasy_logfile))

        return True
Exemple #12
0
    def assert_num_gpus(self):
        '''Assert that that all tasks passed.'''

        return sn.assert_eq(
            sn.count(sn.findall(r'^\s*\[[^\]]*\]\s*Test passed', self.stdout)),
            sn.getattr(self.job, 'num_tasks'))
Exemple #13
0
 def b(self):
     return sn.getattr(self, '_b')
Exemple #14
0
    def __init__(self, variant, lang, linkage):
        self.linkage = linkage
        self.variables = {'CRAYPE_LINK_TYPE': linkage}
        self.prgenv_flags = {}
        self.lang_names = {'c': 'C', 'cpp': 'C++', 'f90': 'Fortran 90'}
        self.descr = f'{self.lang_names[lang]} Hello World'
        self.sourcepath = 'hello_world'
        self.build_system = 'SingleSource'
        self.valid_systems = [
            'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn',
            'tiger:gpu', 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn'
        ]

        self.valid_prog_environs = [
            'PrgEnv-cray', 'PrgEnv-cray_classic', 'PrgEnv-intel', 'PrgEnv-gnu',
            'PrgEnv-pgi', 'PrgEnv-gnu-nocuda', 'PrgEnv-pgi-nocuda'
        ]

        if self.current_system.name in ['kesch', 'arolla', 'tsa']:
            self.exclusive_access = True

        # Removing static compilation from kesch
        if (self.current_system.name in ['kesch'] and linkage == 'static'):
            self.valid_prog_environs = []

        self.compilation_time_seconds = None

        result = sn.findall(
            r'Hello World from thread \s*(\d+) out '
            r'of \s*(\d+) from process \s*(\d+) out of '
            r'\s*(\d+)', self.stdout)

        num_tasks = sn.getattr(self, 'num_tasks')
        num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task')

        def tid(match):
            return int(match.group(1))

        def num_threads(match):
            return int(match.group(2))

        def rank(match):
            return int(match.group(3))

        def num_ranks(match):
            return int(match.group(4))

        self.sanity_patterns = sn.all(
            sn.chain(
                [
                    sn.assert_eq(sn.count(result),
                                 num_tasks * num_cpus_per_task)
                ],
                sn.map(lambda x: sn.assert_lt(tid(x), num_threads(x)), result),
                sn.map(lambda x: sn.assert_lt(rank(x), num_ranks(x)), result),
                sn.map(lambda x: sn.assert_lt(tid(x), num_cpus_per_task),
                       result),
                sn.map(
                    lambda x: sn.assert_eq(num_threads(x), num_cpus_per_task),
                    result),
                sn.map(lambda x: sn.assert_lt(rank(x), num_tasks), result),
                sn.map(lambda x: sn.assert_eq(num_ranks(x), num_tasks),
                       result),
            ))
        self.perf_patterns = {
            'compilation_time': sn.getattr(self, 'compilation_time_seconds')
        }
        self.reference = {'*': {'compilation_time': (60, None, 0.1, 's')}}

        self.maintainers = ['VH', 'EK']
        self.tags = {'production', 'craype'}
Exemple #15
0
 def set_sanity_patterns(self):
     self.sanity_patterns = sn.assert_eq(
         sn.getattr(self, 'num_tasks'),
         sn.count(sn.findall(r'^nid\d+$', self.stdout))
     )
 def capture_build_time(self):
     self.perf_patterns = {
         'compilation_time': sn.getattr(self, 'compilation_time_seconds')
     }
     self.reference = {'*': {'compilation_time': (60, None, 0.1, 's')}}