def __init__(self): super().__init__() self.maintainers = ['JG'] self.valid_systems += ['eiger:mc', 'pilatus:mc'] self.time_limit = '5m' self.sourcepath = 'eatmemory_mpi.c' self.tags.add('mem') self.executable_opts = ['100%'] self.sanity_patterns = sn.assert_found(r'(oom-kill)|(Killed)', self.stderr) # {{{ perf regex = (r'^Eating \d+ MB\/mpi \*\d+mpi = -\d+ MB memory from \/proc\/' r'meminfo: total: \d+ GB, free: \d+ GB, avail: \d+ GB, using:' r' (\d+) GB') self.perf_patterns = { 'max_cn_memory': sn.getattr(self, 'reference_meminfo'), 'max_allocated_memory': sn.max(sn.extractall(regex, self.stdout, 1, int)), } no_limit = (0, None, None, 'GB') self.reference = { '*': { 'max_cn_memory': no_limit, 'max_allocated_memory': (sn.getattr(self, 'reference_meminfo'), -0.05, None, 'GB'), } }
def __init__(self, variant, lang, linkage): self.linkage = linkage self.variables = {'CRAYPE_LINK_TYPE': linkage} self.prgenv_flags = {} self.lang_names = {'c': 'C', 'cpp': 'C++', 'f90': 'Fortran 90'} self.descr = self.lang_names[lang] + ' Hello World' self.sourcepath = 'hello_world' self.build_system = 'SingleSource' self.valid_systems = ['ubelix:compute', 'ubelix:gpu'] self.valid_prog_environs = ['foss', 'intel'] self.compilation_time_seconds = None result = sn.findall( r'Hello World from thread \s*(\d+) out ' r'of \s*(\d+) from process \s*(\d+) out of ' r'\s*(\d+)', self.stdout) num_tasks = sn.getattr(self, 'num_tasks') num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task') def tid(match): return int(match.group(1)) def num_threads(match): return int(match.group(2)) def rank(match): return int(match.group(3)) def num_ranks(match): return int(match.group(4)) self.sanity_patterns = sn.all( sn.chain( [ sn.assert_eq(sn.count(result), num_tasks * num_cpus_per_task) ], sn.map(lambda x: sn.assert_lt(tid(x), num_threads(x)), result), sn.map(lambda x: sn.assert_lt(rank(x), num_ranks(x)), result), sn.map(lambda x: sn.assert_lt(tid(x), num_cpus_per_task), result), sn.map( lambda x: sn.assert_eq(num_threads(x), num_cpus_per_task), result), sn.map(lambda x: sn.assert_lt(rank(x), num_tasks), result), sn.map(lambda x: sn.assert_eq(num_ranks(x), num_tasks), result), )) self.perf_patterns = { 'compilation_time': sn.getattr(self, 'compilation_time_seconds') } self.reference = {'*': {'compilation_time': (60, None, 0.1, 's')}} self.maintainers = ['VH', 'EK'] self.tags = {'production', 'prgenv'}
def set_compiler_flags_and_variables(self): self.sourcesdir = os.path.join( self.current_system.resourcesdir, 'roofline', 'cs-roofline-toolkit.git', 'Empirical_Roofline_Tool-1.1.0', ) self.readonly_files = [ 'Batch', 'Config', 'Drivers', 'ert', 'ert_cscs.py', 'ERT_Users_Manual.pdf', 'Kernels', 'Plot', 'Python', 'README.md', 'Results', 'Scripts', ] # Using a sourcepath trick here to remain close to the way building is # executed in the official repo script (no makefile provided): # https://bitbucket.org/berkeleylab/cs-roofline-toolkit/src/master/ # Empirical_Roofline_Tool-1.1.0/Python/ert_core.py#lines-279 self.sourcepath = 'Kernels/kernel1.cxx Drivers/driver1.cxx' self.build_system = 'SingleSource' # get all parameters: ert_trials_min = sn.getattr(self, 'ert_trials_min') ert_precision = sn.getattr(self, 'ert_precision') ert_flop = sn.getattr(self, 'ert_flop') self.build_system.cppflags = [ '-I./Kernels', f'-DERT_FLOP={ert_flop}', '-DERT_ALIGN=32', '-DERT_MEMORY_MAX=1073741824', '-DERT_MPI=True', '-DERT_OPENMP=True', '-DERT_WORKING_SET_MIN=1', '-DERT_WSS_MULT=1.1', f'-D{ert_precision}', f'-DERT_TRIALS_MIN={ert_trials_min}', # keeping as reminder # '-DERT_INTEL', ] self.prgenv_flags = { 'PrgEnv-gnu': ['-fopenmp', '-O3'], } envname = self.current_environ.name self.build_system.cxxflags = self.prgenv_flags[envname] self.prebuild_cmds = ['module list', 'which gcc']
def set_run_cmds(self): # Usage: ./exe gpu_blocks gpu_threads ert_gpu_blocks = sn.getattr(self, 'ert_gpu_blocks') ert_gpu_threads = sn.getattr(self, 'ert_gpu_threads') self.prerun_cmds += [f'for ii in `seq {repeat}`;do'] self.executable_opts = [ f'{ert_gpu_blocks} {ert_gpu_threads} &> try.00$ii' ] self.postrun_cmds += [ 'done', 'cat try.00* | ./Scripts/preprocess.py > pre', './Scripts/maximum.py < pre > max', './Scripts/summary.py < max > sum', ]
def setup(self, partition, environ, **job_opts): result = sn.findall( r'Hello World from thread \s*(\d+) out ' r'of \s*(\d+) from process \s*(\d+) out of ' r'\s*(\d+)', self.stdout) self.sanity_patterns = sn.all( sn.chain( [ sn.assert_eq(sn.count(result), self.num_tasks * self.num_cpus_per_task) ], sn.map( lambda x: sn.assert_lt(int(x.group(1)), int(x.group(2))), result), sn.map( lambda x: sn.assert_lt(int(x.group(3)), int(x.group(4))), result), sn.map( lambda x: sn.assert_lt(int(x.group(1)), self. num_cpus_per_task), result), sn.map( lambda x: sn.assert_eq(int(x.group(2)), self. num_cpus_per_task), result), sn.map(lambda x: sn.assert_lt(int(x.group(3)), self.num_tasks), result), sn.map(lambda x: sn.assert_eq(int(x.group(4)), self.num_tasks), result), )) self.perf_patterns = { 'compilation_time': sn.getattr(self, 'compilation_time_seconds') } self.reference = {'*': {'compilation_time': (60, None, 0.1)}} super().setup(partition, environ, **job_opts)
def set_compiler_flags_and_variables(self): self.sourcesdir = os.path.join( self.current_system.resourcesdir, 'roofline', 'cs-roofline-toolkit.git', 'Empirical_Roofline_Tool-1.1.0', ) self.readonly_files = [ 'Batch', 'Config', 'Drivers', 'ert', 'ert_cscs.py', 'ERT_Users_Manual.pdf', 'Kernels', 'Plot', 'Python', 'README.md', 'Results', 'Scripts', ] self.sourcepath = 'Kernels/kernel1.cxx Drivers/driver1.cxx' self.build_system = 'SingleSource' # get all parameters: capability = sn.getattr(self, 'cap') ert_trials_min = sn.getattr(self, 'ert_trials_min') ert_precision = sn.getattr(self, 'ert_precision') ert_flop = sn.getattr(self, 'ert_flop') self.build_system.cppflags = [ '-I./Kernels', '-DERT_ALIGN=32', '-DERT_MEMORY_MAX=1073741824', '-DERT_WORKING_SET_MIN=128', '-DERT_WSS_MULT=1.1', '-DERT_GPU', f'-DERT_TRIALS_MIN={ert_trials_min}', f'-D{ert_precision}', f'-DERT_FLOP={ert_flop}', ] self.prgenv_flags = { 'PrgEnv-gnu': ['-O3', '-x cu', f'-arch={capability}'], } self.build_system.cxx = 'nvcc' self.build_system.cxxflags = \ self.prgenv_flags[self.current_environ.name] self.prebuild_cmds = ['module list', 'which gcc', 'which nvcc']
def __init__(self): self.valid_systems = ['daint:gpu', 'daint:mc'] self.valid_prog_environs = ['cray'] self.executable = 'hostname' self.num_tasks = 0 self.num_tasks_per_node = 1 self.sanity_patterns = sn.assert_eq( sn.getattr(self, 'num_tasks'), sn.count(sn.findall(r'^nid\d+$', self.stdout)))
def __init__(self): self.valid_systems = ['daint:gpu', 'daint:mc'] self.valid_prog_environs = ['PrgEnv-cray'] self.executable = 'hostname' self.sourcesdir = None self.num_tasks = 0 self.num_tasks_per_node = 1 self.sanity_patterns = sn.assert_eq( sn.getattr(self, 'num_tasks'), sn.count(sn.findall(r'nid\d+', self.stdout))) self.maintainers = ['you-can-type-your-email-here'] self.tags = {'tutorial'}
def assert_hello_world(self): result = sn.findall( r'Hello, World from thread \s*(\d+) out ' r'of \s*(\d+) from process \s*(\d+) out of ' r'\s*(\d+)', self.stdout) num_tasks = sn.getattr(self, 'num_tasks') num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task') def tid(match): return int(match.group(1)) def num_threads(match): return int(match.group(2)) def rank(match): return int(match.group(3)) def num_ranks(match): return int(match.group(4)) return sn.all( sn.chain( [ sn.assert_eq(sn.count(result), num_tasks * num_cpus_per_task) ], sn.map(lambda x: sn.assert_lt(tid(x), num_threads(x)), result), sn.map(lambda x: sn.assert_lt(rank(x), num_ranks(x)), result), sn.map(lambda x: sn.assert_lt(tid(x), num_cpus_per_task), result), sn.map( lambda x: sn.assert_eq(num_threads(x), num_cpus_per_task), result), sn.map(lambda x: sn.assert_lt(rank(x), num_tasks), result), sn.map(lambda x: sn.assert_eq(num_ranks(x), num_tasks), result), ))
def assert_count_gpus(self): '''Assert GPU count is consistent.''' return sn.all([ sn.assert_eq( sn.count(sn.findall(r'\[\S+\] Found \d+ gpu\(s\)', self.stdout)), sn.getattr(self.job, 'num_tasks')), sn.assert_eq( sn.count( sn.findall( r'\[\S+\] \[gpu \d+\] Kernel launch ' r'latency: \S+ us', self.stdout)), self.job.num_tasks * self.num_gpus_per_node) ])
def eval_sanity(self): output_files = [] output_files = [ file for file in os.listdir(self.stagedir) if file.startswith('output-') ] num_greasy_tasks = len(output_files) failure_msg = (f'Requested {self.num_greasy_tasks} task(s), but ' f'executed only {num_greasy_tasks} tasks(s)') sn.evaluate( sn.assert_eq(num_greasy_tasks, self.num_greasy_tasks, msg=failure_msg)) num_tasks = sn.getattr(self, 'nranks_per_worker') num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task') def tid(match): return int(match.group(1)) def num_threads(match): return int(match.group(2)) def rank(match): return int(match.group(3)) def num_ranks(match): return int(match.group(4)) for output_file in output_files: result = sn.findall( r'Hello, World from thread \s*(\d+) out ' r'of \s*(\d+) from process \s*(\d+) out of ' r'\s*(\d+)', output_file) failure_msg = (f'Found {sn.count(result)} Hello, World... ' f'pattern(s) but expected ' f'{num_tasks * num_cpus_per_task} pattern(s) ' f'inside the output file {output_file}') sn.evaluate( sn.assert_eq(sn.count(result), num_tasks * num_cpus_per_task, msg=failure_msg)) sn.evaluate( sn.all( sn.chain( sn.map( lambda x: sn.assert_lt( tid(x), num_threads(x), msg=(f'Found {tid(x)} threads rather than ' f'{num_threads(x)}')), result), sn.map( lambda x: sn.assert_lt( rank(x), num_ranks(x), msg =(f'Rank id {rank(x)} is not lower than the ' f'number of ranks {self.nranks_per_worker} ' f'in output file')), result), sn.map( lambda x: sn.assert_lt( tid(x), self.num_cpus_per_task, msg=(f'Rank id {tid(x)} is not lower than the ' f'number of cpus per task ' f'{self.num_cpus_per_task} in output ' f'file {output_file}')), result), sn.map( lambda x: sn.assert_eq( num_threads(x), num_cpus_per_task, msg =(f'Found {num_threads(x)} threads rather than ' f'{self.num_cpus_per_task} in output file ' f'{output_file}')), result), sn.map( lambda x: sn.assert_lt( rank(x), num_tasks, msg=( f'Found {rank(x)} threads rather than ' f'{self.num_cpus_per_task} in output file ' f'{output_file}')), result), sn.map( lambda x: sn.assert_eq( num_ranks(x), num_tasks, msg=(f'Number of ranks {num_ranks(x)} is not ' f'equal to {self.nranks_per_worker} in ' f'output file {output_file}')), result)))) sn.evaluate(sn.assert_found(r'Finished greasing', self.greasy_logfile)) sn.evaluate( sn.assert_found((f'INFO: Summary of {self.num_greasy_tasks} ' f'tasks: ' f'{self.num_greasy_tasks} OK, ' f'0 FAILED, ' f'0 CANCELLED, ' fr'0 INVALID\.'), self.greasy_logfile)) return True
def assert_num_gpus(self): '''Assert that that all tasks passed.''' return sn.assert_eq( sn.count(sn.findall(r'^\s*\[[^\]]*\]\s*Test passed', self.stdout)), sn.getattr(self.job, 'num_tasks'))
def b(self): return sn.getattr(self, '_b')
def __init__(self, variant, lang, linkage): self.linkage = linkage self.variables = {'CRAYPE_LINK_TYPE': linkage} self.prgenv_flags = {} self.lang_names = {'c': 'C', 'cpp': 'C++', 'f90': 'Fortran 90'} self.descr = f'{self.lang_names[lang]} Hello World' self.sourcepath = 'hello_world' self.build_system = 'SingleSource' self.valid_systems = [ 'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn', 'tiger:gpu', 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn' ] self.valid_prog_environs = [ 'PrgEnv-cray', 'PrgEnv-cray_classic', 'PrgEnv-intel', 'PrgEnv-gnu', 'PrgEnv-pgi', 'PrgEnv-gnu-nocuda', 'PrgEnv-pgi-nocuda' ] if self.current_system.name in ['kesch', 'arolla', 'tsa']: self.exclusive_access = True # Removing static compilation from kesch if (self.current_system.name in ['kesch'] and linkage == 'static'): self.valid_prog_environs = [] self.compilation_time_seconds = None result = sn.findall( r'Hello World from thread \s*(\d+) out ' r'of \s*(\d+) from process \s*(\d+) out of ' r'\s*(\d+)', self.stdout) num_tasks = sn.getattr(self, 'num_tasks') num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task') def tid(match): return int(match.group(1)) def num_threads(match): return int(match.group(2)) def rank(match): return int(match.group(3)) def num_ranks(match): return int(match.group(4)) self.sanity_patterns = sn.all( sn.chain( [ sn.assert_eq(sn.count(result), num_tasks * num_cpus_per_task) ], sn.map(lambda x: sn.assert_lt(tid(x), num_threads(x)), result), sn.map(lambda x: sn.assert_lt(rank(x), num_ranks(x)), result), sn.map(lambda x: sn.assert_lt(tid(x), num_cpus_per_task), result), sn.map( lambda x: sn.assert_eq(num_threads(x), num_cpus_per_task), result), sn.map(lambda x: sn.assert_lt(rank(x), num_tasks), result), sn.map(lambda x: sn.assert_eq(num_ranks(x), num_tasks), result), )) self.perf_patterns = { 'compilation_time': sn.getattr(self, 'compilation_time_seconds') } self.reference = {'*': {'compilation_time': (60, None, 0.1, 's')}} self.maintainers = ['VH', 'EK'] self.tags = {'production', 'craype'}
def set_sanity_patterns(self): self.sanity_patterns = sn.assert_eq( sn.getattr(self, 'num_tasks'), sn.count(sn.findall(r'^nid\d+$', self.stdout)) )
def capture_build_time(self): self.perf_patterns = { 'compilation_time': sn.getattr(self, 'compilation_time_seconds') } self.reference = {'*': {'compilation_time': (60, None, 0.1, 's')}}