def assert_count_gpus(self): return sn.all([ sn.assert_eq( sn.count(sn.findall(r'\[\S+\] Found \d+ gpu\(s\)', self.stdout)), self.num_tasks_assigned), sn.assert_eq( sn.count( sn.findall( r'\[\S+\] \[gpu \d+\] Kernel launch ' r'latency: \S+ us', self.stdout)), self.num_tasks_assigned * self.num_gpus_per_node) ])
def test_sanity_multiple_patterns(dummytest, sanity_file, dummy_gpu_exec_ctx): sanity_file.write_text('result1 = success\n' 'result2 = success\n') # Simulate a pure sanity test; reset the perf_patterns dummytest.perf_patterns = None dummytest.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'result\d = success', sanity_file)), 2) _run_sanity(dummytest, *dummy_gpu_exec_ctx, skip_perf=True) # Require more patterns to be present dummytest.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'result\d = success', sanity_file)), 3) with pytest.raises(SanityError): _run_sanity(dummytest, *dummy_gpu_exec_ctx, skip_perf=True)
def assert_count_gpus(self): '''Assert GPU count is consistent.''' return sn.all([ sn.assert_eq( sn.count(sn.findall(r'\[\S+\] Found \d+ gpu\(s\)', self.stdout)), sn.getattr(self.job, 'num_tasks')), sn.assert_eq( sn.count( sn.findall( r'\[\S+\] \[gpu \d+\] Kernel launch ' r'latency: \S+ us', self.stdout)), self.job.num_tasks * self.num_gpus_per_node) ])
def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-cray'] self.descr = 'Flexible Cuda Memtest' self.maintainers = ['TM', 'VK'] self.num_tasks_per_node = 1 self.num_tasks = 0 self.num_gpus_per_node = 1 self.modules = ['cudatoolkit'] self.sourcesdir = None src_url = ('https://downloads.sourceforge.net/project/cudagpumemtest/' 'cuda_memtest-1.2.3.tar.gz') self.prebuild_cmd = [ 'wget %s' % src_url, 'tar -xzf cuda_memtest-1.2.3.tar.gz --strip-components=1' ] self.executable = 'cuda_memtest_sm20' self.executable_opts = ['--disable_test', '6', '--num_passes', '1'] valid_test_ids = {i for i in range(11) if i not in {6, 9}} assert_finished_tests = [ sn.assert_eq( sn.count(sn.findall('Test%s finished' % test_id, self.stdout)), self.num_tasks_assigned) for test_id in valid_test_ids ] self.sanity_patterns = sn.all([ *assert_finished_tests, sn.assert_not_found('(?i)ERROR', self.stdout), sn.assert_not_found('(?i)ERROR', self.stderr)])
def __init__(self): super().__init__() self.sourcepath = 'strides.cpp' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'dom:gpu', 'daint:mc', 'dom:mc'] self.valid_prog_environs = ['PrgEnv-gnu'] self.num_tasks = 1 self.num_tasks_per_node = 1 self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'bandwidth', self.stdout)), self.num_tasks_assigned) self.perf_patterns = { 'bandwidth': sn.extractsingle(r'bandwidth: (?P<bw>\S+) GB/s', self.stdout, 'bw', float) } self.system_num_cpus = { 'daint:mc': 72, 'daint:gpu': 24, 'dom:mc': 72, 'dom:gpu': 24, } self.maintainers = ['SK'] self.tags = {'benchmark', 'diagnostic'}
def __init__(self): self.valid_systems = ['daint:gpu', 'dom:gpu', 'tiger:gpu'] self.valid_prog_environs = ['PrgEnv-cray'] self.descr = 'Flexible CUDA Memtest' self.maintainers = ['TM', 'SK'] self.num_tasks_per_node = 1 self.num_tasks = 0 self.num_gpus_per_node = 1 self.modules = ['cudatoolkit'] src_url = ('https://downloads.sourceforge.net/project/cudagpumemtest/' 'cuda_memtest-1.2.3.tar.gz') self.prebuild_cmd = [ 'wget %s' % src_url, 'tar -xzf cuda_memtest-1.2.3.tar.gz', 'cd cuda_memtest-1.2.3', 'patch -p1 < ../cuda_memtest-1.2.3.patch' ] self.build_system = 'Make' self.executable = './cuda_memtest-1.2.3/cuda_memtest' self.executable_opts = ['--disable_test', '6', '--num_passes', '1'] valid_test_ids = {i for i in range(11) if i not in {6, 9}} assert_finished_tests = [ sn.assert_eq( sn.count(sn.findall('Test%s finished' % test_id, self.stdout)), self.num_tasks_assigned) for test_id in valid_test_ids ] self.sanity_patterns = sn.all([ *assert_finished_tests, sn.assert_not_found('(?i)ERROR', self.stdout), sn.assert_not_found('(?i)ERROR', self.stderr) ]) self.tags = {'diagnostic', 'ops', 'craype'}
def __init__(self): self.sourcepath = 'strides.cpp' self.build_system = 'SingleSource' self.valid_systems = [ 'cannon:local', 'cannon:local-gpu', 'cannon:gpu_test', 'cannon:test', 'fasse:fasse', 'test:rc-testing' ] self.valid_prog_environs = ['builtin', 'gnu', 'gpu', 'intel'] self.build_system.cxxflags = ['-std=c++11', '-lpthread'] self.num_tasks = 1 self.num_tasks_per_node = 1 self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'bandwidth', self.stdout)), self.num_tasks_assigned) self.perf_patterns = { 'bandwidth': sn.extractsingle(r'bandwidth: (?P<bw>\S+) GB/s', self.stdout, 'bw', float) } self.system_num_cpus = { 'cannon:local': 48, 'cannon:local-gpu': 32, 'cannon:gpu_test': 16, 'cannon:test': 48, 'fasse:fasse': 48, 'test:rc-testing': 36, '*': 32, }
def __init__(self): super().__init__() self.descr = ('OpenFOAM-Extend check of interMixingFoam: ' 'dambreak tutorial') self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'Air phase volume fraction', self.stdout)), 2944)
def program_begin_count(obj): '''Reports the number of ``PROGRAM_BEGIN`` in the otf2 file (trace validation) ''' pg_begin_count = sn.count(sn.findall(r'^(?P<wl>PROGRAM_BEGIN)\s+', obj.rpt)) return pg_begin_count
def setup(self, partition, environ, **job_opts): if partition.fullname in ['daint:gpu', 'dom:gpu']: self.num_tasks_per_node = 2 self.num_cpus_per_task = 12 else: self.num_tasks_per_node = 4 self.num_cpus_per_task = 18 # since this is a flexible test, we divide the extracted # performance by the number of nodes and compare # against a single reference num_nodes = self.num_tasks_assigned / self.num_tasks_per_node self.perf_patterns = { 'gflops': sn.extractsingle( r'HPCG result is VALID with a GFLOP\/s rating of:\s*' r'(?P<perf>\S+)', self.outfile_lazy, 'perf', float) / num_nodes } self.sanity_patterns = sn.all([ sn.assert_eq(4, sn.count(sn.findall(r'PASSED', self.outfile_lazy))), sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) ]) super().setup(partition, environ, **job_opts)
def __init__(self, kernel_version): super().__init__() self.sourcepath = 'shmem.cu' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] self.num_tasks = 0 self.num_tasks_per_node = 1 self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'Bandwidth', self.stdout)), self.num_tasks_assigned * 2) self.perf_patterns = { 'bandwidth': sn.extractsingle( r'Bandwidth\(double\) (?P<bw>\S+) GB/s', self.stdout, 'bw', float) } # theoretical limit: # 8 [B/cycle] * 1.328 [GHz] * 16 [bankwidth] * 56 [SM] = 9520 GB/s self.reference = { 'dom:gpu': { 'bandwidth': (8850, -0.01, 1. - 9520/8850, 'GB/s') }, 'daint:gpu': { 'bandwidth': (8850, -0.01, 1. - 9520/8850, 'GB/s') }, } self.maintainers = ['SK'] self.tags = {'benchmark', 'diagnostic'}
def test_sanity_failure_noassert(self): self.test.sanity_patterns = sn.findall(r'result = success', self.output_file.name) self.output_file.write('result = failure\n') self.output_file.close() with pytest.raises(SanityError): self.test.check_sanity()
def count_successful_burns(self): '''Set the sanity patterns to count the number of successful burns.''' return sn.assert_eq( sn.count( sn.findall(r'^\s*\[[^\]]*\]\s*GPU\s*\d+\(OK\)', self.stdout)), self.num_tasks_assigned)
def setup(self, partition, environ, **job_opts): result = sn.findall( r'Hello World from thread \s*(\d+) out ' r'of \s*(\d+) from process \s*(\d+) out of ' r'\s*(\d+)', self.stdout) self.sanity_patterns = sn.all( sn.chain( [ sn.assert_eq(sn.count(result), self.num_tasks * self.num_cpus_per_task) ], sn.map( lambda x: sn.assert_lt(int(x.group(1)), int(x.group(2))), result), sn.map( lambda x: sn.assert_lt(int(x.group(3)), int(x.group(4))), result), sn.map( lambda x: sn.assert_lt(int(x.group(1)), self. num_cpus_per_task), result), sn.map( lambda x: sn.assert_eq(int(x.group(2)), self. num_cpus_per_task), result), sn.map(lambda x: sn.assert_lt(int(x.group(3)), self.num_tasks), result), sn.map(lambda x: sn.assert_eq(int(x.group(4)), self.num_tasks), result), )) self.perf_patterns = { 'compilation_time': sn.getattr(self, 'compilation_time_seconds') } self.reference = {'*': {'compilation_time': (60, None, 0.1)}} super().setup(partition, environ, **job_opts)
def __init__(self, name, *args, **kwargs): if name is not '': name += '_' super().__init__('{0}{1}runs'.format(name,self.multi_rep), *args, **kwargs) # scale the assumed runtime self.time_limit = (self.time_limit[0]*self.multi_rep+ int((self.time_limit[1]*self.multi_rep)/60), (self.time_limit[1]*self.multi_rep) % 60+ int((self.time_limit[2]*self.multi_rep) /60), (self.time_limit[2]*self.multi_rep) % 60) # check if we got #multi_rep the the sanity patern if hasattr(self, 'multirun_san_pat'): self.sanity_patterns = sn.assert_eq(sn.count( sn.findall(*self.multirun_san_pat)), self.multi_rep) # create the list of result values: first the average and # then all single elements (to be stored) if hasattr(self, 'multirun_perf_pat'): self.perf_patterns = {} for key in list(self.multirun_perf_pat.keys()): self.perf_patterns[key] = sn.avg( sn.extractall(*(self.multirun_perf_pat[key]))) for run in range(0,self.multi_rep): self.perf_patterns[key+"_{}".format(run)] = sn.extractall( *(self.multirun_perf_pat[key]))[run]
def __init__(self): self.sourcepath = 'shmem.cu' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'dom:gpu', 'tiger:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] self.num_tasks = 0 self.num_tasks_per_node = 1 self.num_gpus_per_node = 1 if self.current_system.name in {'daint', 'dom', 'tiger'}: self.modules = ['craype-accel-nvidia60'] self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'Bandwidth', self.stdout)), self.num_tasks_assigned * 2) self.perf_patterns = { 'bandwidth': sn.extractsingle(r'Bandwidth\(double\) (?P<bw>\S+) GB/s', self.stdout, 'bw', float) } self.reference = { # theoretical limit for P100: # 8 [B/cycle] * 1.328 [GHz] * 16 [bankwidth] * 56 [SM] = 9520 GB/s 'dom:gpu': { 'bandwidth': (8850, -0.01, 9520 / 8850. - 1, 'GB/s') }, 'daint:gpu': { 'bandwidth': (8850, -0.01, 9520 / 8850. - 1, 'GB/s') } } self.maintainers = ['SK'] self.tags = {'benchmark', 'diagnostic', 'craype'}
def test_sanity_multiple_patterns(self): self.output_file.write('result1 = success\n') self.output_file.write('result2 = success\n') self.output_file.close() # Simulate a pure sanity test; invalidate the reference values self.test.reference = {} self.test.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'result\d = success', self.output_file.name)), 2) self.test.check_sanity() # Require more patterns to be present self.test.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'result\d = success', self.output_file.name)), 3) self.assertRaises(SanityError, self.test.check_sanity)
def __init__(self, variant, lang, linkage): self.linkage = linkage self.variables = {'CRAYPE_LINK_TYPE': linkage} self.prgenv_flags = {} self.lang_names = {'c': 'C', 'cpp': 'C++', 'f90': 'Fortran 90'} self.descr = self.lang_names[lang] + ' Hello World' self.sourcepath = 'hello_world' self.build_system = 'SingleSource' self.valid_systems = ['ubelix:compute', 'ubelix:gpu'] self.valid_prog_environs = ['foss', 'intel'] self.compilation_time_seconds = None result = sn.findall( r'Hello World from thread \s*(\d+) out ' r'of \s*(\d+) from process \s*(\d+) out of ' r'\s*(\d+)', self.stdout) num_tasks = sn.getattr(self, 'num_tasks') num_cpus_per_task = sn.getattr(self, 'num_cpus_per_task') def tid(match): return int(match.group(1)) def num_threads(match): return int(match.group(2)) def rank(match): return int(match.group(3)) def num_ranks(match): return int(match.group(4)) self.sanity_patterns = sn.all( sn.chain( [ sn.assert_eq(sn.count(result), num_tasks * num_cpus_per_task) ], sn.map(lambda x: sn.assert_lt(tid(x), num_threads(x)), result), sn.map(lambda x: sn.assert_lt(rank(x), num_ranks(x)), result), sn.map(lambda x: sn.assert_lt(tid(x), num_cpus_per_task), result), sn.map( lambda x: sn.assert_eq(num_threads(x), num_cpus_per_task), result), sn.map(lambda x: sn.assert_lt(rank(x), num_tasks), result), sn.map(lambda x: sn.assert_eq(num_ranks(x), num_tasks), result), )) self.perf_patterns = { 'compilation_time': sn.getattr(self, 'compilation_time_seconds') } self.reference = {'*': {'compilation_time': (60, None, 0.1, 's')}} self.maintainers = ['VH', 'EK'] self.tags = {'production', 'prgenv'}
def __init__(self): self.valid_systems = ['daint:gpu', 'daint:mc'] self.valid_prog_environs = ['cray'] self.executable = 'hostname' self.num_tasks = 0 self.num_tasks_per_node = 1 self.sanity_patterns = sn.assert_eq( sn.getattr(self, 'num_tasks'), sn.count(sn.findall(r'^nid\d+$', self.stdout)))
def __init__(self, **kwargs): super().__init__( 'interMixingFoam', 'OpenFOA-Extend check of interMixingFoam: dambreak tutorial', **kwargs) self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'Air phase volume fraction', self.stdout)), 2944)
def __init__(self): super().__init__() self.descr = 'OpenFOAM check of interMixingFoam: dambreak tutorial' self.sanity_patterns = sn.all([ sn.assert_eq( sn.count(sn.findall('(?P<line>Air phase volume fraction)', self.stdout)), 2534), sn.assert_found(r'^\s*[Ee]nd', self.stdout) ])
def __init__(self, exec_mode): self.sourcepath = 'fftw_benchmark.c' self.build_system = 'SingleSource' self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu'] self.modules = ['cray-fftw'] self.num_tasks_per_node = 12 self.num_gpus_per_node = 0 self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'execution time', self.stdout)), 1) self.build_system.cflags = ['-O2'] if self.current_system.name == 'kesch': self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi'] self.build_system.cflags += [ '-I$FFTW_INC', '-L$FFTW_DIR', '-lfftw3' ] elif self.current_system.name in {'daint', 'dom', 'tiger'}: self.valid_prog_environs = [ 'PrgEnv-cray', 'PrgEnv-pgi', 'PrgEnv-gnu' ] self.perf_patterns = { 'fftw_exec_time': sn.extractsingle(r'execution time:\s+(?P<exec_time>\S+)', self.stdout, 'exec_time', float), } if exec_mode == 'nompi': self.num_tasks = 12 self.executable_opts = ['72 12 1000 0'] self.reference = { 'dom:gpu': { 'fftw_exec_time': (0.55, None, 0.05, 's'), }, 'daint:gpu': { 'fftw_exec_time': (0.55, None, 0.05, 's'), }, 'kesch:cn': { 'fftw_exec_time': (0.61, None, 0.05, 's'), } } else: self.num_tasks = 72 self.executable_opts = ['144 72 200 1'] self.reference = { 'dom:gpu': { 'fftw_exec_time': (0.47, None, 0.50, 's'), }, 'daint:gpu': { 'fftw_exec_time': (0.47, None, 0.50, 's'), }, 'kesch:cn': { 'fftw_exec_time': (1.58, None, 0.50, 's'), } } self.maintainers = ['AJ'] self.tags = {'benchmark', 'scs', 'craype'}
def __init__(self): super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn'] self.descr = 'GPU burn test' self.valid_prog_environs = ['PrgEnv-gnu'] if self.current_system.name == 'kesch': self.exclusive_access = True self.modules = ['craype-accel-nvidia35'] # NOTE: The first option indicates the precision (-d for double) # while the seconds is the time (in secs) to run the test. # For multi-gpu nodes, we run the gpu burn test for more # time to get reliable measurements. self.executable_opts = ['-d', '40'] self.num_gpus_per_node = 16 gpu_arch = '37' else: self.modules = ['craype-accel-nvidia60'] self.executable_opts = ['-d', '20'] self.num_gpus_per_node = 1 gpu_arch = '60' self.sourcepath = 'gpu_burn.cu' self.build_system = 'SingleSource' self.build_system.cxxflags = [ '-arch=compute_%s' % gpu_arch, '-code=sm_%s' % gpu_arch ] self.build_system.ldflags = ['-lcuda', '-lcublas', '-lnvidia-ml'] self.sanity_patterns = sn.assert_eq( sn.count(sn.findall('OK', self.stdout)), self.num_tasks_assigned) self.perf_patterns = { 'perf': sn.min( sn.extractall(r'GPU\s+\d+\(\S*\): (?P<perf>\S*) GF\/s', self.stdout, 'perf', float)) } self.reference = { 'dom:gpu': { 'perf': (4115, -0.10, None) }, 'daint:gpu': { 'perf': (4115, -0.10, None) }, 'kesch:cn': { 'perf': (950, -0.10, None) } } self.num_tasks = 0 self.num_tasks_per_node = 1 self.maintainers = ['AJ', 'VK', 'TM'] self.tags = {'diagnostic', 'benchmark'}
def test_findall(self): res = evaluate(sn.findall('Step: \d+', self.tempfile)) self.assertEqual(3, len(res)) res = evaluate(sn.findall('Step:.*', self.tempfile)) self.assertEqual(3, len(res)) res = evaluate(sn.findall('Step: [12]', self.tempfile)) self.assertEqual(2, len(res)) # Check the matches for expected, match in zip(['Step: 1', 'Step: 2'], res): self.assertEqual(expected, match.group(0)) # Check groups res = evaluate(sn.findall('Step: (?P<no>\d+)', self.tempfile)) for step, match in enumerate(res, start=1): self.assertEqual(step, int(match.group(1))) self.assertEqual(step, int(match.group('no')))
def validate_passed(self): return sn.all([ sn.assert_not_found( r'invalid because the ratio', self.outfile_lazy, msg='number of processes assigned could not be factorized'), sn.assert_eq(4, sn.count(sn.findall(r'PASSED', self.outfile_lazy))), sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node) ])
def test_findall(tempfile): res = sn.evaluate(sn.findall(r'Step: \d+', tempfile)) assert 3 == len(res) res = sn.evaluate(sn.findall('Step:.*', tempfile)) assert 3 == len(res) res = sn.evaluate(sn.findall('Step: [12]', tempfile)) assert 2 == len(res) # Check the matches for expected, match in zip(['Step: 1', 'Step: 2'], res): assert expected == match.group(0) # Check groups res = sn.evaluate(sn.findall(r'Step: (?P<no>\d+)', tempfile)) for step, match in enumerate(res, start=1): assert step == int(match.group(1)) assert step == int(match.group('no'))
def __init__(self): self.valid_systems = ['*'] self.valid_prog_environs = ['*'] self.sourcepath = 'hello_threads.cpp' self.executable_opts = ['16'] self.build_system = 'SingleSource' self.build_system.cxxflags = ['-std=c++11', '-Wall'] num_messages = sn.len( sn.findall(r'\[\s?\d+\] Hello, World\!', self.stdout)) self.sanity_patterns = sn.assert_eq(num_messages, 16)
def __init__(self): self.descr = 'Distributed training with TensorFlow using ipyparallel' self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] cray_cdt_version = osext.cray_cdt_version() # FIXME: The following will not be needed after the Daint upgrade if self.current_system.name == 'dom': self.modules = [ 'ipcmagic', f'Horovod/0.21.0-CrayGNU-{cray_cdt_version}-tf-2.4.0' ] else: self.modules = [ 'ipcmagic', 'Horovod/0.19.1-CrayGNU-20.08-tf-2.2.0' ] self.num_tasks = 2 self.num_tasks_per_node = 1 self.executable = 'ipython' self.executable_opts = ['tf-hvd-sgd-ipc-tf2.py'] nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str) self.sanity_patterns = sn.all( [sn.assert_ne(nids, []), sn.assert_ne(nids[0], nids[1])]) self.reference = { 'daint:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), }, 'dom:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), } } self.perf_patterns = { 'slope': sn.extractsingle(r'slope=(?P<slope>\S+)', self.stdout, 'slope', float), 'offset': sn.extractsingle(r'offset=(?P<offset>\S+)', self.stdout, 'offset', float), 'retries': 4 - sn.count(sn.findall(r'IPCluster is already running', self.stdout)), 'time': sn.extractsingle( r'IPCluster is ready\!\s+' r'\((?P<time>\d+) seconds\)', self.stdout, 'time', float) } self.maintainers = ['RS', 'TR'] self.tags = {'production'}
def __init__(self, variant): self.descr = 'Distributed training with TensorFlow and Horovod' self.valid_systems = ['daint:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] tfshortver = '1.14' self.sourcesdir = 'https://github.com/tensorflow/benchmarks' self.modules = ['Horovod/0.16.4-CrayGNU-19.06-tf-%s.0' % tfshortver] if variant == 'small': self.valid_systems += ['dom:gpu'] self.num_tasks = 8 self.reference = { 'dom:gpu': { 'throughput': (1133.6, None, 0.05, 'images/s'), }, 'daint:gpu': { 'throughput': (1134.8, None, 0.05, 'images/s') }, } else: self.num_tasks = 32 self.reference = { 'daint:gpu': { 'throughput': (4403.0, None, 0.05, 'images/s') }, } self.num_tasks_per_node = 1 self.num_cpus_per_task = 12 self.perf_patterns = { 'throughput': sn.avg( sn.extractall(r'total images/sec:\s+(?P<throughput>\S+)', self.stdout, 'throughput', float)) } self.sanity_patterns = sn.assert_eq( sn.count(sn.findall(r'total images/sec:', self.stdout)), self.num_tasks) self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver] self.variables = { 'NCCL_DEBUG': 'INFO', 'NCCL_IB_HCA': 'ipogif0', 'NCCL_IB_CUDA_SUPPORT': '1', 'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK', } self.executable = 'python' self.executable_opts = [ 'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py', '--model inception3', '--batch_size 64', '--variable_update horovod', '--log_dir ./logs', '--train_dir ./checkpoints' ] self.tags = {'production'} self.maintainers = ['MS', 'RS']
def __init__(self): super().__init__() self.num_tasks = 2 self.valid_systems = [ 'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn', 'kesch:pn' ] self.executable = '/bin/echo' self.executable_opts = ['$MY_VAR'] self.variables = {'MY_VAR': 'TEST123456!'} num_matches = sn.count(sn.findall(r'TEST123456!', self.stdout)) self.sanity_patterns = sn.assert_eq(self.num_tasks, num_matches)