def cdt2008_pgi_workaround(self): cdt = os_ext.cray_cdt_version() if not cdt: return if (self.current_environ.name == 'PrgEnv-pgi' and cdt == '20.08'): self.variables.update({'CUDA_HOME': '$CUDATOOLKIT_HOME'})
def test_cray_cdt_version_unknown_fmt(tmp_path, monkeypatch): # Mock up a CDT file rcfile = tmp_path / 'rcfile' with open(rcfile, 'w') as fp: fp.write('random stuff') monkeypatch.setenv('MODULERCFILE', str(rcfile)) assert os_ext.cray_cdt_version() is None
def test_cray_cdt_version(tmp_path, monkeypatch): # Mock up a CDT file rcfile = tmp_path / 'rcfile' with open(rcfile, 'w') as fp: fp.write('#%Module CDT 20.06\nblah blah\n') monkeypatch.setenv('MODULERCFILE', str(rcfile)) assert os_ext.cray_cdt_version() == '20.06'
def cdt2006_workaround_intel(self): if (self.current_environ.name == 'PrgEnv-intel' and os_ext.cray_cdt_version() == '20.06'): self.modules += ['cray-netcdf-hdf5parallel'] self.prebuild_cmds = [ 'ln -s $CRAY_NETCDF_HDF5PARALLEL_PREFIX/lib/pkgconfig/' 'netcdf-cxx4_parallel.pc netcdf_c++4_parallel.pc' ] self.variables['PKG_CONFIG_PATH'] = '.:$PKG_CONFIG_PATH'
def cdt2006_workaround_dynamic(self): if (os_ext.cray_cdt_version() == '20.06' and self.linkage == 'dynamic' and self.current_environ.name == 'PrgEnv-gnu'): self.variables['PATH'] = ( '/opt/cray/pe/cce/10.0.1/cce-clang/x86_64/bin:$PATH') self.prgenv_flags[self.current_environ.name] += ['-fuse-ld=lld'] # GCC >= 9 is required for the above option; our CUDA-friendly CDT # uses GCC 8 as default. self.modules += ['gcc/9.3.0']
def __init__(self): self.descr = 'Distributed training with TensorFlow using ipyparallel' self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] # FIXME: The following will not be needed after the Daint upgrade cray_cdt_version = os_ext.cray_cdt_version() or '19.10' self.modules = [ 'ipcmagic', f'Horovod/0.19.1-CrayGNU-{cray_cdt_version}-tf-2.2.0' ] self.num_tasks = 2 self.num_tasks_per_node = 1 self.executable = 'ipython' self.executable_opts = ['tf-hvd-sgd-ipc-tf2.py'] nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str) self.sanity_patterns = sn.all( [sn.assert_ne(nids, []), sn.assert_ne(nids[0], nids[1])]) self.reference = { 'daint:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), }, 'dom:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), } } self.perf_patterns = { 'slope': sn.extractsingle(r'slope=(?P<slope>\S+)', self.stdout, 'slope', float), 'offset': sn.extractsingle(r'offset=(?P<offset>\S+)', self.stdout, 'offset', float), 'retries': 4 - sn.count(sn.findall(r'IPCluster is already running', self.stdout)), 'time': sn.extractsingle( r'IPCluster is ready\!\s+' r'\((?P<time>\d+) seconds\)', self.stdout, 'time', float) } self.maintainers = ['RS', 'TR'] self.tags = {'production'}
def __init__(self, boostver): self.descr = f'Test for Boost {boostver} with Python bindings' self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'] self.valid_prog_environs = ['builtin'] cdt_version = os_ext.cray_cdt_version() self.modules = [f'Boost/{boostver}-CrayGNU-{cdt_version}-python3'] self.executable = f'python3 hello.py' self.sanity_patterns = sn.assert_found('hello, world', self.stdout) version_cmd = ('python3 -c \'import sys; ' 'ver=sys.version_info; ' 'print(f"{ver.major}{ver.minor}")\'') self.variables = { 'PYTHON_INCLUDE': '$(python3-config --includes)', 'PYTHON_BOOST_LIB': f'boost_python$({version_cmd})' } self.maintainers = ['JB', 'AJ'] self.tags = {'scs', 'production'}
def cdt2006_cpp_workaround(self): if (os_ext.cray_cdt_version() == '20.06' and self.lang == 'cpp'): self.modules += ['cray-hdf5/1.10.6.1']
def cdt2008_pgi_workaround(self): if (self.current_environ.name == 'PrgEnv-pgi' and osx.cray_cdt_version() == '20.08' and self.current_system.name in ['daint', 'dom']): self.variables['CUDA_HOME'] = '$CUDATOOLKIT_HOME'
def test_cray_cdt_version_no_such_file(tmp_path, monkeypatch): # Mock up a CDT file rcfile = tmp_path / 'rcfile' monkeypatch.setenv('MODULERCFILE', str(rcfile)) assert os_ext.cray_cdt_version() is None
def __init__(self, variant): self.descr = 'Distributed training with TensorFlow2 and Horovod' self.valid_systems = ['daint:gpu'] self.valid_prog_environs = ['builtin'] # FIXME: The following will not be needed after the Daint upgrade cray_cdt_version = os_ext.cray_cdt_version() or '19.10' self.modules = [f'Horovod/0.19.1-CrayGNU-{cray_cdt_version}-tf-2.2.0'] self.sourcesdir = None self.num_tasks_per_node = 1 self.num_cpus_per_task = 12 if variant == 'small': self.valid_systems += ['dom:gpu'] self.num_tasks = 8 self.reference = { 'dom:gpu': { 'throughput': (1712, -0.05, None, 'images/s'), 'throughput_per_gpu': (214, -0.05, None, 'images/s'), }, 'daint:gpu': { 'throughput': (1712, -0.05, None, 'images/s'), 'throughput_per_gpu': (214, -0.05, None, 'images/s') }, } else: self.num_tasks = 32 self.reference = { 'daint:gpu': { 'throughput': (6848, -0.05, None, 'images/s'), 'throughput_per_gpu': (214, -0.05, None, 'images/s') }, } self.perf_patterns = { 'throughput': sn.extractsingle( rf'Total img/sec on {self.num_tasks} GPU\(s\): ' rf'(?P<throughput>\S+) \S+', self.stdout, 'throughput', float), 'throughput_per_gpu': sn.extractsingle( r'Img/sec per GPU: (?P<throughput_per_gpu>\S+) \S+', self.stdout, 'throughput_per_gpu', float) } model = 'InceptionV3' batch_size = 64 self.sanity_patterns = sn.all([ sn.assert_found(rf'Model: {model}', self.stdout), sn.assert_found(rf'Batch size: {batch_size}', self.stdout) ]) self.variables = { 'NCCL_DEBUG': 'INFO', 'NCCL_IB_HCA': 'ipogif0', 'NCCL_IB_CUDA_SUPPORT': '1', 'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK', } self.prerun_cmds = [ 'wget https://raw.githubusercontent.com/horovod/' 'horovod/842d1075e8440f15e84364f494645c28bf20c3ae/' 'examples/tensorflow2_synthetic_benchmark.py' ] self.executable = 'python' self.executable_opts = [ 'tensorflow2_synthetic_benchmark.py', f'--model {model}', f'--batch-size {batch_size}', '--num-iters 5', '--num-batches-per-iter 5', '--num-warmup-batches 5', ] self.tags = {'production'} self.maintainers = ['RS', 'TR']