コード例 #1
0
ファイル: hdf5_compile_run.py プロジェクト: victorusu/reframe
 def cdt_2105_skip(self):
     # cray-hdf5 is supported only on PrgEnv-nvidia for cdt >= 21.05
     if self.current_environ.name == 'PrgEnv-nvidia':
         self.skip_if(
             osext.cray_cdt_version() < '21.05',
             "cray-hdf5 is not supported for cdt < 21.05 on PrgEnv-nvidia")
     elif self.current_environ.name == 'PrgEnv-pgi':
         self.skip_if(
             osext.cray_cdt_version() >= '21.05',
             "cray-hdf5 is not supported for cdt >= 21.05 on PrgEnv-pgi")
コード例 #2
0
    def cdt2008_pgi_workaround(self):
        cdt = osext.cray_cdt_version()
        if not cdt:
            return

        if (self.current_environ.name == 'PrgEnv-pgi' and cdt == '20.08'):
            self.variables.update({'CUDA_HOME': '$CUDATOOLKIT_HOME'})
コード例 #3
0
 def skip_modules(self):
     # FIXME: These modules should be fixed in later releases
     cdt = osext.cray_cdt_version()
     if ((cdt and cdt <= '20.11' and self.cray_module in {
             'cray-petsc-complex', 'cray-petsc-complex-64', 'cudatoolkit'
     })):
         self.valid_systems = []
コード例 #4
0
ファイル: test_utility.py プロジェクト: victorusu/reframe
def test_cray_cdt_version_unknown_fmt(tmp_path, monkeypatch):
    # Mock up a CDT file
    rcfile = tmp_path / 'rcfile'
    with open(rcfile, 'w') as fp:
        fp.write('random stuff')

    monkeypatch.setenv('MODULERCFILE', str(rcfile))
    assert osext.cray_cdt_version() is None
コード例 #5
0
ファイル: test_utility.py プロジェクト: victorusu/reframe
def test_cray_cdt_version(tmp_path, monkeypatch):
    # Mock up a CDT file
    rcfile = tmp_path / 'rcfile'
    with open(rcfile, 'w') as fp:
        fp.write('#%Module CDT 20.06\nblah blah\n')

    monkeypatch.setenv('MODULERCFILE', str(rcfile))
    assert osext.cray_cdt_version() == '20.06'
コード例 #6
0
 def cdt2006_workaround_intel(self):
     if (self.current_environ.name == 'PrgEnv-intel'
             and osext.cray_cdt_version() == '20.06'):
         self.modules += ['cray-netcdf-hdf5parallel']
         self.prebuild_cmds = [
             'ln -s $CRAY_NETCDF_HDF5PARALLEL_PREFIX/lib/pkgconfig/'
             'netcdf-cxx4_parallel.pc netcdf_c++4_parallel.pc'
         ]
         self.variables['PKG_CONFIG_PATH'] = '.:$PKG_CONFIG_PATH'
コード例 #7
0
    def cdt2006_workaround_dynamic(self):
        if (osext.cray_cdt_version() == '20.06' and self.linkage == 'dynamic'
                and self.current_environ.name == 'PrgEnv-gnu'):
            self.variables['PATH'] = (
                '/opt/cray/pe/cce/10.0.1/cce-clang/x86_64/bin:$PATH')
            self.prgenv_flags[self.current_environ.name] += ['-fuse-ld=lld']

            # GCC >= 9 is required for the above option; our CUDA-friendly CDT
            # uses GCC 8 as default.
            self.modules += ['gcc/9.3.0']
コード例 #8
0
    def __init__(self):
        super().__init__()
        self.valid_systems = ['daint:login', 'dom:login']

        # FIXME: These modules should be fixed in later releases
        cdt = osext.cray_cdt_version()
        if ((cdt and cdt <= '20.11' and self.cray_module in {
                'cray-petsc-complex', 'cray-petsc-complex-64', 'cudatoolkit'
        })):
            self.valid_systems = []
コード例 #9
0
ファイル: check_ipcmagic.py プロジェクト: toxa81/reframe
    def __init__(self):
        self.descr = 'Distributed training with TensorFlow using ipyparallel'
        self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_prog_environs = ['PrgEnv-gnu']
        cray_cdt_version = osext.cray_cdt_version()
        # FIXME: The following will not be needed after the Daint upgrade
        if self.current_system.name == 'dom':
            self.modules = [
                'ipcmagic',
                f'Horovod/0.21.0-CrayGNU-{cray_cdt_version}-tf-2.4.0'
            ]
        else:
            self.modules = [
                'ipcmagic', 'Horovod/0.19.1-CrayGNU-20.08-tf-2.2.0'
            ]

        self.num_tasks = 2
        self.num_tasks_per_node = 1
        self.executable = 'ipython'
        self.executable_opts = ['tf-hvd-sgd-ipc-tf2.py']
        nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str)
        self.sanity_patterns = sn.all(
            [sn.assert_ne(nids, []),
             sn.assert_ne(nids[0], nids[1])])
        self.reference = {
            'daint:gpu': {
                'slope': (2.0, -0.1, 0.1, None),
                'offset': (0.0, -0.1, 0.1, None),
                'retries': (0, None, None, None),
                'time': (10, None, None, 's'),
            },
            'dom:gpu': {
                'slope': (2.0, -0.1, 0.1, None),
                'offset': (0.0, -0.1, 0.1, None),
                'retries': (0, None, None, None),
                'time': (10, None, None, 's'),
            }
        }
        self.perf_patterns = {
            'slope':
            sn.extractsingle(r'slope=(?P<slope>\S+)', self.stdout, 'slope',
                             float),
            'offset':
            sn.extractsingle(r'offset=(?P<offset>\S+)', self.stdout, 'offset',
                             float),
            'retries':
            4 -
            sn.count(sn.findall(r'IPCluster is already running', self.stdout)),
            'time':
            sn.extractsingle(
                r'IPCluster is ready\!\s+'
                r'\((?P<time>\d+) seconds\)', self.stdout, 'time', float)
        }
        self.maintainers = ['RS', 'TR']
        self.tags = {'production'}
コード例 #10
0
    def cdt_pgi_workaround(self):
        cdt = osext.cray_cdt_version()
        if not cdt:
            return

        if cdt == '20.08':
            self.build_system.fflags += [
                'CUDA_HOME=$CUDATOOLKIT_HOME', '-Mcuda=cuda10.2'
            ]
        else:
            # FIXME: PGI 20.x does not support CUDA 11, see case #275674
            self.modules += ['cudatoolkit/10.2.89_3.29-7.0.2.1_3.5__g67354b4']
コード例 #11
0
ファイル: environ_check.py プロジェクト: toxa81/reframe
    def __init__(self):
        super().__init__()
        self.valid_systems = ['daint:login', 'dom:login']

        # FIXME: These modules should be fixed in later releases,
        # while gcc was fixed in 20.11

        cdt = osext.cray_cdt_version()
        if ((cdt and cdt <= '20.11' and self.cray_module
             in ['cray-petsc-complex', 'cray-petsc-complex-64', 'cudatoolkit'])
                or (cdt and cdt < '20.11' and module_name == 'gcc')):
            self.valid_systems = []
コード例 #12
0
ファイル: test_utility.py プロジェクト: victorusu/reframe
def test_cray_cdt_version_no_such_file(tmp_path, monkeypatch):
    # Mock up a CDT file
    rcfile = tmp_path / 'rcfile'
    monkeypatch.setenv('MODULERCFILE', str(rcfile))
    assert osext.cray_cdt_version() is None
コード例 #13
0
 def cdt_2105_workaround(self):
     # FIXME: The mkl libraries are not found in cdt 21.05, CASE #285117
     if osext.cray_cdt_version() == '21.05':
         self.build_system.ldflags += [
             '-L/opt/intel/oneapi/mkl/latest/lib/intel64/'
         ]
コード例 #14
0
    def __init__(self, model, mpi_task):
        self.descr = 'Distributed training with Pytorch and Horovod'
        self.valid_systems = ['daint:gpu']
        if mpi_task < 20:
            self.valid_systems += ['dom:gpu']

        self.valid_prog_environs = ['builtin']
        cray_cdt_version = osext.cray_cdt_version()
        self.modules = [f'Horovod/0.19.5-CrayGNU-{cray_cdt_version}-pt-1.6.0']
        self.num_tasks_per_node = 1
        self.num_cpus_per_task = 12
        self.num_tasks = mpi_task
        batch_size = 64
        self.variables = {
            'NCCL_DEBUG': 'INFO',
            'NCCL_IB_HCA': 'ipogif0',
            'NCCL_IB_CUDA_SUPPORT': '1',
            'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK',
        }
        hash = 'master'
        git_url = f'https://raw.githubusercontent.com/horovod/horovod/{hash}/examples/pytorch'  # noqa: E501
        git_src = 'pytorch_synthetic_benchmark.py'
        self.prerun_cmds = [f'wget {git_url}/{git_src}']

        if model == 'inception_v3':
            self.prerun_cmds += [
                'python3 -m venv --system-site-packages myvenv',
                'source myvenv/bin/activate',
                'pip install scipy',
                'sed -i "s-output = model(data)-output, aux = model(data)-"'
                f' {git_src}',
                'sed -i "s-data = torch.randn(args.batch_size, 3, 224, 224)-'
                f'data = torch.randn(args.batch_size, 3, 299, 299)-"'
                f' {git_src}'
            ]

        self.executable = 'python'
        self.executable_opts = [
            git_src,
            f'--model {model}',
            f'--batch-size {batch_size}',
            '--num-iters 5',
            '--num-batches-per-iter 5'
        ]
        self.tags = {'production'}
        self.maintainers = ['RS', 'HM']
        self.sanity_patterns = sn.all([
            sn.assert_found(rf'Model: {model}', self.stdout),
            sn.assert_found(rf'Batch size: {batch_size}', self.stdout)
        ])
        self.perf_patterns = {
            'throughput_per_gpu': sn.extractsingle(
                r'Img/sec per GPU: (?P<throughput_per_gpu>\S+) \S+',
                self.stdout, 'throughput_per_gpu', float
            ),
            'throughput_per_job': sn.extractsingle(
                r'Total img/sec on \d+ GPU\(s\): (?P<throughput>\S+) \S+',
                self.stdout, 'throughput', float
            ),
        }
        ref_per_gpu = 131 if model == 'inception_v3' else 201
        ref_per_job = ref_per_gpu * mpi_task
        self.reference = {
            'dom:gpu': {
                'throughput_per_gpu': (ref_per_gpu, -0.1, None, 'images/s'),
                'throughput_per_job': (ref_per_job, -0.1, None, 'images/s'),
            },
            'daint:gpu': {
                'throughput_per_gpu': (ref_per_gpu, -0.1, None, 'images/s'),
                'throughput_per_job': (ref_per_job, -0.1, None, 'images/s'),
            }
        }
コード例 #15
0
ファイル: cuda_aware_mpi.py プロジェクト: toxa81/reframe
 def cdt2008_pgi_workaround(self):
     if (self.current_environ.name == 'PrgEnv-pgi'
             and osext.cray_cdt_version() == '20.08'
             and self.current_system.name in ['daint', 'dom']):
         self.variables['CUDA_HOME'] = '$CUDATOOLKIT_HOME'