예제 #1
0
    def get_schedsystem_config(self, descr):
        # Handle the special shortcuts first
        if descr == 'nativeslurm':
            return getscheduler('slurm'), getlauncher('srun')

        if descr == 'local':
            return getscheduler('local'), getlauncher('local')

        try:
            sched_descr, launcher_descr = descr.split('+')
        except ValueError as e:
            raise ValueError('invalid syntax for the '
                             'scheduling system: %s' % descr) from None

        return getscheduler(sched_descr), getlauncher(launcher_descr)
예제 #2
0
    def _setup_job(self, **job_opts):
        """Setup the job related to this check."""

        self.logger.debug('setting up the job descriptor')

        msg = 'job scheduler backend: {0}'
        self.logger.debug(
            msg.format('local' if self.is_local else self._current_partition.
                       scheduler.registered_name))

        # num_gpus_per_node is a managed resource
        if self.num_gpus_per_node > 0:
            self.extra_resources.setdefault(
                '_rfm_gpu', {'num_gpus_per_node': self.num_gpus_per_node})

        if self.local:
            scheduler_type = getscheduler('local')
            launcher_type = getlauncher('local')
        else:
            scheduler_type = self._current_partition.scheduler
            launcher_type = self._current_partition.launcher

        job_name = '%s_%s_%s_%s' % (
            self.name, self._sanitize_basename(self._current_system.name),
            self._sanitize_basename(self._current_partition.name),
            self._sanitize_basename(self._current_environ.name))
        job_script_filename = os.path.join(self._stagedir, job_name + '.sh')

        self._job = scheduler_type(
            name=job_name,
            command=' '.join([self.executable] + self.executable_opts),
            launcher=launcher_type(),
            environs=[
                self._current_partition.local_env, self._current_environ
            ],
            workdir=self._stagedir,
            num_tasks=self.num_tasks,
            num_tasks_per_node=self.num_tasks_per_node,
            num_tasks_per_core=self.num_tasks_per_core,
            num_tasks_per_socket=self.num_tasks_per_socket,
            num_cpus_per_task=self.num_cpus_per_task,
            use_smt=self.use_multithreading,
            time_limit=self.time_limit,
            script_filename=job_script_filename,
            stdout=self._stdout,
            stderr=self._stderr,
            pre_run=self.pre_run,
            post_run=self.post_run,
            sched_exclusive_access=self.exclusive_access,
            **job_opts)

        # Get job options from managed resources and prepend them to
        # job_opts. We want any user supplied options to be able to
        # override those set by the framework.
        resources_opts = []
        for r, v in self.extra_resources.items():
            resources_opts.extend(self._current_partition.get_resource(r, **v))

        self._job.options = (self._current_partition.access + resources_opts +
                             self._job.options)
예제 #3
0
def _setup_fake_check():
    # A bit hacky, but we don't want to run a full test every time
    test = _FakeCheck()
    test._job = Job.create(
        getscheduler('local')(),
        getlauncher('local')(), 'fakejob')
    test.job._completion_time = time.time()
    return test
예제 #4
0
    def setup(self, partition, environ, **job_opts):
        if partition.fullname == 'daint:gpu':
            self.num_tasks = 48
            self.num_tasks_per_node = 12
        else:
            self.num_tasks = 72
            self.num_tasks_per_node = 18

        super().setup(partition, environ, **job_opts)
        # The job launcher has to be changed since the `start_analytics`
        # script is not used with srun.
        self.job.launcher = getlauncher('local')()
예제 #5
0
 def setUp(self):
     self.workdir = tempfile.mkdtemp(dir='unittests')
     slurm_scheduler = getscheduler('slurm')
     self.testjob = slurm_scheduler(
         name='testjob',
         launcher=getlauncher('local')(),
         workdir=self.workdir,
         script_filename=os.path.join(self.workdir, 'testjob.sh'),
         stdout=os.path.join(self.workdir, 'testjob.out'),
         stderr=os.path.join(self.workdir, 'testjob.err'))
     # monkey patch `_get_all_nodes` to simulate extraction of
     # slurm nodes through the use of `scontrol show`
     self.testjob._get_all_nodes = self.create_dummy_nodes
     self.testjob._sched_flex_alloc_tasks = 'all'
     self.testjob._num_tasks_per_node = 4
     self.testjob._num_tasks = 0
예제 #6
0
    def _setup_job(self, **job_opts):
        """Setup the job related to this check."""

        self.logger.debug('setting up the job descriptor')

        msg = 'job scheduler backend: {0}'
        self.logger.debug(
            msg.format('local' if self.is_local else
                       self._current_partition.scheduler.registered_name))

        # num_gpus_per_node is a managed resource
        if self.num_gpus_per_node > 0:
            self.extra_resources.setdefault(
                '_rfm_gpu', {'num_gpus_per_node': self.num_gpus_per_node}
            )

        if self.local:
            scheduler_type = getscheduler('local')
            launcher_type  = getlauncher('local')
        else:
            scheduler_type = self._current_partition.scheduler
            launcher_type  = self._current_partition.launcher

        self._job = scheduler_type(
            name='rfm_%s_job' % self.name,
            launcher=launcher_type(),
            workdir=self._stagedir,
            num_tasks=self.num_tasks,
            num_tasks_per_node=self.num_tasks_per_node,
            num_tasks_per_core=self.num_tasks_per_core,
            num_tasks_per_socket=self.num_tasks_per_socket,
            num_cpus_per_task=self.num_cpus_per_task,
            use_smt=self.use_multithreading,
            time_limit=self.time_limit,
            sched_access=self._current_partition.access,
            sched_exclusive_access=self.exclusive_access,
            **job_opts)

        # Get job options from managed resources and prepend them to
        # job_opts. We want any user supplied options to be able to
        # override those set by the framework.
        resources_opts = []
        for r, v in self.extra_resources.items():
            resources_opts.extend(
                self._current_partition.get_resource(r, **v))

        self._job.options = resources_opts + self._job.options
예제 #7
0
    def setUp(self):
        # Monkey patch scheduler to simulate retrieval of nodes from Slurm
        patched_sched = getscheduler('slurm')()
        patched_sched.allnodes = self.create_dummy_nodes
        patched_sched._get_default_partition = lambda: 'pdef'

        self.workdir = tempfile.mkdtemp(dir='unittests')
        self.testjob = Job.create(
            patched_sched,
            getlauncher('local')(),
            name='testjob',
            workdir=self.workdir,
            script_filename=os.path.join(self.workdir, 'testjob.sh'),
            stdout=os.path.join(self.workdir, 'testjob.out'),
            stderr=os.path.join(self.workdir, 'testjob.err'))
        self.testjob._sched_flex_alloc_nodes = 'all'
        self.testjob.num_tasks_per_node = 4
        self.testjob.num_tasks = 0
예제 #8
0
 def setUp(self):
     self.workdir = tempfile.mkdtemp(dir='unittests')
     slurm_scheduler = getscheduler('slurm')
     self.testjob = slurm_scheduler(
         name='testjob',
         command='hostname',
         launcher=getlauncher('local')(),
         environs=[Environment(name='foo')],
         workdir=self.workdir,
         script_filename=os.path.join(self.workdir, 'testjob.sh'),
         stdout=os.path.join(self.workdir, 'testjob.out'),
         stderr=os.path.join(self.workdir, 'testjob.err'))
     self.builder = BashScriptBuilder()
     # monkey patch `_get_reservation_nodes` to simulate extraction of
     # slurm nodes through the use of `scontrol show`
     self.testjob._get_reservation_nodes = self.create_dummy_nodes
     self.testjob._num_tasks_per_node = 4
     self.testjob._num_tasks = 0
예제 #9
0
파일: spec.py 프로젝트: smoors/reframe
    def setup(self, partition, environ, **job_opts):
        self.pre_run = ['source ./shrc', 'mv %s config' %
                        self.configs[environ.name]]
        self.executable_opts = ['--config=%s' % self.configs[environ.name],
                                '--platform NVIDIA',
                                '--tune=base',
                                '--device GPU'] + self.benchmarks[environ.name]
        self.reference = {
            'dom:gpu':   self.refs[environ.name],
            'daint:gpu': self.refs[environ.name]
        }

        self.sanity_patterns = self.sanity_patterns_[environ.name]
        self.perf_patterns = self.perf_patterns_[environ.name]

        super().setup(partition, environ, **job_opts)
        # The job launcher has to be changed since the `runspec`
        # script is not used with srun.
        self.job.launcher = getlauncher('local')()
예제 #10
0
    def prepare_run(self):
        if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']:
            num_workers = 12
            exec_cores = 3
        else:
            num_workers = 36
            exec_cores = 9

        self.variables = {
            'SPARK_WORKER_CORES': '%s' % num_workers,
            'SPARK_LOCAL_DIRS': '"/tmp"',
        }
        self.executable = (
            'spark-submit --conf spark.default.parallelism=%s '
            '--conf spark.executor.cores=%s --conf spark.executor.memory=15g '
            '--master $SPARKURL --class org.apache.spark.examples.SparkPi '
            '$EBROOTSPARK/examples/jars/spark-examples_2.11-2.3.1.jar 10000;' %
            (num_workers, exec_cores))
        # The job launcher has to be changed since the `spark-submit`
        # script is not used with srun.
        self.job.launcher = getlauncher('local')()
예제 #11
0
 def launcher(self):
     return getlauncher('local')(['--foo'])
예제 #12
0
 def launcher(self):
     return getlauncher('srunalloc')(options=['--foo'])
예제 #13
0
 def launcher(self):
     return getlauncher('mpiexec')(options=['--foo'])
예제 #14
0
 def launcher(self):
     return launchers.LauncherWrapper(
         getlauncher('alps')(options=['--foo']), 'ddt', ['--offline'])
예제 #15
0
 def launcher(self):
     return getlauncher(self.launcher_name)()
예제 #16
0
 def launcher(self):
     return getlauncher('alps')(options=['--foo'])
예제 #17
0
 def launcher(self):
     return getlauncher('ssh')()
예제 #18
0
 def launcher(self):
     return getlauncher('srun')()
예제 #19
0
 def launcher(self):
     return launchers.LauncherWrapper(
         getlauncher('alps')(), 'ddt', ['--offline'])
예제 #20
0
 def launcher(self):
     return getlauncher('srunalloc')()
예제 #21
0
 def launcher(self):
     return getlauncher('alps')()
예제 #22
0
 def launcher(self):
     return getlauncher('mpiexec')()
예제 #23
0
 def set_launcher(self):
     # The job launcher has to be changed since the `start_analytics`
     # script is not used with srun.
     self.job.launcher = getlauncher('local')()
예제 #24
0
    def compile(self):
        """The compilation phase of the regression test pipeline.

        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        if not self._current_environ:
            raise PipelineError('no programming environment set')

        # Copy the check's resources to the stage directory
        if self.sourcesdir:
            try:
                commonpath = os.path.commonpath(
                    [self.sourcesdir, self.sourcepath])
            except ValueError:
                commonpath = None

            if commonpath:
                self.logger.warn(
                    "sourcepath `%s' seems to be a subdirectory of "
                    "sourcesdir `%s', but it will be interpreted "
                    "as relative to it." % (self.sourcepath, self.sourcesdir))

            if os_ext.is_url(self.sourcesdir):
                self._clone_to_stagedir(self.sourcesdir)
            else:
                self._copy_to_stagedir(
                    os.path.join(self._prefix, self.sourcesdir))

        # Verify the sourcepath and determine the sourcepath in the stagedir
        if (os.path.isabs(self.sourcepath)
                or os.path.normpath(self.sourcepath).startswith('..')):
            raise PipelineError(
                'self.sourcepath is an absolute path or does not point to a '
                'subfolder or a file contained in self.sourcesdir: ' +
                self.sourcepath)

        staged_sourcepath = os.path.join(self._stagedir, self.sourcepath)
        self.logger.debug('Staged sourcepath: %s' % staged_sourcepath)
        if os.path.isdir(staged_sourcepath):
            if not self.build_system:
                # Try to guess the build system
                cmakelists = os.path.join(staged_sourcepath, 'CMakeLists.txt')
                configure_ac = os.path.join(staged_sourcepath, 'configure.ac')
                configure_in = os.path.join(staged_sourcepath, 'configure.in')
                if os.path.exists(cmakelists):
                    self.build_system = 'CMake'
                    self.build_system.builddir = 'rfm_build'
                elif (os.path.exists(configure_ac)
                      or os.path.exists(configure_in)):
                    self.build_system = 'Autotools'
                    self.build_system.builddir = 'rfm_build'
                else:
                    self.build_system = 'Make'

            self.build_system.srcdir = self.sourcepath
        else:
            if not self.build_system:
                self.build_system = 'SingleSource'

            self.build_system.srcfile = self.sourcepath
            self.build_system.executable = self.executable

        # Prepare build job
        build_commands = [
            *self.prebuild_cmd,
            *self.build_system.emit_build_commands(self._current_environ),
            *self.postbuild_cmd
        ]
        environs = [
            self._current_partition.local_env, self._current_environ,
            self._user_environ
        ]
        self._build_job = getscheduler('local')(
            name='rfm_%s_build' % self.name,
            launcher=getlauncher('local')(),
            workdir=self._stagedir)

        with os_ext.change_dir(self._stagedir):
            try:
                self._build_job.prepare(build_commands,
                                        environs,
                                        login=True,
                                        trap_errors=True)
            except OSError as e:
                raise PipelineError('failed to prepare build job') from e

            self._build_job.submit()
예제 #25
0
 def launcher(self):
     return getlauncher('local')()