Exemple #1
0
 def setUp(self):
     self.job = Job.create(FakeJobScheduler(),
                           self.launcher,
                           name='fake_job',
                           script_filename='fake_script',
                           stdout='fake_stdout',
                           stderr='fake_stderr',
                           sched_account='fake_account',
                           sched_partition='fake_partition',
                           sched_reservation='fake_reservation',
                           sched_nodelist="mynode",
                           sched_exclude_nodelist='fake_exclude_nodelist',
                           sched_exclusive_access='fake_exclude_access',
                           sched_options=['--fake'])
     self.job.num_tasks = 4
     self.job.num_tasks_per_node = 2
     self.job.num_tasks_per_core = 1
     self.job.num_tasks_per_socket = 1
     self.job.num_cpus_per_task = 2
     self.job.use_smt = True
     self.job.time_limit = '10m'
     self.job.options += ['--gres=gpu:4', '#DW jobdw anything']
     self.job.launcher.options = ['--foo']
     self.minimal_job = Job.create(FakeJobScheduler(),
                                   self.launcher,
                                   name='fake_job')
     self.minimal_job.launcher.options = ['--foo']
Exemple #2
0
def getallnodes(state='all', jobs_cli_options=None):
    rt = runtime.runtime()
    nodes = {}
    for part in rt.system.partitions:
        # This job will not be submitted, it's used only to filter
        # the nodes based on the partition configuration
        dummy_job = Job.create(part.scheduler,
                               part.launcher_type(),
                               name='placeholder-job',
                               sched_access=part.access,
                               sched_options=jobs_cli_options)

        available_nodes = part.scheduler.allnodes()
        available_nodes = part.scheduler.filternodes(dummy_job,
                                                     available_nodes)
        getlogger().debug(
            f'Total available nodes for {part.name}: {len(available_nodes)}'
        )

        if state.casefold() != 'all':
            available_nodes = {n for n in available_nodes
                               if n.in_state(state)}
            getlogger().debug(
                f'[F] Selecting nodes in state {state!r}: '
                f'available nodes now: {len(available_nodes)}'
            )

        nodes[part.fullname] = [n.name for n in available_nodes]

    return nodes
Exemple #3
0
def _remote_detect(part):
    def _emit_script(job, env):
        launcher_cmd = job.launcher.run_command(job)
        commands = [
            f'./bootstrap.sh',
            f'{launcher_cmd} ./bin/reframe --detect-host-topology=topo.json'
        ]
        job.prepare(commands, env, trap_errors=True)

    getlogger().info(
        f'Detecting topology of remote partition {part.fullname!r}: '
        f'this may take some time...')
    topo_info = {}
    try:
        prefix = runtime.runtime().get_option('general/0/remote_workdir')
        with _copy_reframe(prefix) as dirname:
            with osext.change_dir(dirname):
                job = Job.create(part.scheduler,
                                 part.launcher_type(),
                                 name='rfm-detect-job',
                                 sched_access=part.access)
                _emit_script(job, [part.local_env])
                getlogger().debug('submitting detection script')
                _log_contents(job.script_filename)
                job.submit()
                job.wait()
                getlogger().debug('job finished')
                _log_contents(job.stdout)
                _log_contents(job.stderr)
                topo_info = json.loads(_contents('topo.json'))
    except Exception as e:
        getlogger().warning(f'failed to retrieve remote processor info: {e}')

    return topo_info
Exemple #4
0
def _setup_fake_check():
    # A bit hacky, but we don't want to run a full test every time
    test = _FakeCheck()
    test._job = Job.create(
        getscheduler('local')(),
        getlauncher('local')(), 'fakejob')
    test.job._completion_time = time.time()
    return test
 def _make_job(**jobargs):
     return Job.create(scheduler(),
                       launcher(),
                       name='testjob',
                       workdir=tmp_path,
                       script_filename=str(tmp_path / 'job.sh'),
                       stdout=str(tmp_path / 'job.out'),
                       stderr=str(tmp_path / 'job.err'),
                       **jobargs)
Exemple #6
0
def fake_check():
    class _FakeCheck(rfm.RegressionTest):
        pass

    # A bit hacky, but we don't want to run a full test every time
    test = _FakeCheck()
    test._job = Job.create(
        getscheduler('local')(),
        getlauncher('local')(), 'fakejob')
    test.job._completion_time = time.time()
    return test
 def _make_flexible_job(flex_type, **jobargs):
     ret = Job.create(slurm_scheduler_patched,
                      getlauncher('local')(),
                      name='testjob',
                      workdir=tmp_path,
                      script_filename=str(tmp_path / 'job.sh'),
                      stdout=str(tmp_path / 'job.out'),
                      stderr=str(tmp_path / 'job.err'),
                      sched_flex_alloc_nodes=flex_type,
                      **jobargs)
     ret.num_tasks = 0
     ret.num_tasks_per_node = 4
     return ret
 def setUp(self):
     self.workdir = tempfile.mkdtemp(dir='unittests')
     self.testjob = Job.create(
         self.scheduler,
         self.launcher,
         name='testjob',
         workdir=self.workdir,
         script_filename=os_ext.mkstemp_path(dir=self.workdir,
                                             suffix='.sh'),
         stdout=os_ext.mkstemp_path(dir=self.workdir, suffix='.out'),
         stderr=os_ext.mkstemp_path(dir=self.workdir, suffix='.err'),
     )
     self.environs = [Environment(name='foo', modules=['testmod_foo'])]
     self.pre_run = ['echo prerun']
     self.post_run = ['echo postrun']
     self.parallel_cmd = 'hostname'
    def setUp(self):
        # Monkey patch scheduler to simulate retrieval of nodes from Slurm
        patched_sched = getscheduler('slurm')()
        patched_sched.allnodes = self.create_dummy_nodes
        patched_sched._get_default_partition = lambda: 'pdef'

        self.workdir = tempfile.mkdtemp(dir='unittests')
        self.testjob = Job.create(
            patched_sched,
            getlauncher('local')(),
            name='testjob',
            workdir=self.workdir,
            script_filename=os.path.join(self.workdir, 'testjob.sh'),
            stdout=os.path.join(self.workdir, 'testjob.out'),
            stderr=os.path.join(self.workdir, 'testjob.err'))
        self.testjob._sched_flex_alloc_nodes = 'all'
        self.testjob.num_tasks_per_node = 4
        self.testjob.num_tasks = 0
Exemple #10
0
def fake_check():
    class _FakeCheck(rfm.RegressionTest):
        pass

    @sn.deferrable
    def error():
        raise BaseException

    # A bit hacky, but we don't want to run a full test every time
    test = _FakeCheck()
    test._job = Job.create(
        getscheduler('local')(),
        getlauncher('local')(), 'fakejob')
    test.job._completion_time = time.time()
    test.job._jobid = 12345
    test.job._nodelist = ['localhost']
    test.custom = 'hello extras'
    test.custom_list = ['custom', 3.0, ['hello', 'world']]
    test.custom_dict = {'a': 1, 'b': 2}
    test.deferred = sn.defer('hello')
    test.deferred_error = error()
    return test
Exemple #11
0
def fake_check():
    class _FakeCheck(rfm.RegressionTest):
        param = parameter(range(3), loggable=True, fmt=lambda x: 10 * x)
        custom = variable(str, value='hello extras', loggable=True)
        custom_list = variable(list,
                               value=['custom', 3.0, ['hello', 'world']],
                               loggable=True)
        custom_dict = variable(dict, value={'a': 1, 'b': 2}, loggable=True)

        # x is a variable that is loggable, but is left undefined. We want to
        # make sure that logging does not crash and simply reports is as
        # undefined
        x = variable(str, loggable=True)

    # A bit hacky, but we don't want to run a full test every time
    test = _FakeCheck(variant_num=1)
    test._job = Job.create(
        getscheduler('local')(),
        getlauncher('local')(), 'fakejob')
    test.job._completion_time = time.time()
    test.job._jobid = 12345
    test.job._nodelist = ['localhost']
    return test
Exemple #12
0
 def _make_job(launcher, *args, **kwargs):
     return Job.create(FakeJobScheduler(), launcher, 'fake_job', *args,
                       **kwargs)
Exemple #13
0
 def make_job(self, *args, **kwargs):
     return Job(*args, **kwargs)