def setUp(self): self.job = Job.create(FakeJobScheduler(), self.launcher, name='fake_job', script_filename='fake_script', stdout='fake_stdout', stderr='fake_stderr', sched_account='fake_account', sched_partition='fake_partition', sched_reservation='fake_reservation', sched_nodelist="mynode", sched_exclude_nodelist='fake_exclude_nodelist', sched_exclusive_access='fake_exclude_access', sched_options=['--fake']) self.job.num_tasks = 4 self.job.num_tasks_per_node = 2 self.job.num_tasks_per_core = 1 self.job.num_tasks_per_socket = 1 self.job.num_cpus_per_task = 2 self.job.use_smt = True self.job.time_limit = '10m' self.job.options += ['--gres=gpu:4', '#DW jobdw anything'] self.job.launcher.options = ['--foo'] self.minimal_job = Job.create(FakeJobScheduler(), self.launcher, name='fake_job') self.minimal_job.launcher.options = ['--foo']
def getallnodes(state='all', jobs_cli_options=None): rt = runtime.runtime() nodes = {} for part in rt.system.partitions: # This job will not be submitted, it's used only to filter # the nodes based on the partition configuration dummy_job = Job.create(part.scheduler, part.launcher_type(), name='placeholder-job', sched_access=part.access, sched_options=jobs_cli_options) available_nodes = part.scheduler.allnodes() available_nodes = part.scheduler.filternodes(dummy_job, available_nodes) getlogger().debug( f'Total available nodes for {part.name}: {len(available_nodes)}' ) if state.casefold() != 'all': available_nodes = {n for n in available_nodes if n.in_state(state)} getlogger().debug( f'[F] Selecting nodes in state {state!r}: ' f'available nodes now: {len(available_nodes)}' ) nodes[part.fullname] = [n.name for n in available_nodes] return nodes
def _remote_detect(part): def _emit_script(job, env): launcher_cmd = job.launcher.run_command(job) commands = [ f'./bootstrap.sh', f'{launcher_cmd} ./bin/reframe --detect-host-topology=topo.json' ] job.prepare(commands, env, trap_errors=True) getlogger().info( f'Detecting topology of remote partition {part.fullname!r}: ' f'this may take some time...') topo_info = {} try: prefix = runtime.runtime().get_option('general/0/remote_workdir') with _copy_reframe(prefix) as dirname: with osext.change_dir(dirname): job = Job.create(part.scheduler, part.launcher_type(), name='rfm-detect-job', sched_access=part.access) _emit_script(job, [part.local_env]) getlogger().debug('submitting detection script') _log_contents(job.script_filename) job.submit() job.wait() getlogger().debug('job finished') _log_contents(job.stdout) _log_contents(job.stderr) topo_info = json.loads(_contents('topo.json')) except Exception as e: getlogger().warning(f'failed to retrieve remote processor info: {e}') return topo_info
def _setup_fake_check(): # A bit hacky, but we don't want to run a full test every time test = _FakeCheck() test._job = Job.create( getscheduler('local')(), getlauncher('local')(), 'fakejob') test.job._completion_time = time.time() return test
def _make_job(**jobargs): return Job.create(scheduler(), launcher(), name='testjob', workdir=tmp_path, script_filename=str(tmp_path / 'job.sh'), stdout=str(tmp_path / 'job.out'), stderr=str(tmp_path / 'job.err'), **jobargs)
def fake_check(): class _FakeCheck(rfm.RegressionTest): pass # A bit hacky, but we don't want to run a full test every time test = _FakeCheck() test._job = Job.create( getscheduler('local')(), getlauncher('local')(), 'fakejob') test.job._completion_time = time.time() return test
def _make_flexible_job(flex_type, **jobargs): ret = Job.create(slurm_scheduler_patched, getlauncher('local')(), name='testjob', workdir=tmp_path, script_filename=str(tmp_path / 'job.sh'), stdout=str(tmp_path / 'job.out'), stderr=str(tmp_path / 'job.err'), sched_flex_alloc_nodes=flex_type, **jobargs) ret.num_tasks = 0 ret.num_tasks_per_node = 4 return ret
def setUp(self): self.workdir = tempfile.mkdtemp(dir='unittests') self.testjob = Job.create( self.scheduler, self.launcher, name='testjob', workdir=self.workdir, script_filename=os_ext.mkstemp_path(dir=self.workdir, suffix='.sh'), stdout=os_ext.mkstemp_path(dir=self.workdir, suffix='.out'), stderr=os_ext.mkstemp_path(dir=self.workdir, suffix='.err'), ) self.environs = [Environment(name='foo', modules=['testmod_foo'])] self.pre_run = ['echo prerun'] self.post_run = ['echo postrun'] self.parallel_cmd = 'hostname'
def setUp(self): # Monkey patch scheduler to simulate retrieval of nodes from Slurm patched_sched = getscheduler('slurm')() patched_sched.allnodes = self.create_dummy_nodes patched_sched._get_default_partition = lambda: 'pdef' self.workdir = tempfile.mkdtemp(dir='unittests') self.testjob = Job.create( patched_sched, getlauncher('local')(), name='testjob', workdir=self.workdir, script_filename=os.path.join(self.workdir, 'testjob.sh'), stdout=os.path.join(self.workdir, 'testjob.out'), stderr=os.path.join(self.workdir, 'testjob.err')) self.testjob._sched_flex_alloc_nodes = 'all' self.testjob.num_tasks_per_node = 4 self.testjob.num_tasks = 0
def fake_check(): class _FakeCheck(rfm.RegressionTest): pass @sn.deferrable def error(): raise BaseException # A bit hacky, but we don't want to run a full test every time test = _FakeCheck() test._job = Job.create( getscheduler('local')(), getlauncher('local')(), 'fakejob') test.job._completion_time = time.time() test.job._jobid = 12345 test.job._nodelist = ['localhost'] test.custom = 'hello extras' test.custom_list = ['custom', 3.0, ['hello', 'world']] test.custom_dict = {'a': 1, 'b': 2} test.deferred = sn.defer('hello') test.deferred_error = error() return test
def fake_check(): class _FakeCheck(rfm.RegressionTest): param = parameter(range(3), loggable=True, fmt=lambda x: 10 * x) custom = variable(str, value='hello extras', loggable=True) custom_list = variable(list, value=['custom', 3.0, ['hello', 'world']], loggable=True) custom_dict = variable(dict, value={'a': 1, 'b': 2}, loggable=True) # x is a variable that is loggable, but is left undefined. We want to # make sure that logging does not crash and simply reports is as # undefined x = variable(str, loggable=True) # A bit hacky, but we don't want to run a full test every time test = _FakeCheck(variant_num=1) test._job = Job.create( getscheduler('local')(), getlauncher('local')(), 'fakejob') test.job._completion_time = time.time() test.job._jobid = 12345 test.job._nodelist = ['localhost'] return test
def _make_job(launcher, *args, **kwargs): return Job.create(FakeJobScheduler(), launcher, 'fake_job', *args, **kwargs)
def make_job(self, *args, **kwargs): return Job(*args, **kwargs)