def test_22_from_batch_command(self): """Test that `from_batch_command` produces a valid jobspec""" jobid = job.submit( self.fh, JobspecV1.from_batch_command("#!/bin/sh\nsleep 0", "nested sleep") ) self.assertGreater(jobid, 0) # test that a shebang is required with self.assertRaises(ValueError): job.submit( self.fh, JobspecV1.from_batch_command("sleep 0", "nested sleep with no shebang"), )
def test_00_null_submit(self): with self.assertRaises(EnvironmentError) as error: job.submit(ffi.NULL, self.basic_jobspec) self.assertEqual(error.exception.errno, errno.EINVAL) with self.assertRaises(EnvironmentError) as error: job.submit_get_id(ffi.NULL) self.assertEqual(error.exception.errno, errno.EINVAL) with self.assertRaises(EnvironmentError) as error: job.submit(self.fh, ffi.NULL) self.assertEqual(error.exception.errno, errno.EINVAL)
def test_00_null_submit(self): with self.assertRaises(EnvironmentError) as error: job.submit(ffi.NULL, self.jobspec) self.assertEqual(error.exception.errno, errno.EINVAL) with self.assertRaises(EnvironmentError) as error: job.submit_get_id(ffi.NULL) self.assertEqual(error.exception.errno, errno.EINVAL) with self.assertRaises(EnvironmentError) as error: job.submit(self.fh, ffi.NULL) self.assertEqual(error.exception.errno, errno.EINVAL)
def test_15_job_cancel(self): self.sleep_jobspec = JobspecV1.from_command(["sleep", "1000"]) jobid = job.submit(self.fh, self.sleep_jobspec, waitable=True) job.cancel(self.fh, jobid) fut = job.wait_async(self.fh, jobid=jobid).wait_for(5.0) return_id, success, errmsg = fut.get_status() self.assertEqual(return_id, jobid) self.assertFalse(success)
def test_20_003_job_event_watch_sync(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"])) self.assertTrue(jobid > 0) future = job.event_watch_async(self.fh, jobid) self.assertIsInstance(future, job.JobEventWatchFuture) event = future.get_event() self.assertIsInstance(event, job.EventLogEvent) self.assertEqual(event.name, "submit") future.cancel()
def test_13_job_kvs(self): jobid = job.submit(self.fh, self.basic_jobspec, waitable=True) job.wait(self.fh, jobid=jobid) for job_kvs_dir in [ job.job_kvs(self.fh, jobid), job.job_kvs_guest(self.fh, jobid), ]: self.assertTrue(isinstance(job_kvs_dir, flux.kvs.KVSDir)) self.assertTrue(flux.kvs.exists(self.fh, job_kvs_dir.path)) self.assertTrue(flux.kvs.isdir(self.fh, job_kvs_dir.path))
def test_16_job_kill(self): self.sleep_jobspec = JobspecV1.from_command(["sleep", "1000"]) jobid = job.submit(self.fh, self.sleep_jobspec, waitable=True) # Wait for shell to fully start to avoid delay in signal job.event_wait(self.fh, jobid, name="start") job.event_wait( self.fh, jobid, name="shell.start", eventlog="guest.exec.eventlog" ) job.kill(self.fh, jobid, signum=signal.SIGKILL) fut = job.wait_async(self.fh, jobid=jobid).wait_for(5.0) return_id, success, errmsg = fut.get_status() self.assertEqual(return_id, jobid) self.assertFalse(success)
def test_20_004_job_event_watch(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"])) self.assertTrue(jobid > 0) events = [] for event in job.event_watch(self.fh, jobid): self.assertIsInstance(event, job.EventLogEvent) self.assertTrue(hasattr(event, "timestamp")) self.assertTrue(hasattr(event, "name")) self.assertTrue(hasattr(event, "context")) self.assertIs(type(event.timestamp), float) self.assertIs(type(event.name), str) self.assertIs(type(event.context), dict) events.append(event.name) self.assertEqual(len(events), 10)
def test_20_006_job_event_wait(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"])) self.assertTrue(jobid > 0) event = job.event_wait(self.fh, jobid, "start") self.assertIsInstance(event, job.EventLogEvent) self.assertEqual(event.name, "start") event = job.event_wait( self.fh, jobid, "shell.init", eventlog="guest.exec.eventlog" ) self.assertIsInstance(event, job.EventLogEvent) self.assertEqual(event.name, "shell.init") event = job.event_wait(self.fh, jobid, "clean") self.assertIsInstance(event, job.EventLogEvent) self.assertEqual(event.name, "clean") with self.assertRaises(OSError): job.event_wait(self.fh, jobid, "foo")
def test_20_007_job_event_wait_exception(self): event = None jobid = job.submit( self.fh, JobspecV1.from_command(["sleep", "0"], num_tasks=128) ) self.assertTrue(jobid > 0) try: event = job.event_wait(self.fh, jobid, "start") except job.JobException as err: self.assertEqual(err.severity, 0) self.assertEqual(err.type, "alloc") self.assertGreater(err.timestamp, 0.0) self.assertIs(event, None) try: event = job.event_wait(self.fh, jobid, "start", raiseJobException=False) except OSError as err: self.assertEqual(err.errno, errno.ENODATA) self.assertIs(event, None)
def test_20_005_job_event_watch_with_cancel(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "3"]), waitable=True) self.assertTrue(jobid > 0) events = [] future = job.event_watch_async(self.fh, jobid) while True: event = future.get_event() if event is None: break if event.name == "start": future.cancel() events.append(event.name) self.assertEqual(event, None) # Should have less than the expected number of events due to cancel self.assertLess(len(events), 8) job.cancel(self.fh, jobid) job.wait(self.fh, jobid)
def test_20_005_1_job_event_watch_with_cancel_stop_true(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "3"]), waitable=True) self.assertTrue(jobid > 0) events = [] future = job.event_watch_async(self.fh, jobid) def cb(future, events): event = future.get_event() if event.name == "start": future.cancel(stop=True) events.append(event.name) future.then(cb, events) rc = self.fh.reactor_run() # Last event should be "start" self.assertEqual(events[-1], "start") job.cancel(self.fh, jobid) job.wait(self.fh, jobid)
def test_20_001_job_event_watch_async(self): myarg = dict(a=1, b=2) events = [] def cb(future, arg): self.assertEqual(arg, myarg) event = future.get_event() if event is None: future.get_flux().reactor_stop() return self.assertIsInstance(event, job.EventLogEvent) events.append(event.name) jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"])) self.assertTrue(jobid > 0) future = job.event_watch_async(self.fh, jobid) self.assertIsInstance(future, job.JobEventWatchFuture) future.then(cb, myarg) rc = self.fh.reactor_run() self.assertGreaterEqual(rc, 0) self.assertEqual(len(events), 10) self.assertEqual(events[0], "submit") self.assertEqual(events[-1], "clean")
def test_20_002_job_event_watch_no_autoreset(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"])) self.assertTrue(jobid > 0) future = job.event_watch_async(self.fh, jobid) self.assertIsInstance(future, job.JobEventWatchFuture) # First event should be "submit" event = future.get_event(autoreset=False) self.assertIsInstance(event, job.EventLogEvent) self.assertEqual(event.name, "submit") # get_event() again with no reset returns same event: event = future.get_event(autoreset=False) self.assertIsInstance(event, job.EventLogEvent) self.assertEqual(event.name, "submit") # reset, then get_event() should get next event future.reset() event = future.get_event(autoreset=False) self.assertIsInstance(event, job.EventLogEvent) self.assertEqual(event.name, "validate") future.cancel()
def work(self, units): # overload the base class work method from flux import job as flux_job self.advance(units, rps.AGENT_SCHEDULING, publish=True, push=False) for unit in units: # # FIXME: transfer from executor # self._cu_environment = self._populate_cu_environment() jd = json.dumps( ru.read_json('/home/merzky/projects/flux/spec.json')) jid = flux_job.submit(self._flux, jd) unit['flux_id'] = jid # publish without state changes - those are retroactively applied # based on flux event timestamps. # TODO: apply some bulking, submission is not really fast. # But at the end performance is determined by flux now, so # communication only affects timelyness of state updates. self._q.put(unit)
def test_02_sync_submit(self): jobid = job.submit(self.fh, self.basic_jobspec) self.assertGreater(jobid, 0)
def test_01_nonstring_submit(self): with self.assertRaises(TypeError): job.submit(self.fh, 0)
def test_02_sync_submit(self): jobid = job.submit(self.fh, self.jobspec) self.assertGreater(jobid, 0)
def test_32_job_result(self): result = {} ids = [] def cb(future, jobid): result[jobid] = future ids.append(job.submit(self.fh, JobspecV1.from_command(["true"]))) ids.append(job.submit(self.fh, JobspecV1.from_command(["false"]))) ids.append(job.submit(self.fh, JobspecV1.from_command(["nosuchprog"]))) ids.append( job.submit(self.fh, JobspecV1.from_command(["sleep", "120"]))) # Submit held job so we can cancel before RUN state ids.append( job.submit(self.fh, JobspecV1.from_command(["true"]), urgency=0)) job.cancel(self.fh, ids[4]) for jobid in ids: flux.job.result_async(self.fh, jobid).then(cb, jobid) def cancel_on_start(future, jobid): event = future.get_event() if event is None: return if event.name == "shell.start": job.cancel(self.fh, jobid) future.cancel() job.event_watch_async(self.fh, ids[3], eventlog="guest.exec.eventlog").then( cancel_on_start, ids[3]) self.fh.reactor_run() self.assertEqual(len(result.keys()), len(ids)) self.addTypeEqualityFunc(JobInfo, self.assertJobInfoEqual) self.assertEqual( result[ids[0]].get_info(), JobInfo({ "id": ids[0], "result": flux.constants.FLUX_JOB_RESULT_COMPLETED, "t_start": 1.0, "t_run": 2.0, "t_cleanup": 3.0, "waitstatus": 0, "exception_occurred": False, }), ) self.assertEqual( result[ids[1]].get_info(), JobInfo({ "id": ids[1], "result": flux.constants.FLUX_JOB_RESULT_FAILED, "t_submit": 1.0, "t_run": 2.0, "t_cleanup": 3.0, "waitstatus": 256, "exception_occurred": False, }), ) self.assertEqual( result[ids[2]].get_info(), JobInfo({ "id": ids[2], "result": flux.constants.FLUX_JOB_RESULT_FAILED, "t_submit": 1.0, "t_run": 2.0, "t_cleanup": 3.0, "waitstatus": 32512, "exception_occurred": True, "exception_type": "exec", "exception_note": "task 0.*: start failed: nosuchprog: " "No such file or directory", "exception_severity": 0, }), ) self.assertEqual( result[ids[3]].get_info(), JobInfo({ "id": ids[3], "result": flux.constants.FLUX_JOB_RESULT_CANCELED, "t_submit": 1.0, "t_run": 2.0, "t_cleanup": 3.0, "waitstatus": 36608, # 143<<8 "exception_occurred": True, "exception_type": "cancel", "exception_note": "", "exception_severity": 0, }), ) self.assertEqual( result[ids[4]].get_info(), JobInfo({ "id": ids[4], "result": flux.constants.FLUX_JOB_RESULT_CANCELED, "t_submit": 0.0, "exception_occurred": True, "exception_type": "cancel", "exception_note": "", "exception_severity": 0, }), ) # synchronous job.result() test self.assertEqual(job.result(self.fh, ids[3]), result[ids[3]].get_info())
def test_23_from_nest_command(self): """Test that `from_batch_command` produces a valid jobspec""" jobid = job.submit(self.fh, JobspecV1.from_nest_command(["sleep", "0"])) self.assertGreater(jobid, 0)
def submit(self, args): """ Submit job, constructing jobspec from args. Returns jobid. """ if not args.command: raise ValueError("job command and arguments are missing") jobspec = JobspecV1.from_command( args.command, num_tasks=args.ntasks, cores_per_task=args.cores_per_task, gpus_per_task=args.gpus_per_task, num_nodes=args.nodes, ) jobspec.cwd = os.getcwd() jobspec.environment = dict(os.environ) if args.time_limit is not None: jobspec.duration = args.time_limit if args.job_name is not None: jobspec.setattr("system.job.name", args.job_name) if args.input is not None: jobspec.setattr_shell_option("input.stdin.type", "file") jobspec.setattr_shell_option("input.stdin.path", args.input) if args.output is not None: jobspec.setattr_shell_option("output.stdout.type", "file") jobspec.setattr_shell_option("output.stdout.path", args.output) if args.label_io: jobspec.setattr_shell_option("output.stdout.label", True) if args.error is not None: jobspec.setattr_shell_option("output.stderr.type", "file") jobspec.setattr_shell_option("output.stderr.path", args.error) if args.label_io: jobspec.setattr_shell_option("output.stderr.label", True) if args.setopt is not None: for kv in args.setopt: # Split into key, val with a default for 1 if no val given: key, val = (kv.split("=", 1) + [1])[:2] try: val = json.loads(val) except: pass jobspec.setattr_shell_option(key, val) if args.setattr is not None: for kv in args.setattr: tmp = kv.split("=", 1) if len(tmp) != 2: raise ValueError("--setattr: Missing value for attr " + kv) key = tmp[0] try: val = json.loads(tmp[1]) except: val = tmp[1] jobspec.setattr(key, val) arg_debug = False arg_waitable = False if args.flags is not None: for tmp in args.flags: for flag in tmp.split(","): if flag == "debug": arg_debug = True elif flag == "waitable": arg_waitable = True else: raise ValueError("--flags: Unknown flag " + flag) if args.dry_run: print(jobspec.dumps(), file=sys.stdout) sys.exit(0) h = flux.Flux() return job.submit( h, jobspec.dumps(), priority=args.priority, waitable=arg_waitable, debug=arg_debug, )
if len(sys.argv) != 2: njobs = 10 else: njobs = int(sys.argv[1]) # Open connection to broker and subscribe to state notifications h = flux.Flux() h.event_subscribe("job-state") # Submit several test jobs, building dictionary by jobid, # where each entry contains a list of job states # N.B. no notification is provided for the NEW state jobspec = JobspecV1.from_command(["hostname"]) jobs = {} for i in range(njobs): jobid = job.submit(h, jobspec) jobs[jobid] = ["NEW"] # Process events until all jobs have reached INACTIVE state. while not all_inactive(jobs): event = h.event_recv() parse_notification(jobs, event) # Verify that each job advanced through the expected set of states, in order for jobid in jobs: if cmp(jobs[jobid], expected_states) != 0: print("{}: {}: {}".format("bad state list", jobid, jobs[jobid])) sys.exit(1) # Unsubscribe to state notifications and close connection to broker. h.event_unsubscribe("job-state")
def submit(self, args): """ Submit job, constructing jobspec from args. Returns jobid. """ jobspec = self.init_jobspec(args) jobspec.cwd = os.getcwd() jobspec.environment = get_filtered_environment(args.env) if args.time_limit is not None: jobspec.duration = args.time_limit if args.job_name is not None: jobspec.setattr("system.job.name", args.job_name) if args.input is not None: jobspec.stdin = args.input if args.output is not None and args.output not in ["none", "kvs"]: jobspec.stdout = args.output if args.label_io: jobspec.setattr_shell_option("output.stdout.label", True) if args.error is not None: jobspec.stderr = args.error if args.label_io: jobspec.setattr_shell_option("output.stderr.label", True) if args.setopt is not None: for keyval in args.setopt: # Split into key, val with a default for 1 if no val given: key, val = (keyval.split("=", 1) + [1])[:2] try: val = json.loads(val) except (json.JSONDecodeError, TypeError): pass jobspec.setattr_shell_option(key, val) if args.debug_emulate: debugged.set_mpir_being_debugged(1) if debugged.get_mpir_being_debugged() == 1: # if stop-tasks-in-exec is present, overwrite jobspec.setattr_shell_option("stop-tasks-in-exec", json.loads("1")) if args.setattr is not None: for keyval in args.setattr: tmp = keyval.split("=", 1) if len(tmp) != 2: raise ValueError("--setattr: Missing value for attr " + keyval) key = tmp[0] try: val = json.loads(tmp[1]) except (json.JSONDecodeError, TypeError): val = tmp[1] jobspec.setattr(key, val) arg_debug = False arg_waitable = False if args.flags is not None: for tmp in args.flags: for flag in tmp.split(","): if flag == "debug": arg_debug = True elif flag == "waitable": arg_waitable = True else: raise ValueError("--flags: Unknown flag " + flag) if args.dry_run: print(jobspec.dumps(), file=sys.stdout) sys.exit(0) flux_handle = flux.Flux() jobid = job.submit( flux_handle, jobspec.dumps(), priority=args.priority, waitable=arg_waitable, debug=arg_debug, ) return JobID(jobid)
num_nodes=2, cores_per_task=2) compute_jobspec.cwd = os.getcwd() compute_jobspec.environment = dict(os.environ) # create bad jobspec that will fail bad_jobspec = JobspecV1.from_command(["/bin/false"]) jobs = [] flags = flux.constants.FLUX_JOB_WAITABLE # submit jobs for i in range(njobs): if i < njobs / 2: jobid = flux.job.submit(h, compute_jobspec, flags=flags) print("submit: {} compute.py".format(jobid)) else: jobid = job.submit(h, bad_jobspec, flags=flags) print("submit: {} bad_jobspec".format(jobid)) jobs.append(jobid) # wait for each job in turn by jobid for jobid in jobs: result = job.wait(h, jobid) if result.success: print("wait: {} Success".format(result.jobid)) else: print("wait: {} Error: {}".format(result.jobid, result.errstr)) # vim: tabstop=4 shiftwidth=4 expandtab
def test_08_jobspec_submit(self): jobspec = Jobspec.from_yaml_stream(self.basic_jobspec) jobid = job.submit(self.fh, jobspec) self.assertGreater(jobid, 0)
def submit(self, args): """ Submit job, constructing jobspec from args. Returns jobid. """ if not args.command: raise ValueError("job command and arguments are missing") jobspec = JobSpec( args.command, num_tasks=args.ntasks, cores_per_task=args.cores_per_task, gpus_per_task=args.gpus_per_task, num_nodes=args.nodes, ) jobspec.set_cwd(os.getcwd()) jobspec.set_environment(dict(os.environ)) if args.time_limit is not None: jobspec.set_duration(args.time_limit) if args.input is not None: jobspec.setattr_shopt("input.stdin.type", "file") jobspec.setattr_shopt("input.stdin.path", args.input) if args.output is not None: jobspec.setattr_shopt("output.stdout.type", "file") jobspec.setattr_shopt("output.stdout.path", args.output) if args.label_io: jobspec.setattr_shopt("output.stdout.label", True) if args.error is not None: jobspec.setattr_shopt("output.stderr.type", "file") jobspec.setattr_shopt("output.stderr.path", args.error) if args.label_io: jobspec.setattr_shopt("output.stderr.label", True) if args.setopt is not None: for kv in args.setopt: # Split into key, val with a default for 1 if no val given: key, val = (kv.split("=", 1) + [1])[:2] try: val = json.loads(val) except: pass jobspec.setattr_shopt(key, val) if args.setattr is not None: for kv in args.setattr: tmp = kv.split("=", 1) if len(tmp) != 2: raise ValueError("--setattr: Missing value for attr " + kv) key = tmp[0] try: val = json.loads(tmp[1]) except: val = tmp[1] jobspec.setattr(key, val) if args.dry_run: print(jobspec.dumps(), file=sys.stdout) sys.exit(0) h = flux.Flux() flags = 0 if args.debug: flags = flux.constants.FLUX_JOB_DEBUG return job.submit(h, jobspec.dumps(), priority=args.priority, flags=flags)
if len(sys.argv) != 2: njobs = 10 else: njobs = int(sys.argv[1]) # Open connection to broker h = flux.Flux() # Submit njobs test jobs (half will fail) jobspec = JobspecV1.from_command(["/bin/true"]) jobspec_fail = JobspecV1.from_command(["/bin/false"]) jobs = [] for i in range(njobs): if i < njobs / 2: jobid = job.submit(h, jobspec, waitable=True) print("submit: {} /bin/true".format(jobid)) else: jobid = job.submit(h, jobspec_fail, waitable=True) print("submit: {} /bin/false".format(jobid)) jobs.append(jobid) # Wait for each job in turn for jobid in jobs: result = job.wait(h, jobid) if result.success: print("wait: {} Success".format(result.jobid)) else: print("wait: {} Error: {}".format(result.jobid, result.errstr)) # vim: tabstop=4 shiftwidth=4 expandtab
if len(sys.argv) != 2: njobs = 10 else: njobs = int(sys.argv[1]) # Open connection to broker and subscribe to state notifications h = flux.Flux() h.event_subscribe("job-state") # Submit several test jobs, building dictionary by jobid, # where each entry contains a list of job states # N.B. no notification is provided for the NEW state jobspec = make_jobspec() jobs = {} for i in range(njobs): jobid = job.submit(h, jobspec) jobs[jobid] = ["NEW"] # Process events until all jobs have reached INACTIVE state. while not all_inactive(jobs): event = h.event_recv() parse_notification(jobs, event) # Verify that each job advanced through the expected set of states, in order for jobid in jobs: if cmp(jobs[jobid], expected_states) != 0: print("{}: {}: {}".format("bad state list", jobid, jobs[jobid])) sys.exit(1) # Unsubscribe to state notifications and close connection to broker. h.event_unsubscribe("job-state")