def test_13_job_kvs(self): jobid = job.submit(self.fh, self.basic_jobspec, waitable=True) job.wait(self.fh, jobid=jobid) for job_kvs_dir in [ job.job_kvs(self.fh, jobid), job.job_kvs_guest(self.fh, jobid), ]: self.assertTrue(isinstance(job_kvs_dir, flux.kvs.KVSDir)) self.assertTrue(flux.kvs.exists(self.fh, job_kvs_dir.path)) self.assertTrue(flux.kvs.isdir(self.fh, job_kvs_dir.path))
def main(): implementation = "bulksubmit" start_time = time.perf_counter() args = setup_parser().parse_args() # open connection to broker h = flux.Flux() # create jobspec for sleep command compute_jobspec = job.JobspecV1.from_command(command=["true"], num_tasks=1, num_nodes=1, cores_per_task=1) compute_jobspec.cwd = os.getcwd() done = 0 for _ in range(args.jobcount): job.submit_async(h, compute_jobspec, waitable=True).then(submit_cb) if h.reactor_run(h.get_reactor(), 0) < 0: h.fatal_error("reactor start failed") while done < args.jobcount: jobid, success, errstr = job.wait(h) if not success: print("wait: {} Error: {}".format(jobid, errstr)) done += 1 total_time = time.perf_counter() - start_time print("Total seconds: {}".format(total_time)) utils.save_timing_data(args.jobcount, total_time, implementation)
def test_20_005_job_event_watch_with_cancel(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "3"]), waitable=True) self.assertTrue(jobid > 0) events = [] future = job.event_watch_async(self.fh, jobid) while True: event = future.get_event() if event is None: break if event.name == "start": future.cancel() events.append(event.name) self.assertEqual(event, None) # Should have less than the expected number of events due to cancel self.assertLess(len(events), 8) job.cancel(self.fh, jobid) job.wait(self.fh, jobid)
def test_20_005_1_job_event_watch_with_cancel_stop_true(self): jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "3"]), waitable=True) self.assertTrue(jobid > 0) events = [] future = job.event_watch_async(self.fh, jobid) def cb(future, events): event = future.get_event() if event.name == "start": future.cancel(stop=True) events.append(event.name) future.then(cb, events) rc = self.fh.reactor_run() # Last event should be "start" self.assertEqual(events[-1], "start") job.cancel(self.fh, jobid) job.wait(self.fh, jobid)
num_nodes=2, cores_per_task=2) compute_jobspec.cwd = os.getcwd() compute_jobspec.environment = dict(os.environ) # create bad jobspec that will fail bad_jobspec = JobspecV1.from_command(["/bin/false"]) jobs = [] flags = flux.constants.FLUX_JOB_WAITABLE # submit jobs for i in range(njobs): if i < njobs / 2: jobid = flux.job.submit(h, compute_jobspec, flags=flags) print("submit: {} compute_jobspec".format(jobid)) else: jobid = flux.job.submit(h, bad_jobspec, flags=flags) print("submit: {} bad_jobspec".format(jobid)) jobs.append(jobid) # wait for each job to complete for jobid in jobs: result = job.wait(h) if result.success: print("wait: {} Success".format(result.jobid)) else: print("wait: {} Error: {}".format(result.jobid, result.errstr)) # vim: tabstop=4 shiftwidth=4 expandtab
if len(sys.argv) != 2: njobs = 10 else: njobs = int(sys.argv[1]) # Open connection to broker h = flux.Flux() # Submit njobs test jobs (half will fail) jobspec = JobspecV1.from_command(["/bin/true"]) jobspec_fail = JobspecV1.from_command(["/bin/false"]) jobs = [] for i in range(njobs): if i < njobs / 2: jobid = job.submit(h, jobspec, waitable=True) print("submit: {} /bin/true".format(jobid)) else: jobid = job.submit(h, jobspec_fail, waitable=True) print("submit: {} /bin/false".format(jobid)) jobs.append(jobid) # Wait for each job in turn for jobid in jobs: result = job.wait(h, jobid) if result.success: print("wait: {} Success".format(result.jobid)) else: print("wait: {} Error: {}".format(result.jobid, result.errstr)) # vim: tabstop=4 shiftwidth=4 expandtab
# create jobspec for compute.py compute_jobspec = JobspecV1.from_command(command=["./compute.py", "5"], num_tasks=4, num_nodes=2, cores_per_task=2) compute_jobspec.cwd = os.getcwd() compute_jobspec.environment = dict(os.environ) flags = flux.constants.FLUX_JOB_WAITABLE done = 0 running = 0 # submit jobs, keep [window_size] jobs running while done < njobs: if running < window_size and done + running < njobs: jobid = flux.job.submit(h, compute_jobspec, flags=flags) print("submit: {}".format(jobid)) running += 1 if running == window_size or done + running == njobs: jobid, success, errstr = job.wait(h) if success: print("wait: {} Success".format(jobid)) else: print("wait: {} Error: {}".format(jobid, errstr)) done += 1 running -= 1 # vim: tabstop=4 shiftwidth=4 expandtab
# asynchronously submit jobspec files from a directory log("Starting...") for file in sys.argv[1:]: with open(file) as jobspec: job.submit_async(h, jobspec.read(), waitable=True).then(submit_cb) if h.reactor_run() < 0: h.fatal_error("reactor start failed") total = len(jobs) dt = time.time() - t0 jps = len(jobs) / dt log("submitted {0} jobs in {1:.2f}s. {2:.2f}job/s".format(total, dt, jps)) count = 0 while count < total: # wait for jobs to complete in any order job.wait(h) count = count + 1 if count == 1: log("First job finished in about {0:.3f}s".format(time.time() - t0)) suffix = "({0:.1f} job/s)".format(count / (time.time() - t0)) progress(count / total, length=58, suffix=suffix) dt = time.time() - t0 log("Ran {0} jobs in {1:.1f}s. {2:.1f} job/s".format(total, dt, total / dt)) # vi: ts=4 sw=4 expandtab