Esempio n. 1
0
 def test_13_job_kvs(self):
     jobid = job.submit(self.fh, self.basic_jobspec, waitable=True)
     job.wait(self.fh, jobid=jobid)
     for job_kvs_dir in [
             job.job_kvs(self.fh, jobid),
             job.job_kvs_guest(self.fh, jobid),
     ]:
         self.assertTrue(isinstance(job_kvs_dir, flux.kvs.KVSDir))
         self.assertTrue(flux.kvs.exists(self.fh, job_kvs_dir.path))
         self.assertTrue(flux.kvs.isdir(self.fh, job_kvs_dir.path))
def main():
    implementation = "bulksubmit"
    start_time = time.perf_counter()
    args = setup_parser().parse_args()
    # open connection to broker
    h = flux.Flux()
    # create jobspec for sleep command
    compute_jobspec = job.JobspecV1.from_command(command=["true"],
                                                 num_tasks=1,
                                                 num_nodes=1,
                                                 cores_per_task=1)
    compute_jobspec.cwd = os.getcwd()
    done = 0
    for _ in range(args.jobcount):
        job.submit_async(h, compute_jobspec, waitable=True).then(submit_cb)
    if h.reactor_run(h.get_reactor(), 0) < 0:
        h.fatal_error("reactor start failed")
    while done < args.jobcount:
        jobid, success, errstr = job.wait(h)
        if not success:
            print("wait: {} Error: {}".format(jobid, errstr))
        done += 1
    total_time = time.perf_counter() - start_time
    print("Total seconds: {}".format(total_time))
    utils.save_timing_data(args.jobcount, total_time, implementation)
Esempio n. 3
0
 def test_20_005_job_event_watch_with_cancel(self):
     jobid = job.submit(self.fh,
                        JobspecV1.from_command(["sleep", "3"]),
                        waitable=True)
     self.assertTrue(jobid > 0)
     events = []
     future = job.event_watch_async(self.fh, jobid)
     while True:
         event = future.get_event()
         if event is None:
             break
         if event.name == "start":
             future.cancel()
         events.append(event.name)
     self.assertEqual(event, None)
     # Should have less than the expected number of events due to cancel
     self.assertLess(len(events), 8)
     job.cancel(self.fh, jobid)
     job.wait(self.fh, jobid)
Esempio n. 4
0
    def test_20_005_1_job_event_watch_with_cancel_stop_true(self):
        jobid = job.submit(self.fh,
                           JobspecV1.from_command(["sleep", "3"]),
                           waitable=True)
        self.assertTrue(jobid > 0)
        events = []
        future = job.event_watch_async(self.fh, jobid)

        def cb(future, events):
            event = future.get_event()
            if event.name == "start":
                future.cancel(stop=True)
            events.append(event.name)

        future.then(cb, events)
        rc = self.fh.reactor_run()

        # Last event should be "start"
        self.assertEqual(events[-1], "start")
        job.cancel(self.fh, jobid)
        job.wait(self.fh, jobid)
Esempio n. 5
0
                                         num_nodes=2,
                                         cores_per_task=2)
compute_jobspec.cwd = os.getcwd()
compute_jobspec.environment = dict(os.environ)

# create bad jobspec that will fail
bad_jobspec = JobspecV1.from_command(["/bin/false"])

jobs = []
flags = flux.constants.FLUX_JOB_WAITABLE

# submit jobs
for i in range(njobs):
    if i < njobs / 2:
        jobid = flux.job.submit(h, compute_jobspec, flags=flags)
        print("submit: {} compute_jobspec".format(jobid))
    else:
        jobid = flux.job.submit(h, bad_jobspec, flags=flags)
        print("submit: {} bad_jobspec".format(jobid))
    jobs.append(jobid)

# wait for each job to complete
for jobid in jobs:
    result = job.wait(h)
    if result.success:
        print("wait: {} Success".format(result.jobid))
    else:
        print("wait: {} Error: {}".format(result.jobid, result.errstr))

# vim: tabstop=4 shiftwidth=4 expandtab
Esempio n. 6
0
if len(sys.argv) != 2:
    njobs = 10
else:
    njobs = int(sys.argv[1])

# Open connection to broker
h = flux.Flux()

# Submit njobs test jobs (half will fail)
jobspec = JobspecV1.from_command(["/bin/true"])
jobspec_fail = JobspecV1.from_command(["/bin/false"])
jobs = []
for i in range(njobs):
    if i < njobs / 2:
        jobid = job.submit(h, jobspec, waitable=True)
        print("submit: {} /bin/true".format(jobid))
    else:
        jobid = job.submit(h, jobspec_fail, waitable=True)
        print("submit: {} /bin/false".format(jobid))
    jobs.append(jobid)

# Wait for each job in turn
for jobid in jobs:
    result = job.wait(h, jobid)
    if result.success:
        print("wait: {} Success".format(result.jobid))
    else:
        print("wait: {} Error: {}".format(result.jobid, result.errstr))

# vim: tabstop=4 shiftwidth=4 expandtab
# create jobspec for compute.py
compute_jobspec = JobspecV1.from_command(command=["./compute.py", "5"],
                                         num_tasks=4,
                                         num_nodes=2,
                                         cores_per_task=2)
compute_jobspec.cwd = os.getcwd()
compute_jobspec.environment = dict(os.environ)

flags = flux.constants.FLUX_JOB_WAITABLE
done = 0
running = 0

# submit jobs, keep [window_size] jobs running
while done < njobs:
    if running < window_size and done + running < njobs:
        jobid = flux.job.submit(h, compute_jobspec, flags=flags)
        print("submit: {}".format(jobid))
        running += 1

    if running == window_size or done + running == njobs:
        jobid, success, errstr = job.wait(h)
        if success:
            print("wait: {} Success".format(jobid))
        else:
            print("wait: {} Error: {}".format(jobid, errstr))
        done += 1
        running -= 1

# vim: tabstop=4 shiftwidth=4 expandtab

# asynchronously submit jobspec files from a directory
log("Starting...")
for file in sys.argv[1:]:
    with open(file) as jobspec:
        job.submit_async(h, jobspec.read(), waitable=True).then(submit_cb)

if h.reactor_run() < 0:
    h.fatal_error("reactor start failed")

total = len(jobs)
dt = time.time() - t0
jps = len(jobs) / dt
log("submitted {0} jobs in {1:.2f}s. {2:.2f}job/s".format(total, dt, jps))

count = 0
while count < total:
    # wait for jobs to complete in any order
    job.wait(h)
    count = count + 1
    if count == 1:
        log("First job finished in about {0:.3f}s".format(time.time() - t0))
    suffix = "({0:.1f} job/s)".format(count / (time.time() - t0))
    progress(count / total, length=58, suffix=suffix)

dt = time.time() - t0
log("Ran {0} jobs in {1:.1f}s. {2:.1f} job/s".format(total, dt, total / dt))

# vi: ts=4 sw=4 expandtab