Beispiel #1
0
def main():
    # parse command line
    parser = argparse.ArgumentParser()
    parser.add_argument("njobs", nargs="?", type=int, default=10)
    args = parser.parse_args()
    # create jobspec for compute.py
    compute_jobspec = JobspecV1.from_command(
        command=["./compute.py", "10"], num_tasks=4, num_nodes=2, cores_per_task=2
    )
    compute_jobspec.cwd = os.getcwd()
    compute_jobspec.environment = dict(os.environ)
    bad_jobspec = JobspecV1.from_command(["/bin/false"])
    # create an executor to submit jobs
    with FluxExecutor() as executor:
        futures = []
        # submit half successful jobs and half failures
        for _ in range(args.njobs // 2):
            futures.append(executor.submit(compute_jobspec))
            print(f"submit: {id(futures[-1])} compute_jobspec")
        for _ in range(args.njobs // 2, args.njobs):
            futures.append(executor.submit(bad_jobspec))
            print(f"submit: {id(futures[-1])} bad_jobspec")
        # wait for each future in turn
        for fut in futures:
            if fut.exception() is not None:
                print(f"wait: {id(fut)} Error: job raised error {fut.exception()}")
            elif fut.result() == 0:
                print(f"wait: {id(fut)} Success")
            else:
                print(f"wait: {id(fut)} Error: job returned exit code {fut.result()}")
Beispiel #2
0
 def test_22_from_batch_command(self):
     """Test that `from_batch_command` produces a valid jobspec"""
     jobid = job.submit(
         self.fh, JobspecV1.from_batch_command("#!/bin/sh\nsleep 0", "nested sleep")
     )
     self.assertGreater(jobid, 0)
     # test that a shebang is required
     with self.assertRaises(ValueError):
         job.submit(
             self.fh,
             JobspecV1.from_batch_command("sleep 0", "nested sleep with no shebang"),
         )
 def submitJob(self):
     compute_jobreq = JobspecV1.from_command(
         command=["sleep", "0"], num_tasks=2, num_nodes=1, cores_per_task=1
     )
     compute_jobreq.cwd = os.getcwd()
     compute_jobreq.environment = dict(os.environ)
     flux.job.submit(self.fh, compute_jobreq, waitable=True)
Beispiel #4
0
 def test_submit_after_shutdown(self):
     executor = FluxExecutor()
     executor.shutdown(wait=True)
     with self.assertRaises(RuntimeError):
         executor.submit(JobspecV1.from_command(["true"]))
     with self.assertRaises(RuntimeError):
         executor.submit(None)
Beispiel #5
0
    def init_jobspec(self, args):
        # If no script (reading from stdin), then use "flux" as arg[0]
        command = args.SCRIPT
        if not command:
            command = ["flux"]

        if not args.nslots:
            raise ValueError("Number of slots to allocate must be specified")

        jobspec = JobspecV1.from_command(
            command=command,
            num_tasks=args.nslots,
            cores_per_task=args.cores_per_slot,
            gpus_per_task=args.gpus_per_slot,
            num_nodes=args.nodes,
        )
        #  Start one flux-broker per node:
        jobspec.setattr_shell_option("per-resource.type", "node")

        #  Copy script contents into jobspec:
        jobspec.setattr("system.batch.script", self.read_script(args))
        jobspec.setattr("system.batch.broker-opts",
                        list_split(args.broker_opts))

        # Default output is flux-{{jobid}}.out
        # overridden by either --output=none or --output=kvs
        if not args.output:
            jobspec.setattr_shell_option("output.stdout.type", "file")
            jobspec.setattr_shell_option("output.stdout.path",
                                         "flux-{{id}}.out")
        return jobspec
Beispiel #6
0
 def test_as_completed(self):
     with FluxExecutor() as executor:
         jobspec = JobspecV1.from_command(["true"])
         futures = [executor.submit(jobspec) for _ in range(3)]
         for fut in cf.as_completed(futures):
             self.assertEqual(fut.result(timeout=0), 0)
             self.assertIsNone(fut.exception())
Beispiel #7
0
 def test_exception_completion(self):
     jobspec = JobspecV1.from_command(["false"])
     thread = _FluxExecutorThread(threading.Event(), threading.Event(),
                                  collections.deque(), 0.01, (), {})
     fut = FluxExecutorFuture(threading.get_ident())
     self.assertFalse(fut.done())
     fut._set_event(EventLogEvent({"name": "start", "timestamp": 0}))
     self.assertFalse(fut.done())
     thread._FluxExecutorThread__event_update(
         ShamJobEventWatchFuture(
             EventLogEvent({
                 "name": "exception",
                 "timestamp": 0,
                 "context": {
                     "severity": 1,
                     "type": "foobar"
                 },
             })),
         fut,
     )
     self.assertFalse(fut.done())
     thread._FluxExecutorThread__event_update(
         ShamJobEventWatchFuture(
             EventLogEvent({
                 "name": "exception",
                 "timestamp": 0,
                 "context": {
                     "severity": 0,
                     "type": "foobar"
                 },
             })),
         fut,
     )
     self.assertTrue(fut.done())
     self.assertIsInstance(fut.exception(), JobException)
Beispiel #8
0
 def test_exception_event(self):
     with FluxExecutor() as executor:
         flag = threading.Event()
         future = executor.submit(JobspecV1.from_command(["/not/a/real/app"]))
         future.add_event_callback("exception", lambda fut, event: flag.set())
         self.assertIsInstance(future.exception(), JobException)
         self.assertTrue(flag.is_set())
Beispiel #9
0
def create_test_jobspec(args):

    #  Create a test jobspec
    if not args.command:
        args.command = ["true"]
    jobspec = JobspecV1.from_command(args.command)

    #  Set any requested shell options
    if args.setopt is not None:
        for keyval in args.setopt:
            # Split into key, val with a default for 1 if no val given:
            key, val = (keyval.split("=", 1) + [1])[:2]
            try:
                val = json.loads(val)
            except (json.JSONDecodeError, TypeError):
                pass
            jobspec.setattr_shell_option(key, val)

    #  Set any requested Jobspec attributes
    if args.setattr is not None:
        for keyval in args.setattr:
            tmp = keyval.split("=", 1)
            if len(tmp) != 2:
                raise ValueError("--setattr: Missing value for attr " + keyval)
            key = tmp[0]
            try:
                val = json.loads(tmp[1])
            except (json.JSONDecodeError, TypeError):
                val = tmp[1]
            jobspec.setattr(key, val)

    if not args.exec:
        jobspec.setattr("system.exec.test.run_duration", args.runtime)

    return jobspec
Beispiel #10
0
 def test_broken_executor(self):
     with FluxExecutor() as executor:
         executor._broken_event.set()
         with self.assertRaisesRegex(RuntimeError, "Executor is broken.*"):
             executor.submit(JobspecV1.from_command(["/not/a/real/app"]))
         with self.assertRaisesRegex(RuntimeError, "Executor is broken.*"):
             executor.attach(25979)
Beispiel #11
0
 def test_15_job_cancel(self):
     self.sleep_jobspec = JobspecV1.from_command(["sleep", "1000"])
     jobid = job.submit(self.fh, self.sleep_jobspec, waitable=True)
     job.cancel(self.fh, jobid)
     fut = job.wait_async(self.fh, jobid=jobid).wait_for(5.0)
     return_id, success, errmsg = fut.get_status()
     self.assertEqual(return_id, jobid)
     self.assertFalse(success)
Beispiel #12
0
 def test_executor_event_callbacks(self):
     with FluxExecutor() as executor:
         expected_events = set(["start", "finish", "depend", "priority", "free"])
         future = executor.submit(JobspecV1.from_command(["false"]))
         for event in executor.EVENTS:
             future.add_event_callback(
                 event, lambda fut, event: expected_events.discard(event.name)
             )
     self.assertFalse(expected_events)  # no more expected events
Beispiel #13
0
 def test_20_003_job_event_watch_sync(self):
     jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"]))
     self.assertTrue(jobid > 0)
     future = job.event_watch_async(self.fh, jobid)
     self.assertIsInstance(future, job.JobEventWatchFuture)
     event = future.get_event()
     self.assertIsInstance(event, job.EventLogEvent)
     self.assertEqual(event.name, "submit")
     future.cancel()
Beispiel #14
0
def main():
    # set up command-line parser
    parser = argparse.ArgumentParser(
        description="submit and wait for the completion of "
        "N bundles, each consisting of compute "
        "and io-forwarding jobs")
    parser.add_argument(
        "njobs",
        metavar="N",
        type=int,
        help="the number of bundles to submit and wait",
    )
    args = parser.parse_args()
    # set up jobspecs
    compute_jobreq = JobspecV1.from_command(command=["./compute.py", "10"],
                                            num_tasks=6,
                                            num_nodes=3,
                                            cores_per_task=2)
    compute_jobreq.cwd = os.getcwd()
    compute_jobreq.environment = dict(os.environ)
    io_jobreq = JobspecV1.from_command(command=["./io-forwarding.py", "10"],
                                       num_tasks=3,
                                       num_nodes=3,
                                       cores_per_task=1)
    io_jobreq.cwd = os.getcwd()
    io_jobreq.environment = dict(os.environ)
    # submit jobs and register event callbacks for all events
    with FluxExecutor() as executor:
        futures = [
            executor.submit(compute_jobreq) for _ in range(args.njobs // 2)
        ]
        futures.extend(
            executor.submit(io_jobreq)
            for _ in range(args.njobs // 2, args.njobs))
        print("bookkeeper: all jobs submitted")
        for fut in futures:
            # each event can have a different callback
            for event in executor.EVENTS:
                fut.add_event_callback(event, event_callback)
        print("bookkeeper: waiting until all jobs complete")
    # exiting the context manager waits for the executor to complete all futures
    print("bookkeeper: all jobs completed")
Beispiel #15
0
 def test_wait(self):
     with FluxExecutor(threads=3) as executor:
         jobspec = JobspecV1.from_command(["false"])
         futures = [executor.submit(jobspec) for _ in range(3)]
         done, not_done = cf.wait(futures, return_when=cf.FIRST_COMPLETED)
         self._check_done(done)
         done, not_done = cf.wait(futures, return_when=cf.FIRST_EXCEPTION)
         self._check_done(done)
         done, not_done = cf.wait(futures)
         self._check_done(done)
         self.assertEqual(len(not_done), 0)
Beispiel #16
0
    def init_jobspec(self, args):
        if not args.command:
            raise ValueError("job command and arguments are missing")

        return JobspecV1.from_command(
            args.command,
            num_tasks=args.ntasks,
            cores_per_task=args.cores_per_task,
            gpus_per_task=args.gpus_per_task,
            num_nodes=args.nodes,
        )
Beispiel #17
0
 def test_submit_after_shutdown(self):
     executor = FluxExecutor()
     executor.shutdown(wait=True)
     with self.assertRaises(RuntimeError):
         executor.submit(JobspecV1.from_command(["true"]))
     with self.assertRaises(RuntimeError):
         executor.submit(None)
     with self.assertRaises(RuntimeError):
         executor.attach(5)
     with self.assertRaises(RuntimeError):
         executor.attach(None)
     self.assertFalse(executor._broken_event.is_set())
Beispiel #18
0
 def test_failed_submit(self):
     with FluxExecutor(thread_name_prefix="foobar") as executor:
         jobspec = JobspecV1.from_command(["false"])
         future = executor.submit(jobspec).add_jobid_callback(
             lambda future: event.set()
         )
         event = threading.Event()
         jobid = future.jobid()
         self.assertGreater(jobid, 0)
         self.assertTrue(event.is_set())
         self.assertEqual(future.result(), 1)
         self.assertIsNone(future.exception())
Beispiel #19
0
 def test_bad_submit_arguments(self):
     """send bad arguments to ``flux.job.submit``"""
     deq = collections.deque()
     event = threading.Event()
     thread = _FluxExecutorThread(event, deq, 0.01, (), {})
     futures = [FluxExecutorFuture(threading.get_ident()) for _ in range(5)]
     jobspec = JobspecV1.from_command(["false"])
     deq.extend(((jobspec,), {"not_an_arg": 42}, f) for f in futures)
     event.set()
     thread.run()
     self.assertFalse(deq)
     self.assertEqual(0, thread._FluxExecutorThread__remaining_flux_futures)
     for fut in futures:
         self.assertIsInstance(fut.exception(), TypeError)
Beispiel #20
0
    def test_16_job_kill(self):
        self.sleep_jobspec = JobspecV1.from_command(["sleep", "1000"])
        jobid = job.submit(self.fh, self.sleep_jobspec, waitable=True)

        #  Wait for shell to fully start to avoid delay in signal
        job.event_wait(self.fh, jobid, name="start")
        job.event_wait(
            self.fh, jobid, name="shell.start", eventlog="guest.exec.eventlog"
        )
        job.kill(self.fh, jobid, signum=signal.SIGKILL)
        fut = job.wait_async(self.fh, jobid=jobid).wait_for(5.0)
        return_id, success, errmsg = fut.get_status()
        self.assertEqual(return_id, jobid)
        self.assertFalse(success)
Beispiel #21
0
 def test_20_004_job_event_watch(self):
     jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"]))
     self.assertTrue(jobid > 0)
     events = []
     for event in job.event_watch(self.fh, jobid):
         self.assertIsInstance(event, job.EventLogEvent)
         self.assertTrue(hasattr(event, "timestamp"))
         self.assertTrue(hasattr(event, "name"))
         self.assertTrue(hasattr(event, "context"))
         self.assertIs(type(event.timestamp), float)
         self.assertIs(type(event.name), str)
         self.assertIs(type(event.context), dict)
         events.append(event.name)
     self.assertEqual(len(events), 10)
Beispiel #22
0
 def test_as_completed(self):
     with FluxExecutor() as executor:
         jobspec = JobspecV1.from_command(["true"])
         futures = [executor.submit(jobspec) for _ in range(3)]
         attach_futures = []
         for fut in cf.as_completed(futures):
             self.assertEqual(fut.result(timeout=0), 0)
             self.assertIsNone(fut.exception())
             attach_fut = executor.attach(fut.jobid())
             self.assertEqual(fut.jobid(), attach_fut.jobid())
             attach_futures.append(attach_fut)
         for attach_fut in cf.as_completed(attach_futures):
             self.assertEqual(attach_fut.result(timeout=0), 0)
             self.assertIsNone(attach_fut.exception())
     self.assertFalse(executor._broken_event.is_set())
Beispiel #23
0
 def test_cancel(self):
     with FluxExecutor() as executor:
         jobspec = JobspecV1.from_command(["false"])
         for _ in range(3):
             future = executor.submit(jobspec)
             if future.cancel():
                 self.assertFalse(future.running())
                 self.assertTrue(future.cancelled())
                 with self.assertRaises(cf.CancelledError):
                     future.jobid()
                 with self.assertRaises(cf.CancelledError):
                     future.exception()
             else:
                 self.assertEqual(future.result(), 1)
                 self.assertIsNone(future.exception())
Beispiel #24
0
    def init_jobspec(self, args):

        if not args.nslots:
            raise ValueError("Number of slots to allocate must be specified")

        jobspec = JobspecV1.from_nest_command(
            command=args.COMMAND,
            num_slots=args.nslots,
            cores_per_slot=args.cores_per_slot,
            gpus_per_slot=args.gpus_per_slot,
            num_nodes=args.nodes,
            broker_opts=list_split(args.broker_opts),
        )
        if sys.stdin.isatty():
            jobspec.setattr_shell_option("pty", True)
        return jobspec
Beispiel #25
0
 def test_cancel(self):
     deq = collections.deque()
     event = threading.Event()
     jobspec = JobspecV1.from_command(["false"])
     thread = _FluxExecutorThread(event, deq, 0.01, (), {})
     futures = [FluxExecutorFuture(threading.get_ident()) for _ in range(5)]
     for fut in futures:
         deq.append(((jobspec,), {}, fut))
         fut.cancel()
     event.set()
     thread.run()
     for fut in futures:
         with self.assertRaises(cf.CancelledError):
             fut.result()
         with self.assertRaises(cf.CancelledError):
             fut.jobid()
Beispiel #26
0
 def test_cancel_attach(self):
     with FluxExecutor() as executor:
         jobspec = JobspecV1.from_command(["true"])
         jobid = executor.submit(jobspec).jobid()
         for _ in range(3):
             future = executor.attach(jobid)
             if future.cancel():
                 self.assertFalse(future.running())
                 self.assertTrue(future.cancelled())
                 self.assertEqual(future.jobid(), jobid)
                 with self.assertRaises(cf.CancelledError):
                     future.exception()
             else:
                 self.assertEqual(future.result(), 0)
                 self.assertIsNone(future.exception())
     self.assertFalse(executor._broken_event.is_set())
Beispiel #27
0
 def test_20_006_job_event_wait(self):
     jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"]))
     self.assertTrue(jobid > 0)
     event = job.event_wait(self.fh, jobid, "start")
     self.assertIsInstance(event, job.EventLogEvent)
     self.assertEqual(event.name, "start")
     event = job.event_wait(
         self.fh, jobid, "shell.init", eventlog="guest.exec.eventlog"
     )
     self.assertIsInstance(event, job.EventLogEvent)
     self.assertEqual(event.name, "shell.init")
     event = job.event_wait(self.fh, jobid, "clean")
     self.assertIsInstance(event, job.EventLogEvent)
     self.assertEqual(event.name, "clean")
     with self.assertRaises(OSError):
         job.event_wait(self.fh, jobid, "foo")
Beispiel #28
0
    def init_jobspec(self, args):

        if not args.nslots:
            raise ValueError("Number of slots to allocate must be specified")

        broker_opts = list_split(args.broker_opts)
        jobspec = JobspecV1.from_command(
            command=["flux", "broker", *broker_opts, *args.COMMAND],
            num_tasks=args.nslots,
            cores_per_task=args.cores_per_slot,
            gpus_per_task=args.gpus_per_slot,
            num_nodes=args.nodes,
        )
        jobspec.setattr_shell_option("per-resource.type", "node")
        if sys.stdin.isatty():
            jobspec.setattr_shell_option("pty", True)
        return jobspec
Beispiel #29
0
 def test_20_007_job_event_wait_exception(self):
     event = None
     jobid = job.submit(
         self.fh, JobspecV1.from_command(["sleep", "0"], num_tasks=128)
     )
     self.assertTrue(jobid > 0)
     try:
         event = job.event_wait(self.fh, jobid, "start")
     except job.JobException as err:
         self.assertEqual(err.severity, 0)
         self.assertEqual(err.type, "alloc")
         self.assertGreater(err.timestamp, 0.0)
     self.assertIs(event, None)
     try:
         event = job.event_wait(self.fh, jobid, "start", raiseJobException=False)
     except OSError as err:
         self.assertEqual(err.errno, errno.ENODATA)
     self.assertIs(event, None)
Beispiel #30
0
 def test_20_005_job_event_watch_with_cancel(self):
     jobid = job.submit(self.fh,
                        JobspecV1.from_command(["sleep", "3"]),
                        waitable=True)
     self.assertTrue(jobid > 0)
     events = []
     future = job.event_watch_async(self.fh, jobid)
     while True:
         event = future.get_event()
         if event is None:
             break
         if event.name == "start":
             future.cancel()
         events.append(event.name)
     self.assertEqual(event, None)
     # Should have less than the expected number of events due to cancel
     self.assertLess(len(events), 8)
     job.cancel(self.fh, jobid)
     job.wait(self.fh, jobid)