Example #1
0
    def exec_watch_cb(self, future, args, jobid, label=""):
        """Handle events in the guest.exec.eventlog"""
        event = future.get_event()
        if event and event.name == "shell.init":
            #  Once the shell.init event is posted, then it is safe to
            #   begin watching the output eventlog:
            #
            job.event_watch_async(self.flux_handle,
                                  jobid,
                                  eventlog="guest.output").then(
                                      self.output_watch_cb, args, jobid, label)

            #  Events from this eventlog are no longer needed
            future.cancel()
Example #2
0
    def submit_cb(self, future, args, label=""):
        try:
            jobid = JobID(future.get_id())
            if not args.quiet:
                print(jobid)
        except OSError as exc:
            print(f"{label}{exc}", file=sys.stderr)
            self.exitcode = 1
            self.progress_update(submit_failed=True)
            return

        if args.wait or args.watch:
            #
            #  If the user requested to wait for or watch all jobs
            #   then start watching the main eventlog.
            #
            #  Carry along a bit of state for each job so that exceptions
            #   before the job is running can be handled properly
            #
            jobinfo = {"id": jobid, "state": "submit"}
            fut = job.event_watch_async(self.flux_handle, jobid)
            fut.then(self.event_watch_cb, args, jobinfo, label)
            self.progress_update(jobinfo, submit=True)
        elif self.progress:
            #  Update progress of submission only
            self.progress.update(jps=self.jobs_per_sec())
Example #3
0
    def event_watch_cb(self, future, args, jobinfo, label=""):
        """Handle events in the main job eventlog"""
        jobid = jobinfo["id"]
        event = future.get_event()
        self.progress_update(jobinfo, event=event)
        if event is None:
            return
        if event.name == "exception":
            #
            #  Handle an exception: update global exitcode and print
            #   an error:
            if jobinfo["state"] == "submit":
                #
                #  If job was still pending then this job failed
                #   to execute. Treat it as failure with exitcode = 1
                #
                jobinfo["state"] = "failed"
                if self.exitcode == 0:
                    self.exitcode = 1

            #  Print a human readable error:
            exception_type = event.context["type"]
            note = event.context["note"]
            print(
                f"{jobid}: exception: type={exception_type} note={note}",
                file=sys.stderr,
            )
        elif event.name == "start" and args.watch:
            #
            #  Watch the exec eventlog if the --watch option was provided:
            #
            jobinfo["state"] = "running"
            job.event_watch_async(self.flux_handle,
                                  jobid,
                                  eventlog="guest.exec.eventlog").then(
                                      self.exec_watch_cb, args, jobid, label)
        elif event.name == "finish":
            #
            #  Collect exit status and adust self.exitcode if necesary:
            #
            jobinfo["state"] = "done"
            status = self.status_to_exitcode(event.context["status"])
            if args.verbose:
                print(f"{jobid}: complete: status={status}", file=sys.stderr)
            if status > self.exitcode:
                self.exitcode = status
Example #4
0
 def test_20_003_job_event_watch_sync(self):
     jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"]))
     self.assertTrue(jobid > 0)
     future = job.event_watch_async(self.fh, jobid)
     self.assertIsInstance(future, job.JobEventWatchFuture)
     event = future.get_event()
     self.assertIsInstance(event, job.EventLogEvent)
     self.assertEqual(event.name, "submit")
     future.cancel()
Example #5
0
 def test_20_005_job_event_watch_with_cancel(self):
     jobid = job.submit(self.fh,
                        JobspecV1.from_command(["sleep", "3"]),
                        waitable=True)
     self.assertTrue(jobid > 0)
     events = []
     future = job.event_watch_async(self.fh, jobid)
     while True:
         event = future.get_event()
         if event is None:
             break
         if event.name == "start":
             future.cancel()
         events.append(event.name)
     self.assertEqual(event, None)
     # Should have less than the expected number of events due to cancel
     self.assertLess(len(events), 8)
     job.cancel(self.fh, jobid)
     job.wait(self.fh, jobid)
Example #6
0
    def test_20_005_1_job_event_watch_with_cancel_stop_true(self):
        jobid = job.submit(self.fh,
                           JobspecV1.from_command(["sleep", "3"]),
                           waitable=True)
        self.assertTrue(jobid > 0)
        events = []
        future = job.event_watch_async(self.fh, jobid)

        def cb(future, events):
            event = future.get_event()
            if event.name == "start":
                future.cancel(stop=True)
            events.append(event.name)

        future.then(cb, events)
        rc = self.fh.reactor_run()

        # Last event should be "start"
        self.assertEqual(events[-1], "start")
        job.cancel(self.fh, jobid)
        job.wait(self.fh, jobid)
Example #7
0
    def test_20_002_job_event_watch_no_autoreset(self):
        jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"]))
        self.assertTrue(jobid > 0)
        future = job.event_watch_async(self.fh, jobid)
        self.assertIsInstance(future, job.JobEventWatchFuture)

        # First event should be "submit"
        event = future.get_event(autoreset=False)
        self.assertIsInstance(event, job.EventLogEvent)
        self.assertEqual(event.name, "submit")

        # get_event() again with no reset returns same event:
        event = future.get_event(autoreset=False)
        self.assertIsInstance(event, job.EventLogEvent)
        self.assertEqual(event.name, "submit")

        # reset, then get_event() should get next event
        future.reset()
        event = future.get_event(autoreset=False)
        self.assertIsInstance(event, job.EventLogEvent)
        self.assertEqual(event.name, "validate")

        future.cancel()
Example #8
0
    def test_20_001_job_event_watch_async(self):
        myarg = dict(a=1, b=2)
        events = []

        def cb(future, arg):
            self.assertEqual(arg, myarg)
            event = future.get_event()
            if event is None:
                future.get_flux().reactor_stop()
                return
            self.assertIsInstance(event, job.EventLogEvent)
            events.append(event.name)

        jobid = job.submit(self.fh, JobspecV1.from_command(["sleep", "0"]))
        self.assertTrue(jobid > 0)
        future = job.event_watch_async(self.fh, jobid)
        self.assertIsInstance(future, job.JobEventWatchFuture)
        future.then(cb, myarg)
        rc = self.fh.reactor_run()
        self.assertGreaterEqual(rc, 0)
        self.assertEqual(len(events), 10)
        self.assertEqual(events[0], "submit")
        self.assertEqual(events[-1], "clean")
Example #9
0
    def test_32_job_result(self):
        result = {}
        ids = []

        def cb(future, jobid):
            result[jobid] = future

        ids.append(job.submit(self.fh, JobspecV1.from_command(["true"])))
        ids.append(job.submit(self.fh, JobspecV1.from_command(["false"])))
        ids.append(job.submit(self.fh, JobspecV1.from_command(["nosuchprog"])))
        ids.append(
            job.submit(self.fh, JobspecV1.from_command(["sleep", "120"])))

        # Submit held job so we can cancel before RUN state
        ids.append(
            job.submit(self.fh, JobspecV1.from_command(["true"]), urgency=0))
        job.cancel(self.fh, ids[4])

        for jobid in ids:
            flux.job.result_async(self.fh, jobid).then(cb, jobid)

        def cancel_on_start(future, jobid):
            event = future.get_event()
            if event is None:
                return
            if event.name == "shell.start":
                job.cancel(self.fh, jobid)
                future.cancel()

        job.event_watch_async(self.fh, ids[3],
                              eventlog="guest.exec.eventlog").then(
                                  cancel_on_start, ids[3])

        self.fh.reactor_run()
        self.assertEqual(len(result.keys()), len(ids))

        self.addTypeEqualityFunc(JobInfo, self.assertJobInfoEqual)

        self.assertEqual(
            result[ids[0]].get_info(),
            JobInfo({
                "id": ids[0],
                "result": flux.constants.FLUX_JOB_RESULT_COMPLETED,
                "t_start": 1.0,
                "t_run": 2.0,
                "t_cleanup": 3.0,
                "waitstatus": 0,
                "exception_occurred": False,
            }),
        )
        self.assertEqual(
            result[ids[1]].get_info(),
            JobInfo({
                "id": ids[1],
                "result": flux.constants.FLUX_JOB_RESULT_FAILED,
                "t_submit": 1.0,
                "t_run": 2.0,
                "t_cleanup": 3.0,
                "waitstatus": 256,
                "exception_occurred": False,
            }),
        )
        self.assertEqual(
            result[ids[2]].get_info(),
            JobInfo({
                "id": ids[2],
                "result": flux.constants.FLUX_JOB_RESULT_FAILED,
                "t_submit": 1.0,
                "t_run": 2.0,
                "t_cleanup": 3.0,
                "waitstatus": 32512,
                "exception_occurred": True,
                "exception_type": "exec",
                "exception_note": "task 0.*: start failed: nosuchprog: "
                "No such file or directory",
                "exception_severity": 0,
            }),
        )
        self.assertEqual(
            result[ids[3]].get_info(),
            JobInfo({
                "id": ids[3],
                "result": flux.constants.FLUX_JOB_RESULT_CANCELED,
                "t_submit": 1.0,
                "t_run": 2.0,
                "t_cleanup": 3.0,
                "waitstatus": 36608,  # 143<<8
                "exception_occurred": True,
                "exception_type": "cancel",
                "exception_note": "",
                "exception_severity": 0,
            }),
        )
        self.assertEqual(
            result[ids[4]].get_info(),
            JobInfo({
                "id": ids[4],
                "result": flux.constants.FLUX_JOB_RESULT_CANCELED,
                "t_submit": 0.0,
                "exception_occurred": True,
                "exception_type": "cancel",
                "exception_note": "",
                "exception_severity": 0,
            }),
        )

        # synchronous job.result() test
        self.assertEqual(job.result(self.fh, ids[3]),
                         result[ids[3]].get_info())
Example #10
0
 def handle_submit(self, args, jobid):
     self.jobs[jobid] = {"t_submit": time.time()}
     fut = job.event_watch_async(self.handle, jobid)
     fut.then(self.event_cb, args, jobid)