Example #1
0
    def submit(self, job: Job) -> None:
        """See :func:`~psij.JobExecutor.submit`."""
        logger.info('Job %s: submitting', job.id)
        self._ensure_work_dir()
        assert (job.spec)

        job.executor = self
        context = self._create_script_context(job)

        # assumes job ids are unique
        submit_file_path = self.work_directory / (job.id + '.job')
        with submit_file_path.open('w') as submit_file:
            self.generate_submit_script(job, context, submit_file)
        try:
            logger.debug('Job %s: running submit command', job.id)
            out = self._run_command(
                self.get_submit_command(job, submit_file_path))
            logger.debug('Job %s: submit command ouput: %s', job.id, out)
            job._native_id = self.job_id_from_submit_output(out)
            logger.info('Job %s: native id: %s', job.id, job.native_id)
            self._set_job_status(
                job,
                JobStatus(JobState.QUEUED,
                          metadata={'native_id': job.native_id}))
        except subprocess.CalledProcessError as ex:
            raise SubmitException(ex.output) from None

        self._queue_poll_thread.register_job(job)
Example #2
0
    def _jobid_cb(self, job: Job, fut: flux.job.FluxExecutorFuture) -> None:
        """Callback triggered when Flux jobid is ready.

        Fetch the jobid, set it on the psij.Job, and set the the job to QUEUED.
        """
        job._native_id = fut.jobid()
        job_status = JobStatus(JobState.QUEUED, time=time.time())
        self._set_job_status(job, job_status)
Example #3
0
    def submit(self, job: Job) -> None:
        """
        Submits the specified :class:`~psij.Job` to be run locally.

        Successful return of this method indicates that the job has been started locally and all
        changes in the job status, including failures, are reported using notifications. If the job
        specification is invalid, an :class:`~psij.InvalidJobException` is thrown. If
        the actual submission fails for reasons outside the validity of the job,
        a :class:`~psij.SubmitException` is thrown.

        :param job: The job to be submitted.
        """
        spec = job.spec
        if not spec:
            raise InvalidJobException('Missing specification')
        job.executor = self

        p = _ChildProcessEntry(
            job, self, self._get_launcher(self._get_launcher_name(spec)))
        assert p.launcher
        args = p.launcher.get_launch_command(job)

        try:
            with job._status_cv:
                if job.status.state == JobState.CANCELED:
                    raise SubmitException('Job canceled')
            logger.debug('Running %s,  out=%s, err=%s', args, spec.stdout_path,
                         spec.stderr_path)
            p.process = subprocess.Popen(args,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.STDOUT,
                                         close_fds=True,
                                         cwd=spec.directory,
                                         env=_get_env(spec))
            self._reaper.register(p)
            job._native_id = p.process.pid
            self._set_job_status(
                job,
                JobStatus(JobState.QUEUED,
                          time=time.time(),
                          metadata={'nativeId': job._native_id}))
            self._set_job_status(job,
                                 JobStatus(JobState.ACTIVE, time=time.time()))
        except Exception as ex:
            raise SubmitException('Failed to submit job', exception=ex)
Example #4
0
    def attach(self, job: Job, native_id: str) -> None:
        """Attaches a job to a native job.

        Attempts to connect `job` to a native job with `native_id` such that the job correctly
        reflects updates to the status of the native job. If the native job was previously
        submitted using this executor (hence having an *exit code file* and a *script output file*),
        the executor will attempt to retrieve the exit code and errors from the job. Otherwise, it
        may be impossible for the executor to distinguish between a failed and successfully
        completed job.

        Parameters
        ----------
        job
            The PSI/J job to attach.
        native_id
            The id of the batch scheduler job to attach to.
        """
        job._native_id = native_id
        job.executor = self
        self._queue_poll_thread.register_job(job)