コード例 #1
0
ファイル: chain.py プロジェクト: jefemagril/fermipy
    def check_links_status(self,
                           fail_running=False,
                           fail_pending=False):
        """"Check the status of all the jobs run from the
        `Link` objects in this `Chain` and return a status
        flag that summarizes that.

        Parameters
        ----------

        fail_running : `bool`
            If True, consider running jobs as failed

        fail_pending : `bool`
            If True, consider pending jobs as failed

        Returns
        -------
        status : `JobStatus`
            Job status flag that summarizes the status of all the jobs,

        """
        status_vector = JobStatusVector()
        for link in self._links.values():
            key = JobDetails.make_fullkey(link.full_linkname)
            link_status = link.check_job_status(key,
                                                fail_running=fail_running,
                                                fail_pending=fail_pending)
            status_vector[link_status] += 1

        return status_vector.get_status()
コード例 #2
0
    def check_links_status(self, fail_running=False, fail_pending=False):
        """"Check the status of all the jobs run from the
        `Link` objects in this `Chain` and return a status
        flag that summarizes that.

        Parameters
        ----------

        fail_running : `bool`
            If True, consider running jobs as failed

        fail_pending : `bool`
            If True, consider pending jobs as failed

        Returns
        -------
        status : `JobStatus`
            Job status flag that summarizes the status of all the jobs,

        """
        status_vector = JobStatusVector()
        for link in self._links.values():
            key = JobDetails.make_fullkey(link.full_linkname)
            link_status = link.check_job_status(key,
                                                fail_running=fail_running,
                                                fail_pending=fail_pending)
            status_vector[link_status] += 1

        return status_vector.get_status()
コード例 #3
0
ファイル: scatter_gather.py プロジェクト: tuoyl/fermipy
    def _check_link_completion(self,
                               link,
                               fail_pending=False,
                               fail_running=False):
        """Internal function to check the completion of all the dispatched jobs

        Returns
        -------

        status_vect : `JobStatusVector`
            Vector that summarize the number of jobs in various states.
        """

        status_vect = JobStatusVector()
        for job_key, job_details in link.jobs.items():
            # if job_details.status == JobStatus.failed:
            #    failed = True
            #    continue
            # elif job_details.status == JobStatus.done:
            #    continue
            if job_key.find(JobDetails.topkey) >= 0:
                continue
            job_details.status = self._interface.check_job(job_details)
            if job_details.status == JobStatus.pending:
                if fail_pending:
                    job_details.status = JobStatus.failed
            elif job_details.status == JobStatus.running:
                if fail_running:
                    job_details.status = JobStatus.failed
            status_vect[job_details.status] += 1
            link.jobs[job_key] = job_details
            link._set_status_self(job_details.jobkey, job_details.status)

        return status_vect
コード例 #4
0
ファイル: scatter_gather.py プロジェクト: tuoyl/fermipy
    def _invoke(self, argv, stream=sys.stdout, resubmit_failed=False):
        """Invoke this object to preform a particular action

        Parameters
        ----------

        argv : list
            List of command line arguments, passed to helper classes

        stream : `file`
            Stream that this function will print to,
            must have 'write' function.

        resubmit_failed : bool
            Resubmit failed jobs.

        Returns
        -------
        status_vect : `JobStatusVector`
            Vector that summarize the number of jobs in various states.

        """
        args = self._run_argparser(argv)

        if args.action not in ACTIONS:
            sys.stderr.write("Unrecognized action %s, options are %s\n" %
                             (args.action, ACTIONS))

        if args.action == 'skip':
            return JobStatus.no_job
        elif args.action in ['run', 'resubmit', 'check_status', 'config']:
            self._job_configs = self.build_job_configs(args.__dict__)

        self._interface._dry_run = args.dry_run

        if args.action == 'run':
            status_vect = self.run_jobs(stream,
                                        resubmit_failed=resubmit_failed)
        elif args.action == 'resubmit':
            status_vect = self.resubmit(stream,
                                        resubmit_failed=resubmit_failed)
        elif args.action == 'check_status':
            self._build_job_dict()
            status_vect = self.check_status(stream)
        elif args.action == 'config':
            self._build_job_dict()
            status_vect = JobStatusVector()
            status_vect[JobStatus.done] += 1

        return status_vect
コード例 #5
0
ファイル: scatter_gather.py プロジェクト: tuoyl/fermipy
    def print_update(self, stream=sys.stdout, job_stats=None):
        """Print an update about the current number of jobs running """
        if job_stats is None:
            job_stats = JobStatusVector()
            job_det_list = []
            job_det_list += self._scatter_link.jobs.values()

            for job_dets in job_det_list:
                if job_dets.status == JobStatus.no_job:
                    continue
                job_stats[job_dets.status] += 1

        stream.write("Status :\n  Total  : %i\n  Unknown: %i\n" %
                     (job_stats.n_total, job_stats[JobStatus.unknown]))
        stream.write(
            "  Not Ready: %i\n  Ready: %i\n" %
            (job_stats[JobStatus.not_ready], job_stats[JobStatus.ready]))
        stream.write(
            "  Pending: %i\n  Running: %i\n" %
            (job_stats[JobStatus.pending], job_stats[JobStatus.running]))
        stream.write("  Done: %i\n  Failed: %i\n" %
                     (job_stats[JobStatus.done], job_stats[JobStatus.failed]))
コード例 #6
0
ファイル: scatter_gather.py プロジェクト: tuoyl/fermipy
    def check_status(self,
                     stream=sys.stdout,
                     check_once=False,
                     fail_pending=False,
                     fail_running=False,
                     no_wait=False,
                     do_print=True,
                     write_status=False):
        """Loop to check on the status of all the jobs in job dict.

        Parameters
        -----------
        stream : `file`
            Stream that this function will print to,
            Must have 'write' function.

        check_once : bool
            Check status once and exit loop.

        fail_pending : `bool`
            If True, consider pending jobs as failed

        fail_running : `bool`
            If True, consider running jobs as failed

        no_wait : bool
            Do not sleep before checking jobs.

        do_print : bool
            Print summary stats.

        write_status : bool
            Write the status the to log file.

        Returns
        -------
        status_vect : `JobStatusVector`
            Vector that summarize the number of jobs in various states.
        """
        running = True
        first = True

        if not check_once:
            if stream != sys.stdout:
                sys.stdout.write('Checking status (%is): ' %
                                 self.args['job_check_sleep'])
                sys.stdout.flush()

        status_vect = JobStatusVector()
        while running:
            if first:
                first = False
            elif self.args['dry_run']:
                break
            elif no_wait:
                pass
            else:
                stream.write("Sleeping %.0f seconds between status checks\n" %
                             self.args['job_check_sleep'])
                if stream != sys.stdout:
                    sys.stdout.write('.')
                    sys.stdout.flush()
                time.sleep(self.args['job_check_sleep'])

            status_vect = self._check_link_completion(self._scatter_link,
                                                      fail_pending,
                                                      fail_running)
            if self.args['check_status_once'] or check_once or no_wait:
                if do_print:
                    self.print_update(stream, status_vect)
                break

            if self.args['print_update']:
                if do_print:
                    self.print_update(stream, status_vect)

            if self._job_archive is not None:
                self._job_archive.write_table_file()

            n_total = status_vect.n_total
            n_done = status_vect.n_done
            n_failed = status_vect.n_failed
            if n_done + n_failed == n_total:
                running = False

        status = status_vect.get_status()
        if status in [JobStatus.failed, JobStatus.partial_failed]:
            if do_print:
                self.print_update(stream, status_vect)
                self.print_failed(stream)
            if write_status:
                self._write_status_to_log(status, stream)
        else:
            if write_status:
                self._write_status_to_log(0, stream)

        self._set_status_self(status=status)
        if not check_once:
            if stream != sys.stdout:
                sys.stdout.write("! %s\n" % (JOB_STATUS_STRINGS[status]))

        if self._job_archive is not None:
            self._job_archive.write_table_file()

        return status_vect
コード例 #7
0
ファイル: scatter_gather.py プロジェクト: jefemagril/fermipy
    def check_status(self, stream=sys.stdout,
                     check_once=False,
                     fail_pending=False, fail_running=False,
                     no_wait=False, do_print=True,
                     write_status=False):
        """Loop to check on the status of all the jobs in job dict.

        Parameters
        -----------
        stream : `file`
            Stream that this function will print to,
            Must have 'write' function.

        check_once : bool
            Check status once and exit loop.

        fail_pending : `bool`
            If True, consider pending jobs as failed

        fail_running : `bool`
            If True, consider running jobs as failed

        no_wait : bool
            Do not sleep before checking jobs.

        do_print : bool
            Print summary stats.

        write_status : bool
            Write the status the to log file.

        Returns
        -------
        status_vect : `JobStatusVector`
            Vector that summarize the number of jobs in various states.
        """
        running = True
        first = True

        if not check_once:
            if stream != sys.stdout:
                sys.stdout.write('Checking status (%is): ' %
                                 self.args['job_check_sleep'])
                sys.stdout.flush()

        status_vect = JobStatusVector()
        while running:
            if first:
                first = False
            elif self.args['dry_run']:
                break
            elif no_wait:
                pass
            else:
                stream.write("Sleeping %.0f seconds between status checks\n" %
                             self.args['job_check_sleep'])
                if stream != sys.stdout:
                    sys.stdout.write('.')
                    sys.stdout.flush()
                time.sleep(self.args['job_check_sleep'])

            status_vect = self._check_link_completion(self._scatter_link,
                                                      fail_pending, fail_running)
            if self.args['check_status_once'] or check_once or no_wait:
                if do_print:
                    self.print_update(stream, status_vect)
                break

            if self.args['print_update']:
                if do_print:
                    self.print_update(stream, status_vect)

            if self._job_archive is not None:
                self._job_archive.write_table_file()

            n_total = status_vect.n_total
            n_done = status_vect.n_done
            n_failed = status_vect.n_failed
            if n_done + n_failed == n_total:
                running = False

        status = status_vect.get_status()
        if status in [JobStatus.failed, JobStatus.partial_failed]:
            if do_print:
                self.print_update(stream, status_vect)
                self.print_failed(stream)
            if write_status:
                self._write_status_to_log(status, stream)
        else:
            if write_status:
                self._write_status_to_log(0, stream)

        self._set_status_self(status=status)            
        if not check_once:
            if stream != sys.stdout:
                sys.stdout.write("! %s\n" % (JOB_STATUS_STRINGS[status]))

        if self._job_archive is not None:
            self._job_archive.write_table_file()

        return status_vect