예제 #1
0
파일: chain.py 프로젝트: jefemagril/fermipy
    def check_links_status(self,
                           fail_running=False,
                           fail_pending=False):
        """"Check the status of all the jobs run from the
        `Link` objects in this `Chain` and return a status
        flag that summarizes that.

        Parameters
        ----------

        fail_running : `bool`
            If True, consider running jobs as failed

        fail_pending : `bool`
            If True, consider pending jobs as failed

        Returns
        -------
        status : `JobStatus`
            Job status flag that summarizes the status of all the jobs,

        """
        status_vector = JobStatusVector()
        for link in self._links.values():
            key = JobDetails.make_fullkey(link.full_linkname)
            link_status = link.check_job_status(key,
                                                fail_running=fail_running,
                                                fail_pending=fail_pending)
            status_vector[link_status] += 1

        return status_vector.get_status()
예제 #2
0
파일: link.py 프로젝트: labsaha/fermipy
    def run_with_log(self,
                     dry_run=False,
                     stage_files=True,
                     resubmit_failed=False):
        """Runs this link with output sent to a pre-defined logfile

        Parameters
        -----------
        dry_run : bool
            Print command but do not run it.

        stage_files : bool
            Copy files to and from scratch staging area.

        resubmit_failed : bool
            Flag for sub-classes to resubmit failed jobs.

        """
        fullkey = JobDetails.make_fullkey(self.full_linkname)
        job_details = self.jobs[fullkey]
        odir = os.path.dirname(job_details.logfile)
        try:
            os.makedirs(odir)
        except OSError:
            pass
        ostream = open(job_details.logfile, 'w')
        self.run(ostream, dry_run, stage_files, resubmit_failed)
예제 #3
0
    def check_links_status(self, fail_running=False, fail_pending=False):
        """"Check the status of all the jobs run from the
        `Link` objects in this `Chain` and return a status
        flag that summarizes that.

        Parameters
        ----------

        fail_running : `bool`
            If True, consider running jobs as failed

        fail_pending : `bool`
            If True, consider pending jobs as failed

        Returns
        -------
        status : `JobStatus`
            Job status flag that summarizes the status of all the jobs,

        """
        status_vector = JobStatusVector()
        for link in self._links.values():
            key = JobDetails.make_fullkey(link.full_linkname)
            link_status = link.check_job_status(key,
                                                fail_running=fail_running,
                                                fail_pending=fail_pending)
            status_vector[link_status] += 1

        return status_vector.get_status()
예제 #4
0
파일: link.py 프로젝트: jefemagril/fermipy
 def _register_self(self, logfile, key=JobDetails.topkey, status=JobStatus.unknown):
     """Runs this link, captures output to logfile,
     and records the job in self.jobs"""
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         job_details = self.jobs[fullkey]
         job_details.status = status
     else:
         job_details = self._register_job(key, self.args, logfile, status)
예제 #5
0
파일: link.py 프로젝트: jefemagril/fermipy
 def _set_status_self(self, key=JobDetails.topkey, status=JobStatus.unknown):
     """Set the status of this job, both in self.jobs and
     in the `JobArchive` if it is present. """
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         self.jobs[fullkey].status = status
         if self._job_archive:
             self._job_archive.register_job(self.jobs[fullkey])
     else:
         self._register_self('dummy.log', key, status)
예제 #6
0
파일: link.py 프로젝트: labsaha/fermipy
 def _set_status_self(self,
                      key=JobDetails.topkey,
                      status=JobStatus.unknown):
     """Set the status of this job, both in self.jobs and
     in the `JobArchive` if it is present. """
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         self.jobs[fullkey].status = status
         if self._job_archive:
             self._job_archive.register_job(self.jobs[fullkey])
     else:
         self._register_self('dummy.log', key, status)
예제 #7
0
파일: link.py 프로젝트: labsaha/fermipy
 def _register_self(self,
                    logfile,
                    key=JobDetails.topkey,
                    status=JobStatus.unknown):
     """Runs this link, captures output to logfile,
     and records the job in self.jobs"""
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         job_details = self.jobs[fullkey]
         job_details.status = status
     else:
         job_details = self._register_job(key, self.args, logfile, status)
예제 #8
0
파일: chain.py 프로젝트: jefemagril/fermipy
 def print_status(self, indent="", recurse=False):
     """Print a summary of the job status for each `Link` in this `Chain`"""
     print ("%s%30s : %15s : %20s" %
            (indent, "Linkname", "Link Status", "Jobs Status"))
     for link in self._links.values():
         if hasattr(link, 'check_status'):
             status_vect = link.check_status(
                 stream=sys.stdout, no_wait=True, do_print=False)
         else:
             status_vect = None
         key = JobDetails.make_fullkey(link.full_linkname)
         link_status = JOB_STATUS_STRINGS[link.check_job_status(key)]
         if status_vect is None:
             jobs_status = JOB_STATUS_STRINGS[link.check_jobs_status()]
         else:
             jobs_status = status_vect
         print ("%s%30s : %15s : %20s" %
                (indent, link.linkname, link_status, jobs_status))
         if hasattr(link, 'print_status') and recurse:
             print ("----------   %30s    -----------" % link.linkname)
             link.print_status(indent + "  ", recurse=True)
             print ("------------------------------------------------")
예제 #9
0
파일: pipeline.py 프로젝트: fermiPy/dmpipe
    def preconfigure(self, config_yaml):
        """ Run any links needed to build files
        that are used in _map_arguments """
        if self._preconfigured:
            return
        config_dict = load_yaml(config_yaml)
        ttype = config_dict.get('ttype')
        self.link_prefix = "%s." % ttype
        config_template = config_dict.get('config_template', None)
        rosters = config_dict.get('rosters')
        alias_dict = config_dict.get('alias_dict', None)
        spatial_models = config_dict.get('spatial_models')
        sims = config_dict.get('sims', {})
        sim_names = []
        sim_names += list(sims.keys())
        if 'random' in config_dict:
            sim_names += ['random']

        self._set_link('prepare-targets',
                       PrepareTargets,
                       ttype=ttype,
                       rosters=rosters,
                       spatial_models=spatial_models,
                       alias_dict=alias_dict,
                       sims=sim_names,
                       config=config_template)
        link = self['prepare-targets']

        key = JobDetails.make_fullkey(link.full_linkname)
        if not link.jobs:
            raise ValueError("No Jobs")
        link_status = link.check_job_status(key)
        if link_status == JobStatus.done:
            self._preconfigured = True
            return
        if link_status == JobStatus.failed:
            link.clean_jobs()
        link.run_with_log()
        self._preconfigured = True
예제 #10
0
 def print_status(self, indent="", recurse=False):
     """Print a summary of the job status for each `Link` in this `Chain`"""
     print("%s%30s : %15s : %20s" %
           (indent, "Linkname", "Link Status", "Jobs Status"))
     for link in self._links.values():
         if hasattr(link, 'check_status'):
             status_vect = link.check_status(stream=sys.stdout,
                                             no_wait=True,
                                             do_print=False)
         else:
             status_vect = None
         key = JobDetails.make_fullkey(link.full_linkname)
         link_status = JOB_STATUS_STRINGS[link.check_job_status(key)]
         if status_vect is None:
             jobs_status = JOB_STATUS_STRINGS[link.check_jobs_status()]
         else:
             jobs_status = status_vect
         print("%s%30s : %15s : %20s" %
               (indent, link.linkname, link_status, jobs_status))
         if hasattr(link, 'print_status') and recurse:
             print("----------   %30s    -----------" % link.linkname)
             link.print_status(indent + "  ", recurse=True)
             print("------------------------------------------------")
예제 #11
0
파일: link.py 프로젝트: jefemagril/fermipy
    def run_with_log(self, dry_run=False, stage_files=True, resubmit_failed=False):
        """Runs this link with output sent to a pre-defined logfile

        Parameters
        -----------
        dry_run : bool
            Print command but do not run it.

        stage_files : bool
            Copy files to and from scratch staging area.

        resubmit_failed : bool
            Flag for sub-classes to resubmit failed jobs.

        """
        fullkey = JobDetails.make_fullkey(self.full_linkname)
        job_details = self.jobs[fullkey]
        odir = os.path.dirname(job_details.logfile)
        try:
            os.makedirs(odir)
        except OSError:
            pass
        ostream = open(job_details.logfile, 'w')
        self.run(ostream, dry_run, stage_files, resubmit_failed)
예제 #12
0
파일: chain.py 프로젝트: jefemagril/fermipy
    def _run_chain(self,
                   stream=sys.stdout,
                   dry_run=False,
                   stage_files=True,
                   force_run=False,
                   resubmit_failed=False):
        """Run all the links in the chain

        Parameters
        -----------
        stream : `file`
            Stream to print to,
            Must have 'write' function

        dry_run : bool
            Print commands but do not run them

        stage_files : bool
            Stage files to and from the scratch area

        force_run : bool
            Run jobs, even if they are marked as done

        resubmit_failed : bool
            Resubmit failed jobs

        """
        self._set_links_job_archive()
        failed = False

        if self._file_stage is not None:
            input_file_mapping, output_file_mapping = self._map_scratch_files(
                self.sub_files)
            if stage_files:
                self._file_stage.make_scratch_dirs(input_file_mapping, dry_run)
                self._file_stage.make_scratch_dirs(
                    output_file_mapping, dry_run)
                self._stage_input_files(input_file_mapping, dry_run)

        for link in self._links.values():
            logfile = os.path.join('logs', "%s.log" % link.full_linkname)
            link._archive_self(logfile, status=JobStatus.unknown)
            key = JobDetails.make_fullkey(link.full_linkname)
            if hasattr(link, 'check_status'):
                link.check_status(stream, no_wait=True,
                                  check_once=True, do_print=False)
            else:
                pass
            link_status = link.check_job_status(key)
            if link_status in [JobStatus.done]:
                if not force_run:
                    print ("Skipping done link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.running]:
                if not force_run and not resubmit_failed:
                    print ("Skipping running link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.failed,
                                 JobStatus.partial_failed]:
                if not resubmit_failed:
                    print ("Skipping failed link", link.full_linkname)
                    continue
            print ("Running link ", link.full_linkname)
            link.run_with_log(dry_run=dry_run, stage_files=False,
                              resubmit_failed=resubmit_failed)
            link_status = link.check_jobs_status()
            link._set_status_self(status=link_status)
            if link_status in [JobStatus.failed, JobStatus.partial_failed]:
                print ("Stoping chain execution at failed link %s" %
                       link.full_linkname)
                failed = True
                break
#            elif link_status in [JobStatus.partial_failed]:
#                print ("Resubmitting partially failed link %s" %
#                       link.full_linkname)
#                link.run_with_log(dry_run=dry_run, stage_files=False,
#                                  resubmit_failed=resubmit_failed)
#                link_status = link.check_jobs_status()
#                link._set_status_self(status=link_status)
#                if link_status in [JobStatus.partial_failed]:
#                    print ("Stoping chain execution: resubmission failed %s" %
#                           link.full_linkname)
#                    failed = True
#                    break

        if self._file_stage is not None and stage_files and not failed:
            self._stage_output_files(output_file_mapping, dry_run)

        chain_status = self.check_links_status()
        print ("Chain status: %s" % (JOB_STATUS_STRINGS[chain_status]))
        if chain_status == 5:
            job_status = 0
        else:
            job_status = -1
        self._write_status_to_log(job_status, stream)
        self._set_status_self(status=chain_status)

        if self._job_archive:
            self._job_archive.file_archive.update_file_status()
            self._job_archive.write_table_file()
예제 #13
0
    def _run_chain(self,
                   stream=sys.stdout,
                   dry_run=False,
                   stage_files=True,
                   force_run=False,
                   resubmit_failed=False):
        """Run all the links in the chain

        Parameters
        -----------
        stream : `file`
            Stream to print to,
            Must have 'write' function

        dry_run : bool
            Print commands but do not run them

        stage_files : bool
            Stage files to and from the scratch area

        force_run : bool
            Run jobs, even if they are marked as done

        resubmit_failed : bool
            Resubmit failed jobs

        """
        self._set_links_job_archive()
        failed = False

        if self._file_stage is not None:
            input_file_mapping, output_file_mapping = self._map_scratch_files(
                self.sub_files)
            if stage_files:
                self._file_stage.make_scratch_dirs(input_file_mapping, dry_run)
                self._file_stage.make_scratch_dirs(output_file_mapping,
                                                   dry_run)
                self._stage_input_files(input_file_mapping, dry_run)

        for link in self._links.values():
            logfile = os.path.join('logs', "%s.log" % link.full_linkname)
            link._archive_self(logfile, status=JobStatus.unknown)
            key = JobDetails.make_fullkey(link.full_linkname)
            if hasattr(link, 'check_status'):
                link.check_status(stream,
                                  no_wait=True,
                                  check_once=True,
                                  do_print=False)
            else:
                pass
            link_status = link.check_job_status(key)
            if link_status in [JobStatus.done]:
                if not force_run:
                    print("Skipping done link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.running]:
                if not force_run and not resubmit_failed:
                    print("Skipping running link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.failed, JobStatus.partial_failed]:
                if not resubmit_failed:
                    print("Skipping failed link", link.full_linkname)
                    continue
            print("Running link ", link.full_linkname)
            link.run_with_log(dry_run=dry_run,
                              stage_files=False,
                              resubmit_failed=resubmit_failed)
            link_status = link.check_jobs_status()
            link._set_status_self(status=link_status)
            if link_status in [JobStatus.failed, JobStatus.partial_failed]:
                print("Stoping chain execution at failed link %s" %
                      link.full_linkname)
                failed = True
                break
#            elif link_status in [JobStatus.partial_failed]:
#                print ("Resubmitting partially failed link %s" %
#                       link.full_linkname)
#                link.run_with_log(dry_run=dry_run, stage_files=False,
#                                  resubmit_failed=resubmit_failed)
#                link_status = link.check_jobs_status()
#                link._set_status_self(status=link_status)
#                if link_status in [JobStatus.partial_failed]:
#                    print ("Stoping chain execution: resubmission failed %s" %
#                           link.full_linkname)
#                    failed = True
#                    break

        if self._file_stage is not None and stage_files and not failed:
            self._stage_output_files(output_file_mapping, dry_run)

        chain_status = self.check_links_status()
        print("Chain status: %s" % (JOB_STATUS_STRINGS[chain_status]))
        if chain_status == 5:
            job_status = 0
        else:
            job_status = -1
        self._write_status_to_log(job_status, stream)
        self._set_status_self(status=chain_status)

        if self._job_archive:
            self._job_archive.file_archive.update_file_status()
            self._job_archive.write_table_file()