コード例 #1
0
ファイル: scatter_gather.py プロジェクト: tburnett/fermipy
    def build_job_dict(self):
        """Build a dictionary of `JobDetails` objects for the internal `Link`"""
        if self.args['dry_run']:
            status = JobStatus.unknown
        else:
            status = JobStatus.not_ready

        if self.jobs.has_key('__top__'):
            pass
        else:
            job_details = JobDetails(jobname=self.linkname,
                                     jobkey='__top__',
                                     appname=self.appname,
                                     logfile="%s_top.log"%self.linkname,
                                     job_config=self.args,
                                     timestamp=get_timestamp(),
                                     file_dict=copy.deepcopy(self.files),
                                     sub_file_dict=copy.deepcopy(self.sub_files),
                                     status=status)
            self.jobs[job_details.fullkey] = job_details
        
        for jobkey, job_config in sorted(self._job_configs.items()):
            full_job_config = self._merge_config(job_config)
            ScatterGather._make_scatter_logfile_name(jobkey, self.linkname, full_job_config)
            logfile = full_job_config.get('logfile')
            self._scatter_link.register_job(key=jobkey,
                                            job_config=full_job_config,
                                            logfile=logfile,
                                            status=status)
コード例 #2
0
ファイル: link.py プロジェクト: labsaha/fermipy
    def _create_job_details(self, key, job_config, logfile, status):
        """Create a `JobDetails` for a single job

        Parameters
        ----------

        key : str
            Key used to identify this particular job

        job_config : dict
            Dictionary with arguements passed to this particular job

        logfile : str
            Name of the associated log file

        status : int
            Current status of the job

        Returns
        -------
        job_details : `fermipy.jobs.JobDetails`
            Object with the details about a particular job.

        """
        self.update_args(job_config)
        job_details = JobDetails(jobname=self.full_linkname,
                                 jobkey=key,
                                 appname=self.appname,
                                 logfile=logfile,
                                 job_config=job_config,
                                 timestamp=get_timestamp(),
                                 file_dict=copy.deepcopy(self.files),
                                 sub_file_dict=copy.deepcopy(self.sub_files),
                                 status=status)
        return job_details
コード例 #3
0
    def check_links_status(self, fail_running=False, fail_pending=False):
        """"Check the status of all the jobs run from the
        `Link` objects in this `Chain` and return a status
        flag that summarizes that.

        Parameters
        ----------

        fail_running : `bool`
            If True, consider running jobs as failed

        fail_pending : `bool`
            If True, consider pending jobs as failed

        Returns
        -------
        status : `JobStatus`
            Job status flag that summarizes the status of all the jobs,

        """
        status_vector = JobStatusVector()
        for link in self._links.values():
            key = JobDetails.make_fullkey(link.full_linkname)
            link_status = link.check_job_status(key,
                                                fail_running=fail_running,
                                                fail_pending=fail_pending)
            status_vector[link_status] += 1

        return status_vector.get_status()
コード例 #4
0
ファイル: link.py プロジェクト: labsaha/fermipy
    def run_with_log(self,
                     dry_run=False,
                     stage_files=True,
                     resubmit_failed=False):
        """Runs this link with output sent to a pre-defined logfile

        Parameters
        -----------
        dry_run : bool
            Print command but do not run it.

        stage_files : bool
            Copy files to and from scratch staging area.

        resubmit_failed : bool
            Flag for sub-classes to resubmit failed jobs.

        """
        fullkey = JobDetails.make_fullkey(self.full_linkname)
        job_details = self.jobs[fullkey]
        odir = os.path.dirname(job_details.logfile)
        try:
            os.makedirs(odir)
        except OSError:
            pass
        ostream = open(job_details.logfile, 'w')
        self.run(ostream, dry_run, stage_files, resubmit_failed)
コード例 #5
0
ファイル: chain.py プロジェクト: jefemagril/fermipy
    def check_links_status(self,
                           fail_running=False,
                           fail_pending=False):
        """"Check the status of all the jobs run from the
        `Link` objects in this `Chain` and return a status
        flag that summarizes that.

        Parameters
        ----------

        fail_running : `bool`
            If True, consider running jobs as failed

        fail_pending : `bool`
            If True, consider pending jobs as failed

        Returns
        -------
        status : `JobStatus`
            Job status flag that summarizes the status of all the jobs,

        """
        status_vector = JobStatusVector()
        for link in self._links.values():
            key = JobDetails.make_fullkey(link.full_linkname)
            link_status = link.check_job_status(key,
                                                fail_running=fail_running,
                                                fail_pending=fail_pending)
            status_vector[link_status] += 1

        return status_vector.get_status()
コード例 #6
0
ファイル: link.py プロジェクト: jefemagril/fermipy
 def _register_self(self, logfile, key=JobDetails.topkey, status=JobStatus.unknown):
     """Runs this link, captures output to logfile,
     and records the job in self.jobs"""
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         job_details = self.jobs[fullkey]
         job_details.status = status
     else:
         job_details = self._register_job(key, self.args, logfile, status)
コード例 #7
0
ファイル: link.py プロジェクト: jefemagril/fermipy
 def _set_status_self(self, key=JobDetails.topkey, status=JobStatus.unknown):
     """Set the status of this job, both in self.jobs and
     in the `JobArchive` if it is present. """
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         self.jobs[fullkey].status = status
         if self._job_archive:
             self._job_archive.register_job(self.jobs[fullkey])
     else:
         self._register_self('dummy.log', key, status)
コード例 #8
0
ファイル: test_job_archive.py プロジェクト: tburnett/fermipy
def test_job_details():
    job = JobDetails(dbkey=-1,
                     jobname='test',
                     jobkey='dummy',
                     appname='test',
                     logfile='test.log',
                     job_config=str({}),
                     timestamp=0,
                     infile_ids=[3, 4],
                     outfile_ids=[6, 7],
                     rm_ids=[3],
                     status=JobStatus.no_job)
    job_dict = {job.dbkey: job}
    table, table_ids = JobDetails.make_tables(job_dict)
    job_dict2 = JobDetails.make_dict(table, table_ids)
    job2 = job_dict2[job.dbkey]

    assert (job.jobname == job2.jobname)
    assert (job.dbkey == job2.dbkey)
    assert (job.logfile == job2.logfile)
    assert (job.status == job2.status)
コード例 #9
0
def test_job_details():    
    job = JobDetails(dbkey=-1,
                     jobname='test',
                     jobkey='dummy',
                     appname='test',
                     logfile='test.log',
                     job_config=str({}),
                     timestamp=0,
                     infile_ids=[3,4],
                     outfile_ids=[6,7],
                     rm_ids=[3],
                     status=JobStatus.no_job)
    job_dict = {job.dbkey:job}
    table, table_ids = JobDetails.make_tables(job_dict)
    job_dict2 = JobDetails.make_dict(table, table_ids)    
    job2 = job_dict2[job.dbkey]
    
    assert(job.jobname == job2.jobname)
    assert(job.dbkey == job2.dbkey)
    assert(job.logfile == job2.logfile)
    assert(job.status == job2.status)
コード例 #10
0
ファイル: link.py プロジェクト: labsaha/fermipy
 def _set_status_self(self,
                      key=JobDetails.topkey,
                      status=JobStatus.unknown):
     """Set the status of this job, both in self.jobs and
     in the `JobArchive` if it is present. """
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         self.jobs[fullkey].status = status
         if self._job_archive:
             self._job_archive.register_job(self.jobs[fullkey])
     else:
         self._register_self('dummy.log', key, status)
コード例 #11
0
ファイル: link.py プロジェクト: labsaha/fermipy
 def _register_self(self,
                    logfile,
                    key=JobDetails.topkey,
                    status=JobStatus.unknown):
     """Runs this link, captures output to logfile,
     and records the job in self.jobs"""
     fullkey = JobDetails.make_fullkey(self.full_linkname, key)
     if fullkey in self.jobs:
         job_details = self.jobs[fullkey]
         job_details.status = status
     else:
         job_details = self._register_job(key, self.args, logfile, status)
コード例 #12
0
def test_job_details():
    """ Test that we can build a `JobDetails` object """

    job = JobDetails(dbkey=-1,
                     jobname='test',
                     jobkey='dummy',
                     appname='test',
                     logfile='test.log',
                     job_config=str({}),
                     timestamp=0,
                     infile_ids=[3, 4],
                     outfile_ids=[6, 7],
                     rm_ids=[3],
                     status=JobStatus.no_job)
    job_dict = {job.dbkey: job}
    table, table_ids = JobDetails.make_tables(job_dict)
    job_dict2 = JobDetails.make_dict(table)
    job2 = job_dict2[job.dbkey]

    assert_str_eq(job.jobname, job2.jobname)
    assert_str_eq(job.dbkey, job2.dbkey)
    assert_str_eq(job.logfile, job2.logfile)
    assert_str_eq(job.status, job2.status)
コード例 #13
0
ファイル: chain.py プロジェクト: jefemagril/fermipy
 def print_status(self, indent="", recurse=False):
     """Print a summary of the job status for each `Link` in this `Chain`"""
     print ("%s%30s : %15s : %20s" %
            (indent, "Linkname", "Link Status", "Jobs Status"))
     for link in self._links.values():
         if hasattr(link, 'check_status'):
             status_vect = link.check_status(
                 stream=sys.stdout, no_wait=True, do_print=False)
         else:
             status_vect = None
         key = JobDetails.make_fullkey(link.full_linkname)
         link_status = JOB_STATUS_STRINGS[link.check_job_status(key)]
         if status_vect is None:
             jobs_status = JOB_STATUS_STRINGS[link.check_jobs_status()]
         else:
             jobs_status = status_vect
         print ("%s%30s : %15s : %20s" %
                (indent, link.linkname, link_status, jobs_status))
         if hasattr(link, 'print_status') and recurse:
             print ("----------   %30s    -----------" % link.linkname)
             link.print_status(indent + "  ", recurse=True)
             print ("------------------------------------------------")
コード例 #14
0
ファイル: pipeline.py プロジェクト: fermiPy/dmpipe
    def preconfigure(self, config_yaml):
        """ Run any links needed to build files
        that are used in _map_arguments """
        if self._preconfigured:
            return
        config_dict = load_yaml(config_yaml)
        ttype = config_dict.get('ttype')
        self.link_prefix = "%s." % ttype
        config_template = config_dict.get('config_template', None)
        rosters = config_dict.get('rosters')
        alias_dict = config_dict.get('alias_dict', None)
        spatial_models = config_dict.get('spatial_models')
        sims = config_dict.get('sims', {})
        sim_names = []
        sim_names += list(sims.keys())
        if 'random' in config_dict:
            sim_names += ['random']

        self._set_link('prepare-targets',
                       PrepareTargets,
                       ttype=ttype,
                       rosters=rosters,
                       spatial_models=spatial_models,
                       alias_dict=alias_dict,
                       sims=sim_names,
                       config=config_template)
        link = self['prepare-targets']

        key = JobDetails.make_fullkey(link.full_linkname)
        if not link.jobs:
            raise ValueError("No Jobs")
        link_status = link.check_job_status(key)
        if link_status == JobStatus.done:
            self._preconfigured = True
            return
        if link_status == JobStatus.failed:
            link.clean_jobs()
        link.run_with_log()
        self._preconfigured = True
コード例 #15
0
 def print_status(self, indent="", recurse=False):
     """Print a summary of the job status for each `Link` in this `Chain`"""
     print("%s%30s : %15s : %20s" %
           (indent, "Linkname", "Link Status", "Jobs Status"))
     for link in self._links.values():
         if hasattr(link, 'check_status'):
             status_vect = link.check_status(stream=sys.stdout,
                                             no_wait=True,
                                             do_print=False)
         else:
             status_vect = None
         key = JobDetails.make_fullkey(link.full_linkname)
         link_status = JOB_STATUS_STRINGS[link.check_job_status(key)]
         if status_vect is None:
             jobs_status = JOB_STATUS_STRINGS[link.check_jobs_status()]
         else:
             jobs_status = status_vect
         print("%s%30s : %15s : %20s" %
               (indent, link.linkname, link_status, jobs_status))
         if hasattr(link, 'print_status') and recurse:
             print("----------   %30s    -----------" % link.linkname)
             link.print_status(indent + "  ", recurse=True)
             print("------------------------------------------------")
コード例 #16
0
ファイル: link.py プロジェクト: jefemagril/fermipy
    def run_with_log(self, dry_run=False, stage_files=True, resubmit_failed=False):
        """Runs this link with output sent to a pre-defined logfile

        Parameters
        -----------
        dry_run : bool
            Print command but do not run it.

        stage_files : bool
            Copy files to and from scratch staging area.

        resubmit_failed : bool
            Flag for sub-classes to resubmit failed jobs.

        """
        fullkey = JobDetails.make_fullkey(self.full_linkname)
        job_details = self.jobs[fullkey]
        odir = os.path.dirname(job_details.logfile)
        try:
            os.makedirs(odir)
        except OSError:
            pass
        ostream = open(job_details.logfile, 'w')
        self.run(ostream, dry_run, stage_files, resubmit_failed)
コード例 #17
0
ファイル: chain.py プロジェクト: jefemagril/fermipy
    def _run_chain(self,
                   stream=sys.stdout,
                   dry_run=False,
                   stage_files=True,
                   force_run=False,
                   resubmit_failed=False):
        """Run all the links in the chain

        Parameters
        -----------
        stream : `file`
            Stream to print to,
            Must have 'write' function

        dry_run : bool
            Print commands but do not run them

        stage_files : bool
            Stage files to and from the scratch area

        force_run : bool
            Run jobs, even if they are marked as done

        resubmit_failed : bool
            Resubmit failed jobs

        """
        self._set_links_job_archive()
        failed = False

        if self._file_stage is not None:
            input_file_mapping, output_file_mapping = self._map_scratch_files(
                self.sub_files)
            if stage_files:
                self._file_stage.make_scratch_dirs(input_file_mapping, dry_run)
                self._file_stage.make_scratch_dirs(
                    output_file_mapping, dry_run)
                self._stage_input_files(input_file_mapping, dry_run)

        for link in self._links.values():
            logfile = os.path.join('logs', "%s.log" % link.full_linkname)
            link._archive_self(logfile, status=JobStatus.unknown)
            key = JobDetails.make_fullkey(link.full_linkname)
            if hasattr(link, 'check_status'):
                link.check_status(stream, no_wait=True,
                                  check_once=True, do_print=False)
            else:
                pass
            link_status = link.check_job_status(key)
            if link_status in [JobStatus.done]:
                if not force_run:
                    print ("Skipping done link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.running]:
                if not force_run and not resubmit_failed:
                    print ("Skipping running link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.failed,
                                 JobStatus.partial_failed]:
                if not resubmit_failed:
                    print ("Skipping failed link", link.full_linkname)
                    continue
            print ("Running link ", link.full_linkname)
            link.run_with_log(dry_run=dry_run, stage_files=False,
                              resubmit_failed=resubmit_failed)
            link_status = link.check_jobs_status()
            link._set_status_self(status=link_status)
            if link_status in [JobStatus.failed, JobStatus.partial_failed]:
                print ("Stoping chain execution at failed link %s" %
                       link.full_linkname)
                failed = True
                break
#            elif link_status in [JobStatus.partial_failed]:
#                print ("Resubmitting partially failed link %s" %
#                       link.full_linkname)
#                link.run_with_log(dry_run=dry_run, stage_files=False,
#                                  resubmit_failed=resubmit_failed)
#                link_status = link.check_jobs_status()
#                link._set_status_self(status=link_status)
#                if link_status in [JobStatus.partial_failed]:
#                    print ("Stoping chain execution: resubmission failed %s" %
#                           link.full_linkname)
#                    failed = True
#                    break

        if self._file_stage is not None and stage_files and not failed:
            self._stage_output_files(output_file_mapping, dry_run)

        chain_status = self.check_links_status()
        print ("Chain status: %s" % (JOB_STATUS_STRINGS[chain_status]))
        if chain_status == 5:
            job_status = 0
        else:
            job_status = -1
        self._write_status_to_log(job_status, stream)
        self._set_status_self(status=chain_status)

        if self._job_archive:
            self._job_archive.file_archive.update_file_status()
            self._job_archive.write_table_file()
コード例 #18
0
    def _run_chain(self,
                   stream=sys.stdout,
                   dry_run=False,
                   stage_files=True,
                   force_run=False,
                   resubmit_failed=False):
        """Run all the links in the chain

        Parameters
        -----------
        stream : `file`
            Stream to print to,
            Must have 'write' function

        dry_run : bool
            Print commands but do not run them

        stage_files : bool
            Stage files to and from the scratch area

        force_run : bool
            Run jobs, even if they are marked as done

        resubmit_failed : bool
            Resubmit failed jobs

        """
        self._set_links_job_archive()
        failed = False

        if self._file_stage is not None:
            input_file_mapping, output_file_mapping = self._map_scratch_files(
                self.sub_files)
            if stage_files:
                self._file_stage.make_scratch_dirs(input_file_mapping, dry_run)
                self._file_stage.make_scratch_dirs(output_file_mapping,
                                                   dry_run)
                self._stage_input_files(input_file_mapping, dry_run)

        for link in self._links.values():
            logfile = os.path.join('logs', "%s.log" % link.full_linkname)
            link._archive_self(logfile, status=JobStatus.unknown)
            key = JobDetails.make_fullkey(link.full_linkname)
            if hasattr(link, 'check_status'):
                link.check_status(stream,
                                  no_wait=True,
                                  check_once=True,
                                  do_print=False)
            else:
                pass
            link_status = link.check_job_status(key)
            if link_status in [JobStatus.done]:
                if not force_run:
                    print("Skipping done link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.running]:
                if not force_run and not resubmit_failed:
                    print("Skipping running link", link.full_linkname)
                    continue
            elif link_status in [JobStatus.failed, JobStatus.partial_failed]:
                if not resubmit_failed:
                    print("Skipping failed link", link.full_linkname)
                    continue
            print("Running link ", link.full_linkname)
            link.run_with_log(dry_run=dry_run,
                              stage_files=False,
                              resubmit_failed=resubmit_failed)
            link_status = link.check_jobs_status()
            link._set_status_self(status=link_status)
            if link_status in [JobStatus.failed, JobStatus.partial_failed]:
                print("Stoping chain execution at failed link %s" %
                      link.full_linkname)
                failed = True
                break
#            elif link_status in [JobStatus.partial_failed]:
#                print ("Resubmitting partially failed link %s" %
#                       link.full_linkname)
#                link.run_with_log(dry_run=dry_run, stage_files=False,
#                                  resubmit_failed=resubmit_failed)
#                link_status = link.check_jobs_status()
#                link._set_status_self(status=link_status)
#                if link_status in [JobStatus.partial_failed]:
#                    print ("Stoping chain execution: resubmission failed %s" %
#                           link.full_linkname)
#                    failed = True
#                    break

        if self._file_stage is not None and stage_files and not failed:
            self._stage_output_files(output_file_mapping, dry_run)

        chain_status = self.check_links_status()
        print("Chain status: %s" % (JOB_STATUS_STRINGS[chain_status]))
        if chain_status == 5:
            job_status = 0
        else:
            job_status = -1
        self._write_status_to_log(job_status, stream)
        self._set_status_self(status=chain_status)

        if self._job_archive:
            self._job_archive.file_archive.update_file_status()
            self._job_archive.write_table_file()