def _init_configs(self, section, default_fs_option, default_section, init_kwargs): cfg = Config.instance() # get the proper section if not section: section = cfg.get_expanded("target", default_fs_option) # try to read it and fill configs to pass to the file system and the remote file interface fs_config = {} fi_config = {} if isinstance(section, six.string_types): # when set, the section must exist if not cfg.has_section(section): raise Exception( "law config has no section '{}' to read {} options".format( section, self.__class__.__name__)) # extend options of sections other than the default one with its values self._update_section_defaults(default_section, section) # read the configs from the section for both the file system and remote interface fs_config = self.parse_config(section) fi_config = self.file_interface_cls.parse_config(section) # update both configs with init kwargs fs_config = merge_dicts(fs_config, init_kwargs, deep=True) fi_config = merge_dicts(fi_config, init_kwargs, deep=True) return section, fs_config, fi_config
def arc_create_job_file_factory(self, **kwargs): # job file fectory config priority: config file < class defaults < kwargs get_prefixed_config = self.workflow_proxy.get_prefixed_config cfg = { "dir": get_prefixed_config("job", "job_file_dir"), "mkdtemp": get_prefixed_config("job", "job_file_dir_mkdtemp", type=bool), "cleanup": get_prefixed_config("job", "job_file_dir_cleanup", type=bool), } kwargs = merge_dicts(cfg, self.arc_job_file_factory_defaults, kwargs) return ARCJobFileFactory(**kwargs)
def submit_jobs(self, job_files, **kwargs): task = self.task # prepare objects for dumping intermediate submission data dump_freq = self._get_task_attribute( "dump_intermediate_submission_data")() if dump_freq and not is_number(dump_freq): dump_freq = 50 # progress callback to inform the scheduler def progress_callback(i, job_id): job_num = i + 1 # some job managers respond with a list of job ids per submission (e.g. htcondor, slurm) # batched submission is not yet supported, so get the first id if isinstance(job_id, list): job_id = job_id[0] # set the job id early self.submission_data.jobs[job_num]["job_id"] = job_id # log a message every 25 jobs if job_num in (1, len(job_files)) or job_num % 25 == 0: task.publish_message("submitted {}/{} job(s)".format( job_num, len(job_files))) # dump intermediate submission data with a certain frequency if dump_freq and job_num % dump_freq == 0: self.dump_submission_data() # get job kwargs for submission and merge with passed kwargs submit_kwargs = self._get_job_kwargs("submit") submit_kwargs = merge_dicts(submit_kwargs, kwargs) return self.job_manager.submit_batch(job_files, retries=3, threads=task.threads, callback=progress_callback, **submit_kwargs)
def _submit(self, job_files, **kwargs): task = self.task # job_files is an ordered mapping job_num -> {"job": PATH, "log": PATH/None}, get keys and # values for faster lookup by numeric index job_nums = list(job_files.keys()) job_files = [f["job"] for f in six.itervalues(job_files)] # prepare objects for dumping intermediate submission data dump_freq = self._get_task_attribute("dump_intermediate_submission_data", True)() if dump_freq and not is_number(dump_freq): dump_freq = 50 # get job kwargs for submission and merge with passed kwargs submit_kwargs = self._get_job_kwargs("submit") submit_kwargs = merge_dicts(submit_kwargs, kwargs) # progress callback to inform the scheduler def progress_callback(i, job_id): job_num = job_nums[i] # some job managers respond with a list of job ids per submission (e.g. htcondor, slurm) # so get the first id as long as batched submission is not yet supported if isinstance(job_id, list) and not self.job_manager.chunk_size_submit: job_id = job_id[0] # set the job id early self.submission_data.jobs[job_num]["job_id"] = job_id # log a message every 25 jobs if i in (0, len(job_files) - 1) or (i + 1) % 25 == 0: task.publish_message("submitted {}/{} job(s)".format(i + 1, len(job_files))) # dump intermediate submission data with a certain frequency if dump_freq and (i + 1) % dump_freq == 0: self.dump_submission_data() return self.job_manager.submit_batch(job_files, retries=3, threads=task.threads, callback=progress_callback, **submit_kwargs)
def arc_create_job_manager(self, **kwargs): kwargs = merge_dicts(self.arc_job_manager_defaults, kwargs) return ARCJobManager(**kwargs)
def lsf_create_job_file_factory(self, **kwargs): # job file fectory config priority: kwargs > class defaults kwargs = merge_dicts({}, self.lsf_job_file_factory_defaults, kwargs) return LSFJobFileFactory(**kwargs)
def lsf_create_job_manager(self, **kwargs): kwargs = merge_dicts(self.lsf_job_manager_defaults, kwargs) return LSFJobManager(**kwargs)
def htcondor_create_job_file_factory(self, **kwargs): # job file fectory config priority: kwargs > class defaults kwargs = merge_dicts({}, self.htcondor_job_file_factory_defaults, kwargs) return HTCondorJobFileFactory(**kwargs)
def htcondor_create_job_manager(self, **kwargs): kwargs = merge_dicts(self.htcondor_job_manager_defaults, kwargs) return HTCondorJobManager(**kwargs)
def glite_create_job_manager(self, **kwargs): kwargs = merge_dicts(self.glite_job_manager_defaults, kwargs) return GLiteJobManager(**kwargs)
def glite_create_job_file_factory(self, **kwargs): # job file fectory config priority: kwargs > class defaults kwargs = merge_dicts({}, self.glite_job_file_factory_defaults, kwargs) return self.glite_job_file_factory_cls()(**kwargs)
def glite_create_job_manager(self, **kwargs): kwargs = merge_dicts(self.glite_job_manager_defaults, kwargs) return self.glite_job_manager_cls()(**kwargs)