def filter_is_done(self, tasks): """ Yield a dictionary of Slurm job metadata for each task that has completed. """ # jobid can be none if submission fialed job_ids = [t.drm_jobID for t in tasks if t.drm_jobID is not None] if job_ids: job_infos = retry_call(do_sacct, fargs=[job_ids], delay=10, tries=10, backoff=2, max_delay=60, logger=tasks[0].workflow.log) for task in tasks: if task.drm_jobID in job_infos: job_info = job_infos[task.drm_jobID] if job_info['State'] in FAILED_STATES + COMPLETED_STATES: job_info = parse_sacct(job_infos[task.drm_jobID], tasks[0].workflow.log) yield task, job_info else: assert job_info[ 'State'] in PENDING_STATES, 'Invalid job state: `%s` for %s drm_job_id=%s' % ( job_info['State'], task, task.drm_jobID)
def submit_job(self, task): for p in [task.output_stdout_path, task.output_stderr_path]: if os.path.exists(p): os.unlink(p) task.drm_jobID = retry_call(sbatch, fargs=[task], delay=10, tries=10, backoff=2, max_delay=60, logger=task.log) task.status = TaskStatus.submitted
def submit_job(self, task): if task.environment_variables is not None: raise NotImplementedError for p in [task.output_stdout_path, task.output_stderr_path]: if os.path.exists(p): os.unlink(p) task.drm_jobID = retry_call( sbatch, fargs=[task], delay=10, tries=10, backoff=2, max_delay=60, logger=task.log, ) task.status = TaskStatus.submitted
def drm_statuses(self, tasks, log_errors=True): """ :param tasks: tasks that have been submitted to the job manager :returns: (dict) task.drm_jobID -> drm_status """ job_ids = [t.drm_jobID for t in tasks if t.drm_jobID is not None] if job_ids: job_infos = retry_call(do_sacct, fargs=[job_ids], delay=10, tries=10, backoff=2, max_delay=60, logger=tasks[0].workflow.log) def f(task): return job_infos.get(task.drm_jobID, dict()).get('State', 'UNK_JOB_STATE') return {task.drm_jobID: f(task) for task in tasks} else: return {}
def drm_statuses(self, tasks): job_ids = [task.drm_jobID for task in tasks] task_infos = retry_call( kstatus, fargs=[job_ids], delay=10, tries=10, backoff=2, max_delay=60, logger=tasks[0].workflow.log, ) if len(job_ids) > 1: task_infos = task_infos["items"] else: task_infos = [task_infos] task_infos = {task_info["metadata"]["labels"]["job-name"]: task_info for task_info in task_infos} return task_infos
def filter_is_done(self, tasks): """ Yield a dictionary of Slurm job metadata for each task that has completed. """ # jobid can be none if submission fialed job_ids = [t.drm_jobID for t in tasks if t.drm_jobID is not None] if job_ids: job_infos = retry_call(do_sacct, fargs=[job_ids], delay=10, tries=10, backoff=2, max_delay=60, logger=tasks[0].workflow.log) for task in tasks: if task.drm_jobID in job_infos: job_info = job_infos[task.drm_jobID] if job_info['State'] in FAILED_STATES + COMPLETED_STATES: job_info = parse_sacct(job_infos[task.drm_jobID], tasks[0].workflow.log) yield task, job_info else: assert job_info['State'] in PENDING_STATES, 'Invalid job state: `%s` for %s drm_job_id=%s' % (job_info['State'], task, task.drm_jobID)
def drm_statuses(self, tasks): """ :param tasks: tasks that have been submitted to the job manager :returns: (dict) task.drm_jobID -> drm_status """ job_ids = [t.drm_jobID for t in tasks if t.drm_jobID is not None] if job_ids: job_infos = retry_call(do_sacct, fargs=[job_ids], delay=10, tries=10, backoff=2, max_delay=60, logger=tasks[0].workflow.log) def f(task): return job_infos.get(task.drm_jobID, dict()).get('State', 'UNK_JOB_STATE') return {task.drm_jobID: f(task) for task in tasks} else: return {}