def _wait_for_jobs(self): while True: with self.lock: if not self.wait: return active_jobs = self.active_jobs self.active_jobs = list() for active_job in active_jobs: exitcode = active_job.process.poll() if exitcode is None: # job not yet finished self.active_jobs.append(active_job) elif exitcode == 0: # job finished successfully os.remove(active_job.jobscript) self.finish_job(active_job.job) active_job.callback(active_job.job) else: # job failed os.remove(active_job.jobscript) self.print_job_error(active_job.job) print_exception( ClusterJobException(active_job.job, self.dag.jobid(active_job.job), active_job.jobscript), self.workflow.linemaps) active_job.error_callback(active_job.job) time.sleep(1)
def _wait_for_jobs(self): while True: with self.lock: if not self.wait: return active_jobs = self.active_jobs self.active_jobs = list() for active_job in active_jobs: if os.path.exists(active_job.jobfinished): os.remove(active_job.jobfinished) os.remove(active_job.jobscript) self.finish_job(active_job.job) active_job.callback(active_job.job) elif os.path.exists(active_job.jobfailed): os.remove(active_job.jobfailed) os.remove(active_job.jobscript) self.print_job_error(active_job.job) print_exception( ClusterJobException(active_job.job, self.dag.jobid(active_job.job), active_job.jobscript), self.workflow.linemaps) active_job.error_callback(active_job.job) else: self.active_jobs.append(active_job) time.sleep(1)
def _wait_for_jobs(self): import drmaa while True: with self.lock: if not self.wait: return active_jobs = self.active_jobs self.active_jobs = list() for active_job in active_jobs: try: retval = self.session.wait(active_job.jobid, drmaa.Session.TIMEOUT_NO_WAIT) except drmaa.errors.ExitTimeoutException as e: # job still active self.active_jobs.append(active_job) continue except (drmaa.errors.InternalException, Exception) as e: print_exception(WorkflowError("DRMAA Error: {}".format(e)), self.workflow.linemaps) os.remove(active_job.jobscript) active_job.error_callback(active_job.job) continue # job exited os.remove(active_job.jobscript) if retval.hasExited and retval.exitStatus == 0: self.finish_job(active_job.job) active_job.callback(active_job.job) else: self.print_job_error(active_job.job) print_exception( ClusterJobException(active_job.job, self.dag.jobid(active_job.job), active_job.jobscript), self.workflow.linemaps) active_job.error_callback(active_job.job) time.sleep(1)
def _wait_for_job( self, job, callback, error_callback, jobscript, jobfinished, jobfailed): while True: if os.path.exists(jobfinished): os.remove(jobfinished) os.remove(jobscript) self.finish_job(job) callback(job) return if os.path.exists(jobfailed): os.remove(jobfailed) os.remove(jobscript) print_exception( ClusterJobException(job, self.dag.jobid(job), self.get_jobscript(job)), self.workflow.linemaps) error_callback(job) return time.sleep(1)