def check_watched_items(self): """ Called by the monitor thread to look at each watched job and deal with state changes. """ new_watched = [] for cjs in self.watched: job_id = cjs.job_id galaxy_id_tag = cjs.job_wrapper.get_id_tag() try: if os.stat(cjs.user_log).st_size == cjs.user_log_size: new_watched.append(cjs) continue s1, s4, s7, s5, s9, log_size = summarize_condor_log( cjs.user_log, job_id) job_running = s1 and not (s4 or s7) job_complete = s5 job_failed = s9 cjs.user_log_size = log_size except Exception: # so we don't kill the monitor thread log.exception("(%s/%s) Unable to check job status" % (galaxy_id_tag, job_id)) log.warning("(%s/%s) job will now be errored" % (galaxy_id_tag, job_id)) cjs.fail_message = "Cluster could not complete job" self.work_queue.put((self.fail_job, cjs)) continue if job_running and not cjs.running: log.debug("(%s/%s) job is now running" % (galaxy_id_tag, job_id)) cjs.job_wrapper.change_state(model.Job.states.RUNNING) if not job_running and cjs.running: log.debug("(%s/%s) job has stopped running" % (galaxy_id_tag, job_id)) # Will switching from RUNNING to QUEUED confuse Galaxy? # cjs.job_wrapper.change_state( model.Job.states.QUEUED ) if job_complete: if cjs.job_wrapper.get_state() != model.Job.states.DELETED: external_metadata = not asbool( cjs.job_wrapper.job_destination.params.get( "embed_metadata_in_job", True)) if external_metadata: self._handle_metadata_externally( cjs.job_wrapper, resolve_requirements=True) log.debug("(%s/%s) job has completed" % (galaxy_id_tag, job_id)) self.work_queue.put((self.finish_job, cjs)) continue if job_failed: log.debug("(%s/%s) job failed" % (galaxy_id_tag, job_id)) cjs.failed = True self.work_queue.put((self.finish_job, cjs)) continue cjs.runnning = job_running new_watched.append(cjs) # Replace the watch list with the updated version self.watched = new_watched
def check_watched_items( self ): """ Called by the monitor thread to look at each watched job and deal with state changes. """ new_watched = [] for cjs in self.watched: job_id = cjs.job_id galaxy_id_tag = cjs.job_wrapper.get_id_tag() try: if os.stat( cjs.user_log ).st_size == cjs.user_log_size: new_watched.append( cjs ) continue s1, s4, s7, s5, s9, log_size = summarize_condor_log(cjs.user_log, job_id) job_running = s1 and not (s4 or s7) job_complete = s5 job_failed = s9 cjs.user_log_size = log_size except Exception: # so we don't kill the monitor thread log.exception( "(%s/%s) Unable to check job status" % ( galaxy_id_tag, job_id ) ) log.warning( "(%s/%s) job will now be errored" % ( galaxy_id_tag, job_id ) ) cjs.fail_message = "Cluster could not complete job" self.work_queue.put( ( self.fail_job, cjs ) ) continue if job_running and not cjs.running: log.debug( "(%s/%s) job is now running" % ( galaxy_id_tag, job_id ) ) cjs.job_wrapper.change_state( model.Job.states.RUNNING ) if not job_running and cjs.running: log.debug( "(%s/%s) job has stopped running" % ( galaxy_id_tag, job_id ) ) # Will switching from RUNNING to QUEUED confuse Galaxy? # cjs.job_wrapper.change_state( model.Job.states.QUEUED ) if job_complete: if cjs.job_wrapper.get_state() != model.Job.states.DELETED: external_metadata = not asbool( cjs.job_wrapper.job_destination.params.get( "embed_metadata_in_job", True) ) if external_metadata: self._handle_metadata_externally( cjs.job_wrapper, resolve_requirements=True ) log.debug( "(%s/%s) job has completed" % ( galaxy_id_tag, job_id ) ) self.work_queue.put( ( self.finish_job, cjs ) ) continue if job_failed: log.debug( "(%s/%s) job failed" % ( galaxy_id_tag, job_id ) ) cjs.failed = True self.work_queue.put( ( self.finish_job, cjs ) ) continue cjs.runnning = job_running new_watched.append( cjs ) # Replace the watch list with the updated version self.watched = new_watched