Example #1
0
 def check_watched_items(self):
     """
     Called by the monitor thread to look at each watched job and deal
     with state changes.
     """
     new_watched = []
     for cjs in self.watched:
         job_id = cjs.job_id
         galaxy_id_tag = cjs.job_wrapper.get_id_tag()
         try:
             if os.stat(cjs.user_log).st_size == cjs.user_log_size:
                 new_watched.append(cjs)
                 continue
             s1, s4, s7, s5, s9, log_size = summarize_condor_log(
                 cjs.user_log, job_id)
             job_running = s1 and not (s4 or s7)
             job_complete = s5
             job_failed = s9
             cjs.user_log_size = log_size
         except Exception:
             # so we don't kill the monitor thread
             log.exception("(%s/%s) Unable to check job status" %
                           (galaxy_id_tag, job_id))
             log.warning("(%s/%s) job will now be errored" %
                         (galaxy_id_tag, job_id))
             cjs.fail_message = "Cluster could not complete job"
             self.work_queue.put((self.fail_job, cjs))
             continue
         if job_running and not cjs.running:
             log.debug("(%s/%s) job is now running" %
                       (galaxy_id_tag, job_id))
             cjs.job_wrapper.change_state(model.Job.states.RUNNING)
         if not job_running and cjs.running:
             log.debug("(%s/%s) job has stopped running" %
                       (galaxy_id_tag, job_id))
             # Will switching from RUNNING to QUEUED confuse Galaxy?
             # cjs.job_wrapper.change_state( model.Job.states.QUEUED )
         if job_complete:
             if cjs.job_wrapper.get_state() != model.Job.states.DELETED:
                 external_metadata = not asbool(
                     cjs.job_wrapper.job_destination.params.get(
                         "embed_metadata_in_job", True))
                 if external_metadata:
                     self._handle_metadata_externally(
                         cjs.job_wrapper, resolve_requirements=True)
                 log.debug("(%s/%s) job has completed" %
                           (galaxy_id_tag, job_id))
                 self.work_queue.put((self.finish_job, cjs))
             continue
         if job_failed:
             log.debug("(%s/%s) job failed" % (galaxy_id_tag, job_id))
             cjs.failed = True
             self.work_queue.put((self.finish_job, cjs))
             continue
         cjs.runnning = job_running
         new_watched.append(cjs)
     # Replace the watch list with the updated version
     self.watched = new_watched
Example #2
0
 def check_watched_items( self ):
     """
     Called by the monitor thread to look at each watched job and deal
     with state changes.
     """
     new_watched = []
     for cjs in self.watched:
         job_id = cjs.job_id
         galaxy_id_tag = cjs.job_wrapper.get_id_tag()
         try:
             if os.stat( cjs.user_log ).st_size == cjs.user_log_size:
                 new_watched.append( cjs )
                 continue
             s1, s4, s7, s5, s9, log_size = summarize_condor_log(cjs.user_log, job_id)
             job_running = s1 and not (s4 or s7)
             job_complete = s5
             job_failed = s9
             cjs.user_log_size = log_size
         except Exception:
             # so we don't kill the monitor thread
             log.exception( "(%s/%s) Unable to check job status" % ( galaxy_id_tag, job_id ) )
             log.warning( "(%s/%s) job will now be errored" % ( galaxy_id_tag, job_id ) )
             cjs.fail_message = "Cluster could not complete job"
             self.work_queue.put( ( self.fail_job, cjs ) )
             continue
         if job_running and not cjs.running:
             log.debug( "(%s/%s) job is now running" % ( galaxy_id_tag, job_id ) )
             cjs.job_wrapper.change_state( model.Job.states.RUNNING )
         if not job_running and cjs.running:
             log.debug( "(%s/%s) job has stopped running" % ( galaxy_id_tag, job_id ) )
             # Will switching from RUNNING to QUEUED confuse Galaxy?
             # cjs.job_wrapper.change_state( model.Job.states.QUEUED )
         if job_complete:
             if cjs.job_wrapper.get_state() != model.Job.states.DELETED:
                 external_metadata = not asbool( cjs.job_wrapper.job_destination.params.get( "embed_metadata_in_job", True) )
                 if external_metadata:
                     self._handle_metadata_externally( cjs.job_wrapper, resolve_requirements=True )
                 log.debug( "(%s/%s) job has completed" % ( galaxy_id_tag, job_id ) )
                 self.work_queue.put( ( self.finish_job, cjs ) )
             continue
         if job_failed:
             log.debug( "(%s/%s) job failed" % ( galaxy_id_tag, job_id ) )
             cjs.failed = True
             self.work_queue.put( ( self.finish_job, cjs ) )
             continue
         cjs.runnning = job_running
         new_watched.append( cjs )
     # Replace the watch list with the updated version
     self.watched = new_watched