class TaskRegistry(Registry): def __init__(self, name, doc, dirty_flush_counter=10, update_index_time=30): super(TaskRegistry, self).__init__(name, doc, dirty_flush_counter=10, update_index_time=30) self._main_thread = None def getProxy(self): this_slice = TaskRegistrySlice(self.name) this_slice.objects = self return TaskRegistrySliceProxy(this_slice) def getIndexCache(self, obj): if obj.getNodeData() is None: raise Exception("Currently don't support Index Caching") cached_values = ['status', 'id', 'name'] c = {} for cv in cached_values: if cv in obj.getNodeData(): c[cv] = obj.getNodeAttribute(cv) this_slice = TaskRegistrySlice("tmp") for dpv in this_slice._display_columns: c["display:" + dpv] = this_slice._get_display_value(obj, dpv) return c def _thread_main(self): """ This is an internal function; the main loop of the background thread """ # Add runtime handlers for all the taskified applications, since now # all the backends are loaded from Ganga.GPIDev.Adapters.ApplicationRuntimeHandlers import allHandlers from .TaskApplication import handler_map for basename, name in handler_map: for backend in allHandlers.getAllBackends(basename): allHandlers.add(name, backend, allHandlers.get(basename, backend)) from Ganga.Core.GangaRepository import getRegistry while getRegistry("jobs").hasStarted() is not True: time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return while True: from Ganga.Core import monitoring_component if (not monitoring_component is None and monitoring_component.enabled ) or config['ForceTaskMonitoring']: break time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY # and must go away soon for tid in self.ids(): try: self[tid]._getWriteAccess() self[tid].startup() except RegistryError: continue except Exception as err: logger.error( "Unknown/Unexpected Error in starting up tasks main loop") logger.error("Exiting: err=%s" % str(err)) return logger.debug("Entering main loop") # Main loop while self._main_thread is not None and not self._main_thread.should_stop( ): # For each task try to run it if config['ForceTaskMonitoring'] or monitoring_component.enabled: for tid in self.ids(): logger.debug("Running over tid: %s" % str(tid)) try: from Ganga.GPIDev.Lib.Tasks import ITask if isType(self[tid], ITask): # for new ITasks, always need write access self[tid]._getWriteAccess() p = self[tid] else: if self[tid].status in [ "running", "running/pause" ]: self[tid]._getWriteAccess() p = self[tid] elif self[tid].status is 'completed' and ( self[tid].n_status('ready') or self[tid].n_status('running')): self[tid].updateStatus() continue else: continue except RegistryError: # could not acquire lock continue if self._main_thread.should_stop(): break try: from Ganga.GPIDev.Lib.Tasks import ITask if isType(self[tid], ITask): # for new ITasks, always call update() p.update() else: # TODO: Make this user-configurable and add better # error message if (p.n_status("failed") * 100.0 / (20 + p.n_status("completed")) > 20): p.pause() logger.error( "Task %s paused - %i jobs have failed while only %i jobs have completed successfully." % (p.name, p.n_status("failed"), p.n_status("completed"))) logger.error( "Please investigate the cause of the failing jobs and then remove the previously failed jobs using job.remove()" ) logger.error( "You can then continue to run this task with tasks(%i).run()" % p.id) continue numjobs = p.submitJobs() if numjobs > 0: self._flush([p]) # finalise any required transforms p.finaliseTransforms() p.updateStatus() except Exception as x: logger.error( "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x)) type_, value_, traceback_ = sys.exc_info() logger.error("Full traceback:\n %s" % ' '.join( traceback.format_exception(type_, value_, traceback_))) p.pause() if self._main_thread.should_stop(): break if self._main_thread.should_stop(): break logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency'])) # Sleep interruptible for 10 seconds for i in range(0, int(config['TaskLoopFrequency'] * 100)): if self._main_thread.should_stop(): break time.sleep(0.01) def startup(self): """ Start a background thread that periodically run()s""" super(TaskRegistry, self).startup() from Ganga.Core.GangaThread import GangaThread self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main) self._main_thread.start() def shutdown(self): super(TaskRegistry, self).shutdown() def stop(self): if self._main_thread is not None: self._main_thread.stop()
class TaskRegistry(Registry): def __init__(self, name, doc, dirty_flush_counter=10, update_index_time=30): super(TaskRegistry, self).__init__( name, doc, dirty_flush_counter=dirty_flush_counter, update_index_time=update_index_time ) self._main_thread = None self.stored_slice = TaskRegistrySlice(self.name) self.stored_slice.objects = self self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice) def getSlice(self): return self.stored_slice def getProxy(self): return self.stored_proxy def getIndexCache(self, obj): if obj._data is None: raise Exception("Currently don't support Index Caching") cached_values = ['status', 'id', 'name'] c = {} for cv in cached_values: if cv in obj._data: c[cv] = getattr(obj, cv) this_slice = TaskRegistrySlice("tmp") for dpv in this_slice._display_columns: c["display:" + dpv] = this_slice._get_display_value(obj, dpv) return c def _thread_main(self): """ This is an internal function; the main loop of the background thread """ from Ganga.Core.GangaRepository import getRegistry while getRegistry("jobs").hasStarted() is not True: time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return while True: from Ganga.Core import monitoring_component if (not monitoring_component is None and monitoring_component.enabled) or config['ForceTaskMonitoring']: break time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY # and must go away soon for tid in self.ids(): try: self[tid].startup() except Exception as err: logger.error("Unknown/Unexpected Error in starting up tasks main loop") logger.error("Exiting: err=%s" % str(err)) return logger.debug("Entering main loop") # Main loop while self._main_thread is not None and not self._main_thread.should_stop(): # If monitoring is enabled (or forced for Tasks) loop over each one and update if (config['ForceTaskMonitoring'] or monitoring_component.enabled) and not config['disableTaskMon']: for tid in self.ids(): logger.debug("Running over tid: %s" % str(tid)) try: p = self[tid] p.update() except Exception as x: logger.error( "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x)) type_, value_, traceback_ = sys.exc_info() logger.error("Full traceback:\n %s" % ' '.join( traceback.format_exception(type_, value_, traceback_))) p.pause() if self._main_thread.should_stop(): break if self._main_thread.should_stop(): break logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency'])) # Sleep interruptible for 10 seconds for i in range(0, int(config['TaskLoopFrequency'] * 100)): if self._main_thread.should_stop(): break time.sleep(0.01) def startup(self): """ Start a background thread that periodically run()s""" super(TaskRegistry, self).startup() from Ganga.Core.GangaThread import GangaThread self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main) self._main_thread.start() # create a registry flusher self.flush_thread = RegistryFlusher(self) self.flush_thread.start() def shutdown(self): self.flush_thread.join() super(TaskRegistry, self).shutdown() def stop(self): if self._main_thread is not None: self._main_thread.stop() self._main_thread.join()
class TaskRegistry(Registry): def __init__(self, name, doc): super(TaskRegistry, self).__init__(name, doc) self._main_thread = None self.stored_slice = TaskRegistrySlice(self.name) self.stored_slice.objects = self self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice) def getSlice(self): return self.stored_slice def getProxy(self): return self.stored_proxy def getIndexCache(self, obj): cached_values = ['status', 'id', 'name'] c = {} for cv in cached_values: if hasattr(obj, cv): c[cv] = getattr(obj, cv) this_slice = TaskRegistrySlice("tmp") for dpv in this_slice._display_columns: c["display:" + dpv] = this_slice._get_display_value(obj, dpv) return c def _thread_main(self): """ This is an internal function; the main loop of the background thread """ from Ganga.Core.GangaRepository import getRegistry while getRegistry("jobs").hasStarted() is not True: time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return while True: from Ganga.Core import monitoring_component if (not monitoring_component is None and monitoring_component.enabled ) or config['ForceTaskMonitoring']: break time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY # and must go away soon for tid in self.ids(): try: self[tid].startup() except Exception as err: logger.error( "Unknown/Unexpected Error in starting up tasks main loop") logger.error("Exiting: err=%s" % str(err)) return logger.debug("Entering main loop") # Main loop while self._main_thread is not None and not self._main_thread.should_stop( ): # If monitoring is enabled (or forced for Tasks) loop over each one and update if (config['ForceTaskMonitoring'] or monitoring_component.enabled ) and not config['disableTaskMon']: for tid in self.ids(): logger.debug("Running over tid: %s" % str(tid)) try: p = self[tid] p.update() except Exception as x: logger.error( "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x)) type_, value_, traceback_ = sys.exc_info() logger.error("Full traceback:\n %s" % ' '.join( traceback.format_exception(type_, value_, traceback_))) p.pause() if self._main_thread.should_stop(): break if self._main_thread.should_stop(): break logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency'])) # Sleep interruptible for 10 seconds for i in range(0, int(config['TaskLoopFrequency'] * 100)): if self._main_thread.should_stop(): break time.sleep(0.01) def startup(self): """ Start a background thread that periodically run()s""" super(TaskRegistry, self).startup() from Ganga.Core.GangaThread import GangaThread self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main) self._main_thread.start() # create a registry flusher self.flush_thread = RegistryFlusher(self, 'TaskRegistryFlusher') self.flush_thread.start() def shutdown(self): self.flush_thread.join() super(TaskRegistry, self).shutdown() def stop(self): if self._main_thread is not None: self._main_thread.stop() self._main_thread.join()
class TaskRegistry(Registry): def __init__(self, name, doc, dirty_flush_counter=10, update_index_time=30): super(TaskRegistry, self).__init__( name, doc, dirty_flush_counter=10, update_index_time=30 ) self._main_thread = None def getProxy(self): this_slice = TaskRegistrySlice(self.name) this_slice.objects = self return TaskRegistrySliceProxy(this_slice) def getIndexCache(self, obj): if obj.getNodeData() is None: raise Exception("Currently don't support Index Caching") cached_values = ['status', 'id', 'name'] c = {} for cv in cached_values: if cv in obj.getNodeData(): c[cv] = obj.getNodeAttribute(cv) this_slice = TaskRegistrySlice("tmp") for dpv in this_slice._display_columns: c["display:" + dpv] = this_slice._get_display_value(obj, dpv) return c def _thread_main(self): """ This is an internal function; the main loop of the background thread """ # Add runtime handlers for all the taskified applications, since now # all the backends are loaded from Ganga.GPIDev.Adapters.ApplicationRuntimeHandlers import allHandlers from .TaskApplication import handler_map for basename, name in handler_map: for backend in allHandlers.getAllBackends(basename): allHandlers.add( name, backend, allHandlers.get(basename, backend)) from Ganga.Core.GangaRepository import getRegistry while getRegistry("jobs").hasStarted() is not True: time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return while True: from Ganga.Core import monitoring_component if (not monitoring_component is None and monitoring_component.enabled) or config['ForceTaskMonitoring']: break time.sleep(0.1) if self._main_thread is None or self._main_thread.should_stop(): return # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY # and must go away soon for tid in self.ids(): try: self[tid]._getWriteAccess() self[tid].startup() except RegistryError: continue except Exception as err: logger.error("Unknown/Unexpected Error in starting up tasks main loop") logger.error("Exiting: err=%s" % str(err)) return logger.debug("Entering main loop") # Main loop while self._main_thread is not None and not self._main_thread.should_stop(): # For each task try to run it if config['ForceTaskMonitoring'] or monitoring_component.enabled: for tid in self.ids(): logger.debug("Running over tid: %s" % str(tid)) try: from Ganga.GPIDev.Lib.Tasks import ITask if isType(self[tid], ITask): # for new ITasks, always need write access self[tid]._getWriteAccess() p = self[tid] else: if self[tid].status in ["running", "running/pause"]: self[tid]._getWriteAccess() p = self[tid] elif self[tid].status is 'completed' and (self[tid].n_status('ready') or self[tid].n_status('running')): self[tid].updateStatus() continue else: continue except RegistryError: # could not acquire lock continue if self._main_thread.should_stop(): break try: from Ganga.GPIDev.Lib.Tasks import ITask if isType(self[tid], ITask): # for new ITasks, always call update() p.update() else: # TODO: Make this user-configurable and add better # error message if (p.n_status("failed") * 100.0 / (20 + p.n_status("completed")) > 20): p.pause() logger.error("Task %s paused - %i jobs have failed while only %i jobs have completed successfully." % ( p.name, p.n_status("failed"), p.n_status("completed"))) logger.error( "Please investigate the cause of the failing jobs and then remove the previously failed jobs using job.remove()") logger.error( "You can then continue to run this task with tasks(%i).run()" % p.id) continue numjobs = p.submitJobs() if numjobs > 0: self._flush([p]) # finalise any required transforms p.finaliseTransforms() p.updateStatus() except Exception as x: logger.error( "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x)) type_, value_, traceback_ = sys.exc_info() logger.error("Full traceback:\n %s" % ' '.join( traceback.format_exception(type_, value_, traceback_))) p.pause() if self._main_thread.should_stop(): break if self._main_thread.should_stop(): break logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency'])) # Sleep interruptible for 10 seconds for i in range(0, int(config['TaskLoopFrequency'] * 100)): if self._main_thread.should_stop(): break time.sleep(0.01) def startup(self): """ Start a background thread that periodically run()s""" super(TaskRegistry, self).startup() from Ganga.Core.GangaThread import GangaThread self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main) self._main_thread.start() def shutdown(self): super(TaskRegistry, self).shutdown() def stop(self): if self._main_thread is not None: self._main_thread.stop()
for subjob in job.subjobs: try: process_subjob(job,subjob) except: raise logger.warning('Exception in process_subjob:') logger.warning(sys.exc_info()[0]) logger.warning(sys.exc_info()[1]) if test_paused(): break test_sleep(30) else: logger.warning('No jobs to monitor. Exiting now.') #Stop plotting and summarizing thread. pt.stop() ct.stop() paused = test_paused() if not paused: test.state = 'completed' test.endtime = datetime.now() test.save() logger.info('Test state updated to %s'%(test.state)) logger.info('Disconnected from DB') if not paused: try: logger.info('Killing leftover "submitted" jobs')
while (test_active() and not test_paused()): test = Test.objects.get(pk=testid) # logger.info('HC Copy Thread: TOP OF MAIN LOOP') for job in jobs: if not test_active() or test_paused() or ct.should_stop(): break copyJob(job) test_sleep(10) logger.info('Main thread exited.') else: logger.warning('No jobs to monitor. Exiting now.') #Stop thread. ct.stop() paused = test_paused() if not paused: test.state = 'completed' test.endtime = datetime.now() test.save() logger.info('Test state updated to %s'%(test.state)) logger.info('Disconnected from DB') if not paused: try: logger.info('Killing leftover "submitted" jobs') jobs.select(status='submitted').kill()
process_subjob(job, subjob) except: logger.warning('Exception in process_subjob:') logger.warning(sys.exc_info()[0]) logger.warning(sys.exc_info()[1]) if test_paused(): break test_sleep(20) print '.', else: noJobs = True logger.warning('No jobs to monitor. Exiting now.') #Stop plotting and summarizing thread. pt.stop() paused = test_paused() if not paused: if noJobs: test.state = 'error' else: test.state = "completed" test.endtime = datetime.now() test.save() logger.info('Test state updated to %s' % (test.state)) logger.info('Disconnected from DB') if not paused: