def sleep_until_state(j, timeout=None, state='completed', break_states=None, sleep_period=1, verbose=False): ''' Wait until the job reaches the specified state Returns: True: if the state has been reached in the given timeout period False: timeout occured or a break state has been reached If break_states is specified, the call terminates when job enters in one of these state, returning False ''' from Ganga.GPIDev.Base.Proxy import stripProxy j = stripProxy(j) if j.master is not None: j = j.master if timeout is None: timeout = config['timeout'] from time import sleep from Ganga.Core import monitoring_component from Ganga.Core.GangaRepository import getRegistryProxy jobs = getRegistryProxy('jobs') current_status = None while j.status != state and timeout > 0: if not monitoring_component.isEnabled(): monitoring_component.runMonitoring(jobs=jobs.select(j.id, j.id), _loadCredentials=False) else: monitoring_component.alive = True monitoring_component.enabled = True monitoring_component.steps = -1 monitoring_component.__updateTimeStamp = 0 monitoring_component.__sleepCounter = -0.5 if verbose and j.status != current_status: logger.info("Job %s: status = %s" % (str(j.id), str(j.status))) if current_status is None: current_status = j.status if type(break_states) == type([]) and j.status in break_states: logger.info("Job finished with status: %s" % j.status) return False sleep(sleep_period) timeout -= sleep_period logger.debug("Status: %s" % j.status) logger.info("Job finished with status: %s" % j.status) logger.info("Timeout: %s" % str(timeout)) try: j._getRegistry().updateLocksNow() except: pass return j.status == state
def sleep_until_state(j, timeout=None, state='completed', break_states=None, sleep_period=1, verbose=False): ''' Wait until the job reaches the specified state Returns: True: if the state has been reached in the given timeout period False: timeout occured or a break state has been reached If break_states is specified, the call terminates when job enters in one of these state, returning False ''' from Ganga.GPIDev.Base.Proxy import stripProxy j = stripProxy(j) if j.master is not None: j = j.master if timeout is None: timeout = config['timeout'] from time import sleep from Ganga.Core import monitoring_component from Ganga.Core.GangaRepository import getRegistryProxy jobs = getRegistryProxy('jobs') current_status = None while j.status != state and timeout > 0: if not monitoring_component.isEnabled(): monitoring_component.runMonitoring(jobs=jobs.select(j.id,j.id)) else: monitoring_component.alive = True monitoring_component.enabled = True monitoring_component.steps = -1 monitoring_component.__updateTimeStamp = 0 monitoring_component.__sleepCounter = -0.5 if verbose and j.status != current_status: logger.info("Job %s: status = %s" % (str(j.id), str(j.status))) if current_status is None: current_status = j.status if type(break_states) == type([]) and j.status in break_states: logger.info("Job finished with status: %s" % j.status ) return False sleep(sleep_period) timeout -= sleep_period logger.debug("Status: %s" % j.status) logger.info("Job finished with status: %s" % j.status ) logger.info("Timeout: %s" % str(timeout)) try: j._getRegistry().updateLocksNow() except: pass return j.status == state
def sleep_until_state(j, timeout=None, state='completed', break_states=None, sleep_period=1, verbose=False): ''' Wait until the job reaches the specified state Returns: True: if the state has been reached in the given timeout period False: timeout occured or a break state has been reached If break_states is specified, the call terminates when job enters in one of these state, returning False ''' if timeout is None: timeout = config['timeout'] from time import sleep from Ganga.Core import monitoring_component from Ganga.GPI import jobs current_status = None while j.status != state and timeout > 0: if not monitoring_component.isEnabled(): monitoring_component.runMonitoring( jobs[j.id] ) else: monitoring_component.alive = True monitoring_component.enabled = True monitoring_component.steps = -1 monitoring_component.__updateTimeStamp = 0 monitoring_component.__sleepCounter = -0.5 if verbose and j.status != current_status: logger.info(j.id, j.status) if current_status is None: current_status = j.status if type(break_states) == type([]) and j.status in break_states: logger.info("Job finished with status: %s" % j.status ) return False sleep(sleep_period) timeout -= sleep_period logger.debug("Status: %s" % j.status) logger.info("Job finished with status: %s" % j.status ) logger.info("Timeout: %s" % str(timeout)) return j.status == state
def master_updateMonitoringInformation(jobs): """ Update monitoring information for jobs: jobs is a list of jobs in this backend which require monitoring (either 'submitted' or 'running' state). The jobs list never contains the subjobs. The default implementation iterates over subjobs and calls updateMonitoringInformation(). """ ## Have to import here so it's actually defined from Ganga.Core import monitoring_component logger.debug("Running Monitoring for Jobs: %s" % str([j.getFQID('.') for j in jobs])) ## Only process 10 files from the backend at once blocks_of_size = 10 ## Separate different backends implicitly simple_jobs = {} # FIXME Add some check for (sub)jobs which are in a transient state but # are not locked by an active session of ganga for j in jobs: ## All subjobs should have same backend if len(j.subjobs) > 0: monitorable_subjobs = [sj for sj in j.subjobs if sj.status in ['submitted', 'running']] logger.debug('Monitoring subjobs: %s', repr([sj._repr() for sj in monitorable_subjobs])) if not monitorable_subjobs: continue stripProxy(j)._getWriteAccess() monitorable_blocks = [] temp_block = [] for this_sj in monitorable_subjobs: temp_block.append(this_sj) if len(temp_block) == blocks_of_size: monitorable_blocks.append(temp_block) temp_block = [] if temp_block: monitorable_blocks.append(temp_block) temp_block = [] for this_block in monitorable_blocks: if monitoring_component and not monitoring_component.isEnabled(False) or not monitoring_component: break try: j.backend.updateMonitoringInformation(this_block) except Exception as err: logger.error("Monitoring Error: %s" % str(err)) j.updateMasterJobStatus() stripProxy(j)._setDirty() else: backend_name = j.backend.__class__.__name__ if backend_name not in simple_jobs.keys(): simple_jobs[backend_name] = [] simple_jobs[backend_name].append(j) if len(simple_jobs.keys()) > 0: for this_backend in simple_jobs.keys(): logger.debug('Monitoring jobs: %s', repr([jj._repr() for jj in simple_jobs[this_backend]])) for this_job in simple_jobs[this_backend]: stripProxy(this_job)._getWriteAccess() simple_jobs[this_backend][0].backend.updateMonitoringInformation(simple_jobs[this_backend]) for this_job in simple_jobs[this_backend]: stripProxy(this_job)._setDirty() logger.debug("Finished Monitoring request")
def master_updateMonitoringInformation(jobs): """ Update monitoring information for jobs: jobs is a list of jobs in this backend which require monitoring (either 'submitted' or 'running' state). The jobs list never contains the subjobs. The default implementation iterates over subjobs and calls updateMonitoringInformation(). """ ## Have to import here so it's actually defined from Ganga.Core import monitoring_component logger.debug("Running Monitoring for Jobs: %s" % str([j.getFQID('.') for j in jobs])) ## Only process 10 files from the backend at once #blocks_of_size = 10 try: from Ganga.Utilities.Config import getConfig blocks_of_size = getConfig('PollThread')['numParallelJobs'] except Exception as err: logger.debug("Problem with PollThread Config, defaulting to block size of 5 in master_updateMon...") blocks_of_size = 5 ## Separate different backends implicitly simple_jobs = {} # FIXME Add some check for (sub)jobs which are in a transient state but # are not locked by an active session of ganga for j in jobs: ## All subjobs should have same backend if len(j.subjobs) > 0: #logger.info("Looking for sj") monitorable_subjobs = [] if isType(j.subjobs, SubJobXMLList): cache = j.subjobs.getAllCachedData() for sj_id in range(0,len(j.subjobs)): if cache[sj_id]['status'] in ['submitted', 'running']: if j.subjobs.isLoaded(sj_id): ## SJ may have changed from cache in memory this_sj = j.subjobs(sj_id) if this_sj.status in ['submitted', 'running']: monitorable_subjobs.append(this_sj) else: monitorable_subjobs.append(j.subjobs(sj_id)) else: for sj in j.subjobs: if sj.status in ['submitted', 'running']: monitorable_subjobs.append( sj ) #logger.info('Monitoring subjobs: %s', str([sj._repr() for sj in monitorable_subjobs])) if not monitorable_subjobs: continue stripProxy(j)._getWriteAccess() #logger.info("Dividing") monitorable_blocks = [] temp_block = [] for this_sj in monitorable_subjobs: temp_block.append(this_sj) if len(temp_block) == blocks_of_size: monitorable_blocks.append(temp_block) temp_block = [] if temp_block: monitorable_blocks.append(temp_block) temp_block = [] for this_block in monitorable_blocks: if monitoring_component and not monitoring_component.isEnabled(False) or not monitoring_component: break try: j.backend.updateMonitoringInformation(this_block) except Exception as err: logger.error("Monitoring Error: %s" % str(err)) j.updateMasterJobStatus() ## NB ONLY THE MASTER JOB IS KNOWN TO THE JOB REPO!!! stripProxy(j)._setDirty() else: backend_name = j.backend.__class__.__name__ if backend_name not in simple_jobs.keys(): simple_jobs[backend_name] = [] simple_jobs[backend_name].append(j) if len(simple_jobs.keys()) > 0: for this_backend in simple_jobs.keys(): logger.debug('Monitoring jobs: %s', repr([jj._repr() for jj in simple_jobs[this_backend]])) for this_job in simple_jobs[this_backend]: stripProxy(this_job)._getWriteAccess() simple_jobs[this_backend][0].backend.updateMonitoringInformation(simple_jobs[this_backend]) for this_job in simple_jobs[this_backend]: stripProxy(this_job)._setDirty() logger.debug("Finished Monitoring request")
def master_updateMonitoringInformation(jobs): """ Update monitoring information for jobs: jobs is a list of jobs in this backend which require monitoring (either 'submitted' or 'running' state). The jobs list never contains the subjobs. The default implementation iterates over subjobs and calls updateMonitoringInformation(). """ from Ganga.Core import monitoring_component was_monitoring_running = monitoring_component and monitoring_component.isEnabled(False) logger.debug("Running Monitoring for Jobs: %s" % [j.getFQID(".") for j in jobs]) ## Only process 10 files from the backend at once # blocks_of_size = 10 try: from Ganga.Utility.Config import getConfig blocks_of_size = getConfig("PollThread")["numParallelJobs"] except Exception as err: logger.debug("Problem with PollThread Config, defaulting to block size of 5 in master_updateMon...") logger.debug("Error: %s" % err) blocks_of_size = 5 ## Separate different backends implicitly simple_jobs = {} # FIXME Add some check for (sub)jobs which are in a transient state but # are not locked by an active session of ganga for j in jobs: ## All subjobs should have same backend if len(j.subjobs) > 0: # logger.info("Looking for sj") monitorable_subjob_ids = [] if isType(j.subjobs, SubJobXMLList): cache = j.subjobs.getAllCachedData() for sj_id in range(0, len(j.subjobs)): if cache[sj_id]["status"] in ["submitted", "running"]: if j.subjobs.isLoaded(sj_id): ## SJ may have changed from cache in memory this_sj = j.subjobs(sj_id) if this_sj.status in ["submitted", "running"]: monitorable_subjob_ids.append(sj_id) else: monitorable_subjob_ids.append(sj_id) else: for sj in j.subjobs: if sj.status in ["submitted", "running"]: monitorable_subjob_ids.append(sj.id) # logger.info('Monitoring subjobs: %s', monitorable_subjob_ids) if not monitorable_subjob_ids: continue # logger.info("Dividing") monitorable_blocks = [] temp_block = [] for this_sj_id in monitorable_subjob_ids: temp_block.append(this_sj_id) if len(temp_block) == blocks_of_size: monitorable_blocks.append(temp_block) temp_block = [] if temp_block: monitorable_blocks.append(temp_block) temp_block = [] for this_block in monitorable_blocks: # If the monitoring function was running at the start of the function but has since stopped, break. if ( was_monitoring_running and monitoring_component and not monitoring_component.isEnabled(False) or not monitoring_component ): break try: subjobs_to_monitor = [] for sj_id in this_block: subjobs_to_monitor.append(j.subjobs[sj_id]) j.backend.updateMonitoringInformation(subjobs_to_monitor) except Exception as err: logger.error("Monitoring Error: %s" % err) j.updateMasterJobStatus() else: backend_name = getName(j.backend) if backend_name not in simple_jobs: simple_jobs[backend_name] = [] simple_jobs[backend_name].append(j) if len(simple_jobs) > 0: for this_backend in simple_jobs.keys(): logger.debug("Monitoring jobs: %s", repr([jj._repr() for jj in simple_jobs[this_backend]])) stripProxy(simple_jobs[this_backend][0].backend).updateMonitoringInformation(simple_jobs[this_backend]) logger.debug("Finished Monitoring request")
def master_updateMonitoringInformation(jobs): """ Update monitoring information for jobs: jobs is a list of jobs in this backend which require monitoring (either 'submitted' or 'running' state). The jobs list never contains the subjobs. The default implementation iterates over subjobs and calls updateMonitoringInformation(). """ ## Have to import here so it's actually defined from Ganga.Core import monitoring_component logger.debug("Running Monitoring for Jobs: %s" % str([j.getFQID('.') for j in jobs])) ## Only process 10 files from the backend at once #blocks_of_size = 10 try: from Ganga.Utility.Config import getConfig blocks_of_size = getConfig('PollThread')['numParallelJobs'] except Exception as err: logger.debug( "Problem with PollThread Config, defaulting to block size of 5 in master_updateMon..." ) logger.debug("Error: %s" % str(err)) blocks_of_size = 5 ## Separate different backends implicitly simple_jobs = {} # FIXME Add some check for (sub)jobs which are in a transient state but # are not locked by an active session of ganga for j in jobs: ## All subjobs should have same backend if len(j.subjobs) > 0: #logger.info("Looking for sj") monitorable_subjob_ids = [] if isType(j.subjobs, SubJobXMLList): cache = j.subjobs.getAllCachedData() for sj_id in range(0, len(j.subjobs)): if cache[sj_id]['status'] in ['submitted', 'running']: if j.subjobs.isLoaded(sj_id): ## SJ may have changed from cache in memory this_sj = j.subjobs(sj_id) if this_sj.status in ['submitted', 'running']: monitorable_subjob_ids.append(sj_id) else: monitorable_subjob_ids.append(sj_id) else: for sj in j.subjobs: if sj.status in ['submitted', 'running']: monitorable_subjob_ids.append(sj.id) #logger.info('Monitoring subjobs: %s', str(monitorable_subjob_ids) if not monitorable_subjob_ids: continue stripProxy(j)._getWriteAccess() #logger.info("Dividing") monitorable_blocks = [] temp_block = [] for this_sj_id in monitorable_subjob_ids: temp_block.append(this_sj_id) if len(temp_block) == blocks_of_size: monitorable_blocks.append(temp_block) temp_block = [] if temp_block: monitorable_blocks.append(temp_block) temp_block = [] for this_block in monitorable_blocks: if monitoring_component and not monitoring_component.isEnabled( False) or not monitoring_component: break try: subjobs_to_monitor = [] for sj_id in this_block: subjobs_to_monitor.append(j.subjobs[sj_id]) stripProxy(j.backend).updateMonitoringInformation( subjobs_to_monitor) except Exception as err: logger.error("Monitoring Error: %s" % str(err)) j.updateMasterJobStatus() ## NB ONLY THE MASTER JOB IS KNOWN TO THE JOB REPO!!! stripProxy(j)._setDirty() else: backend_name = getName(j.backend) if backend_name not in simple_jobs.keys(): simple_jobs[backend_name] = [] simple_jobs[backend_name].append(j) if len(simple_jobs.keys()) > 0: for this_backend in simple_jobs.keys(): logger.debug( 'Monitoring jobs: %s', repr([jj._repr() for jj in simple_jobs[this_backend]])) for this_job in simple_jobs[this_backend]: stripProxy(this_job)._getWriteAccess() stripProxy(simple_jobs[this_backend] [0].backend).updateMonitoringInformation( simple_jobs[this_backend]) for this_job in simple_jobs[this_backend]: stripProxy(this_job)._setDirty() logger.debug("Finished Monitoring request")
def master_updateMonitoringInformation(jobs): """ Update monitoring information for jobs: jobs is a list of jobs in this backend which require monitoring (either 'submitted' or 'running' state). The jobs list never contains the subjobs. The default implementation iterates over subjobs and calls updateMonitoringInformation(). """ from Ganga.Core import monitoring_component was_monitoring_running = monitoring_component and monitoring_component.isEnabled(False) logger.debug("Running Monitoring for Jobs: %s" % [j.getFQID('.') for j in jobs]) ## Only process 10 files from the backend at once #blocks_of_size = 10 poll_config = getConfig('PollThread') try: blocks_of_size = poll_config['numParallelJobs'] except Exception as err: logger.debug("Problem with PollThread Config, defaulting to block size of 5 in master_updateMon...") logger.debug("Error: %s" % err) blocks_of_size = 5 ## Separate different backends implicitly simple_jobs = {} multiThreadMon = poll_config['enable_multiThreadMon'] # FIXME Add some check for (sub)jobs which are in a transient state but # are not locked by an active session of ganga queues = getQueues() for j in jobs: ## All subjobs should have same backend if len(j.subjobs) > 0: #logger.info("Looking for sj") monitorable_subjob_ids = [] if isType(j.subjobs, SubJobXMLList): cache = j.subjobs.getAllCachedData() for sj_id in range(0,len(j.subjobs)): if cache[sj_id]['status'] in ['submitted', 'running']: if j.subjobs.isLoaded(sj_id): ## SJ may have changed from cache in memory this_sj = j.subjobs(sj_id) if this_sj.status in ['submitted', 'running']: monitorable_subjob_ids.append(sj_id) else: monitorable_subjob_ids.append(sj_id) else: for sj in j.subjobs: if sj.status in ['submitted', 'running']: monitorable_subjob_ids.append(sj.id) #logger.info('Monitoring subjobs: %s', monitorable_subjob_ids) if not monitorable_subjob_ids: continue #logger.info("Dividing") monitorable_blocks = [] temp_block = [] for this_sj_id in monitorable_subjob_ids: temp_block.append(this_sj_id) if len(temp_block) == blocks_of_size: monitorable_blocks.append(temp_block) temp_block = [] if temp_block: monitorable_blocks.append(temp_block) temp_block = [] for this_block in monitorable_blocks: # If the monitoring function was running at the start of the function but has since stopped, break. if was_monitoring_running and monitoring_component and not monitoring_component.isEnabled(False) or not monitoring_component: break try: subjobs_to_monitor = [] for sj_id in this_block: subjobs_to_monitor.append(j.subjobs[sj_id]) if multiThreadMon: if queues.totalNumIntThreads() < getConfig("Queues")['NumWorkerThreads']: queues._addSystem(j.backend.updateMonitoringInformation, args=(subjobs_to_monitor,), name="Backend Monitor") else: j.backend.updateMonitoringInformation(subjobs_to_monitor) except Exception as err: logger.error("Monitoring Error: %s" % err) j.updateMasterJobStatus() else: backend_name = getName(j.backend) if backend_name not in simple_jobs: simple_jobs[backend_name] = [] simple_jobs[backend_name].append(j) if len(simple_jobs) > 0: for this_backend in simple_jobs.keys(): logger.debug('Monitoring jobs: %s', repr([jj._repr() for jj in simple_jobs[this_backend]])) if multiThreadMon: if queues.totalNumIntThreads() < getConfig("Queues")['NumWorkerThreads']: queues._addSystem(stripProxy(simple_jobs[this_backend][0].backend).updateMonitoringInformation, args=(simple_jobs[this_backend],), name="Backend Monitor") else: stripProxy(simple_jobs[this_backend][0].backend).updateMonitoringInformation(simple_jobs[this_backend]) logger.debug("Finished Monitoring request") if not multiThreadMon: return loop = True while loop: for stat in queues._monitoring_threadpool.worker_status(): loop = False; if stat[0] is not None and stat[0].startswith("Backend Monitor"): loop = True; break; time.sleep(1.)