def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info('Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests() except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) self.logdb.delete(reqName, "error", this_thread=True) except TERMINAL_EXCEPTIONS as ex: # fatal error - report back to ReqMgr self.logger.error('Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nSee log for details.\nError: "%s"' % str(ex) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work
def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] try: workLoads = self.getAvailableRequests() except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError( None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) self.logdb.delete(reqName, "error", this_thread=True, agent=False) except TERMINAL_EXCEPTIONS as ex: # fatal error - report back to ReqMgr self.logger.error( 'Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nSee log for details.\nError: "%s"' % str(ex) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work
class WorkQueueReqMgrInterface(): """Helper class for ReqMgr interaction""" def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] self.reqMgr = RequestManager(kwargs) self.previous_state = {} def __call__(self, queue): """Synchronize WorkQueue and RequestManager""" msg = '' try: # pull in new work work = self.queueNewRequests(queue) msg += "New Work: %d\n" % work except Exception: self.logger.exception("Error caught during RequestManager pull") try: # get additional open-running work extraWork = self.addNewElementsToOpenRequests(queue) msg += "Work added: %d\n" % extraWork except Exception: self.logger.exception("Error caught during RequestManager split") try: # report back to ReqMgr uptodate_elements = self.report(queue) msg += "Updated ReqMgr status for: %s\n" % ", ".join( [x['RequestName'] for x in uptodate_elements]) except: self.logger.exception("Error caught during RequestManager update") else: try: # Delete finished requests from WorkQueue self.deleteFinishedWork(queue, uptodate_elements) except: self.logger.exception("Error caught during work deletion") queue.backend.recordTaskActivity('reqmgr_sync', msg) def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info( 'Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests(*queue.params['Teams']) except Exception, ex: msg = "Error contacting RequestManager: %s" % str(ex) self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: # try: # self.reportRequestStatus(reqName, "negotiating") # except Exception, ex: # self.logger.error(""" # Unable to update ReqMgr state to negotiating: %s # Ignoring this request: %s""" % (str(ex), reqName)) # continue try: try: Lexicon.couchurl(workLoadUrl) except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError( None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) except (WorkQueueWMSpecError, WorkQueueNoWorkError), ex: # fatal error - report back to ReqMgr self.logger.info( 'Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue
def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info( 'Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests(queue.params['Teams']) except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: # try: # self.reportRequestStatus(reqName, "negotiating") # except Exception, ex: # self.logger.error(""" # Unable to update ReqMgr state to negotiating: %s # Ignoring this request: %s""" % (str(ex), reqName)) # continue try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError( None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) self.logdb.delete(reqName, "error", this_thread=True) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - report back to ReqMgr self.logger.info( 'Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue try: self.reportRequestStatus(reqName, "acquired") except Exception as ex: self.logger.warning("Unable to update ReqMgr state: %s" % str(ex)) self.logger.warning('Will try again later') self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work
def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info('Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests(queue.params['Teams']) except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: # try: # self.reportRequestStatus(reqName, "negotiating") # except Exception, ex: # self.logger.error(""" # Unable to update ReqMgr state to negotiating: %s # Ignoring this request: %s""" % (str(ex), reqName)) # continue try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request = reqName, team = team) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - report back to ReqMgr self.logger.info('Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message = str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.sendMessage(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.sendMessage(reqName, msg, 'error') continue try: if self.reqmgr2Only: self.reqMgr2.updateRequestStatus(reqName, "acquired") else: self.markAcquired(reqName, queue.params.get('QueueURL', 'No Queue')) except Exception as ex: self.logger.warning("Unable to update ReqMgr state: %s" % str(ex)) self.logger.warning('Will try again later') self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work