예제 #1
0
class RequestManagerTest(RESTBaseUnitTest):
    """
    Test RequestMgr Service client
    It will start RequestMgr RESTService
    Server DB is whatever env is set     
    
    This checks whether DS call makes without error and return the results.
    This test only test service call returns without error.
    The correctness of each function is tested in test/python/RequestManager_t/RequestMgr_t.py
    
    """
    def initialize(self):
        self.couchDBName = "reqmgr_t_0"
        self.config = RequestManagerConfig(
                'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel')
        dbUrl = os.environ.get("DATABASE", None)
        self.config.setDBUrl(dbUrl)        
        self.config.setFormatter('WMCore.WebTools.RESTFormatter')
        self.config.setupRequestConfig()
        self.config.setupCouchDatabase(dbName = self.couchDBName)
        self.config.setPort(8899)
        self.schemaModules = ["WMCore.RequestManager.RequestDB"]
                
        
    def setUp(self):
        RESTBaseUnitTest.setUp(self)
        self.testInit.setupCouch("%s" % self.couchDBName, "GroupUser", "ConfigCache", "ReqMgr")
        self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats")
        # logging stuff from TestInit is broken, setting myself
        l = logging.getLogger()
        l.setLevel(logging.DEBUG)
        self.params = {}
        self.params['endpoint'] = self.config.getServerUrl()
        self.reqService = RequestManagerDS(self.params)
        self.jsonSender = JSONRequests(self.config.getServerUrl())

        userName     = '******'
        groupName    = 'Li'
        teamName     = 'Tang'
        schema = utils.getAndSetupSchema(self,
                                         userName = userName,
                                         groupName = groupName,
                                         teamName = teamName)
        schema['ConfigCacheID'] = self.createConfig()
        schema['CouchDBName'] = self.couchDBName
        schema['CouchWorkloadDBName'] = self.couchDBName

        try:
            r = self.jsonSender.put('request', schema)
            try:
                self.requestName = r[0]['RequestName']
            except:
                self.requestName = r[0].values()[0]['RequestName']
        except Exception as ex:
            msg = traceback.format_exc()
            print("Exception during set up, reason: %s" % msg)
            raise ex

    def tearDown(self):
        self.config.deleteWorkloadCache()
        RESTBaseUnitTest.tearDown(self)
        self.testInit.tearDownCouch()

    def createConfig(self, bad = False):
        """
        _createConfig_

        Create a config of some sort that we can load out of ConfigCache
        """
        PSetTweak = {'process': {'outputModules_': ['ThisIsAName'],
                                 'ThisIsAName': {'dataset': {'dataTier': 'RECO',
                                                             'filterName': 'Filter'}}}}
        BadTweak  = {'process': {'outputModules_': ['ThisIsAName1', 'ThisIsAName2'],
                                 'ThisIsAName1': {'dataset': {'dataTier': 'RECO',
                                                             'filterName': 'Filter'}},
                                 'ThisIsAName2': {'dataset': {'dataTier': 'RECO',
                                                             'filterName': 'Filter'}}}}
        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDBName)
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        if bad:
            configCache.setPSetTweaks(PSetTweak = BadTweak)
        else:
            configCache.setPSetTweaks(PSetTweak = PSetTweak)
        configCache.save()
        return configCache.getCouchID()

    @attr("integration")
    def testA_RequestManagerService(self):
        requestName = self.requestName
        
        request = self.reqService.getRequest(requestName)
        # minimal test : it's return type  and the some value inside
        self.assertEqual(type(request), dict)
        self.assertTrue(len(request) > 0)
        
        # Test putTeam
        self.reqService.putTeam("team_usa")
        self.assertTrue('team_usa' in self.jsonSender.get('team')[0])
        
        self.jsonSender.put('assignment/%s/%s' % ("team_usa", requestName))
        
        request = self.reqService.getAssignment(teamName = "team_usa")
        self.assertEqual(type(request), list)
        self.assertTrue(len(request) > 0)
       
        request = self.reqService.getAssignment(request = requestName)
        self.assertEqual(type(request), list)
        self.assertTrue(len(request) > 0)
        
        self.reqService.sendMessage(requestName,"error")
        self.reqService.putWorkQueue(requestName, "http://test_url")
        self.reqService.reportRequestProgress(requestName,
                        percent_complete = 100, percent_success = 90)
        
        self.reqService.updateRequestStatus(requestName, "running-open")
예제 #2
0
class RequestManagerTest(RESTBaseUnitTest):
    """
    Test RequestMgr Service client
    It will start RequestMgr RESTService
    Server DB is whatever env is set     
    
    This checks whether DS call makes without error and return the results.
    This test only test service call returns without error.
    The correctness of each function is tested in test/python/RequestManager_t/RequestMgr_t.py
    
    """
    def initialize(self):
        self.couchDBName = "reqmgr_t_0"
        self.config = RequestManagerConfig(
            'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel')
        dbUrl = os.environ.get("DATABASE", None)
        self.config.setDBUrl(dbUrl)
        self.config.setFormatter('WMCore.WebTools.RESTFormatter')
        self.config.setupRequestConfig()
        self.config.setupCouchDatabase(dbName=self.couchDBName)
        self.config.setPort(8899)
        self.schemaModules = ["WMCore.RequestManager.RequestDB"]

    def setUp(self):
        RESTBaseUnitTest.setUp(self)
        self.testInit.setupCouch("%s" % self.couchDBName, "GroupUser",
                                 "ConfigCache", "ReqMgr")
        self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats")
        # logging stuff from TestInit is broken, setting myself
        l = logging.getLogger()
        l.setLevel(logging.DEBUG)
        self.params = {}
        self.params['endpoint'] = self.config.getServerUrl()
        self.reqService = RequestManagerDS(self.params)
        self.jsonSender = JSONRequests(self.config.getServerUrl())

        userName = '******'
        groupName = 'Li'
        teamName = 'Tang'
        schema = utils.getAndSetupSchema(self,
                                         userName=userName,
                                         groupName=groupName,
                                         teamName=teamName)
        schema['ConfigCacheID'] = self.createConfig()
        schema['CouchDBName'] = self.couchDBName
        schema['CouchWorkloadDBName'] = self.couchDBName

        try:
            r = self.jsonSender.put('request', schema)
            try:
                self.requestName = r[0]['RequestName']
            except:
                self.requestName = r[0].values()[0]['RequestName']
        except Exception as ex:
            msg = traceback.format_exc()
            print "Exception during set up, reason: %s" % msg
            raise ex

    def tearDown(self):
        self.config.deleteWorkloadCache()
        RESTBaseUnitTest.tearDown(self)
        self.testInit.tearDownCouch()

    def createConfig(self, bad=False):
        """
        _createConfig_

        Create a config of some sort that we can load out of ConfigCache
        """
        PSetTweak = {
            'process': {
                'outputModules_': ['ThisIsAName'],
                'ThisIsAName': {
                    'dataset': {
                        'dataTier': 'RECO',
                        'filterName': 'Filter'
                    }
                }
            }
        }
        BadTweak = {
            'process': {
                'outputModules_': ['ThisIsAName1', 'ThisIsAName2'],
                'ThisIsAName1': {
                    'dataset': {
                        'dataTier': 'RECO',
                        'filterName': 'Filter'
                    }
                },
                'ThisIsAName2': {
                    'dataset': {
                        'dataTier': 'RECO',
                        'filterName': 'Filter'
                    }
                }
            }
        }
        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName=self.couchDBName)
        configCache.createUserGroup(groupname="testGroup", username='******')
        if bad:
            configCache.setPSetTweaks(PSetTweak=BadTweak)
        else:
            configCache.setPSetTweaks(PSetTweak=PSetTweak)
        configCache.save()
        return configCache.getCouchID()

    @attr("integration")
    def testA_RequestManagerService(self):
        requestName = self.requestName

        request = self.reqService.getRequest(requestName)
        # minimal test : it's return type  and the some value inside
        self.assertEqual(type(request), dict)
        self.assertTrue(len(request) > 0)

        # Test putTeam
        self.reqService.putTeam("team_usa")
        self.assertTrue('team_usa' in self.jsonSender.get('team')[0])

        self.jsonSender.put('assignment/%s/%s' % ("team_usa", requestName))

        request = self.reqService.getAssignment(teamName="team_usa")
        self.assertEqual(type(request), list)
        self.assertTrue(len(request) > 0)

        request = self.reqService.getAssignment(request=requestName)
        self.assertEqual(type(request), list)
        self.assertTrue(len(request) > 0)

        self.reqService.sendMessage(requestName, "error")
        self.reqService.putWorkQueue(requestName, "http://test_url")
        self.reqService.reportRequestProgress(requestName,
                                              percent_complete=100,
                                              percent_success=90)

        self.reqService.updateRequestStatus(requestName, "running-open")
예제 #3
0
class WorkQueueReqMgrInterface():
    """Helper class for ReqMgr interaction"""
    def __init__(self, **kwargs):
        if not kwargs.get('logger'):
            import logging
            kwargs['logger'] = logging
        self.logger = kwargs['logger']
        self.reqMgr = RequestManager(kwargs)
        self.reqmgr2Only = kwargs.get("reqmgr2_only", False)
        #this will break all in one test
        self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None))
        
        centralurl = kwargs.get("central_logdb_url", "")
        identifier = kwargs.get("log_reporter", "")
        
        # set the thread name before creat the log db. 
        # only sets that when it is not set already
        myThread = threading.currentThread()
        if myThread.getName() == "MainThread":
            myThread.setName(self.__class__.__name__)
            
        self.logdb = LogDB(centralurl, identifier, logger=self.logger)
        self.previous_state = {}

    def __call__(self, queue):
        """Synchronize WorkQueue and RequestManager"""
        msg = ''
        try:    # pull in new work
            work = self.queueNewRequests(queue)
            msg += "New Work: %d\n" % work
        except Exception:
            self.logger.exception("Error caught during RequestManager pull")

        try: # get additional open-running work
            extraWork = self.addNewElementsToOpenRequests(queue)
            msg += "Work added: %d\n" % extraWork
        except Exception:
            self.logger.exception("Error caught during RequestManager split")

        try:    # report back to ReqMgr
            uptodate_elements = self.report(queue)
            msg += "Updated ReqMgr status for: %s\n" % ", ".join([x['RequestName'] for x in uptodate_elements])
        except:
            self.logger.exception("Error caught during RequestManager update")
        else:
            try:    # Delete finished requests from WorkQueue
                self.deleteFinishedWork(queue, uptodate_elements)
            except:
                self.logger.exception("Error caught during work deletion")

        queue.backend.recordTaskActivity('reqmgr_sync', msg)

    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info('Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests(queue.params['Teams'])
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
#            try:
#                self.reportRequestStatus(reqName, "negotiating")
#            except Exception, ex:
#                self.logger.error("""
#                    Unable to update ReqMgr state to negotiating: %s
#                    Ignoring this request: %s""" % (str(ex), reqName))
#                continue

            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl, request = reqName, team = team)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - report back to ReqMgr
                self.logger.info('Permanent failure processing request "%s": %s' % (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" % reqName)
                self.reportRequestStatus(reqName, 'Failed', message = str(ex))
                continue
            except (IOError, socket.error, CouchError, CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            try:
                self.reportRequestStatus(reqName, "acquired")
            except Exception as ex:
                self.logger.warning("Unable to update ReqMgr state: %s" % str(ex))
                self.logger.warning('Will try again later')

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" % work)
        return work


    def report(self, queue):
        """Report queue status to ReqMgr."""
        new_state = {}
        uptodate_elements = []
        now = time.time()

        elements = queue.statusInbox(dictKey = "RequestName")
        if not elements:
            return new_state

        for ele in elements:
            ele = elements[ele][0] # 1 element tuple
            try:
                request = self.reqMgr2.getRequestByNames(ele['RequestName'])[ele['RequestName']]
                if request['RequestStatus'] in ('failed', 'completed', 'announced',
                                                'epic-FAILED', 'closed-out', 'rejected'):
                    # requests can be done in reqmgr but running in workqueue
                    # if request has been closed but agent cleanup actions
                    # haven't been run (or agent has been retired)
                    # Prune out obviously too old ones to avoid build up
                    if queue.params.get('reqmgrCompleteGraceTime', -1) > 0:
                        if (now - float(ele.updatetime)) > queue.params['reqmgrCompleteGraceTime']:
                            # have to check all elements are at least running and are old enough
                            request_elements = queue.statusInbox(WorkflowName = request['RequestName'])
                            if not any([x for x in request_elements if x['Status'] != 'Running' and not x.inEndState()]):
                                last_update = max([float(x.updatetime) for x in request_elements])
                                if (now - last_update) > queue.params['reqmgrCompleteGraceTime']:
                                    self.logger.info("Finishing request %s as it is done in reqmgr" % request['RequestName'])
                                    queue.doneWork(WorkflowName=request['RequestName'])
                                    continue
                    else:
                        pass # assume workqueue status will catch up later
                elif request['RequestStatus'] == 'aborted' or request['RequestStatus'] == 'force-complete':
                    queue.cancelWork(WorkflowName=request['RequestName'])
                # Check consistency of running-open/closed and the element closure status
                elif request['RequestStatus'] == 'running-open' and not ele.get('OpenForNewData', False):
                    self.reportRequestStatus(ele['RequestName'], 'running-closed')
                elif request['RequestStatus'] == 'running-closed' and ele.get('OpenForNewData', False):
                    queue.closeWork(ele['RequestName'])
                # update request status if necessary
                elif ele['Status'] not in self._reqMgrToWorkQueueStatus(request['RequestStatus']):
                    self.reportElement(ele)
                    
                uptodate_elements.append(ele)
            except Exception as ex:
                msg = 'Error talking to ReqMgr about request "%s": %s'
                traceMsg = traceback.format_exc()
                self.logger.error(msg % (ele['RequestName'], traceMsg))

        return uptodate_elements

    def deleteFinishedWork(self, queue, elements):
        """Delete work from queue that is finished in ReqMgr"""
        finished = []
        for element in elements:
            if self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed', 'announced',
                                                         'epic-FAILED', 'closed-out', 'rejected') \
                                                         and element.inEndState():
                finished.append(element['RequestName'])
        return queue.deleteWorkflows(*finished)
    
    def _getRequestsByTeamsAndStatus(self, status, teams = []):
        """
        TODO: now it assumes one team per requests - check whether this assumption is correct
        Check whether we actually use the team for this.
        Also switch to byteamandstatus couch call instead of 
        """
        requests = self.reqMgr2.getRequestByStatus(status)
        #Then sort by Team name then sort by Priority
        #https://docs.python.org/2/howto/sorting.html
        if teams and len(teams) > 0:
            results = {}
            for reqName, value in requests.items():
                if value["Teams"][0] in teams:
                    results[reqName] = value
            return results 
        else:
            return requests    
            
    def getAvailableRequests(self, teams):
        """
        Get available requests for the given teams and sort by team and priority
        returns [(team, request_name, request_spec_url)]
        """
        
        tempResults = self._getRequestsByTeamsAndStatus("assigned", teams).values()
        filteredResults = []
        for request in tempResults:
            if "Teams" in request and len(request["Teams"]) == 1:
                filteredResults.append(request)
            else:
                msg = "no team or more than one team (%s) are assigined: %s" % (
                                    request.get("Teams", None), request["RequestName"])
                self.logger.error(msg)
                self.logdb.post(request["RequestName"], msg, 'error')
        filteredResults.sort(key = itemgetter('RequestPriority'), reverse = True)
        filteredResults.sort(key = lambda r: r["Teams"][0])
        
        results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"]) for x in filteredResults]
        
        return results

    def reportRequestStatus(self, request, status, message = None):
        """Change state in RequestManager
           Optionally, take a message to append to the request
        """
        if message:
            self.logdb.post(request, str(message), 'info')
        reqmgrStatus = self._workQueueToReqMgrStatus(status)
        if reqmgrStatus: # only send known states
            try:
                # try reqmgr1 call if it fails
                self.reqMgr.reportRequestStatus(request, reqmgrStatus)
            except Exception as ex:
                # try reqmgr2 call
                msg = "%s : reqmgr2 request: %s" % (request, str(ex))
                self.logdb.post(request, msg, 'warning')
                self.reqMgr2.updateRequestStatus(request, reqmgrStatus)                

    def markAcquired(self, request, url = None):
        """Mark request acquired"""
        self.reqMgr.putWorkQueue(request, url)

    def _workQueueToReqMgrStatus(self, status):
        """Map WorkQueue Status to that reported to ReqMgr"""
        statusMapping = {'Acquired' : 'acquired',
                         'Running' : 'running-open',
                         'Failed' : 'failed',
                         'Canceled' : 'aborted',
                         'CancelRequested' : 'aborted',
                         'Done' : 'completed'
                         }
        if status in statusMapping:
            # if wq status passed convert to reqmgr status
            return statusMapping[status]
        elif status in REQUEST_STATE_LIST:
            # if reqmgr status passed return reqmgr status
            return status
        else:
            # unknown status
            return None

    def _reqMgrToWorkQueueStatus(self, status):
        """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation"""
        statusMapping = {'acquired': ['Acquired'],
                         'running' : ['Running'],
                         'running-open': ['Running'],
                         'running-closed': ['Running'],
                         'failed': ['Failed'],
                         'aborted': ['Canceled', 'CancelRequested'],
                         'force-complete': ['Canceled', 'CancelRequested'],
                         'completed': ['Done']}
        if status in statusMapping:
            return statusMapping[status]
        else:
            return []

    def reportElement(self, element):
        """Report element to ReqMgr"""
        self.reportRequestStatus(element['RequestName'], element['Status'])

    def addNewElementsToOpenRequests(self, queue):
        """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset"""
        self.logger.info("Checking Request Manager for open requests and closing old ones")

        # First close any open inbox element which hasn't found anything new in a while
        queue.closeWork()
        self.report(queue)

        work = 0
        requests = []

        # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough
        if queue.params['DrainMode']:
            self.logger.info('Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            requests = self._getRequestsByTeamsAndStatus("running-open", queue.params['Teams']).keys()
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for reqName in requests:
            try:
                self.logger.info("Processing request %s" % (reqName))
                units = queue.addWork(requestName = reqName)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - but at least it was split the first time. Log and skip.
                msg = 'Error adding further work to request "%s". Will try again later' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except (IOError, socket.error, CouchError, CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

        self.logger.info("%s element(s) added to open requests" % work)
        return work
예제 #4
0
class WorkQueueReqMgrInterface():
    """Helper class for ReqMgr interaction"""
    def __init__(self, **kwargs):
        if not kwargs.get('logger'):
            import logging
            kwargs['logger'] = logging
        self.logger = kwargs['logger']
        #TODO: (reqmgr2Only - remove this line when reqmgr is replaced)
        self.reqMgr = RequestManager(kwargs)
        #this will break all in one test
        self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None))

        centralurl = kwargs.get("central_logdb_url", "")
        identifier = kwargs.get("log_reporter", "")

        # set the thread name before creat the log db.
        # only sets that when it is not set already
        myThread = threading.currentThread()
        if myThread.getName() == "MainThread":
            myThread.setName(self.__class__.__name__)

        self.logdb = LogDB(centralurl, identifier, logger=self.logger)
        self.previous_state = {}

    def __call__(self, queue):
        """Synchronize WorkQueue and RequestManager"""
        msg = ''
        try:  # pull in new work
            work = self.queueNewRequests(queue)
            msg += "New Work: %d\n" % work
        except Exception:
            self.logger.exception("Error caught during RequestManager pull")

        try:  # get additional open-running work
            extraWork = self.addNewElementsToOpenRequests(queue)
            msg += "Work added: %d\n" % extraWork
        except Exception:
            self.logger.exception("Error caught during RequestManager split")

        try:  # report back to ReqMgr
            uptodate_elements = self.report(queue)
            msg += "Updated ReqMgr status for: %s\n" % ", ".join(
                [x['RequestName'] for x in uptodate_elements])
        except Exception:
            self.logger.exception("Error caught during RequestManager update")
        else:
            try:  # Delete finished requests from WorkQueue
                self.deleteFinishedWork(queue, uptodate_elements)
            except Exception:
                self.logger.exception("Error caught during work deletion")

        queue.backend.recordTaskActivity('reqmgr_sync', msg)

    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info(
                'Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests(queue.params['Teams'])
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            #            try:
            #                self.reportRequestStatus(reqName, "negotiating")
            #            except Exception, ex:
            #                self.logger.error("""
            #                    Unable to update ReqMgr state to negotiating: %s
            #                    Ignoring this request: %s""" % (str(ex), reqName))
            #                continue

            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(
                            None,
                            "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" %
                                 (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl,
                                        request=reqName,
                                        team=team)
                self.logdb.delete(reqName, "error", this_thread=True)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - report back to ReqMgr
                self.logger.info(
                    'Permanent failure processing request "%s": %s' %
                    (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" %
                                 reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            try:
                self.reportRequestStatus(reqName, "acquired")
            except Exception as ex:
                self.logger.warning("Unable to update ReqMgr state: %s" %
                                    str(ex))
                self.logger.warning('Will try again later')

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" %
                             work)
        return work

    def report(self, queue):
        """Report queue status to ReqMgr."""
        new_state = {}
        uptodate_elements = []
        now = time.time()

        elements = queue.statusInbox(dictKey="RequestName")
        if not elements:
            return new_state

        for ele in elements:
            ele = elements[ele][0]  # 1 element tuple
            try:
                request = self.reqMgr2.getRequestByNames(ele['RequestName'])
                if not request:
                    msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele[
                        'RequestName']
                    self.logger.warning(msg)
                    continue
                request = request[ele['RequestName']]
                if request['RequestStatus'] in ('failed', 'completed',
                                                'announced', 'epic-FAILED',
                                                'closed-out', 'rejected'):
                    # requests can be done in reqmgr but running in workqueue
                    # if request has been closed but agent cleanup actions
                    # haven't been run (or agent has been retired)
                    # Prune out obviously too old ones to avoid build up
                    if queue.params.get('reqmgrCompleteGraceTime', -1) > 0:
                        if (now - float(ele.updatetime)
                            ) > queue.params['reqmgrCompleteGraceTime']:
                            # have to check all elements are at least running and are old enough
                            request_elements = queue.statusInbox(
                                WorkflowName=request['RequestName'])
                            if not any([
                                    x for x in request_elements
                                    if x['Status'] != 'Running'
                                    and not x.inEndState()
                            ]):
                                last_update = max([
                                    float(x.updatetime)
                                    for x in request_elements
                                ])
                                if (
                                        now - last_update
                                ) > queue.params['reqmgrCompleteGraceTime']:
                                    self.logger.info(
                                        "Finishing request %s as it is done in reqmgr"
                                        % request['RequestName'])
                                    queue.doneWork(
                                        WorkflowName=request['RequestName'])
                                    continue
                    else:
                        pass  # assume workqueue status will catch up later
                elif request['RequestStatus'] == 'aborted' or request[
                        'RequestStatus'] == 'force-complete':
                    queue.cancelWork(WorkflowName=request['RequestName'])
                # Check consistency of running-open/closed and the element closure status
                elif request['RequestStatus'] == 'running-open' and not ele.get(
                        'OpenForNewData', False):
                    self.reportRequestStatus(ele['RequestName'],
                                             'running-closed')
                elif request['RequestStatus'] == 'running-closed' and ele.get(
                        'OpenForNewData', False):
                    queue.closeWork(ele['RequestName'])
                # update request status if necessary
                elif ele['Status'] not in self._reqMgrToWorkQueueStatus(
                        request['RequestStatus']):
                    self.reportElement(ele)

                uptodate_elements.append(ele)
            except Exception as ex:
                msg = 'Error talking to ReqMgr about request "%s": %s'
                traceMsg = traceback.format_exc()
                self.logger.error(msg % (ele['RequestName'], traceMsg))

        return uptodate_elements

    def deleteFinishedWork(self, queue, elements):
        """Delete work from queue that is finished in ReqMgr"""
        finished = []
        for element in elements:
            if self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed', 'announced',
                                                         'epic-FAILED', 'closed-out', 'rejected') \
                                                         and element.inEndState():
                finished.append(element['RequestName'])
        return queue.deleteWorkflows(*finished)

    def _getRequestsByTeamsAndStatus(self, status, teams=[]):
        """
        TODO: now it assumes one team per requests - check whether this assumption is correct
        Check whether we actually use the team for this.
        Also switch to byteamandstatus couch call instead of 
        """
        requests = self.reqMgr2.getRequestByStatus(status)
        #Then sort by Team name then sort by Priority
        #https://docs.python.org/2/howto/sorting.html
        if teams and len(teams) > 0:
            results = {}
            for reqName, value in requests.items():
                if value["Teams"][0] in teams:
                    results[reqName] = value
            return results
        else:
            return requests

    def getAvailableRequests(self, teams):
        """
        Get available requests for the given teams and sort by team and priority
        returns [(team, request_name, request_spec_url)]
        """

        tempResults = self._getRequestsByTeamsAndStatus("assigned",
                                                        teams).values()
        filteredResults = []
        for request in tempResults:
            if "Teams" in request and len(request["Teams"]) == 1:
                filteredResults.append(request)
                self.logdb.delete(request["RequestName"],
                                  "error",
                                  this_thread=True)
            else:
                msg = "no team or more than one team (%s) are assigined: %s" % (
                    request.get("Teams", None), request["RequestName"])
                self.logger.error(msg)
                self.logdb.post(request["RequestName"], msg, 'error')
        filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True)
        filteredResults.sort(key=lambda r: r["Teams"][0])

        results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"])
                   for x in filteredResults]

        return results

    def reportRequestStatus(self, request, status, message=None):
        """Change state in RequestManager
           Optionally, take a message to append to the request
        """
        if message:
            self.logdb.post(request, str(message), 'info')
        reqmgrStatus = self._workQueueToReqMgrStatus(status)
        if reqmgrStatus:  # only send known states
            try:
                #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                self.reqMgr.reportRequestStatus(request, reqmgrStatus)
                # And replace with this (remove all Exceptins)
                #self.reqMgr2.updateRequestStatus(request, reqmgrStatus)
            except HTTPException as ex:
                # If we get an HTTPException of 404 means reqmgr2 request
                if ex.status == 404:
                    # try reqmgr2 call
                    msg = "%s : reqmgr2 request: %s" % (request, str(ex))
                    self.logdb.post(request, msg, 'info')
                    self.reqMgr2.updateRequestStatus(request, reqmgrStatus)
                else:
                    msg = "%s : fail to update status with HTTP error: %s" % (
                        request, str(ex))
                    self.logdb.post(request, msg, 'warning')
                    raise ex
            except Exception as ex:
                msg = "%s : fail to update status will try later: %s" % (
                    request, str(ex))
                self.logdb.post(request, msg, 'warning')
                raise ex

    def markAcquired(self, request, url=None):
        """Mark request acquired"""
        self.reqMgr.putWorkQueue(request, url)

    def _workQueueToReqMgrStatus(self, status):
        """Map WorkQueue Status to that reported to ReqMgr"""
        statusMapping = {
            'Acquired': 'acquired',
            'Running': 'running-open',
            'Failed': 'failed',
            'Canceled': 'aborted',
            'CancelRequested': 'aborted',
            'Done': 'completed'
        }
        if status in statusMapping:
            # if wq status passed convert to reqmgr status
            return statusMapping[status]
        elif status in REQUEST_STATE_LIST:
            # if reqmgr status passed return reqmgr status
            return status
        else:
            # unknown status
            return None

    def _reqMgrToWorkQueueStatus(self, status):
        """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation"""
        statusMapping = {
            'acquired': ['Acquired'],
            'running': ['Running'],
            'running-open': ['Running'],
            'running-closed': ['Running'],
            'failed': ['Failed'],
            'aborted': ['Canceled', 'CancelRequested'],
            'force-complete': ['Canceled', 'CancelRequested'],
            'completed': ['Done']
        }
        if status in statusMapping:
            return statusMapping[status]
        else:
            return []

    def reportElement(self, element):
        """Report element to ReqMgr"""
        self.reportRequestStatus(element['RequestName'], element['Status'])

    def addNewElementsToOpenRequests(self, queue):
        """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset"""
        self.logger.info(
            "Checking Request Manager for open requests and closing old ones")

        # First close any open inbox element which hasn't found anything new in a while
        queue.closeWork()
        self.report(queue)

        work = 0
        requests = []

        # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough
        if queue.params['DrainMode']:
            self.logger.info(
                'Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            requests = self._getRequestsByTeamsAndStatus(
                "running-open", queue.params['Teams']).keys()
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for reqName in requests:
            try:
                self.logger.info("Processing request %s" % (reqName))
                units = queue.addWork(requestName=reqName)
                self.logdb.delete(request["RequestName"], 'error', True)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - but at least it was split the first time. Log and skip.
                msg = 'Error adding further work to request "%s". Will try again later' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

        self.logger.info("%s element(s) added to open requests" % work)
        return work
예제 #5
0
class RequestManagerTest(RESTBaseUnitTest):
    """
    Test RequestMgr Service client
    It will start RequestMgr RESTService
    Server DB is whatever env is set     
    
    This checks whether DS call makes without error and return the results.
    This test only test service call returns without error.
    The correctness of each function is tested in test/python/RequestManager_t/RequestMgr_t.py
    """
    def initialize(self):
        self.couchDBName = "reqmgr_t_0"
        self.config = RequestManagerConfig(
                'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel')
        dbUrl = os.environ.get("DATABASE", None)
        self.config.setDBUrl(dbUrl)        
        self.config.setFormatter('WMCore.WebTools.RESTFormatter')
        self.config.setupRequestConfig()
        self.config.setupCouchDatabase(dbName = self.couchDBName)
        self.config.setPort(8888)
        self.schemaModules = ["WMCore.RequestManager.RequestDB"]
        return
        
    def setUp(self):
        """
        setUP global values
        """
        RESTBaseUnitTest.setUp(self)
        self.testInit.setupCouch("%s" % self.couchDBName,
                                 "GroupUser", "ConfigCache")
        self.params = {}
        self.params['endpoint'] = self.config.getServerUrl()
        self.reqService = RequestManagerDS(self.params)
        self.jsonSender = JSONRequests(self.config.getServerUrl())
        self.requestSchema = getRequestSchema()
        self.jsonSender.put('group/PeopleLikeMe')
        self.jsonSender.put('user/[email protected]')
        self.jsonSender.put('group/PeopleLikeMe/me')
        self.jsonSender.put('version/CMSSW_3_5_8')
        r = self.jsonSender.put('request/' + self.requestSchema['RequestName'], 
                                self.requestSchema)
        self.requestName = r[0]['RequestName']
    
    def tearDown(self):
        self.config.deleteWorkloadCache()
        RESTBaseUnitTest.tearDown(self)
        self.testInit.tearDownCouch()

    @attr("integration")
    def testA_RequestManagerService(self):
        requestName = self.requestName
        
        request = self.reqService.getRequest(requestName)
        # minimal test : it's return type  and the some value inside
        self.assertEqual(type(request), dict)
        self.assertTrue(len(request) > 0)
        
        # Test putTeam
        self.reqService.putTeam("team_usa")
        self.assertTrue('team_usa' in self.jsonSender.get('team')[0])
        
        self.jsonSender.put('assignment/%s/%s' % ("team_usa", requestName))
        
        request = self.reqService.getAssignment(teamName = "team_usa")
        self.assertEqual(type(request), list)
        self.assertTrue(len(request) > 0)
       
        request = self.reqService.getAssignment(request = requestName)
        self.assertEqual(type(request), list)
        self.assertTrue(len(request) > 0)
        
        self.reqService.sendMessage(requestName,"error")
        self.reqService.putWorkQueue(requestName, "http://test_url")
        self.reqService.reportRequestProgress(requestName)
        self.reqService.reportRequestProgress(requestName,
                        percent_complete = 100, percent_success = 90)
        
        self.reqService.reportRequestStatus(requestName, "running")
        return