def testEmulator(self): EmulatorHelper.setEmulators(True, True, True, True) self.assertEqual(PhEDEx().wrapped.__module__, 'WMQuality.Emulators.PhEDExClient.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMQuality.Emulators.DBSClient.DBSReader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMQuality.Emulators.SiteDBClient.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMQuality.Emulators.RequestManagerClient.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBSReader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager') EmulatorHelper.resetEmulators() self.assertEqual(PhEDEx().wrapped.__module__, 'WMCore.Services.PhEDEx.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMCore.Services.DBS.DBS2Reader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMCore.Services.SiteDB.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMCore.Services.RequestManager.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBS2Reader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')
def getRequestInfoFromReqMgr(serviceURL): """ get the request info from requestManager """ service = RequestManager({"endpoint": serviceURL}) try: baseResults = service.getRequestNames() urls = service.getWorkQueue() except Exception, ex: logging.error(str(ex)) return DFormatter.errorFormatter(serviceURL, "RequestManger Down")
def getSplitting(requestName): reqmgrUrl = 'https://cmsweb.cern.ch/reqmgr/reqMgr/' reqmgr = RequestManager(dict={'endpoint': reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) params = workload.getTopLevelTask()[0].jobSplittingParameters() algo = workload.getTopLevelTask()[0].jobSplittingAlgorithm() return params['events_per_job']
def main(): demPolicy = Block() reqmgr = RequestManager(dict = {'endpoint' : 'https://cmsweb.cern.ch/reqmgr/reqMgr'}) result = reqmgr.getRequest('pdmvserv_HIG-Summer12DR53X-01392_T1_ES_PIC_MSS_1_v0__130724_063344_7207') workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment('pdmvserv_HIG-Summer12DR53X-01392_T1_ES_PIC_MSS_1_v0__130724_063344_7207', 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) x,y = demPolicy(wmspec = workload, task = workload.getTopLevelTask()[0]) print x print y
def getSplitting(requestName): reqmgrUrl='https://cmsweb.cern.ch/reqmgr/reqMgr/' reqmgr = RequestManager(dict = {'endpoint' : reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) params = workload.getTopLevelTask()[0].jobSplittingParameters() algo = workload.getTopLevelTask()[0].jobSplittingAlgorithm() return params['events_per_job']
def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue( **self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: #sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB else: self.centralCouchDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr( self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) #Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname="Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return
def getSiteInfoFromReqMgr(serviceURL): """ get agent info from request mgr """ reqMgr = RequestManager({'endpoint':serviceURL}) #get information from global queue. try: queues = reqMgr.getWorkQueue() except Exception, ex: errorInfo = {} errorInfo['site_name'] = serviceURL return [errorInfo]
def getAgentInfoFromReqMgr(serviceURL): """ get agent info from request mgr """ reqMgr = RequestManager({'endpoint':serviceURL}) #get information from global queue. try: gQueues = reqMgr.getWorkQueue() except Exception, ex: errorInfo = {} errorInfo['url'] = serviceURL errorInfo['status'] = "Request Manager down: %s" % serviceURL errorInfo['acdc'] = 'N/A' return [errorInfo]
def getRequestInformationAndWorkload(requestName, reqmgrUrl): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ reqmgr = RequestManager(dict = {'endpoint' : reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result
def getRequestInformationAndWorkload(requestName, reqmgrUrl): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ reqmgr = RequestManager(dict={'endpoint': reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result
def getAgentInfoFromReqMgr(serviceURL): """ get agent info from request mgr """ reqMgr = RequestManager({'endpoint': serviceURL}) try: if serviceURL.lower() == "local": gQueues = getGlobalQueues() else: gQueues = reqMgr.getWorkQueue() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['url'] = serviceURL errorInfo['status'] = "Request Manager down: %s" % serviceURL errorInfo['acdc'] = 'N/A' return [errorInfo]
def getAgentInfoFromReqMgr(serviceURL): """ get agent info from request mgr """ reqMgr = RequestManager({'endpoint':serviceURL}) try: if serviceURL.lower() == "local": gQueues = getGlobalQueues() else: gQueues = reqMgr.getWorkQueue() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['url'] = serviceURL errorInfo['status'] = "Request Manager down: %s" % serviceURL errorInfo['acdc'] = 'N/A' return [errorInfo]
def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter( self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader( self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc = ReqMgr( self.config.TaskArchiver.ReqMgr2ServiceURL) else: #TODO: remove this for reqmgr2 self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase( statSummaryDBName)
def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.wmstatsCouchDB = WMStatsWriter( self.config.TaskArchiver.localWMStatsURL) self.centralCouchDBReader = WMStatsReader( self.config.TaskArchiver.centralWMStatsURL) if self.useReqMgrForCompletionCheck: self.deletableStates = ["announced"] self.centralCouchDBWriter = WMStatsWriter( self.config.TaskArchiver.centralWMStatsURL) self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableStates = ["completed"] self.centralCouchDBWriter = self.wmstatsCouchDB jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase( statSummaryDBName)
def setUp(self): RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "GroupUser", "ConfigCache", "ReqMgr") self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats") # logging stuff from TestInit is broken, setting myself l = logging.getLogger() l.setLevel(logging.DEBUG) self.params = {} self.params['endpoint'] = self.config.getServerUrl() self.reqService = RequestManagerDS(self.params) self.jsonSender = JSONRequests(self.config.getServerUrl()) userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName=userName, groupName=groupName, teamName=teamName) schema['ConfigCacheID'] = self.createConfig() schema['CouchDBName'] = self.couchDBName schema['CouchWorkloadDBName'] = self.couchDBName try: r = self.jsonSender.put('request', schema) try: self.requestName = r[0]['RequestName'] except: self.requestName = r[0].values()[0]['RequestName'] except Exception as ex: msg = traceback.format_exc() print "Exception during set up, reason: %s" % msg raise ex
def setUp(self): RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "GroupUser", "ConfigCache", "ReqMgr") self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats") # logging stuff from TestInit is broken, setting myself l = logging.getLogger() l.setLevel(logging.DEBUG) self.params = {} self.params['endpoint'] = self.config.getServerUrl() self.reqService = RequestManagerDS(self.params) self.jsonSender = JSONRequests(self.config.getServerUrl()) userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) schema['ConfigCacheID'] = self.createConfig() schema['CouchDBName'] = self.couchDBName schema['CouchWorkloadDBName'] = self.couchDBName try: r = self.jsonSender.put('request', schema) try: self.requestName = r[0]['RequestName'] except: self.requestName = r[0].values()[0]['RequestName'] except Exception as ex: msg = traceback.format_exc() print("Exception during set up, reason: %s" % msg) raise ex
def getSiteInfoFromReqMgr(serviceURL): """ get agent info from request mgr """ reqMgr = RequestManager({'endpoint':serviceURL}) #get information from global queue. try: if serviceURL.lower() == "local": queues = getGlobalQueues() ###TODO: add back when GlobalMonitor spins out as a separate application else: queues = reqMgr.getWorkQueue() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['site_name'] = serviceURL return [errorInfo]
def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] self.reqMgr = RequestManager(kwargs) self.previous_state = {}
def getSiteInfoFromReqMgr(serviceURL): """ get agent info from request mgr """ reqMgr = RequestManager({'endpoint': serviceURL}) #get information from global queue. try: if serviceURL.lower() == "local": queues = getGlobalQueues() ###TODO: add back when GlobalMonitor spins out as a separate application else: queues = reqMgr.getWorkQueue() except Exception, ex: logging.warning("Error: %s" % str(ex)) errorInfo = {} errorInfo['site_name'] = serviceURL return [errorInfo]
def getRequestInfoFromReqMgr(serviceURL): """ get the request info from requestManager """ ###TODO: add back when GlobalMonitor spins out as a separate application service = RequestManager({'endpoint':serviceURL}) try: ### use request manager funtion directly ### TODO: remove this when GlobalMonitor spins out as a separate application if serviceURL.lower() == "local": baseResults = getOverview() urls = getGlobalQueues() else: baseResults = service.getRequestNames() urls = service.getWorkQueue() except Exception, ex: logging.error(str(ex)) return DFormatter.errorFormatter(serviceURL, "RequestManger Down")
def setUp(self): """ setUP global values Database setUp is done in base class """ self.couchDBName = "reqmgr_t_0" RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "ConfigCache", "ReqMgr") self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats") self.testInit.setupCouch("%s_acdc" % self.couchDBName, "ACDC", "GroupUser") reqMgrHost = self.config.getServerUrl() self.jsonSender = JSONRequests(reqMgrHost) self.params = {} self.params['endpoint'] = reqMgrHost self.reqService = RequestManager(self.params)
def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] #TODO: (reqmgr2Only - remove this line when reqmgr is replaced) self.reqMgr = RequestManager(kwargs) #this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {}
def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory( package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi ) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, "WorkQueueParams"): self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, "useReqMgrForCompletionCheck", True) if not self.useReqMgrForCompletionCheck: # sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp, ) self.centralCouchDBWriter = self.requestLocalCouchDB else: self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) # TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager({"endpoint": self.config.TaskArchiver.ReqMgrServiceURL}) # Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname="Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return
def setUp(self): """ setUP global values """ RESTBaseUnitTest.setUp(self) self.params = {} self.params['endpoint'] = self.config.getServerUrl() self.reqService = RequestManagerDS(self.params) self.jsonSender = JSONRequests(self.config.getServerUrl()) self.requestSchema = getRequestSchema() self.jsonSender.put('group/PeopleLikeMe') self.jsonSender.put('user/[email protected]') self.jsonSender.put('group/PeopleLikeMe/me') self.jsonSender.put('version/CMSSW_3_5_8') self.jsonSender.put('request/' + self.requestSchema['RequestName'], self.requestSchema)
def __init__(self, config): """ __init__ """ BaseWorkerThread.__init__(self) self.config = config self.bossAir = BossAirAPI(config = self.config) self.reqmgr = RequestManager({'endpoint' : self.config.JobUpdater.reqMgrUrl}) self.workqueue = WorkQueue(self.config.WorkQueueManager.couchurl, self.config.WorkQueueManager.dbname) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.listWorkflowsDAO = self.daoFactory(classname = "Workflow.ListForJobUpdater") self.updateWorkflowPrioDAO = self.daoFactory(classname = "Workflow.UpdatePriority") self.executingJobsDAO = self.daoFactory(classname = "Jobs.GetNumberOfJobsForWorkflowTaskStatus")
def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] self.reqMgr = RequestManager(kwargs) self.reqmgr2Only = kwargs.get("reqmgr2_only", False) #this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {}
def setup(self, parameters): """ Called at startup """ self.teamName = self.config.Agent.teamName # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) else: #TODO: remove this for reqmgr2 self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)
class RequestManagerTest(RESTBaseUnitTest): """ Test RequestMgr Service client It will start RequestMgr RESTService Server DB is whatever env is set This checks whether DS call makes without error and return the results. This test only test service call returns without error. The correctness of each function is tested in test/python/RequestManager_t/RequestMgr_t.py """ def initialize(self): self.couchDBName = "reqmgr_t_0" self.config = RequestManagerConfig( 'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel') dbUrl = os.environ.get("DATABASE", None) self.config.setDBUrl(dbUrl) self.config.setFormatter('WMCore.WebTools.RESTFormatter') self.config.setupRequestConfig() self.config.setupCouchDatabase(dbName=self.couchDBName) self.config.setPort(8899) self.schemaModules = ["WMCore.RequestManager.RequestDB"] def setUp(self): RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "GroupUser", "ConfigCache", "ReqMgr") self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats") # logging stuff from TestInit is broken, setting myself l = logging.getLogger() l.setLevel(logging.DEBUG) self.params = {} self.params['endpoint'] = self.config.getServerUrl() self.reqService = RequestManagerDS(self.params) self.jsonSender = JSONRequests(self.config.getServerUrl()) userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName=userName, groupName=groupName, teamName=teamName) schema['ConfigCacheID'] = self.createConfig() schema['CouchDBName'] = self.couchDBName schema['CouchWorkloadDBName'] = self.couchDBName try: r = self.jsonSender.put('request', schema) try: self.requestName = r[0]['RequestName'] except: self.requestName = r[0].values()[0]['RequestName'] except Exception as ex: msg = traceback.format_exc() print "Exception during set up, reason: %s" % msg raise ex def tearDown(self): self.config.deleteWorkloadCache() RESTBaseUnitTest.tearDown(self) self.testInit.tearDownCouch() def createConfig(self, bad=False): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = { 'process': { 'outputModules_': ['ThisIsAName'], 'ThisIsAName': { 'dataset': { 'dataTier': 'RECO', 'filterName': 'Filter' } } } } BadTweak = { 'process': { 'outputModules_': ['ThisIsAName1', 'ThisIsAName2'], 'ThisIsAName1': { 'dataset': { 'dataTier': 'RECO', 'filterName': 'Filter' } }, 'ThisIsAName2': { 'dataset': { 'dataTier': 'RECO', 'filterName': 'Filter' } } } } configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDBName) configCache.createUserGroup(groupname="testGroup", username='******') if bad: configCache.setPSetTweaks(PSetTweak=BadTweak) else: configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.save() return configCache.getCouchID() @attr("integration") def testA_RequestManagerService(self): requestName = self.requestName request = self.reqService.getRequest(requestName) # minimal test : it's return type and the some value inside self.assertEqual(type(request), dict) self.assertTrue(len(request) > 0) # Test putTeam self.reqService.putTeam("team_usa") self.assertTrue('team_usa' in self.jsonSender.get('team')[0]) self.jsonSender.put('assignment/%s/%s' % ("team_usa", requestName)) request = self.reqService.getAssignment(teamName="team_usa") self.assertEqual(type(request), list) self.assertTrue(len(request) > 0) request = self.reqService.getAssignment(request=requestName) self.assertEqual(type(request), list) self.assertTrue(len(request) > 0) self.reqService.sendMessage(requestName, "error") self.reqService.putWorkQueue(requestName, "http://test_url") self.reqService.reportRequestProgress(requestName, percent_complete=100, percent_success=90) self.reqService.updateRequestStatus(requestName, "running-open")
class ReqMgrTester(object): def __init__(self, reqMgrUrl): self.reqMgrUrl = reqMgrUrl self.restSender = JSONRequests(reqMgrUrl) d = dict(endpoint=self.reqMgrUrl) self.reqMgrService = RequestManager(d) def queryAllRequests(self): """ Returns all requests stored at ReqMgr instance. """ logging.info("Querying all requests at ReqMgr instance ...") r = self.reqMgrService.getRequestNames() print "Found %s requests:" % len(r) for req in r: print req def queryRequest(self, requestName): """ Query a specific request according to the input argument. """ urlQuery = "request/%s" % requestName logging.info("Querying request '%s'" % requestName) logging.info("Query: '%s':" % urlQuery) r = self.restSender.get(urlQuery) print str(r) def createRequests(self, numRequests): """ Inject new numRequests into ReqMgr instance. (see ReqMgr_t testE how to create a request) """ logging.info("Creating %s new requests ..." % numRequests) schema = ReReco.getTestArguments() schema['RequestName'] = 'TestReReco' schema['RequestType'] = 'ReReco' schema['CmsPath'] = "/uscmst1/prod/sw/cms" schema['Requestor'] = '%s' % "zmaxa" schema['Group'] = '%s' % "DATAOPS" schema['BlockWhitelist'] = ['/dataset/dataset/dataset#alpha'] schema['BlockBlacklist'] = ['/dataset/dataset/dataset#beta'] schema['Campaign'] = 'MyTestCampaign' for i in range(numRequests): urlQuery = "request/testRequest" print "Query: '%s':" % urlQuery print "Schema (request): '%s'" % schema r = self.restSender.put(urlQuery, schema) # print "request creating response: ", r print "created: ", r[0]["RequestName"] def deleteRequest(self, requestNames): """ Delete requests specified in the input, more request names are comma-separated. """ logging.info("Deleting requests ...") for reqName in requestNames.split(','): reqName = reqName.strip() urlQuery = "request/%s" % reqName logging.info("Deleting request (request_name): '%s'" % reqName) logging.info("Query: '%s':" % urlQuery) r = self.restSender.delete(urlQuery) def requestChangeStates(self, reqName): """ Route the request (spec. by the request name) in the input through a series of possible request states. """ logging.info("Changing state of request %s ..." % reqName) def changeState(requestName, urlQuery): logging.info("Query: '%s' ..." % urlQuery) r = self.restSender.put(urlQuery) r = self.restSender.get("request/%s" % requestName) #assert r[0]["RequestStatus"] == statusName logging.info("Querying modified request, new state: %s" % r[0]["RequestStatus"]) # once a request is created, it's in 'new' state # states transition has to be an allowed one as defined here: # WMCore/RequestManager/RequestDB/Settings/RequestStatus.py statesQueries = [ "request/%s?status=%s" % (reqName, "testing-approved"), "request/%s?status=%s" % (reqName, "testing"), "request/%s?status=%s" % (reqName, "tested"), "request/%s?status=%s" % (reqName, "assignment-approved"), # create an assignment now # need quote because of space in the team name # (previous name - White Sox) urllib.quote("assignment/%s/%s" % (TEAM_NAME, reqName)) ] for query in statesQueries: changeState(reqName, query) def setup(self): """ Setup ReqMgr instance for dealing with requests - needs to create a user, group, SW releases entries, etc. as done in test/python/WMCore_t/RequestManager_t/ReqMgr_t.py """ logging.info("ReqMgr setup ...") def doQuery(urlQuery): logging.info("Query: '%s' ..." % urlQuery) r = None try: r = self.restSender.put(urlQuery) except Exception as ex: print "exception" print str(ex) print "response:", r queries = [ "user/[email protected]", "group/DATAOPS", "group/DATAOPS/zmaxa", urllib.quote("team/" + TEAM_NAME), "version/%s" % "CMSSW_3_5_8" ] for q in queries: doQuery(q) logging.info("ReqMgr setup finished, listing known users ...") q = "user/" r = self.restSender.get(q) print r
class TaskArchiverPoller(BaseWorkerThread): """ Polls for Ended jobs List of attributes requireCouch: raise an exception on couch failure instead of ignoring """ def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue( **self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: #sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB else: self.centralCouchDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr( self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) #Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname="Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return def terminate(self, params): """ _terminate_ This function terminates the job after a final pass """ logging.debug("terminating. doing one more pass before we die") self.algorithm(params) return def algorithm(self, parameters=None): """ _algorithm_ Executes the two main methods of the poller: 1. findAndMarkFinishedSubscriptions 2. completeTasks Final result is that finished workflows get their summary built and uploaded to couch, and all traces of them are removed from the agent WMBS and couch (this last one on demand). """ try: self.findAndMarkFinishedSubscriptions() (finishedwfs, finishedwfsWithLogCollectAndCleanUp ) = self.getFinishedWorkflows() # set the data cache which can be used other thread (no ther thread should set the data cache) DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp) self.completeTasks(finishedwfs) except WMException: myThread = threading.currentThread() if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise except Exception as ex: myThread = threading.currentThread() msg = "Caught exception in TaskArchiver\n" msg += str(ex) if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise TaskArchiverPollerException(msg) return def findAndMarkFinishedSubscriptions(self): """ _findAndMarkFinishedSubscriptions_ Find new finished subscriptions and mark as finished in WMBS. """ myThread = threading.currentThread() myThread.transaction.begin() #Get the subscriptions that are now finished and mark them as such logging.info("Polling for finished subscriptions") finishedSubscriptions = self.daoFactory( classname="Subscriptions.MarkNewFinishedSubscriptions") finishedSubscriptions.execute(self.stateID, timeOut=self.timeout) logging.info("Finished subscriptions updated") myThread.transaction.commit() return def getFinishedWorkflows(self): """ 1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows) 2. Get finished workflows with logCollect and Cleanup only. 3. combined those and make return finishedwfs - without LogCollect and CleanUp task finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task """ finishedWorkflowsDAO = self.daoFactory( classname="Workflow.GetFinishedWorkflows") finishedwfs = finishedWorkflowsDAO.execute() finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute( onlySecondary=True) finishedwfsWithLogCollectAndCleanUp = {} for wf in finishedLogCollectAndCleanUpwfs: if wf in finishedwfs: finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf] return (finishedwfs, finishedwfsWithLogCollectAndCleanUp) def completeTasks(self, finishedwfs): """ _completeTasks_ This method will call several auxiliary methods to do the following: 1. Notify the WorkQueue about finished subscriptions 2. update dbsbuffer_workflow table with finished subscription """ #Only delete those where the upload and notification succeeded logging.info("Found %d candidate workflows for completing: %s" % (len(finishedwfs), finishedwfs.keys())) # update the completed flag in dbsbuffer_workflow table so blocks can be closed # create updateDBSBufferWorkflowComplete DAO if len(finishedwfs) == 0: return completedWorkflowsDAO = self.dbsDaoFactory( classname="UpdateWorkflowsToCompleted") centralCouchAlive = True try: #TODO: need to enable when reqmgr2 -wmstats is ready #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict"); abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus( ["aborted"]) logging.info( "There are %d requests in 'aborted' status in central couch." % len(abortedWorkflows)) forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus( ["force-complete"]) logging.info( "List of 'force-complete' workflows in central couch: %s" % forceCompleteWorkflows) except Exception as ex: centralCouchAlive = False logging.error( "we will try again when remote couch server comes back\n%s" % str(ex)) if centralCouchAlive: for workflow in finishedwfs: try: #Notify the WorkQueue, if there is one if self.workQueue != None: subList = [] logging.info("Marking subscriptions as Done ...") for l in finishedwfs[workflow]["workflows"].values(): subList.extend(l) self.notifyWorkQueue(subList) #Now we know the workflow as a whole is gone, we can delete the information from couch if not self.useReqMgrForCompletionCheck: self.requestLocalCouchDB.updateRequestStatus( workflow, "completed") logging.info("status updated to completed %s" % workflow) if workflow in abortedWorkflows: #TODO: remove when reqmgr2-wmstats deployed newState = "aborted-completed" elif workflow in forceCompleteWorkflows: newState = "completed" else: newState = None if newState != None: # update reqmgr workload document only request mgr is installed if not self.useReqMgrForCompletionCheck: # commented out untill all the agent is updated so every request have new state # TODO: agent should be able to write reqmgr db diretly add the right group in # reqmgr self.requestLocalCouchDB.updateRequestStatus( workflow, newState) else: try: #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced) logging.info( "Updating status to '%s' in both oracle and couchdb ..." % newState) self.reqmgrSvc.updateRequestStatus( workflow, newState) #And replace with this - remove all the excption #self.reqmgr2Svc.updateRequestStatus(workflow, newState) except httplib.HTTPException as ex: # If we get an HTTPException of 404 means reqmgr2 request if ex.status == 404: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % ( workflow, str(ex)) logging.warning(msg) self.reqmgr2Svc.updateRequestStatus( workflow, newState) else: msg = "%s : fail to update status %s with HTTP error: %s" % ( workflow, newState, str(ex)) logging.error(msg) raise ex logging.info("status updated to '%s' : %s" % (newState, workflow)) completedWorkflowsDAO.execute([workflow]) except TaskArchiverPollerException as ex: #Something didn't go well when notifying the workqueue, abort!!! logging.error( "Something bad happened while archiving tasks.") logging.error(str(ex)) continue except Exception as ex: #Something didn't go well on couch, abort!!! msg = "Problem while archiving tasks for workflow %s\n" % workflow msg += "Exception message: %s" % str(ex) msg += "\nTraceback: %s" % traceback.format_exc() logging.error(msg) continue return def notifyWorkQueue(self, subList): """ _notifyWorkQueue_ Tells the workQueue component that a particular subscription, or set of subscriptions, is done. Receives confirmation """ for sub in subList: try: self.workQueue.doneWork(SubscriptionId=sub) except WorkQueueNoMatchingElements: #Subscription wasn't known to WorkQueue, feel free to clean up logging.debug( "Local WorkQueue knows nothing about this subscription: %s" % sub) pass except Exception as ex: msg = "Error talking to workqueue: %s\n" % str(ex) msg += "Tried to complete the following: %s\n" % sub raise TaskArchiverPollerException(msg) return
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ self.teamName = self.config.Agent.teamName # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) else: #TODO: remove this for reqmgr2 self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName) def algorithm(self, parameters): """ Get information from wmbs, workqueue and local couch. - It deletes old wmstats docs - Archive workflows """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) # archiving only workflows that I own (same team) logging.info("Getting requests in '%s' state for team '%s'" % (self.deletableState, self.teamName)) endTime = int(time.time()) - self.archiveDelayHours * 3600 wfs = self.centralRequestDBReader.getRequestByTeamAndStatus(self.teamName, self.deletableState) commonWfs = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, False, endTime) deletableWorkflows = list(set(wfs) & set(commonWfs)) logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows)) numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived") logging.info("archive normal %s workflows" % numUpdated) abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"]) logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows)) numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived") logging.info("archive aborted %s workflows" % numUpdated) rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"]) logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows)) numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived") logging.info("archive rejected %s workflows" % numUpdated) except Exception as ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle") def archiveWorkflows(self, workflows, archiveState): updated = 0 for workflowName in workflows: if self.cleanAllLocalCouchDB(workflowName): if self.useReqMgrForCompletionCheck: if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState) else: self.reqmgrSvc.updateRequestStatus(workflowName, archiveState); updated += 1 logging.debug("status updated to %s %s" % (archiveState, workflowName)) else: self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState) return updated def deleteWorkflowFromJobCouch(self, workflowName, db): """ _deleteWorkflowFromCouch_ If we are asked to delete the workflow from couch, delete it to clear up some space. Load the document IDs and revisions out of couch by workflowName, then order a delete on them. """ if db == "JobDump": couchDB = self.jobsdatabase view = "jobsByWorkflowName" elif db == "FWJRDump": couchDB = self.fwjrdatabase view = "fwjrsByWorkflowName" elif db == "SummaryStats": couchDB = self.statsumdatabase view = None elif db == "WMStatsAgent": couchDB = self.wmstatsCouchDB.getDBInstance() view = "jobsByStatusWorkflow" if view == None: try: committed = couchDB.delete_doc(workflowName) except CouchNotFoundError as ex: return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))} else: options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False} try: jobs = couchDB.loadView(db, view, options = options)['rows'] except Exception as ex: errorMsg = "Error on loading jobs for %s" % workflowName logging.warning("%s/n%s" % (str(ex), errorMsg)) return {'status': 'error', 'message': errorMsg} for j in jobs: doc = {} doc["_id"] = j['value']['id'] doc["_rev"] = j['value']['rev'] couchDB.queueDelete(doc) committed = couchDB.commit() if committed: #create the error report errorReport = {} deleted = 0 status = "ok" for data in committed: if 'error' in data: errorReport.setdefault(data['error'], 0) errorReport[data['error']] += 1 status = "error" else: deleted += 1 return {'status': status, 'delete': deleted, 'message': errorReport} else: return {'status': 'warning', 'message': "no %s exist" % workflowName} def cleanAllLocalCouchDB(self, workflowName): logging.info("Deleting %s from JobCouch" % workflowName) jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump") logging.debug("%s docs deleted from JobDump" % jobReport) fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump") logging.debug("%s docs deleted from FWJRDump" % fwjrReport) summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats") logging.debug("%s docs deleted from SummaryStats" % summaryReport) wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStatsAgent") logging.debug("%s docs deleted from wmagent_summary" % wmstatsReport) # if one of the procedure fails return False if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or wmstatsReport["status"] == "error"): return False # other wise return True. return True
class WorkQueueReqMgrInterface(): """Helper class for ReqMgr interaction""" def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] #TODO: (reqmgr2Only - remove this line when reqmgr is replaced) self.reqMgr = RequestManager(kwargs) #this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {} def __call__(self, queue): """Synchronize WorkQueue and RequestManager""" msg = '' try: # pull in new work work = self.queueNewRequests(queue) msg += "New Work: %d\n" % work except Exception: self.logger.exception("Error caught during RequestManager pull") try: # get additional open-running work extraWork = self.addNewElementsToOpenRequests(queue) msg += "Work added: %d\n" % extraWork except Exception: self.logger.exception("Error caught during RequestManager split") try: # report back to ReqMgr uptodate_elements = self.report(queue) msg += "Updated ReqMgr status for: %s\n" % ", ".join( [x['RequestName'] for x in uptodate_elements]) except Exception: self.logger.exception("Error caught during RequestManager update") else: try: # Delete finished requests from WorkQueue self.deleteFinishedWork(queue, uptodate_elements) except Exception: self.logger.exception("Error caught during work deletion") queue.backend.recordTaskActivity('reqmgr_sync', msg) def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info( 'Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests(queue.params['Teams']) except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: # try: # self.reportRequestStatus(reqName, "negotiating") # except Exception, ex: # self.logger.error(""" # Unable to update ReqMgr state to negotiating: %s # Ignoring this request: %s""" % (str(ex), reqName)) # continue try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError( None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) self.logdb.delete(reqName, "error", this_thread=True) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - report back to ReqMgr self.logger.info( 'Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue try: self.reportRequestStatus(reqName, "acquired") except Exception as ex: self.logger.warning("Unable to update ReqMgr state: %s" % str(ex)) self.logger.warning('Will try again later') self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work def report(self, queue): """Report queue status to ReqMgr.""" new_state = {} uptodate_elements = [] now = time.time() elements = queue.statusInbox(dictKey="RequestName") if not elements: return new_state for ele in elements: ele = elements[ele][0] # 1 element tuple try: request = self.reqMgr2.getRequestByNames(ele['RequestName']) if not request: msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele[ 'RequestName'] self.logger.warning(msg) continue request = request[ele['RequestName']] if request['RequestStatus'] in ('failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected'): # requests can be done in reqmgr but running in workqueue # if request has been closed but agent cleanup actions # haven't been run (or agent has been retired) # Prune out obviously too old ones to avoid build up if queue.params.get('reqmgrCompleteGraceTime', -1) > 0: if (now - float(ele.updatetime) ) > queue.params['reqmgrCompleteGraceTime']: # have to check all elements are at least running and are old enough request_elements = queue.statusInbox( WorkflowName=request['RequestName']) if not any([ x for x in request_elements if x['Status'] != 'Running' and not x.inEndState() ]): last_update = max([ float(x.updatetime) for x in request_elements ]) if ( now - last_update ) > queue.params['reqmgrCompleteGraceTime']: self.logger.info( "Finishing request %s as it is done in reqmgr" % request['RequestName']) queue.doneWork( WorkflowName=request['RequestName']) continue else: pass # assume workqueue status will catch up later elif request['RequestStatus'] == 'aborted' or request[ 'RequestStatus'] == 'force-complete': queue.cancelWork(WorkflowName=request['RequestName']) # Check consistency of running-open/closed and the element closure status elif request['RequestStatus'] == 'running-open' and not ele.get( 'OpenForNewData', False): self.reportRequestStatus(ele['RequestName'], 'running-closed') elif request['RequestStatus'] == 'running-closed' and ele.get( 'OpenForNewData', False): queue.closeWork(ele['RequestName']) # update request status if necessary elif ele['Status'] not in self._reqMgrToWorkQueueStatus( request['RequestStatus']): self.reportElement(ele) uptodate_elements.append(ele) except Exception as ex: msg = 'Error talking to ReqMgr about request "%s": %s' traceMsg = traceback.format_exc() self.logger.error(msg % (ele['RequestName'], traceMsg)) return uptodate_elements def deleteFinishedWork(self, queue, elements): """Delete work from queue that is finished in ReqMgr""" finished = [] for element in elements: if self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected') \ and element.inEndState(): finished.append(element['RequestName']) return queue.deleteWorkflows(*finished) def _getRequestsByTeamsAndStatus(self, status, teams=[]): """ TODO: now it assumes one team per requests - check whether this assumption is correct Check whether we actually use the team for this. Also switch to byteamandstatus couch call instead of """ requests = self.reqMgr2.getRequestByStatus(status) #Then sort by Team name then sort by Priority #https://docs.python.org/2/howto/sorting.html if teams and len(teams) > 0: results = {} for reqName, value in requests.items(): if value["Teams"][0] in teams: results[reqName] = value return results else: return requests def getAvailableRequests(self, teams): """ Get available requests for the given teams and sort by team and priority returns [(team, request_name, request_spec_url)] """ tempResults = self._getRequestsByTeamsAndStatus("assigned", teams).values() filteredResults = [] for request in tempResults: if "Teams" in request and len(request["Teams"]) == 1: filteredResults.append(request) self.logdb.delete(request["RequestName"], "error", this_thread=True) else: msg = "no team or more than one team (%s) are assigined: %s" % ( request.get("Teams", None), request["RequestName"]) self.logger.error(msg) self.logdb.post(request["RequestName"], msg, 'error') filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True) filteredResults.sort(key=lambda r: r["Teams"][0]) results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"]) for x in filteredResults] return results def reportRequestStatus(self, request, status, message=None): """Change state in RequestManager Optionally, take a message to append to the request """ if message: self.logdb.post(request, str(message), 'info') reqmgrStatus = self._workQueueToReqMgrStatus(status) if reqmgrStatus: # only send known states try: #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced) self.reqMgr.reportRequestStatus(request, reqmgrStatus) # And replace with this (remove all Exceptins) #self.reqMgr2.updateRequestStatus(request, reqmgrStatus) except HTTPException as ex: # If we get an HTTPException of 404 means reqmgr2 request if ex.status == 404: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % (request, str(ex)) self.logdb.post(request, msg, 'info') self.reqMgr2.updateRequestStatus(request, reqmgrStatus) else: msg = "%s : fail to update status with HTTP error: %s" % ( request, str(ex)) self.logdb.post(request, msg, 'warning') raise ex except Exception as ex: msg = "%s : fail to update status will try later: %s" % ( request, str(ex)) self.logdb.post(request, msg, 'warning') raise ex def markAcquired(self, request, url=None): """Mark request acquired""" self.reqMgr.putWorkQueue(request, url) def _workQueueToReqMgrStatus(self, status): """Map WorkQueue Status to that reported to ReqMgr""" statusMapping = { 'Acquired': 'acquired', 'Running': 'running-open', 'Failed': 'failed', 'Canceled': 'aborted', 'CancelRequested': 'aborted', 'Done': 'completed' } if status in statusMapping: # if wq status passed convert to reqmgr status return statusMapping[status] elif status in REQUEST_STATE_LIST: # if reqmgr status passed return reqmgr status return status else: # unknown status return None def _reqMgrToWorkQueueStatus(self, status): """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation""" statusMapping = { 'acquired': ['Acquired'], 'running': ['Running'], 'running-open': ['Running'], 'running-closed': ['Running'], 'failed': ['Failed'], 'aborted': ['Canceled', 'CancelRequested'], 'force-complete': ['Canceled', 'CancelRequested'], 'completed': ['Done'] } if status in statusMapping: return statusMapping[status] else: return [] def reportElement(self, element): """Report element to ReqMgr""" self.reportRequestStatus(element['RequestName'], element['Status']) def addNewElementsToOpenRequests(self, queue): """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset""" self.logger.info( "Checking Request Manager for open requests and closing old ones") # First close any open inbox element which hasn't found anything new in a while queue.closeWork() self.report(queue) work = 0 requests = [] # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough if queue.params['DrainMode']: self.logger.info( 'Draining queue: Skip requesting work from ReqMgr') return 0 try: requests = self._getRequestsByTeamsAndStatus( "running-open", queue.params['Teams']).keys() except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for reqName in requests: try: self.logger.info("Processing request %s" % (reqName)) units = queue.addWork(requestName=reqName) self.logdb.delete(request["RequestName"], 'error', True) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - but at least it was split the first time. Log and skip. msg = 'Error adding further work to request "%s". Will try again later' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) added to open requests" % work) return work
class WorkQueueReqMgrInterface(): """Helper class for ReqMgr interaction""" def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] self.reqMgr = RequestManager(kwargs) self.reqmgr2Only = kwargs.get("reqmgr2_only", False) #this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {} def __call__(self, queue): """Synchronize WorkQueue and RequestManager""" msg = '' try: # pull in new work work = self.queueNewRequests(queue) msg += "New Work: %d\n" % work except Exception: self.logger.exception("Error caught during RequestManager pull") try: # get additional open-running work extraWork = self.addNewElementsToOpenRequests(queue) msg += "Work added: %d\n" % extraWork except Exception: self.logger.exception("Error caught during RequestManager split") try: # report back to ReqMgr uptodate_elements = self.report(queue) msg += "Updated ReqMgr status for: %s\n" % ", ".join([x['RequestName'] for x in uptodate_elements]) except: self.logger.exception("Error caught during RequestManager update") else: try: # Delete finished requests from WorkQueue self.deleteFinishedWork(queue, uptodate_elements) except: self.logger.exception("Error caught during work deletion") queue.backend.recordTaskActivity('reqmgr_sync', msg) def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info('Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests(queue.params['Teams']) except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: # try: # self.reportRequestStatus(reqName, "negotiating") # except Exception, ex: # self.logger.error(""" # Unable to update ReqMgr state to negotiating: %s # Ignoring this request: %s""" % (str(ex), reqName)) # continue try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request = reqName, team = team) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - report back to ReqMgr self.logger.info('Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message = str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue try: self.reportRequestStatus(reqName, "acquired") except Exception as ex: self.logger.warning("Unable to update ReqMgr state: %s" % str(ex)) self.logger.warning('Will try again later') self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work def report(self, queue): """Report queue status to ReqMgr.""" new_state = {} uptodate_elements = [] now = time.time() elements = queue.statusInbox(dictKey = "RequestName") if not elements: return new_state for ele in elements: ele = elements[ele][0] # 1 element tuple try: request = self.reqMgr2.getRequestByNames(ele['RequestName'])[ele['RequestName']] if request['RequestStatus'] in ('failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected'): # requests can be done in reqmgr but running in workqueue # if request has been closed but agent cleanup actions # haven't been run (or agent has been retired) # Prune out obviously too old ones to avoid build up if queue.params.get('reqmgrCompleteGraceTime', -1) > 0: if (now - float(ele.updatetime)) > queue.params['reqmgrCompleteGraceTime']: # have to check all elements are at least running and are old enough request_elements = queue.statusInbox(WorkflowName = request['RequestName']) if not any([x for x in request_elements if x['Status'] != 'Running' and not x.inEndState()]): last_update = max([float(x.updatetime) for x in request_elements]) if (now - last_update) > queue.params['reqmgrCompleteGraceTime']: self.logger.info("Finishing request %s as it is done in reqmgr" % request['RequestName']) queue.doneWork(WorkflowName=request['RequestName']) continue else: pass # assume workqueue status will catch up later elif request['RequestStatus'] == 'aborted' or request['RequestStatus'] == 'force-complete': queue.cancelWork(WorkflowName=request['RequestName']) # Check consistency of running-open/closed and the element closure status elif request['RequestStatus'] == 'running-open' and not ele.get('OpenForNewData', False): self.reportRequestStatus(ele['RequestName'], 'running-closed') elif request['RequestStatus'] == 'running-closed' and ele.get('OpenForNewData', False): queue.closeWork(ele['RequestName']) # update request status if necessary elif ele['Status'] not in self._reqMgrToWorkQueueStatus(request['RequestStatus']): self.reportElement(ele) uptodate_elements.append(ele) except Exception as ex: msg = 'Error talking to ReqMgr about request "%s": %s' traceMsg = traceback.format_exc() self.logger.error(msg % (ele['RequestName'], traceMsg)) return uptodate_elements def deleteFinishedWork(self, queue, elements): """Delete work from queue that is finished in ReqMgr""" finished = [] for element in elements: if self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected') \ and element.inEndState(): finished.append(element['RequestName']) return queue.deleteWorkflows(*finished) def _getRequestsByTeamsAndStatus(self, status, teams = []): """ TODO: now it assumes one team per requests - check whether this assumption is correct Check whether we actually use the team for this. Also switch to byteamandstatus couch call instead of """ requests = self.reqMgr2.getRequestByStatus(status) #Then sort by Team name then sort by Priority #https://docs.python.org/2/howto/sorting.html if teams and len(teams) > 0: results = {} for reqName, value in requests.items(): if value["Teams"][0] in teams: results[reqName] = value return results else: return requests def getAvailableRequests(self, teams): """ Get available requests for the given teams and sort by team and priority returns [(team, request_name, request_spec_url)] """ tempResults = self._getRequestsByTeamsAndStatus("assigned", teams).values() filteredResults = [] for request in tempResults: if "Teams" in request and len(request["Teams"]) == 1: filteredResults.append(request) else: msg = "no team or more than one team (%s) are assigined: %s" % ( request.get("Teams", None), request["RequestName"]) self.logger.error(msg) self.logdb.post(request["RequestName"], msg, 'error') filteredResults.sort(key = itemgetter('RequestPriority'), reverse = True) filteredResults.sort(key = lambda r: r["Teams"][0]) results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"]) for x in filteredResults] return results def reportRequestStatus(self, request, status, message = None): """Change state in RequestManager Optionally, take a message to append to the request """ if message: self.logdb.post(request, str(message), 'info') reqmgrStatus = self._workQueueToReqMgrStatus(status) if reqmgrStatus: # only send known states try: # try reqmgr1 call if it fails self.reqMgr.reportRequestStatus(request, reqmgrStatus) except Exception as ex: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % (request, str(ex)) self.logdb.post(request, msg, 'warning') self.reqMgr2.updateRequestStatus(request, reqmgrStatus) def markAcquired(self, request, url = None): """Mark request acquired""" self.reqMgr.putWorkQueue(request, url) def _workQueueToReqMgrStatus(self, status): """Map WorkQueue Status to that reported to ReqMgr""" statusMapping = {'Acquired' : 'acquired', 'Running' : 'running-open', 'Failed' : 'failed', 'Canceled' : 'aborted', 'CancelRequested' : 'aborted', 'Done' : 'completed' } if status in statusMapping: # if wq status passed convert to reqmgr status return statusMapping[status] elif status in REQUEST_STATE_LIST: # if reqmgr status passed return reqmgr status return status else: # unknown status return None def _reqMgrToWorkQueueStatus(self, status): """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation""" statusMapping = {'acquired': ['Acquired'], 'running' : ['Running'], 'running-open': ['Running'], 'running-closed': ['Running'], 'failed': ['Failed'], 'aborted': ['Canceled', 'CancelRequested'], 'force-complete': ['Canceled', 'CancelRequested'], 'completed': ['Done']} if status in statusMapping: return statusMapping[status] else: return [] def reportElement(self, element): """Report element to ReqMgr""" self.reportRequestStatus(element['RequestName'], element['Status']) def addNewElementsToOpenRequests(self, queue): """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset""" self.logger.info("Checking Request Manager for open requests and closing old ones") # First close any open inbox element which hasn't found anything new in a while queue.closeWork() self.report(queue) work = 0 requests = [] # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough if queue.params['DrainMode']: self.logger.info('Draining queue: Skip requesting work from ReqMgr') return 0 try: requests = self._getRequestsByTeamsAndStatus("running-open", queue.params['Teams']).keys() except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for reqName in requests: try: self.logger.info("Processing request %s" % (reqName)) units = queue.addWork(requestName = reqName) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - but at least it was split the first time. Log and skip. msg = 'Error adding further work to request "%s". Will try again later' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) added to open requests" % work) return work
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp = self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp = self.config.AnalyticsDataCollector.RequestCouchApp) if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) else: #TODO: remove this for reqmgr2 self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp = self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) logging.info("getting complete and announced requests") endTime = int(time.time()) - self.archiveDelayHours * 3600 deletableWorkflows = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, False, endTime) logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows)) numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived") logging.info("archive normal %s workflows" % numUpdated) abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"]) logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows)) numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived") logging.info("archive aborted %s workflows" % numUpdated) rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"]) logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows)) numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived") logging.info("archive rejected %s workflows" % numUpdated) except Exception as ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle") def archiveWorkflows(self, workflows, archiveState): updated = 0 for workflowName in workflows: if self.cleanAllLocalCouchDB(workflowName): if self.useReqMgrForCompletionCheck: if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState) else: self.reqmgrSvc.updateRequestStatus(workflowName, archiveState); updated += 1 logging.debug("status updated to %s %s" % (archiveState, workflowName)) else: self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState) return updated def deleteWorkflowFromJobCouch(self, workflowName, db): """ _deleteWorkflowFromCouch_ If we are asked to delete the workflow from couch, delete it to clear up some space. Load the document IDs and revisions out of couch by workflowName, then order a delete on them. """ if (db == "JobDump"): couchDB = self.jobsdatabase view = "jobsByWorkflowName" elif (db == "FWJRDump"): couchDB = self.fwjrdatabase view = "fwjrsByWorkflowName" elif (db == "SummaryStats"): couchDB = self.statsumdatabase view = None elif (db == "WMStats"): couchDB = self.wmstatsCouchDB.getDBInstance() view = "jobsByStatusWorkflow" if view == None: try: committed = couchDB.delete_doc(workflowName) except CouchNotFoundError as ex: return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))} else: options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False} try: jobs = couchDB.loadView(db, view, options = options)['rows'] except Exception as ex: errorMsg = "Error on loading jobs for %s" % workflowName logging.warning("%s/n%s" % (str(ex), errorMsg)) return {'status': 'error', 'message': errorMsg} for j in jobs: doc = {} doc["_id"] = j['value']['id'] doc["_rev"] = j['value']['rev'] couchDB.queueDelete(doc) committed = couchDB.commit() if committed: #create the error report errorReport = {} deleted = 0 status = "ok" for data in committed: if 'error' in data: errorReport.setdefault(data['error'], 0) errorReport[data['error']] += 1 status = "error" else: deleted += 1 return {'status': status, 'delete': deleted, 'message': errorReport} else: return {'status': 'warning', 'message': "no %s exist" % workflowName} def cleanAllLocalCouchDB(self, workflowName): logging.info("Deleting %s from JobCouch" % workflowName) jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump") logging.debug("%s docs deleted from JobDump" % jobReport) fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump") logging.debug("%s docs deleted from FWJRDump" % fwjrReport) summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats") logging.debug("%s docs deleted from SummaryStats" % summaryReport) wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStats") logging.debug("%s docs deleted from wmagent_summary" % wmstatsReport) # if one of the procedure fails return False if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or wmstatsReport["status"] == "error"): return False # other wise return True. return True
class ReqMgrTester(object): def __init__(self, reqMgrUrl): self.reqMgrUrl = reqMgrUrl self.restSender = JSONRequests(reqMgrUrl) d = dict(endpoint = self.reqMgrUrl) self.reqMgrService = RequestManager(d) def queryAllRequests(self): """ Returns all requests stored at ReqMgr instance. """ logging.info("Querying all requests at ReqMgr instance ...") r = self.reqMgrService.getRequestNames() print "Found %s requests:" % len(r) for req in r: print req def queryRequest(self, requestName): """ Query a specific request according to the input argument. """ urlQuery = "request/%s" % requestName logging.info("Querying request '%s'" % requestName) logging.info("Query: '%s':" % urlQuery) r = self.restSender.get(urlQuery) print str(r) def createRequests(self, numRequests): """ Inject new numRequests into ReqMgr instance. (see ReqMgr_t testE how to create a request) """ logging.info("Creating %s new requests ..." % numRequests) schema = ReReco.getTestArguments() schema['RequestName'] = 'TestReReco' schema['RequestType'] = 'ReReco' schema['CmsPath'] = "/uscmst1/prod/sw/cms" schema['Requestor'] = '%s' % "zmaxa" schema['Group'] = '%s' % "DATAOPS" schema['BlockWhitelist'] = ['/dataset/dataset/dataset#alpha'] schema['BlockBlacklist'] = ['/dataset/dataset/dataset#beta'] schema['Campaign'] = 'MyTestCampaign' for i in range(numRequests): urlQuery = "request/testRequest" print "Query: '%s':" % urlQuery print "Schema (request): '%s'" % schema r = self.restSender.put(urlQuery, schema) # print "request creating response: ", r print "created: ", r[0]["RequestName"] def deleteRequest(self, requestNames): """ Delete requests specified in the input, more request names are comma-separated. """ logging.info("Deleting requests ...") for reqName in requestNames.split(','): reqName = reqName.strip() urlQuery = "request/%s" % reqName logging.info("Deleting request (request_name): '%s'" % reqName) logging.info("Query: '%s':" % urlQuery) r = self.restSender.delete(urlQuery) def requestChangeStates(self, reqName): """ Route the request (spec. by the request name) in the input through a series of possible request states. """ logging.info("Changing state of request %s ..." % reqName) def changeState(requestName, urlQuery): logging.info("Query: '%s' ..." % urlQuery) r = self.restSender.put(urlQuery) r = self.restSender.get("request/%s" % requestName) #assert r[0]["RequestStatus"] == statusName logging.info("Querying modified request, new state: %s" % r[0]["RequestStatus"]) # once a request is created, it's in 'new' state # states transition has to be an allowed one as defined here: # WMCore/RequestManager/RequestDB/Settings/RequestStatus.py statesQueries = ["request/%s?status=%s" % (reqName, "testing-approved"), "request/%s?status=%s" % (reqName, "testing"), "request/%s?status=%s" % (reqName, "tested"), "request/%s?status=%s" % (reqName, "assignment-approved"), # create an assignment now # need quote because of space in the team name # (previous name - White Sox) urllib.quote("assignment/%s/%s" % (TEAM_NAME, reqName))] for query in statesQueries: changeState(reqName, query) def setup(self): """ Setup ReqMgr instance for dealing with requests - needs to create a user, group, SW releases entries, etc. as done in test/python/WMCore_t/RequestManager_t/ReqMgr_t.py """ logging.info("ReqMgr setup ...") def doQuery(urlQuery): logging.info("Query: '%s' ..." % urlQuery) r = None try: r = self.restSender.put(urlQuery) except Exception as ex: print "exception" print str(ex) print "response:", r queries = ["user/[email protected]", "group/DATAOPS", "group/DATAOPS/zmaxa", urllib.quote("team/" + TEAM_NAME), "version/%s" % "CMSSW_3_5_8"] for q in queries: doQuery(q) logging.info("ReqMgr setup finished, listing known users ...") q = "user/" r = self.restSender.get(q) print r
class CleanCouchPoller(BaseWorkerThread): """ Cleans up local couch db according the the given condition. 1. Cleans local couch db when request is completed and reported to cental db. This will clean up local couchdb, local summary db, local queue 2. Cleans old couchdoc which is created older than the time threshold """ def __init__(self, config): """ Initialize config """ BaseWorkerThread.__init__(self) # set the workqueue service for REST call self.config = config def setup(self, parameters): """ Called at startup """ self.teamName = self.config.Agent.teamName # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, appName="WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName) def algorithm(self, parameters): """ Get information from wmbs, workqueue and local couch. - It deletes old wmstats docs - Archive workflows """ try: logging.info("Cleaning up the old request docs") report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays) logging.info("%s docs deleted" % report) logging.info("Cleaning up the archived request docs") report = self.cleanAlreadyArchivedWorkflows() logging.info("%s archived workflows deleted" % report) # archiving only workflows that I own (same team) logging.info("Getting requests in '%s' state for team '%s'", self.deletableState, self.teamName) endTime = int(time.time()) - self.archiveDelayHours * 3600 wfs = self.centralRequestDBReader.getRequestByTeamAndStatus(self.teamName, self.deletableState) commonWfs = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, False, endTime) deletableWorkflows = list(set(wfs) & set(commonWfs)) logging.info("Ready to archive normal %s workflows", len(deletableWorkflows)) numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived") logging.info("archive normal %s workflows", numUpdated) abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"]) logging.info("Ready to archive aborted %s workflows", len(abortedWorkflows)) numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived") logging.info("archive aborted %s workflows", numUpdated) rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"]) logging.info("Ready to archive rejected %s workflows", len(rejectedWorkflows)) numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived") logging.info("archive rejected %s workflows", numUpdated) except Exception as ex: logging.error(str(ex)) logging.error("Error occurred, will try again next cycle") def archiveWorkflows(self, workflows, archiveState): updated = 0 for workflowName in workflows: if self.cleanAllLocalCouchDB(workflowName): if self.useReqMgrForCompletionCheck: try: #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced) self.reqmgrSvc.updateRequestStatus(workflowName, archiveState) #And replace with this - remove all the excption #self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState) except HTTPException as ex: # If we get an HTTPException of 404 means reqmgr2 request if ex.status == 404: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % (workflowName, str(ex)) logging.warning(msg) self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState) else: msg = "%s : fail to update status with HTTP error: %s" % (workflowName, str(ex)) logging.error(msg) raise ex updated += 1 logging.debug("status updated to %s %s", archiveState, workflowName) else: # tier0 update case self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState) return updated def deleteWorkflowFromJobCouch(self, workflowName, db): """ _deleteWorkflowFromCouch_ If we are asked to delete the workflow from couch, delete it to clear up some space. Load the document IDs and revisions out of couch by workflowName, then order a delete on them. """ options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False} if db == "JobDump": couchDB = self.jobsdatabase view = "jobsByWorkflowName" elif db == "FWJRDump": couchDB = self.fwjrdatabase view = "fwjrsByWorkflowName" elif db == "SummaryStats": couchDB = self.statsumdatabase view = None elif db == "WMStatsAgent": couchDB = self.wmstatsCouchDB.getDBInstance() view = "allWorkflows" options = {"key": workflowName, "reduce": False} if view == None: try: committed = couchDB.delete_doc(workflowName) except CouchNotFoundError as ex: return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))} else: try: jobs = couchDB.loadView(db, view, options = options)['rows'] except Exception as ex: errorMsg = "Error on loading jobs for %s" % workflowName logging.warning("%s/n%s" % (str(ex), errorMsg)) return {'status': 'error', 'message': errorMsg} for j in jobs: doc = {} doc["_id"] = j['value']['id'] doc["_rev"] = j['value']['rev'] couchDB.queueDelete(doc) committed = couchDB.commit() if committed: #create the error report errorReport = {} deleted = 0 status = "ok" for data in committed: if 'error' in data: errorReport.setdefault(data['error'], 0) errorReport[data['error']] += 1 status = "error" else: deleted += 1 return {'status': status, 'delete': deleted, 'message': errorReport} else: return {'status': 'warning', 'message': "no %s exist" % workflowName} def cleanAllLocalCouchDB(self, workflowName): logging.info("Deleting %s from JobCouch" % workflowName) jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump") logging.debug("%s docs deleted from JobDump", jobReport) fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump") logging.debug("%s docs deleted from FWJRDump", fwjrReport) summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats") logging.debug("%s docs deleted from SummaryStats", summaryReport) wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStatsAgent") logging.debug("%s docs deleted from wmagent_summary", wmstatsReport) # if one of the procedure fails return False if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or wmstatsReport["status"] == "error"): return False # other wise return True. return True def cleanAlreadyArchivedWorkflows(self): """ loop through the workflows in couchdb, if archived delete all the data in couchdb """ numDeletedRequests = 0 try: localWMStats = self.wmstatsCouchDB.getDBInstance() options = {"group_level": 1, "reduce": True} results = localWMStats.loadView("WMStatsAgent", "allWorkflows", options = options)['rows'] requestNames = [x['key'] for x in results] logging.info("There are %s workfows to check for archived status" % len(requestNames)) workflowDict = self.centralRequestDBReader.getStatusAndTypeByRequest(requestNames) for request, value in workflowDict.items(): if value[0].endswith("-archived"): self.cleanAllLocalCouchDB(request) numDeletedRequests += 1 except Exception as ex: errorMsg = "Error on loading workflow list from wmagent_summary db" logging.warning("%s/n%s" % (errorMsg, str(ex))) return numDeletedRequests
class ReqMgrTester(object): def __init__(self, reqMgrUrl): self.reqMgrUrl = reqMgrUrl self.restSender = JSONRequests(reqMgrUrl) d = dict(endpoint = self.reqMgrUrl) self.reqMgrService = RequestManager(d) def queryAllRequests(self): """ Returns all requests stored at ReqMgr instance. """ logging.info("Querying all requests at ReqMgr instance ...") r = self.reqMgrService.getRequestNames() print "Found %s requests:" % len(r) for req in r: print req def queryRequest(self, requestName): """ Query a specific request according to the input argument. """ urlQuery = "request/%s" % requestName logging.info("Querying request '%s'" % requestName) logging.info("Query: '%s':" % urlQuery) r = self.restSender.get(urlQuery) print str(r) def createRequests(self, numRequests): """ Inject new numRequests into ReqMgr instance. (see ReqMgr_t testE how to create a request) """ logging.info("Creating %s new requests ..." % numRequests) schema = ReReco.getTestArguments() schema['RequestName'] = 'TestReReco' schema['RequestType'] = 'ReReco' schema['CmsPath'] = "/uscmst1/prod/sw/cms" schema['Requestor'] = '%s' % "testinguser" schema['Group'] = '%s' % "PeopleLikeMe" schema['BlockWhitelist'] = ['/dataset/dataset/dataset#alpha'] schema['BlockBlacklist'] = ['/dataset/dataset/dataset#beta'] schema['Campaign'] = 'MyTestCampaign' for i in range(numRequests): urlQuery = "request/testRequest" logging.info("Query: '%s':" % urlQuery) r = self.restSender.put(urlQuery, schema) # print "request creating response: ", r print "created: ", r[0]["RequestName"] def deleteRequest(self, requestNames): """ Delete requests specified in the input, more request names are comma-separated. """ logging.info("Deleting requests ...") for reqName in requestNames.split(','): reqName = reqName.strip() urlQuery = "request/%s" % reqName logging.info("Deleting request (request_name): '%s'" % reqName) logging.info("Query: '%s':" % urlQuery) r = self.restSender.delete(urlQuery) def injectOpsClipboard(self, reqName, couchUrl, couchDbName): """ Once a request reaches "ops-hold" state, it can be injected into CouchDB, application OpsClipboard, for further manipulation. Do this here with the reqName request. OpsClipboard.inject() method which does in the CouchDB injection is called from WMCore/HTTPFrontEnd/RequestManager/Assign.py handleAssignmentPage method (which currently, 2012-01, doesn't have any unittest nor REST API) (used only from the Assignment webpage) Works when running locally accessing CouchDB behind frontend: py test/data/ReqMgr/reqmgr-load_example_data.py -u https://localhost:2000/reqmgr/reqMgr/ \ -t testinguser_120131_213320_2161 -i -o https://localhost:2000/couchdb/ \ -a ops_clipboard """ # find out campaign name associated with this request r = self.restSender.get("request/%s" % reqName) campaign = r[0]["Campaign"] logging.info("Campaign: %s" % campaign) requests = [{u"RequestName": reqName, u"CampaignName": campaign}] OpsClipboard.inject(couchUrl, couchDbName, *requests) def requestChangeStates(self, reqName, injectOpsClipboard, couchUrl, couchDbName): """ Route the request (spec. by the request name) in the input through a series of possible request states. """ logging.info("Changing state of request %s ..." % reqName) def changeState(requestName, urlQuery): logging.info("Query: '%s' ..." % urlQuery) r = self.restSender.put(urlQuery) r = self.restSender.get("request/%s" % requestName) #assert r[0]["RequestStatus"] == statusName logging.info("Querying modified request, new state: %s" % r[0]["RequestStatus"]) # once a request is created, it's in 'new' state # states transition has to be an allowed one as defined here: # WMCore/RequestManager/RequestDB/Settings/RequestStatus.py statesQueries = ["request/%s?status=%s" % (reqName, "testing-approved"), "request/%s?status=%s" % (reqName, "testing"), "request/%s?status=%s" % (reqName, "tested"), "request/%s?status=%s" % (reqName, "assignment-approved"), # create an assignment now # need quote because of space in the team name urllib.quote("assignment/%s/%s" % (TEAM_NAME, reqName)), "request/%s?status=%s" % (reqName, "ops-hold")] for query in statesQueries: changeState(reqName, query) if injectOpsClipboard: self.injectOpsClipboard(reqName, couchUrl, couchDbName) def setup(self): """ Setup ReqMgr instance for dealing with requests - needs to create a user, group, SW releases entries, etc. as done in test/python/WMCore_t/RequestManager_t/ReqMgr_t.py """ logging.info("ReqMgr setup ...") def doQuery(urlQuery): logging.info("Query: '%s' ..." % urlQuery) r = None try: r = self.restSender.put(urlQuery) except Exception as ex: print "exception" print str(ex) print "response:", r queries = ["user/[email protected]", "group/PeopleLikeMe", "group/PeopleLikeMe/testinguser", urllib.quote("team/" + TEAM_NAME), "version/%s" % "CMSSW_3_5_8"] for q in queries: doQuery(q) logging.info("ReqMgr setup finished, listing known users ...") q = "user/" r = self.restSender.get(q) print r
class JobUpdaterPoller(BaseWorkerThread): """ _JobUpdaterPoller_ Poller class for the JobUpdater """ def __init__(self, config): """ __init__ """ BaseWorkerThread.__init__(self) self.config = config self.bossAir = BossAirAPI(config = self.config) self.reqmgr = RequestManager({'endpoint' : self.config.JobUpdater.reqMgrUrl}) self.workqueue = WorkQueue(self.config.WorkQueueManager.couchurl, self.config.WorkQueueManager.dbname) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.listWorkflowsDAO = self.daoFactory(classname = "Workflow.ListForJobUpdater") self.updateWorkflowPrioDAO = self.daoFactory(classname = "Workflow.UpdatePriority") self.executingJobsDAO = self.daoFactory(classname = "Jobs.GetNumberOfJobsForWorkflowTaskStatus") def setup(self, parameters = None): """ _setup_ """ pass def terminate(self, parameters = None): """ _terminate_ Terminate gracefully. """ pass def algorithm(self, parameters = None): """ _algorithm_ """ logging.info("Synchronizing priorities with ReqMgr...") self.synchronizeJobPriority() def synchronizeJobPriority(self): """ _synchronizeJobPriority_ Check WMBS and WorkQueue for active workflows and compare with the ReqMgr for priority changes. If a priority change occurs then update the job priority in the batch system and the elements in the local queue that have not been injected yet. """ # Update the priority of workflows that are not in WMBS and just in local queue priorityCache = {} workflowsToUpdate = {} workflowsToCheck = [x for x in self.workqueue.getAvailableWorkflows()] for workflow, priority in workflowsToCheck: if workflow not in priorityCache: try: priorityCache[workflow] = self.reqmgr.getRequest(workflow)['RequestPriority'] except Exception, ex: logging.error("Couldn't retrieve the priority of request %s" % workflow) logging.error("Error: %s" % ex) continue if priority != priorityCache[workflow]: workflowsToUpdate[workflow] = priorityCache[workflow] for workflow in workflowsToUpdate: self.workqueue.updatePriority(workflow, workflowsToUpdate[workflow]) # Check the workflows in WMBS priorityCache = {} workflowsToUpdateWMBS = {} workflowsToCheck = self.listWorkflowsDAO.execute() for workflowEntry in workflowsToCheck: workflow = workflowEntry['name'] if workflow not in priorityCache: try: priorityCache[workflow] = self.reqmgr.getRequest(workflow)['RequestPriority'] except Exception, ex: logging.error("Couldn't retrieve the priority of request %s" % workflow) logging.error("Error: %s" % ex) continue requestPriority = priorityCache[workflow] if requestPriority != workflowEntry['workflow_priority']: # Update the workqueue priority for the Available elements self.workqueue.updatePriority(workflow, priorityCache[workflow]) # Check if there are executing jobs for this particular task if self.executingJobsDAO.execute(workflow, workflowEntry['task']) > 0: self.bossAir.updateJobInformation(workflow, workflowEntry['task'], requestPriority = priorityCache[workflow], taskPriority = workflowEntry['task_priority']) workflowsToUpdateWMBS[workflow] = priorityCache[workflow]
class TaskArchiverPoller(BaseWorkerThread): """ Polls for Ended jobs List of attributes requireCouch: raise an exception on couch failure instead of ignoring """ def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: #sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp = self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB; else: self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) #Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname = "Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return def terminate(self, params): """ _terminate_ This function terminates the job after a final pass """ logging.debug("terminating. doing one more pass before we die") self.algorithm(params) return def algorithm(self, parameters = None): """ _algorithm_ Executes the two main methods of the poller: 1. findAndMarkFinishedSubscriptions 2. completeTasks Final result is that finished workflows get their summary built and uploaded to couch, and all traces of them are removed from the agent WMBS and couch (this last one on demand). """ try: self.findAndMarkFinishedSubscriptions() (finishedwfs, finishedwfsWithLogCollectAndCleanUp) = self.getFinishedWorkflows() # set the data cache which can be used other thread (no ther thread should set the data cache) DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp) self.completeTasks(finishedwfs) except WMException: myThread = threading.currentThread() if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise except Exception as ex: myThread = threading.currentThread() msg = "Caught exception in TaskArchiver\n" msg += str(ex) if getattr(myThread, 'transaction', False) \ and getattr(myThread.transaction, 'transaction', False): myThread.transaction.rollback() raise TaskArchiverPollerException(msg) return def findAndMarkFinishedSubscriptions(self): """ _findAndMarkFinishedSubscriptions_ Find new finished subscriptions and mark as finished in WMBS. """ myThread = threading.currentThread() myThread.transaction.begin() #Get the subscriptions that are now finished and mark them as such logging.info("Polling for finished subscriptions") finishedSubscriptions = self.daoFactory(classname = "Subscriptions.MarkNewFinishedSubscriptions") finishedSubscriptions.execute(self.stateID, timeOut = self.timeout) logging.info("Finished subscriptions updated") myThread.transaction.commit() return def getFinishedWorkflows(self): """ 1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows) 2. Get finished workflows with logCollect and Cleanup only. 3. combined those and make return finishedwfs - without LogCollect and CleanUp task finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task """ finishedWorkflowsDAO = self.daoFactory(classname = "Workflow.GetFinishedWorkflows") finishedwfs = finishedWorkflowsDAO.execute() finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(onlySecondary=True) finishedwfsWithLogCollectAndCleanUp = {} for wf in finishedLogCollectAndCleanUpwfs: if wf in finishedwfs: finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf] return (finishedwfs, finishedwfsWithLogCollectAndCleanUp) def completeTasks(self, finishedwfs): """ _completeTasks_ This method will call several auxiliary methods to do the following: 1. Notify the WorkQueue about finished subscriptions 2. update dbsbuffer_workflow table with finished subscription """ #Only delete those where the upload and notification succeeded logging.info("Found %d candidate workflows for completing: %s" % (len(finishedwfs),finishedwfs.keys())) # update the completed flag in dbsbuffer_workflow table so blocks can be closed # create updateDBSBufferWorkflowComplete DAO if len(finishedwfs) == 0: return completedWorkflowsDAO = self.dbsDaoFactory(classname = "UpdateWorkflowsToCompleted") centralCouchAlive = True try: #TODO: need to enable when reqmgr2 -wmstats is ready #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict"); abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus(["aborted"]) logging.info("There are %d requests in 'aborted' status in central couch." % len(abortedWorkflows)) forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus(["force-complete"]) logging.info("List of 'force-complete' workflows in central couch: %s" % forceCompleteWorkflows) except Exception as ex: centralCouchAlive = False logging.error("we will try again when remote couch server comes back\n%s" % str(ex)) if centralCouchAlive: for workflow in finishedwfs: try: #Notify the WorkQueue, if there is one if self.workQueue != None: subList = [] logging.info("Marking subscriptions as Done ...") for l in finishedwfs[workflow]["workflows"].values(): subList.extend(l) self.notifyWorkQueue(subList) #Now we know the workflow as a whole is gone, we can delete the information from couch if not self.useReqMgrForCompletionCheck: self.requestLocalCouchDB.updateRequestStatus(workflow, "completed") logging.info("status updated to completed %s" % workflow) if workflow in abortedWorkflows: #TODO: remove when reqmgr2-wmstats deployed newState = "aborted-completed" elif workflow in forceCompleteWorkflows: newState = "completed" else: newState = None if newState != None: # update reqmgr workload document only request mgr is installed if not self.useReqMgrForCompletionCheck: # commented out untill all the agent is updated so every request have new state # TODO: agent should be able to write reqmgr db diretly add the right group in # reqmgr self.requestLocalCouchDB.updateRequestStatus(workflow, newState) else: try: #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced) logging.info("Updating status to '%s' in both oracle and couchdb ..." % newState) self.reqmgrSvc.updateRequestStatus(workflow, newState) #And replace with this - remove all the excption #self.reqmgr2Svc.updateRequestStatus(workflow, newState) except httplib.HTTPException as ex: # If we get an HTTPException of 404 means reqmgr2 request if ex.status == 404: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % (workflow, str(ex)) logging.warning(msg) self.reqmgr2Svc.updateRequestStatus(workflow, newState) else: msg = "%s : fail to update status %s with HTTP error: %s" % (workflow, newState, str(ex)) logging.error(msg) raise ex logging.info("status updated to '%s' : %s" % (newState, workflow)) completedWorkflowsDAO.execute([workflow]) except TaskArchiverPollerException as ex: #Something didn't go well when notifying the workqueue, abort!!! logging.error("Something bad happened while archiving tasks.") logging.error(str(ex)) continue except Exception as ex: #Something didn't go well on couch, abort!!! msg = "Problem while archiving tasks for workflow %s\n" % workflow msg += "Exception message: %s" % str(ex) msg += "\nTraceback: %s" % traceback.format_exc() logging.error(msg) continue return def notifyWorkQueue(self, subList): """ _notifyWorkQueue_ Tells the workQueue component that a particular subscription, or set of subscriptions, is done. Receives confirmation """ for sub in subList: try: self.workQueue.doneWork(SubscriptionId = sub) except WorkQueueNoMatchingElements: #Subscription wasn't known to WorkQueue, feel free to clean up logging.info("Local WorkQueue knows nothing about this subscription: %s" % sub) pass except Exception as ex: msg = "Error talking to workqueue: %s\n" % str(ex) msg += "Tried to complete the following: %s\n" % sub raise TaskArchiverPollerException(msg) return
class ReqMgrTest(RESTBaseUnitTest): """ Basic test for the ReqMgr services. Setup is done off-screen in RESTBaseUnitTest - this makes things confusing """ def setUp(self): """ setUP global values Database setUp is done in base class """ self.couchDBName = "reqmgr_t_0" RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "ConfigCache", "ReqMgr") self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats") self.testInit.setupCouch("%s_acdc" % self.couchDBName, "ACDC", "GroupUser") reqMgrHost = self.config.getServerUrl() self.jsonSender = JSONRequests(reqMgrHost) self.params = {} self.params['endpoint'] = reqMgrHost self.reqService = RequestManager(self.params) def initialize(self): self.config = RequestManagerConfig( 'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel') self.config.setFormatter('WMCore.WebTools.RESTFormatter') self.config.setupRequestConfig() self.config.setupCouchDatabase(dbName = self.couchDBName) self.config.setPort(12888) self.schemaModules = ["WMCore.RequestManager.RequestDB"] def tearDown(self): """ tearDown Tear down everything """ RESTBaseUnitTest.tearDown(self) self.testInit.tearDownCouch() def createConfig(self, bad = False): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = {'process': {'outputModules_': ['ThisIsAName'], 'ThisIsAName': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} BadTweak = {'process': {'outputModules_': ['ThisIsAName1', 'ThisIsAName2'], 'ThisIsAName1': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}, 'ThisIsAName2': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDBName) configCache.createUserGroup(groupname = "testGroup", username = '******') if bad: configCache.setPSetTweaks(PSetTweak = BadTweak) else: configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.save() return configCache.getCouchID() @attr("integration") def testA_testBasicSetUp(self): """ _testBasicSetUp_ Moving the tests that were in the setUp category out of it, mostly because I want to make sure that they don't fail inside the setUp statement. """ if 'me' in self.jsonSender.get('user')[0]: self.jsonSender.delete('user/me') self.assertFalse('me' in self.jsonSender.get('user')[0]) self.assertEqual(self.jsonSender.put('user/[email protected]')[1], 200) self.assertTrue('me' in self.jsonSender.get('user')[0]) if 'PeopleLikeMe' in self.jsonSender.get('group')[0]: self.jsonSender.delete('group/PeopleLikeMe') self.assertFalse('PeopleLikeMe' in self.jsonSender.get('group')[0]) self.assertEqual(self.jsonSender.put('group/PeopleLikeMe')[1], 200) self.assertTrue( 'PeopleLikeMe' in self.jsonSender.get('group')[0]) self.jsonSender.put('group/PeopleLikeMe/me') users = self.jsonSender.get('group/PeopleLikeMe')[0]['users'] self.assertTrue('me' in users) groups = self.jsonSender.get('user/me')[0]['groups'] self.assertTrue('PeopleLikeMe' in groups) groups2 = self.jsonSender.get('group?user=me')[0] self.assertTrue('PeopleLikeMe' in groups2) if 'White Sox' in self.jsonSender.get('team')[0]: self.jsonSender.delete(urllib.quote('team/White Sox')) self.assertFalse('White Sox' in self.jsonSender.get('team')[0]) self.assertEqual(self.jsonSender.put(urllib.quote('team/White Sox'))[1], 200) self.assertTrue('White Sox' in self.jsonSender.get('team')[0]) # some foreign key stuff to deal with schema = utils.getSchema() version = "version/" + schema["CMSSWVersion"] self.assertTrue(self.jsonSender.put(version)[1] == 200) self.assertTrue(schema["CMSSWVersion"] in self.jsonSender.get('version')[0]) @attr("integration") def testB_ReReco(self): """ _ReReco_ Try a basic ReReco workflow """ schema = utils.getAndSetupSchema(self) schema['RequestNumEvents'] = 100 schema['SizePerEvent'] = 101 configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") self.doRequest(schema) def doRequest(self, schema): """ _doRequest_ Run all tests on a basic ReReco workflow """ requestName = schema['RequestName'] self.assertRaises(HTTPException, self.jsonSender.delete, 'request/%s' % requestName) result = self.jsonSender.put('request/%s' % (requestName), schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] self.assertEqual(self.jsonSender.get('request/%s' % requestName)[0]['RequestName'], requestName) self.jsonSender.put('request/%s?status=assignment-approved' % requestName) me = self.jsonSender.get('user/me')[0] self.assertTrue(requestName in me['requests']) self.assertEqual(self.jsonSender.put('request/%s?priority=5' % requestName)[1], 200) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(request['RequestPriority'], 5) # Check LFN Bases self.assertEqual(request['UnmergedLFNBase'], '/store/unmerged') self.assertEqual(request['MergedLFNBase'], '/store/data') # Check Num events self.assertEqual(request['RequestNumEvents'], 100) self.assertEqual(request['SizePerEvent'], 101) # only certain transitions allowed #self.assertEqual(self.jsonSender.put('request/%s?status=running' % requestName)[1], 400) self.assertRaises(HTTPException, self.jsonSender.put,'request/%s?status=running' % requestName) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(request['RequestStatus'], 'assignment-approved') self.assertTrue(self.jsonSender.put(urllib.quote('assignment/White Sox/%s' % requestName))[1] == 200) requestsAndSpecs = self.jsonSender.get(urllib.quote('assignment/White Sox'))[0] self.assertTrue(requestName in requestsAndSpecs[0]) workloadHelper = WMWorkloadHelper() workloadHelper.load(requestsAndSpecs[0][1]) self.assertEqual(workloadHelper.getOwner()['Requestor'], "me") self.assertEqual(self.jsonSender.get('assignment?request=%s'% requestName)[0], ['White Sox']) self.assertEqual(self.jsonSender.get('request/%s' % requestName)[0]['teams'], ['White Sox']) agentUrl = 'http://cmssrv96.fnal.gov/workqueue' self.jsonSender.put('workQueue/%s?url=%s'% (requestName, urllib.quote(agentUrl)) ) self.assertEqual(self.jsonSender.get('workQueue/%s' % requestName)[0][0], agentUrl) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(request['RequestStatus'], 'acquired') self.jsonSender.post('request/%s?events_written=10&files_merged=1' % requestName) self.jsonSender.post('request/%s?events_written=20&files_merged=2&percent_success=99.9' % requestName) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(len(request['RequestUpdates']), 2) self.assertEqual(request['RequestUpdates'][0]['files_merged'], 1) self.assertEqual(request['RequestUpdates'][1]['events_written'], 20) self.assertEqual(request['RequestUpdates'][1]['percent_success'], 99.9) message = "The sheriff is near" jsonMessage = json.dumps(message) self.jsonSender.put('message/%s' % requestName, message) messages = self.jsonSender.get('message/%s' % requestName) #self.assertEqual(messages[0][0][0], message) for status in ['running-open', 'running-closed', 'completed']: self.jsonSender.put('request/%s?status=%s' % (requestName, status)) # campaign self.jsonSender.put('campaign/%s' % 'TestCampaign') campaigns = self.jsonSender.get('campaign')[0] self.assertTrue('TestCampaign' in campaigns.keys()) self.jsonSender.put('campaign/%s/%s' % ('TestCampaign', requestName)) requestsInCampaign = self.jsonSender.get('campaign/%s' % 'TestCampaign')[0] self.assertTrue(requestName in requestsInCampaign.keys()) req = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(req['Campaign'], 'TestCampaign') self.jsonSender.delete('request/%s' % requestName) @attr("integration") def testC_404Errors(self): """ _404Errors_ Do some things that generate 404 errors. This should be limited to requests for objects that do not exist. """ badName = 'ThereIsNoWayThisNameShouldExist' # First, try to find a non-existent request # This should throw a 404 error. # The request name should not be in it self.checkForError(cls = 'request', badName = badName, exitCode = 404, message = 'Given requestName not found') # Now look for non-existent user self.checkForError(cls = 'user', badName = badName, exitCode = 404, message = 'Cannot find user') # Now try non-existent campaign self.checkForError(cls = 'campaign', badName = badName, exitCode = 404, message = "Cannot find campaign") # Now try invalid message # This raises a requestName error because it searches for the request self.checkForError(cls = 'message', badName = badName, exitCode = 404, message = "Given requestName not found", testEmpty = False) # Check for assignments (no teams or requests) # This raises a team error because it tries to load teams out first self.checkForError(cls = 'assignment', badName = badName, exitCode = 404, message = 'Cannot find team') @attr("integration") def testD_400Errors(self): """ _400Errors_ These are failures created by invalid input, such as sending args to a request when it doesn't accept any. They should generatore 400 Errors """ badName = 'ThereIsNoWayThisNameShouldExist' # Attempt to send arguments to a function that doesn't accept them. self.checkForError(cls = 'team', badName = badName, exitCode = 400, message = "Invalid input: Arguments added where none allowed") # Recheck for versions self.checkForError(cls = 'version', badName = badName, exitCode = 400, message = "Invalid input: Arguments added where none allowed") # Break the validation self.checkForError(cls = 'user', badName = '!', exitCode = 400, message = 'Invalid input: Input data failed validation') def checkForError(self, cls, badName, exitCode, message, testEmpty = True): """ _checkForError_ Generic function for checking for errors in JSON commands Does a basic check on type cls searching for name badName which hopefull does not exist. Checks to make sure that it exits with code exitCode, and that the error contains the string message. Also checks to make sure that name badName is NOT in the output testEmpty for those that don't handle calls to the main (i.e., who require an argument) """ raises = False # First assert that the test to be tested is empty if testEmpty: result = self.jsonSender.get(cls) self.assertTrue(type(result[0]) in [type([]), type({})]) # Next, test try: result = self.jsonSender.get('%s/%s' % (cls, badName)) except HTTPException as ex: raises = True self.assertEqual(ex.status, exitCode) self.assertTrue(message in ex.result) self.assertFalse(badName in ex.result) self.assertTrue(raises) @attr("integration") def testE_CheckStatusChanges(self): """ _CheckStatusChanges_ Check status changes for a single request. See whether we can move the request through the proper chain. Figure out what happens when we fail. """ myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] # There should only be one request in the DB result = GetRequest.requestID(requestName = requestName) self.assertEqual(result, 1) result = self.jsonSender.get('request/%s' % requestName) self.assertEqual(result[0]['Group'], groupName) self.assertEqual(result[0]['Requestor'], userName) # Let's see what we can do in terms of setting status self.changeStatusAndCheck(requestName = requestName, statusName = 'new') # Let's try an illegal status change, just for the hell of it raises = False try: self.jsonSender.put('request/%s?status=assigned' % requestName) except HTTPException as ex: raises = True self.assertEqual(ex.status, 403) self.assertTrue('Failed to change status' in ex.result) self.assertFalse(requestName in ex.result) self.assertTrue(raises) # Now, let's try a totally bogus status raises = False try: self.jsonSender.put('request/%s?status=bogus' % requestName) except HTTPException as ex: raises = True self.assertEqual(ex.status, 403) self.assertTrue('Failed to change status' in ex.result) self.assertFalse(requestName in ex.result) self.assertTrue(raises) # We should still be in new result = self.jsonSender.get('request/%s' % requestName) self.assertEqual(result[0]['RequestStatus'], 'new') # Let's go on in a full loop self.changeStatusAndCheck(requestName = requestName, statusName = 'testing-approved') self.changeStatusAndCheck(requestName = requestName, statusName = 'testing') self.changeStatusAndCheck(requestName = requestName, statusName = 'tested') self.changeStatusAndCheck(requestName = requestName, statusName = 'assignment-approved') # This should fail, as you cannot assign a request without a team raises = False try: self.changeStatusAndCheck(requestName = requestName, statusName = 'assigned') except HTTPException as ex: raises = True self.assertTrue('Cannot change status without a team' in ex.result) self.assertTrue(raises) self.jsonSender.put(urllib.quote('assignment/%s/%s' % (teamName, requestName))) self.changeStatusAndCheck(requestName = requestName, statusName = 'acquired') self.changeStatusAndCheck(requestName = requestName, statusName = 'running-open') self.changeStatusAndCheck(requestName = requestName, statusName = 'running-closed') self.changeStatusAndCheck(requestName = requestName, statusName = 'completed') self.changeStatusAndCheck(requestName = requestName, statusName = 'closed-out') def changeStatusAndCheck(self, requestName, statusName): """ _changeStatusAndCheck_ Change the status of a request and make sure that the request actually did it. """ self.jsonSender.put('request/%s?status=%s' % (requestName, statusName)) result = self.jsonSender.get('request/%s' % requestName) self.assertEqual(result[0]['RequestStatus'], statusName) def loadWorkload(self, requestName): """ _loadWorkload_ Load the workload from couch after we've saved it there. """ workload = WMWorkloadHelper() url = '%s/%s/%s/spec' % (os.environ['COUCHURL'], self.couchDBName, requestName) workload.load(url) return workload def testF_TestWhitelistBlacklist(self): """ _TestWhitelistBlacklist_ Test whether or not we can assign the block/run blacklist/whitelist """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) schema['RunWhitelist'] = [1, 2, 3] schema['RunBlacklist'] = [4, 5, 6] schema['BlockWhitelist'] = ['/dataset/dataset/dataset#alpha'] schema['BlockBlacklist'] = ['/dataset/dataset/dataset#beta'] configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] workload = self.loadWorkload(requestName = requestName) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.runs.whitelist, schema['RunWhitelist']) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.runs.blacklist, schema['RunBlacklist']) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.blocks.whitelist, schema['BlockWhitelist']) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.blocks.blacklist, schema['BlockBlacklist']) req = self.jsonSender.get('request/%s' % requestName) self.assertTrue('Site Blacklist' in req[0]) self.assertTrue('Site Whitelist' in req[0]) schema['BlockBlacklist'] = {'1': '/dataset/dataset/dataset#beta'} try: raises = False result = self.jsonSender.put('request/testRequest', schema) except HTTPException as ex: raises = True self.assertEqual(ex.status, 400) print(ex.result) self.assertTrue("Error in Workload Validation: Argument BlockBlacklist type is incorrect in schema." in ex.result) pass self.assertTrue(raises) schema['BlockBlacklist'] = ['/dataset/dataset/dataset#beta'] schema['RunWhitelist'] = {'1': '/dataset/dataset/dataset#beta'} try: raises = False result = self.jsonSender.put('request/testRequest', schema) except HTTPException as ex: raises = True self.assertEqual(ex.status, 400) self.assertTrue("Error in Workload Validation: Argument RunWhitelist type is incorrect in schema." in ex.result) pass self.assertTrue(raises) schema['RunWhitelist'] = ['hello', 'how', 'are', 'you'] try: raises = True result = self.jsonSender.put('request/testRequest', schema) except HTTPException as ex: raises = True self.assertEqual(ex.status, 400) self.assertTrue("Error in Workload Validation: Argument RunWhitelist doesn't pass validation." in ex.result) pass self.assertTrue(raises) def testG_AddDuplicateUser(self): """ _AddDuplicateUser_ Test and see if we get a sensible error when adding a duplicate user. """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) raises = False try: self.jsonSender.put('group/%s/%s' % (groupName, userName)) except HTTPException as ex: self.assertTrue("User/Group Already Linked in DB" in ex.result) self.assertEqual(ex.status, 400) raises = True self.assertTrue(raises) def testH_RemoveSoftwareVersion(self): """ _RemoveSoftwareVersion_ Remove the software version after submitting the request. See what that does. """ myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] req = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(req['SoftwareVersions'], [schema["CMSSWVersion"]]) # Delete software versions and make sure they're gone from the DB SoftwareManagement.removeSoftware(softwareName = schema["CMSSWVersion"], scramArch = schema["ScramArch"]) versions = myThread.dbi.processData("SELECT * FROM reqmgr_software")[0].fetchall() self.assertEqual(versions, []) assocs = myThread.dbi.processData("SELECT * FROM reqmgr_software_dependency")[0].fetchall() self.assertEqual(assocs, []) req = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(req['SoftwareVersions'], [schema["CMSSWVersion"]]) def testI_CheckConfigIDs(self): """ _CheckConfigIDs_ Check to see if we can pull out the ConfigIDs by request """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) # Set some versions schema['ProcessingVersion'] = '2012' schema['AcquisitionEra'] = 'ae2012' schema["PrimaryDataset"] = "ReallyFake" schema["RequestNumEvents"] = 100 configID = self.createConfig() schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") schema["ConfigCacheID"] = configID schema["InputDataset"] = '/MinimumBias/Run2010B-RelValRawSkim-v1/RAW' result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] result = self.jsonSender.get('configIDs?prim=MinimumBias&proc=Run2010B-RelValRawSkim-v1&tier=RAW')[0] print(result) self.assertTrue(requestName in result.keys()) self.assertTrue(configID in result[requestName][0]) def testJ_CheckRequestCloning(self): myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") schema["AcquisitionEra"] = "NewEra" result = self.jsonSender.put("request", schema) self.assertEqual(result[1], 200) requestName = result[0]["RequestName"] acquisitionEra = result[0]["AcquisitionEra"] self.assertTrue(schema["AcquisitionEra"], acquisitionEra) # set some non-default priority # when cloning a request which had some non default priority, # the priority values were lost when creating a cloned request, the # default values were lost. Change it here to specifically catch this case. priority = 300 result = self.jsonSender.put("request/%s?priority=%s" % (requestName, priority)) self.assertEqual(result[1], 200) # get the original request from the server, although the variable result # shall have the same stuff in response = self.jsonSender.get("request/%s" % requestName) origRequest = response[0] self.assertEqual(origRequest["AcquisitionEra"], acquisitionEra) # test that the priority was correctly set in the brand-new request self.assertEqual(origRequest["RequestPriority"], priority) # test cloning not existing request self.assertRaises(HTTPException, self.jsonSender.put, "clone/%s" % "NotExistingRequestName") # correct attempt to clone the request # this is the new request, it'll have different name result = self.jsonSender.put("clone/%s" % requestName) # get the cloned request from the server respose = self.jsonSender.get("request/%s" % result[0]["RequestName"]) clonedRequest = respose[0] # these request arguments shall differ in the cloned request: toDiffer = ["RequestName", "RequestStatus"] for differ in toDiffer: self.assertNotEqual(origRequest[differ], clonedRequest[differ]) # check the desired status of the cloned request self.assertEqual(clonedRequest["RequestStatus"], "assignment-approved", "Cloned request status should be 'assignment-approved', not '%s'." % clonedRequest["RequestStatus"]) # don't care about these two (they will likely be the same in the unittest # since the brand new request injection as well as the cloning probably # happen at roughly the same time) toDiffer.extend(["RequestDate", "timeStamp", "RequestWorkflow"]) for differ in toDiffer: del origRequest[differ] del clonedRequest[differ] # check the request dictionaries self.assertEqual(len(origRequest), len(clonedRequest)) for k1, k2 in zip(sorted(origRequest.keys()), sorted(clonedRequest.keys())): msg = ("Request values: original: %s: %s cloned: %s: %s differ" % (k1, origRequest[k1], k2, clonedRequest[k2])) self.assertEqual(origRequest[k1], clonedRequest[k2], msg) def testK_CheckRequestFailsInjectionForbiddenInputArg(self): myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) from WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel import deprecatedRequestArgs for deprec in deprecatedRequestArgs: schema = utils.getSchema(groupName=groupName, userName=userName) schema[deprec] = "something" self.assertRaises(HTTPException, self.jsonSender.put, "request", schema) def setupACDCDatabase(self, collectionName, taskPath, user, group): """ _setupACDCDatabase_ Populate an ACDC database with bogus records associated to certain collection name, user and task path. """ acdcServer = CouchService(url = self.testInit.couchUrl, database = "%s_acdc" % self.couchDBName) owner = acdcServer.newOwner(group, user) testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = collectionName) testCollection.setOwner(owner) testFileset = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = taskPath) testCollection.addFileset(testFileset) testFiles = [] for _ in range(5): testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFiles.append(testFile) testFileset.add(testFiles) def testL_CascadeCloseOutAnnnouncement(self): """ _testL_CascadeCloseOutAnnouncement_ Test the cascade closeout REST call, also check that when announced a request deletes all ACDC records in the system. """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put("request", schema)[0] originalRequest = result['RequestName'] self.setupACDCDatabase(originalRequest, "/%s/DataProcessing" % originalRequest, result['Requestor'], result['Group']) depth = 2 nReq = 3 requests = [originalRequest] def createChildrenRequest(parentRequest, i, nReq): createdRequests = [] resubSchema = utils.getResubmissionSchema(parentRequest, "/%s/DataProcessing" % parentRequest, groupName, userName) result = self.jsonSender.put("request", resubSchema)[0] requestName = result['RequestName'] self.setupACDCDatabase(requestName, "/%s/DataProcessing" % requestName, result['Requestor'], result['Group']) createdRequests.append(requestName) if i: for _ in range(nReq): createdRequests.extend(createChildrenRequest(requestName, i - 1, nReq)) return createdRequests requests.extend(createChildrenRequest(originalRequest, depth, nReq)) for request in requests: self.changeStatusAndCheck(request, 'assignment-approved') for request in requests: self.jsonSender.put("assignment?team=%s&requestName=%s" % (teamName, request)) for status in ['acquired', 'running-open', 'running-closed', 'completed']: for request in requests: self.changeStatusAndCheck(request, status) self.jsonSender.post('closeout?requestName=%s&cascade=True' % originalRequest) svc = CouchService(url = self.testInit.couchUrl, database = "%s_acdc" % self.couchDBName) owner = svc.newOwner(groupName, userName) for request in requests: result = self.jsonSender.get('request/%s' % request) self.assertEqual(result[0]['RequestStatus'], 'closed-out') testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = request) testCollection.setOwner(owner) testCollection.populate() self.assertNotEqual(len(testCollection["filesets"]), 0) self.jsonSender.post('announce?requestName=%s&cascade=True' % originalRequest) for request in requests: result = self.jsonSender.get('request/%s' % request) self.assertEqual(result[0]['RequestStatus'], 'announced') testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = request) testCollection.setOwner(owner) testCollection.populate() self.assertEqual(len(testCollection["filesets"]), 0) def testM_PutRequestStats(self): userName = '******' groupName = 'Bryant' teamName = 'Lakers' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put("request", schema)[0] originalRequest = result['RequestName'] stats = {'total_jobs': 100, 'input_events': 100, 'input_lumis': 100, 'input_num_files': 100} result = self.reqService.putRequestStats(originalRequest, stats) self.assertEqual(result['RequestName'], originalRequest)
class RequestLifeCycleBase_t(): request = None request_name = None workqueue = None endpoint = os.environ.get('REQMGRBASEURL', 'https://localhost:8443') reqmgr = RequestManager({'endpoint': endpoint + '/reqmgr/reqMgr'}) team = 'TestTeam' _failure_detected = False @recordException def setUp(self): if self.__class__._failure_detected: raise nose.SkipTest # simple ping check - check reqmgr up tries = 0 while True: try: if not self.__class__.request: self.__class__.reqmgr.getTeam() break except: tries += 1 if tries >= 3: raise nose.SkipTest("Unable to contact reqmgr") time.sleep(15) def _configCacheId(self, label): """Return config cache id for given config label""" key, cert = self.__class__.reqmgr['requests'].getKeyCert() configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey=key, cert=cert) try: configCacheId = configCache.getIDFromLabel(label) except: configCacheId = None if configCacheId: return configCacheId # The following will fail if FWCore.ParameterSet not in PYTHONPATH from PSetTweaks.WMTweak import makeTweak configCache.createUserGroup('test', 'test') configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs') configCache.addConfig(os.path.join(configDir, label + '.py')) configCache.setLabel(label) configCache.setDescription(label) modPath = imp.find_module(label, [configDir]) loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2]) configCache.setPSetTweaks( makeTweak(loadedConfig.process).jsondictionary()) configCache.save() return configCache.getIDFromLabel(label) def _convertLabelsToId(self, config): fields = [ 'ProcConfigCacheID', 'Skim1ConfigCacheID', 'StepOneConfigCacheID', 'ConfigCacheID' ] for field in fields: if config.get(field): config[field] = self._configCacheId(config[field]) for field in ['Task1', 'Task2', 'Task3', 'Task4']: if config.get(field): config[field] = self._convertLabelsToId(config[field]) return config @attr("lifecycle") @recordException def test05InjectConfigs(self): """Inject configs to cache""" self.__class__.requestParams = self._convertLabelsToId( self.__class__.requestParams) @attr("lifecycle") @recordException def test10InjectRequest(self): """Can inject a request""" self.__class__.requestParams.setdefault('RequestString', self.__class__.__name__) tries = 0 while True: try: self.__class__.request = self.__class__.reqmgr.makeRequest( **self.__class__.requestParams )['WMCore.RequestManager.DataStructs.Request.Request'] self.__class__.request_name = self.__class__.request[ 'RequestName'] break except: tries += 1 if tries > 3: raise self.assertTrue(self.__class__.request) self.assertTrue(self.__class__.request_name) print("Injected request %s" % self.__class__.request_name) self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'new') @attr("lifecycle") @recordException def test20ApproveRequest(self): """Approve request""" self.__class__.reqmgr.reportRequestStatus(self.__class__.request_name, 'assignment-approved') self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'assignment-approved') # @attr("lifecycle") @recordException def test30AssignRequest(self): """Assign request""" self.__class__.reqmgr.assign(self.__class__.request_name, self.__class__.team, "Testing", "v1", MergedLFNBase='/store/temp', UnmergedLFNBase='/store/temp') self.__class__.request = self.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'assigned') @attr("lifecycle") @recordException def test40WorkQueueAcquires(self): """WorkQueue picks up request""" if not self.__class__.request_name: raise nose.SkipTest start = time.time() while True: workqueue = self.reqmgr.getWorkQueue( request=self.__class__.request_name) if workqueue: self.__class__.workqueue = WorkQueue(workqueue[0]) self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertTrue( self.__class__.request['RequestStatus'] in ('acquired', 'running')) request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] if [ x for x in request if x['status'] in ('Available', 'Negotiating', 'Acquired', 'Running') ]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for workqueue to acquire') time.sleep(15) @attr("lifecycle") @recordException def test50AgentAcquires(self): """Elements acquired by agent""" # skip if request already running self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) if self.__class__.request['RequestStatus'] == 'running': raise nose.SkipTest start = time.time() while True: request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] if [x for x in request if x['status'] in ('Acquired', 'Running')]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for agent to acquire') time.sleep(15) self.assertTrue( [x for x in request if x['status'] in ('Acquired', 'Running')]) @attr("lifecycle") @recordException def test60RequestRunning(self): """Request running""" start = time.time() while True: request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] childQueue = [x for x in self.__class__.workqueue.getChildQueuesByRequest() if \ x['request_name'] == self.__class__.request_name] if request and 'Running' in [x['status'] for x in request]: self.assertTrue(childQueue, "Running but can't get child queue") break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to run') time.sleep(15) @attr("lifecycle") @recordException def test70WorkQueueFinished(self): """Request completed in workqueue""" start = time.time() while True: request = [x for x in self.__class__.workqueue.getJobStatusByRequest() if \ x['request_name'] == self.__class__.request_name] # request deleted from wq shortly after finishing, so may not appear here if not request or request == [ x for x in request if x['status'] in ('Done', 'Failed', 'Canceled') ]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to finish') time.sleep(15) @attr("lifecycle") @recordException def test80RequestFinished(self): """Request completed""" start = time.time() while True: self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) if self.__class__.request['RequestStatus'] in ('completed', 'failed', 'aborted'): break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to finish') time.sleep(15) @attr("lifecycle") @recordException def test90RequestCloseOut(self): """Closeout request""" self.reqmgr.reportRequestStatus(self.__class__.request_name, "closed-out") self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual('closed-out', self.__class__.request['RequestStatus'])
class RequestManagerTest(RESTBaseUnitTest): """ Test RequestMgr Service client It will start RequestMgr RESTService Server DB is whatever env is set This checks whether DS call makes without error and return the results. This test only test service call returns without error. The correctness of each function is tested in test/python/RequestManager_t/RequestMgr_t.py """ def initialize(self): self.couchDBName = "reqmgr_t_0" self.config = RequestManagerConfig( 'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel') dbUrl = os.environ.get("DATABASE", None) self.config.setDBUrl(dbUrl) self.config.setFormatter('WMCore.WebTools.RESTFormatter') self.config.setupRequestConfig() self.config.setupCouchDatabase(dbName = self.couchDBName) self.config.setPort(8899) self.schemaModules = ["WMCore.RequestManager.RequestDB"] def setUp(self): RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "GroupUser", "ConfigCache", "ReqMgr") self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats") # logging stuff from TestInit is broken, setting myself l = logging.getLogger() l.setLevel(logging.DEBUG) self.params = {} self.params['endpoint'] = self.config.getServerUrl() self.reqService = RequestManagerDS(self.params) self.jsonSender = JSONRequests(self.config.getServerUrl()) userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) schema['ConfigCacheID'] = self.createConfig() schema['CouchDBName'] = self.couchDBName schema['CouchWorkloadDBName'] = self.couchDBName try: r = self.jsonSender.put('request', schema) try: self.requestName = r[0]['RequestName'] except: self.requestName = r[0].values()[0]['RequestName'] except Exception as ex: msg = traceback.format_exc() print("Exception during set up, reason: %s" % msg) raise ex def tearDown(self): self.config.deleteWorkloadCache() RESTBaseUnitTest.tearDown(self) self.testInit.tearDownCouch() def createConfig(self, bad = False): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = {'process': {'outputModules_': ['ThisIsAName'], 'ThisIsAName': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} BadTweak = {'process': {'outputModules_': ['ThisIsAName1', 'ThisIsAName2'], 'ThisIsAName1': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}, 'ThisIsAName2': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDBName) configCache.createUserGroup(groupname = "testGroup", username = '******') if bad: configCache.setPSetTweaks(PSetTweak = BadTweak) else: configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.save() return configCache.getCouchID() @attr("integration") def testA_RequestManagerService(self): requestName = self.requestName request = self.reqService.getRequest(requestName) # minimal test : it's return type and the some value inside self.assertEqual(type(request), dict) self.assertTrue(len(request) > 0) # Test putTeam self.reqService.putTeam("team_usa") self.assertTrue('team_usa' in self.jsonSender.get('team')[0]) self.jsonSender.put('assignment/%s/%s' % ("team_usa", requestName)) request = self.reqService.getAssignment(teamName = "team_usa") self.assertEqual(type(request), list) self.assertTrue(len(request) > 0) request = self.reqService.getAssignment(request = requestName) self.assertEqual(type(request), list) self.assertTrue(len(request) > 0) self.reqService.sendMessage(requestName,"error") self.reqService.putWorkQueue(requestName, "http://test_url") self.reqService.reportRequestProgress(requestName, percent_complete = 100, percent_success = 90) self.reqService.updateRequestStatus(requestName, "running-open")
def __init__(self, reqMgrUrl): self.reqMgrUrl = reqMgrUrl self.restSender = JSONRequests(reqMgrUrl) d = dict(endpoint=self.reqMgrUrl) self.reqMgrService = RequestManager(d)
def __init__(self, reqMgrUrl): self.reqMgrUrl = reqMgrUrl self.restSender = JSONRequests(reqMgrUrl) d = dict(endpoint = self.reqMgrUrl) self.reqMgrService = RequestManager(d)
class RequestManagerTest(RESTBaseUnitTest): """ Test RequestMgr Service client It will start RequestMgr RESTService Server DB is whatever env is set This checks whether DS call makes without error and return the results. This test only test service call returns without error. The correctness of each function is tested in test/python/RequestManager_t/RequestMgr_t.py """ def initialize(self): self.couchDBName = "reqmgr_t_0" self.config = RequestManagerConfig( 'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel') dbUrl = os.environ.get("DATABASE", None) self.config.setDBUrl(dbUrl) self.config.setFormatter('WMCore.WebTools.RESTFormatter') self.config.setupRequestConfig() self.config.setupCouchDatabase(dbName = self.couchDBName) self.config.setPort(8888) self.schemaModules = ["WMCore.RequestManager.RequestDB"] return def setUp(self): """ setUP global values """ RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "GroupUser", "ConfigCache") self.params = {} self.params['endpoint'] = self.config.getServerUrl() self.reqService = RequestManagerDS(self.params) self.jsonSender = JSONRequests(self.config.getServerUrl()) self.requestSchema = getRequestSchema() self.jsonSender.put('group/PeopleLikeMe') self.jsonSender.put('user/[email protected]') self.jsonSender.put('group/PeopleLikeMe/me') self.jsonSender.put('version/CMSSW_3_5_8') r = self.jsonSender.put('request/' + self.requestSchema['RequestName'], self.requestSchema) self.requestName = r[0]['RequestName'] def tearDown(self): self.config.deleteWorkloadCache() RESTBaseUnitTest.tearDown(self) self.testInit.tearDownCouch() @attr("integration") def testA_RequestManagerService(self): requestName = self.requestName request = self.reqService.getRequest(requestName) # minimal test : it's return type and the some value inside self.assertEqual(type(request), dict) self.assertTrue(len(request) > 0) # Test putTeam self.reqService.putTeam("team_usa") self.assertTrue('team_usa' in self.jsonSender.get('team')[0]) self.jsonSender.put('assignment/%s/%s' % ("team_usa", requestName)) request = self.reqService.getAssignment(teamName = "team_usa") self.assertEqual(type(request), list) self.assertTrue(len(request) > 0) request = self.reqService.getAssignment(request = requestName) self.assertEqual(type(request), list) self.assertTrue(len(request) > 0) self.reqService.sendMessage(requestName,"error") self.reqService.putWorkQueue(requestName, "http://test_url") self.reqService.reportRequestProgress(requestName) self.reqService.reportRequestProgress(requestName, percent_complete = 100, percent_success = 90) self.reqService.reportRequestStatus(requestName, "running") return
cfg = cfg.WMStats import cherrypy from WMCore.Services.RequestManager.RequestManager import RequestManager from WMCore.Services.WorkQueue.WorkQueue import WorkQueue from WMCore.CherryPyThread.PeriodicWorker import PeriodicWorker from WMCore.WMStats.DataCollectTask import DataCollectTask import logging cherrypy.log.error_log.setLevel(logging.DEBUG) cherrypy.log.access_log.setLevel(logging.DEBUG) cherrypy.config["server.socket_port"] = cfg.port #def sayHello(test): # print "Hello" #PeriodicWorker(sayHello, 5) # get reqmgr url from config reqmgrSvc = RequestManager({'endpoint': cfg.reqmgrURL}) wqSvc = WorkQueue(cfg.globalQueueURL) wmstatSvc = WMStatSevice(cfg.couchURL) reqmgrTask = DataCollectTask(reqmgrSvc.getRequest, reqmgrDataFormat, wmstatSvc.uploadData) #reqmgrTask = DataCollectTask(reqmgrSvc.getRequestNames, lambda x: x, wmstatSvc.uploadData) #wqTask = DataCollectTask(wqSvc.getTopLevelJobsByRequest, wqDataFormat, wmstatSvc.uploadData) reqmgrWorker = PeriodicWorker(reqmgrTask, cfg.pollInterval) #wqWorker = PeriodicWorker(wqTask, 200) cherrypy.quickstart()
class ReqMgrTest(RESTBaseUnitTest): """ Basic test for the ReqMgr services. Setup is done off-screen in RESTBaseUnitTest - this makes things confusing """ def setUp(self): """ setUP global values Database setUp is done in base class """ self.couchDBName = "reqmgr_t_0" RESTBaseUnitTest.setUp(self) self.testInit.setupCouch("%s" % self.couchDBName, "ConfigCache", "ReqMgr") self.testInit.setupCouch("%s_wmstats" % self.couchDBName, "WMStats") self.testInit.setupCouch("%s_acdc" % self.couchDBName, "ACDC", "GroupUser") reqMgrHost = self.config.getServerUrl() self.jsonSender = JSONRequests(reqMgrHost) self.params = {} self.params['endpoint'] = reqMgrHost self.reqService = RequestManager(self.params) def initialize(self): self.config = RequestManagerConfig( 'WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel') self.config.setFormatter('WMCore.WebTools.RESTFormatter') self.config.setupRequestConfig() self.config.setupCouchDatabase(dbName = self.couchDBName) self.config.setPort(12888) self.schemaModules = ["WMCore.RequestManager.RequestDB"] def tearDown(self): """ tearDown Tear down everything """ RESTBaseUnitTest.tearDown(self) self.testInit.tearDownCouch() def createConfig(self, bad = False): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = {'process': {'outputModules_': ['ThisIsAName'], 'ThisIsAName': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} BadTweak = {'process': {'outputModules_': ['ThisIsAName1', 'ThisIsAName2'], 'ThisIsAName1': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}, 'ThisIsAName2': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDBName) configCache.createUserGroup(groupname = "testGroup", username = '******') if bad: configCache.setPSetTweaks(PSetTweak = BadTweak) else: configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.save() return configCache.getCouchID() @attr("integration") def testA_testBasicSetUp(self): """ _testBasicSetUp_ Moving the tests that were in the setUp category out of it, mostly because I want to make sure that they don't fail inside the setUp statement. """ if 'me' in self.jsonSender.get('user')[0]: self.jsonSender.delete('user/me') self.assertFalse('me' in self.jsonSender.get('user')[0]) self.assertEqual(self.jsonSender.put('user/[email protected]')[1], 200) self.assertTrue('me' in self.jsonSender.get('user')[0]) if 'PeopleLikeMe' in self.jsonSender.get('group')[0]: self.jsonSender.delete('group/PeopleLikeMe') self.assertFalse('PeopleLikeMe' in self.jsonSender.get('group')[0]) self.assertEqual(self.jsonSender.put('group/PeopleLikeMe')[1], 200) self.assertTrue( 'PeopleLikeMe' in self.jsonSender.get('group')[0]) self.jsonSender.put('group/PeopleLikeMe/me') users = self.jsonSender.get('group/PeopleLikeMe')[0]['users'] self.assertTrue('me' in users) groups = self.jsonSender.get('user/me')[0]['groups'] self.assertTrue('PeopleLikeMe' in groups) groups2 = self.jsonSender.get('group?user=me')[0] self.assertTrue('PeopleLikeMe' in groups2) if 'White Sox' in self.jsonSender.get('team')[0]: self.jsonSender.delete(urllib.quote('team/White Sox')) self.assertFalse('White Sox' in self.jsonSender.get('team')[0]) self.assertEqual(self.jsonSender.put(urllib.quote('team/White Sox'))[1], 200) self.assertTrue('White Sox' in self.jsonSender.get('team')[0]) # some foreign key stuff to deal with schema = utils.getSchema() version = "version/" + schema["CMSSWVersion"] self.assertTrue(self.jsonSender.put(version)[1] == 200) self.assertTrue(schema["CMSSWVersion"] in self.jsonSender.get('version')[0]) @attr("integration") def testB_ReReco(self): """ _ReReco_ Try a basic ReReco workflow """ schema = utils.getAndSetupSchema(self) schema['RequestNumEvents'] = 100 schema['SizePerEvent'] = 101 configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") self.doRequest(schema) def doRequest(self, schema): """ _doRequest_ Run all tests on a basic ReReco workflow """ requestName = schema['RequestName'] self.assertRaises(HTTPException, self.jsonSender.delete, 'request/%s' % requestName) result = self.jsonSender.put('request/%s' % (requestName), schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] self.assertEqual(self.jsonSender.get('request/%s' % requestName)[0]['RequestName'], requestName) self.jsonSender.put('request/%s?status=assignment-approved' % requestName) me = self.jsonSender.get('user/me')[0] self.assertTrue(requestName in me['requests']) self.assertEqual(self.jsonSender.put('request/%s?priority=5' % requestName)[1], 200) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(request['RequestPriority'], 5) # Check LFN Bases self.assertEqual(request['UnmergedLFNBase'], '/store/unmerged') self.assertEqual(request['MergedLFNBase'], '/store/data') # Check Num events self.assertEqual(request['RequestNumEvents'], 100) self.assertEqual(request['SizePerEvent'], 101) # only certain transitions allowed #self.assertEqual(self.jsonSender.put('request/%s?status=running' % requestName)[1], 400) self.assertRaises(HTTPException, self.jsonSender.put,'request/%s?status=running' % requestName) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(request['RequestStatus'], 'assignment-approved') self.assertTrue(self.jsonSender.put(urllib.quote('assignment/White Sox/%s' % requestName))[1] == 200) requestsAndSpecs = self.jsonSender.get(urllib.quote('assignment/White Sox'))[0] self.assertTrue(requestName in requestsAndSpecs[0]) workloadHelper = WMWorkloadHelper() workloadHelper.load(requestsAndSpecs[0][1]) self.assertEqual(workloadHelper.getOwner()['Requestor'], "me") self.assertEqual(self.jsonSender.get('assignment?request=%s'% requestName)[0], ['White Sox']) self.assertEqual(self.jsonSender.get('request/%s' % requestName)[0]['teams'], ['White Sox']) agentUrl = 'http://cmssrv96.fnal.gov/workqueue' self.jsonSender.put('workQueue/%s?url=%s'% (requestName, urllib.quote(agentUrl)) ) self.assertEqual(self.jsonSender.get('workQueue/%s' % requestName)[0][0], agentUrl) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(request['RequestStatus'], 'acquired') self.jsonSender.post('request/%s?events_written=10&files_merged=1' % requestName) self.jsonSender.post('request/%s?events_written=20&files_merged=2&percent_success=99.9' % requestName) request = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(len(request['RequestUpdates']), 2) self.assertEqual(request['RequestUpdates'][0]['files_merged'], 1) self.assertEqual(request['RequestUpdates'][1]['events_written'], 20) self.assertEqual(request['RequestUpdates'][1]['percent_success'], 99.9) message = "The sheriff is near" jsonMessage = json.dumps(message) self.jsonSender.put('message/%s' % requestName, message) messages = self.jsonSender.get('message/%s' % requestName) #self.assertEqual(messages[0][0][0], message) for status in ['running-open', 'running-closed', 'completed']: self.jsonSender.put('request/%s?status=%s' % (requestName, status)) # campaign self.jsonSender.put('campaign/%s' % 'TestCampaign') campaigns = self.jsonSender.get('campaign')[0] self.assertTrue('TestCampaign' in campaigns.keys()) self.jsonSender.put('campaign/%s/%s' % ('TestCampaign', requestName)) requestsInCampaign = self.jsonSender.get('campaign/%s' % 'TestCampaign')[0] self.assertTrue(requestName in requestsInCampaign.keys()) req = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(req['Campaign'], 'TestCampaign') self.jsonSender.delete('request/%s' % requestName) @attr("integration") def testC_404Errors(self): """ _404Errors_ Do some things that generate 404 errors. This should be limited to requests for objects that do not exist. """ badName = 'ThereIsNoWayThisNameShouldExist' # First, try to find a non-existent request # This should throw a 404 error. # The request name should not be in it self.checkForError(cls = 'request', badName = badName, exitCode = 404, message = 'Given requestName not found') # Now look for non-existent user self.checkForError(cls = 'user', badName = badName, exitCode = 404, message = 'Cannot find user') # Now try non-existent campaign self.checkForError(cls = 'campaign', badName = badName, exitCode = 404, message = "Cannot find campaign") # Now try invalid message # This raises a requestName error because it searches for the request self.checkForError(cls = 'message', badName = badName, exitCode = 404, message = "Given requestName not found", testEmpty = False) # Check for assignments (no teams or requests) # This raises a team error because it tries to load teams out first self.checkForError(cls = 'assignment', badName = badName, exitCode = 404, message = 'Cannot find team') @attr("integration") def testD_400Errors(self): """ _400Errors_ These are failures created by invalid input, such as sending args to a request when it doesn't accept any. They should generatore 400 Errors """ badName = 'ThereIsNoWayThisNameShouldExist' # Attempt to send arguments to a function that doesn't accept them. self.checkForError(cls = 'team', badName = badName, exitCode = 400, message = "Invalid input: Arguments added where none allowed") # Recheck for versions self.checkForError(cls = 'version', badName = badName, exitCode = 400, message = "Invalid input: Arguments added where none allowed") # Break the validation self.checkForError(cls = 'user', badName = '!', exitCode = 400, message = 'Invalid input: Input data failed validation') def checkForError(self, cls, badName, exitCode, message, testEmpty = True): """ _checkForError_ Generic function for checking for errors in JSON commands Does a basic check on type cls searching for name badName which hopefull does not exist. Checks to make sure that it exits with code exitCode, and that the error contains the string message. Also checks to make sure that name badName is NOT in the output testEmpty for those that don't handle calls to the main (i.e., who require an argument) """ raises = False # First assert that the test to be tested is empty if testEmpty: result = self.jsonSender.get(cls) self.assertTrue(type(result[0]) in [type([]), type({})]) # Next, test try: result = self.jsonSender.get('%s/%s' % (cls, badName)) except HTTPException as ex: raises = True self.assertEqual(ex.status, exitCode) self.assertTrue(message in ex.result) self.assertFalse(badName in ex.result) self.assertTrue(raises) @attr("integration") def testE_CheckStatusChanges(self): """ _CheckStatusChanges_ Check status changes for a single request. See whether we can move the request through the proper chain. Figure out what happens when we fail. """ myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] # There should only be one request in the DB result = GetRequest.requestID(requestName = requestName) self.assertEqual(result, 1) result = self.jsonSender.get('request/%s' % requestName) self.assertEqual(result[0]['Group'], groupName) self.assertEqual(result[0]['Requestor'], userName) # Let's see what we can do in terms of setting status self.changeStatusAndCheck(requestName = requestName, statusName = 'new') # Let's try an illegal status change, just for the hell of it raises = False try: self.jsonSender.put('request/%s?status=negotiating' % requestName) except HTTPException as ex: raises = True self.assertEqual(ex.status, 403) self.assertTrue('Failed to change status' in ex.result) self.assertFalse(requestName in ex.result) self.assertTrue(raises) # Now, let's try a totally bogus status raises = False try: self.jsonSender.put('request/%s?status=bogus' % requestName) except HTTPException as ex: raises = True self.assertEqual(ex.status, 403) self.assertTrue('Failed to change status' in ex.result) self.assertFalse(requestName in ex.result) self.assertTrue(raises) # We should still be in new result = self.jsonSender.get('request/%s' % requestName) self.assertEqual(result[0]['RequestStatus'], 'new') # Let's go on in a full loop self.changeStatusAndCheck(requestName = requestName, statusName = 'testing-approved') self.changeStatusAndCheck(requestName = requestName, statusName = 'testing') self.changeStatusAndCheck(requestName = requestName, statusName = 'tested') self.changeStatusAndCheck(requestName = requestName, statusName = 'assignment-approved') # This should fail, as you cannot assign a request without a team raises = False try: self.changeStatusAndCheck(requestName = requestName, statusName = 'assigned') except HTTPException as ex: raises = True self.assertTrue('Cannot change status without a team' in ex.result) self.assertTrue(raises) self.jsonSender.put(urllib.quote('assignment/%s/%s' % (teamName, requestName))) self.changeStatusAndCheck(requestName = requestName, statusName = 'negotiating') self.changeStatusAndCheck(requestName = requestName, statusName = 'acquired') self.changeStatusAndCheck(requestName = requestName, statusName = 'running-open') self.changeStatusAndCheck(requestName = requestName, statusName = 'running-closed') self.changeStatusAndCheck(requestName = requestName, statusName = 'completed') self.changeStatusAndCheck(requestName = requestName, statusName = 'closed-out') def changeStatusAndCheck(self, requestName, statusName): """ _changeStatusAndCheck_ Change the status of a request and make sure that the request actually did it. """ self.jsonSender.put('request/%s?status=%s' % (requestName, statusName)) result = self.jsonSender.get('request/%s' % requestName) self.assertEqual(result[0]['RequestStatus'], statusName) def loadWorkload(self, requestName): """ _loadWorkload_ Load the workload from couch after we've saved it there. """ workload = WMWorkloadHelper() url = '%s/%s/%s/spec' % (os.environ['COUCHURL'], self.couchDBName, requestName) workload.load(url) return workload def testF_TestWhitelistBlacklist(self): """ _TestWhitelistBlacklist_ Test whether or not we can assign the block/run blacklist/whitelist """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) schema['RunWhitelist'] = [1, 2, 3] schema['RunBlacklist'] = [4, 5, 6] schema['BlockWhitelist'] = ['/dataset/dataset/dataset#alpha'] schema['BlockBlacklist'] = ['/dataset/dataset/dataset#beta'] configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] workload = self.loadWorkload(requestName = requestName) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.runs.whitelist, schema['RunWhitelist']) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.runs.blacklist, schema['RunBlacklist']) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.blocks.whitelist, schema['BlockWhitelist']) self.assertEqual(workload.data.tasks.DataProcessing.input.dataset.blocks.blacklist, schema['BlockBlacklist']) req = self.jsonSender.get('request/%s' % requestName) self.assertTrue('Site Blacklist' in req[0]) self.assertTrue('Site Whitelist' in req[0]) schema['BlockBlacklist'] = {'1': '/dataset/dataset/dataset#beta'} try: raises = False result = self.jsonSender.put('request/testRequest', schema) except HTTPException as ex: raises = True self.assertEqual(ex.status, 400) print ex.result self.assertTrue("Error in Workload Validation: Argument BlockBlacklist type is incorrect in schema." in ex.result) pass self.assertTrue(raises) schema['BlockBlacklist'] = ['/dataset/dataset/dataset#beta'] schema['RunWhitelist'] = {'1': '/dataset/dataset/dataset#beta'} try: raises = False result = self.jsonSender.put('request/testRequest', schema) except HTTPException as ex: raises = True self.assertEqual(ex.status, 400) self.assertTrue("Error in Workload Validation: Argument RunWhitelist type is incorrect in schema." in ex.result) pass self.assertTrue(raises) schema['RunWhitelist'] = ['hello', 'how', 'are', 'you'] try: raises = True result = self.jsonSender.put('request/testRequest', schema) except HTTPException as ex: raises = True self.assertEqual(ex.status, 400) self.assertTrue("Error in Workload Validation: Argument RunWhitelist doesn't pass validation." in ex.result) pass self.assertTrue(raises) def testG_AddDuplicateUser(self): """ _AddDuplicateUser_ Test and see if we get a sensible error when adding a duplicate user. """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) raises = False try: self.jsonSender.put('group/%s/%s' % (groupName, userName)) except HTTPException as ex: self.assertTrue("User/Group Already Linked in DB" in ex.result) self.assertEqual(ex.status, 400) raises = True self.assertTrue(raises) def testH_RemoveSoftwareVersion(self): """ _RemoveSoftwareVersion_ Remove the software version after submitting the request. See what that does. """ myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] req = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(req['SoftwareVersions'], [schema["CMSSWVersion"]]) # Delete software versions and make sure they're gone from the DB SoftwareManagement.removeSoftware(softwareName = schema["CMSSWVersion"], scramArch = schema["ScramArch"]) versions = myThread.dbi.processData("SELECT * FROM reqmgr_software")[0].fetchall() self.assertEqual(versions, []) assocs = myThread.dbi.processData("SELECT * FROM reqmgr_software_dependency")[0].fetchall() self.assertEqual(assocs, []) req = self.jsonSender.get('request/%s' % requestName)[0] self.assertEqual(req['SoftwareVersions'], [schema["CMSSWVersion"]]) def testI_CheckConfigIDs(self): """ _CheckConfigIDs_ Check to see if we can pull out the ConfigIDs by request """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) # Set some versions schema['ProcessingVersion'] = '2012' schema['AcquisitionEra'] = 'ae2012' schema["PrimaryDataset"] = "ReallyFake" schema["RequestNumEvents"] = 100 configID = self.createConfig() schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") schema["ConfigCacheID"] = configID schema["InputDataset"] = '/MinimumBias/Run2010B-RelValRawSkim-v1/RAW' result = self.jsonSender.put('request/testRequest', schema) self.assertEqual(result[1], 200) requestName = result[0]['RequestName'] result = self.jsonSender.get('configIDs?prim=MinimumBias&proc=Run2010B-RelValRawSkim-v1&tier=RAW')[0] print result self.assertTrue(requestName in result.keys()) self.assertTrue(configID in result[requestName][0]) def testJ_CheckRequestCloning(self): myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") schema["AcquisitionEra"] = "NewEra" result = self.jsonSender.put("request", schema) self.assertEqual(result[1], 200) requestName = result[0]["RequestName"] acquisitionEra = result[0]["AcquisitionEra"] self.assertTrue(schema["AcquisitionEra"], acquisitionEra) # set some non-default priority # when cloning a request which had some non default priority, # the priority values were lost when creating a cloned request, the # default values were lost. Change it here to specifically catch this case. priority = 300 result = self.jsonSender.put("request/%s?priority=%s" % (requestName, priority)) self.assertEqual(result[1], 200) # get the original request from the server, although the variable result # shall have the same stuff in response = self.jsonSender.get("request/%s" % requestName) origRequest = response[0] self.assertEquals(origRequest["AcquisitionEra"], acquisitionEra) # test that the priority was correctly set in the brand-new request self.assertEquals(origRequest["RequestPriority"], priority) # test cloning not existing request self.assertRaises(HTTPException, self.jsonSender.put, "clone/%s" % "NotExistingRequestName") # correct attempt to clone the request # this is the new request, it'll have different name result = self.jsonSender.put("clone/%s" % requestName) # get the cloned request from the server respose = self.jsonSender.get("request/%s" % result[0]["RequestName"]) clonedRequest = respose[0] # these request arguments shall differ in the cloned request: toDiffer = ["RequestName", "RequestStatus"] for differ in toDiffer: self.assertNotEqual(origRequest[differ], clonedRequest[differ]) # check the desired status of the cloned request self.assertEquals(clonedRequest["RequestStatus"], "assignment-approved", "Cloned request status should be 'assignment-approved', not '%s'." % clonedRequest["RequestStatus"]) # don't care about these two (they will likely be the same in the unittest # since the brand new request injection as well as the cloning probably # happen at roughly the same time) toDiffer.extend(["RequestDate", "timeStamp", "RequestWorkflow"]) for differ in toDiffer: del origRequest[differ] del clonedRequest[differ] # check the request dictionaries self.assertEquals(len(origRequest), len(clonedRequest)) for k1, k2 in zip(sorted(origRequest.keys()), sorted(clonedRequest.keys())): msg = ("Request values: original: %s: %s cloned: %s: %s differ" % (k1, origRequest[k1], k2, clonedRequest[k2])) self.assertEqual(origRequest[k1], clonedRequest[k2], msg) def testK_CheckRequestFailsInjectionForbiddenInputArg(self): myThread = threading.currentThread() userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) from WMCore.HTTPFrontEnd.RequestManager.ReqMgrRESTModel import deprecatedRequestArgs for deprec in deprecatedRequestArgs: schema = utils.getSchema(groupName=groupName, userName=userName) schema[deprec] = "something" self.assertRaises(HTTPException, self.jsonSender.put, "request", schema) def setupACDCDatabase(self, collectionName, taskPath, user, group): """ _setupACDCDatabase_ Populate an ACDC database with bogus records associated to certain collection name, user and task path. """ acdcServer = CouchService(url = self.testInit.couchUrl, database = "%s_acdc" % self.couchDBName) owner = acdcServer.newOwner(group, user) testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = collectionName) testCollection.setOwner(owner) testFileset = CouchFileset(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = taskPath) testCollection.addFileset(testFileset) testFiles = [] for _ in range(5): testFile = File(lfn = makeUUID(), size = random.randint(1024, 4096), events = random.randint(1024, 4096)) testFiles.append(testFile) testFileset.add(testFiles) def testL_CascadeCloseOutAnnnouncement(self): """ _testL_CascadeCloseOutAnnouncement_ Test the cascade closeout REST call, also check that when announced a request deletes all ACDC records in the system. """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put("request", schema)[0] originalRequest = result['RequestName'] self.setupACDCDatabase(originalRequest, "/%s/DataProcessing" % originalRequest, result['Requestor'], result['Group']) depth = 2 nReq = 3 requests = [originalRequest] def createChildrenRequest(parentRequest, i, nReq): createdRequests = [] resubSchema = utils.getResubmissionSchema(parentRequest, "/%s/DataProcessing" % parentRequest, groupName, userName) result = self.jsonSender.put("request", resubSchema)[0] requestName = result['RequestName'] self.setupACDCDatabase(requestName, "/%s/DataProcessing" % requestName, result['Requestor'], result['Group']) createdRequests.append(requestName) if i: for _ in range(nReq): createdRequests.extend(createChildrenRequest(requestName, i - 1, nReq)) return createdRequests requests.extend(createChildrenRequest(originalRequest, depth, nReq)) for request in requests: self.changeStatusAndCheck(request, 'assignment-approved') for request in requests: self.jsonSender.put("assignment?team=%s&requestName=%s" % (teamName, request)) for status in ['acquired', 'running-open', 'running-closed', 'completed']: for request in requests: self.changeStatusAndCheck(request, status) self.jsonSender.post('closeout?requestName=%s&cascade=True' % originalRequest) svc = CouchService(url = self.testInit.couchUrl, database = "%s_acdc" % self.couchDBName) owner = svc.newOwner(groupName, userName) for request in requests: result = self.jsonSender.get('request/%s' % request) self.assertEqual(result[0]['RequestStatus'], 'closed-out') testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = request) testCollection.setOwner(owner) testCollection.populate() self.assertNotEqual(len(testCollection["filesets"]), 0) self.jsonSender.post('announce?requestName=%s&cascade=True' % originalRequest) for request in requests: result = self.jsonSender.get('request/%s' % request) self.assertEqual(result[0]['RequestStatus'], 'announced') testCollection = CouchCollection(database = self.testInit.couchDbName, url = self.testInit.couchUrl, name = request) testCollection.setOwner(owner) testCollection.populate() self.assertEqual(len(testCollection["filesets"]), 0) def testM_PutRequestStats(self): userName = '******' groupName = 'Bryant' teamName = 'Lakers' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") result = self.jsonSender.put("request", schema)[0] originalRequest = result['RequestName'] stats = {'total_jobs': 100, 'input_events': 100, 'input_lumis': 100, 'input_num_files': 100} result = self.reqService.putRequestStats(originalRequest, stats) self.assertEqual(result['RequestName'], originalRequest)