def getRequestInformationAndWorkload(requestName, reqmgrUrl, centralRequestDBURL): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ wfDBReader = RequestDBReader(centralRequestDBURL, couchapp="ReqMgr") result = wfDBReader.getRequestByNames(requestName, True) workloadDB = Database(result[requestName]['CouchWorkloadDBName'], result[requestName]['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result[requestName]
def getRequestInformationAndWorkload(requestName, reqmgrUrl, centralRequestDBURL): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ wfDBReader = RequestDBReader(centralRequestDBURL, couchapp = "ReqMgr") result = wfDBReader.getRequestByNames(requestName,True) workloadDB = Database(result[requestName]['CouchWorkloadDBName'], result[requestName]['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result[requestName]
def fetchWorkflowsSpec(config, listOfWfs): """ Fetch the workload of a list of workflows. Filter out only a few usefull keys """ if isinstance(listOfWfs, basestring): listOfWfs = [listOfWfs] wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) tempWfs = wfDBReader.getRequestByNames(listOfWfs, True) wfShortDict = {} for wf in listOfWfs: wfShortDict[wf] = filterKeys(tempWfs[wf]) return wfShortDict
def fetchWorkflowsSpec(config, listOfWfs): """ Fetch the workload of a list of workflows. Filter out only a few usefull keys """ if isinstance(listOfWfs, basestring): listOfWfs = [listOfWfs] wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) tempWfs = wfDBReader.getRequestByNames(listOfWfs, True) wfShortDict = {} for wf in listOfWfs: wfShortDict[wf] = filterKeys(tempWfs[wf]) return wfShortDict
def main(): """ It will either delete docs in couchdb for the workflow you have provided or it will loop over the final (or almost final) states and ask for your permission to delete them. """ args = sys.argv[1:] if not len(args) == 1: print "usage: python syncPrioReqMgrxGQ.py <text_file_with_the_workflow_names>" sys.exit(0) inputFile = args[0] with open(inputFile) as f: listWorkflows = [x.rstrip('\n') for x in f.readlines()] if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) wfDBReader = RequestDBReader( config.AnalyticsDataCollector.centralRequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl) workflowsDict = wfDBReader.getRequestByNames(listWorkflows) for wf, details in workflowsDict.iteritems(): print "wf: %s and prio: %s" % (wf, details['RequestPriority']) wqDocs = wqBackend.getElements(WorkflowName=wf) docIds = [ elem._id for elem in wqDocs if elem['Status'] == 'Available' and elem['Priority'] != details['RequestPriority'] ] if docIds: print "Changing the priority of the following available docs: %s" % docIds wqBackend.updateElements(*docIds, Priority=details['RequestPriority']) else: print " there is nothing to update for this workflow."
class RequestDBTest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["ReqMgr"] self.testInit = TestInitCouchApp('RequestDBServiceTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'requsetdb_t' self.testInit.setupCouch(dbName, *self.couchApps) reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.requestWriter = RequestDBWriter(reqDBURL) self.requestReader = RequestDBReader(reqDBURL) self.requestWriter.defaultStale = {} self.requestReader.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testRequestDBWriter(self): # test getWork schema = generate_reqmgr_schema(3) result = self.requestWriter.insertGenericRequest(schema[0]) self.assertEqual(len(result), 1, 'insert fail') self.assertEqual( self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail') self.assertEqual( self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"), 'Error: document not found') result = self.requestWriter.updateRequestProperty( schema[0]['RequestName'], {'Teams': ['teamA']}) self.assertEqual( self.requestWriter.updateRequestProperty(schema[0]['RequestName'], {'Teams': ['teamA']}), 'OK', 'update fail') self.assertEqual( self.requestWriter.updateRequestProperty("not_exist_schema", {'Teams': 'teamA'}), 'Error: document not found') result = self.requestReader.getRequestByNames( [schema[0]['RequestName']]) self.assertEqual(len(result), 1, "should be 1") result = self.requestReader.getRequestByStatus(["failed"], False, 1) self.assertEqual(len(result), 1, "should be 1") result = self.requestReader.getStatusAndTypeByRequest( [schema[0]['RequestName']]) self.assertEqual(result[schema[0]['RequestName']][0], 'failed', "should be failed") result = self.requestWriter.insertGenericRequest(schema[1]) time.sleep(2) result = self.requestWriter.insertGenericRequest(schema[2]) endTime = int(time.time()) - 1 result = self.requestReader.getRequestByStatusAndEndTime( "new", False, endTime) self.assertEqual(len(result), 1, "should be 1") endTime = int(time.time()) + 1 result = self.requestReader.getRequestByStatusAndEndTime( "new", False, endTime) self.assertEqual(len(result), 2, "should be 2")
class WMStatsReader(object): #TODO need to get this from reqmgr api ACTIVE_STATUS = ["new", "assignment-approved", "assigned", "acquired", "running", "running-open", "running-closed", "failed", "force-complete", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected"] T0_ACTIVE_STATUS = ["new", "Closed", "Merge", "Harvesting", "Processing Done", "AlcaSkim", "completed"] def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"): self._sanitizeURL(couchURL) # set the connection for local couchDB call self._commonInit(couchURL, appName) if reqdbURL: self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp) else: self.reqDB = None def _sanitizeURL(self, couchURL): return sanitizeURL(couchURL)['url'] def _commonInit(self, couchURL, appName = "WMStats"): """ setting up comon variables for inherited class. inherited class should call this in their init function """ self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = appName self.defaultStale = {"stale": "update_after"} def setDefaultStaleOptions(self, options): if not options: options = {} if 'stale' not in options: options.update(self.defaultStale) return options def getLatestJobInfoByRequests(self, requestNames): jobInfoByRequestAndAgent = {} if len(requestNames) > 0: requestAndAgentKey = self._getRequestAndAgent(requestNames) jobDocIds = self._getLatestJobInfo(requestAndAgentKey) jobInfoByRequestAndAgent = self._getAllDocsByIDs(jobDocIds) return jobInfoByRequestAndAgent def _updateRequestInfoWithJobInfo(self, requestInfo): if len(requestInfo.keys()) != 0: jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests(requestInfo.keys()) self._combineRequestAndJobData(requestInfo, jobInfoByRequestAndAgent) def _getCouchView(self, view, options, keys = []): options = self.setDefaultStaleOptions(options) if keys and isinstance(keys, str): keys = [keys] return self.couchDB.loadView(self.couchapp, view, options, keys) def _formatCouchData(self, data, key = "id"): result = {} for row in data['rows']: if 'error' in row: continue if "doc" in row: result[row[key]] = row["doc"] else: result[row[key]] = None return result def _combineRequestAndJobData(self, requestData, jobData): """ update the request data with job info requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847", "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003", "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}}, "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731", "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}}, "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}}, "T2_DE_DESY":{"failure":{"exception":3},"success":375}}, "agent":"WMAgentCommissioning", "tasks": {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production": {"status":{"failure":{"exception":3},"success":331}, "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908, "dataset":{},"failure":{"exception":3}, "cmsRunCPUPerformance":{"totalJobCPU":10869688.8, "totalEventCPU":10832426.7, "totalJobTime":11255865.9}, "inputEvents":0}, "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573, "dataset":{}, "cmsRunCPUPerformance":{"totalJobCPU":167324.8, "totalEventCPU":166652.1, "totalJobTime":174975.7}, "inputEvents":0}}, "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0}, "jobtype":"Production"}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect": {"jobtype":"LogCollect", "subscription_status":{"updated":1392885768, "finished":0, "total":1,"open":1}}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput": {"status":{"success":41,"submitted":{"retry":1,"pending":1}}, "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915}, "success":41,"wrappedTotalJobTime":9190, "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM": {"totalLumis":973,"events":97300,"size":105698406915}}, "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125}, "inputEvents":97300}, "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}}, "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1}, "jobtype":"Merge"}, "agent_url":"vocms231.cern.ch:9999", "type":"agent_request"}} """ if jobData: for row in jobData["rows"]: # condition checks if documents are deleted between calls. # just ignore in that case if row["doc"]: jobInfo = requestData[row["doc"]["workflow"]] jobInfo.setdefault("AgentJobInfo", {}) jobInfo["AgentJobInfo"][row["doc"]["agent_url"]] = row["doc"] def _getRequestAndAgent(self, filterRequest = None): """ returns the [['request_name', 'agent_url'], ....] """ options = {} options["reduce"] = True options["group"] = True result = self._getCouchView("requestAgentUrl", options) if filterRequest == None: keys = [row['key'] for row in result["rows"]] else: keys = [row['key'] for row in result["rows"] if row['key'][0] in filterRequest] return keys def _getLatestJobInfo(self, keys): """ keys is [['request_name', 'agent_url'], ....] returns ids """ if len(keys) == 0: return [] options = {} options["reduce"] = True options["group"] = True result = self._getCouchView("latestRequest", options, keys) ids = [row['value']['id'] for row in result["rows"]] return ids def _getAllDocsByIDs(self, ids, include_docs = True): """ keys is [id, ....] returns document """ if len(ids) == 0: return None options = {} options["include_docs"] = include_docs result = self.couchDB.allDocs(options, ids) return result def _getAgentInfo(self): """ returns all the agents status on wmstats """ options = {} result = self._getCouchView("agentInfo", options) return result def agentsByTeam(self, ignoreDrain = True): """ return a dictionary like {team:#agents,...} """ result = self._getAgentInfo() response = dict() for agentInfo in result["rows"]: teams = agentInfo['value']['agent_team'].split(',') for team in teams: if team not in response.keys(): response[team] = 0 if ignoreDrain: if not agentInfo['value']['drain_mode']: for team in teams: response[team] += 1 else: for team in teams: response[team] += 1 return response def getServerInstance(self): return self.couchServer def getDBInstance(self): return self.couchDB def getRequestDBInstance(self): return self.reqDB def getHeartbeat(self): try: return self.couchDB.info(); except Exception as ex: return {'error_message': str(ex)} def getRequestByNames(self, requestNames, jobInfoFlag = False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. """ requestInfo = self.reqDB.getRequestByNames(requestNames, True) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getActiveData(self, jobInfoFlag = False): return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS, jobInfoFlag) def getT0ActiveData(self, jobInfoFlag = False): return self.getRequestByStatus(WMStatsReader.T0_ACTIVE_STATUS, jobInfoFlag) def getRequestByStatus(self, statusList, jobInfoFlag = False, limit = None, skip = None, legacyFormat = False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. If legacyFormat is True convert data to old wmstats format from current reqmgr format. Shouldn't be set to True unless existing code breaks """ requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit, skip) if legacyFormat: # convert the format to wmstas old format for requestName, doc in requestInfo.items(): requestInfo[requestName] = convertToLegacyFormat(doc) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getRequestSummaryWithJobInfo(self, requestName): """ get request info with job status """ requestInfo = self.reqDB.getRequestByNames(requestName) self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getArchivedRequests(self): """ get list of archived workflow in wmstats db. """ options = {"group_level": 1, "reduce": True} results = self.couchDB.loadView(self.couchapp, "allWorkflows", options = options)['rows'] requestNames = [x['key'] for x in results] workflowDict = self.reqDB.getStatusAndTypeByRequest(requestNames) archivedRequests = [] for request, value in workflowDict.items(): if value[0].endswith("-archived"): archivedRequests.append(request) return archivedRequests def isWorkflowCompletedWithLogCollectAndCleanUp(self, requestName): """ check whether workflow is completed including LogCollect and CleanUp tasks TODO: If the parent task all failed and next task are not created at all, It can't detect complete status. If the one of the task doesn't contain any jobs, it will return False """ requestInfo = self.getRequestSummaryWithJobInfo(requestName) reqInfoInstance = RequestInfo(requestInfo[requestName]) return reqInfoInstance.isWorkflowFinished()
class WMStatsReader(object): # TODO need to get this from reqmgr api ACTIVE_STATUS = ["new", "assignment-approved", "assigned", "acquired", "running", "running-open", "running-closed", "failed", "force-complete", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected"] T0_ACTIVE_STATUS = ["new", "Closed", "Merge", "Harvesting", "Processing Done", "AlcaSkim", "completed"] def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"): self._sanitizeURL(couchURL) # set the connection for local couchDB call self._commonInit(couchURL, appName) if reqdbURL: self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp) else: self.reqDB = None def _sanitizeURL(self, couchURL): return sanitizeURL(couchURL)['url'] def _commonInit(self, couchURL, appName="WMStats"): """ setting up comon variables for inherited class. inherited class should call this in their init function """ self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = appName self.defaultStale = {"stale": "update_after"} def setDefaultStaleOptions(self, options): if not options: options = {} if 'stale' not in options: options.update(self.defaultStale) return options def getLatestJobInfoByRequests(self, requestNames): jobInfoByRequestAndAgent = {} if len(requestNames) > 0: requestAndAgentKey = self._getRequestAndAgent(requestNames) jobInfoByRequestAndAgent = self._getLatestJobInfo(requestAndAgentKey) return jobInfoByRequestAndAgent def _updateRequestInfoWithJobInfo(self, requestInfo): if len(requestInfo.keys()) != 0: jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests(requestInfo.keys()) self._combineRequestAndJobData(requestInfo, jobInfoByRequestAndAgent) def _getCouchView(self, view, options, keys=None): keys = keys or [] options = self.setDefaultStaleOptions(options) if keys and isinstance(keys, str): keys = [keys] return self.couchDB.loadView(self.couchapp, view, options, keys) def _formatCouchData(self, data, key="id"): result = {} for row in data['rows']: if 'error' in row: continue if "doc" in row: result[row[key]] = row["doc"] else: result[row[key]] = None return result def _combineRequestAndJobData(self, requestData, jobData): """ update the request data with job info requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847", "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003", "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}}, "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731", "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}}, "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}}, "T2_DE_DESY":{"failure":{"exception":3},"success":375}}, "agent":"WMAgentCommissioning", "tasks": {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production": {"status":{"failure":{"exception":3},"success":331}, "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908, "dataset":{},"failure":{"exception":3}, "cmsRunCPUPerformance":{"totalJobCPU":10869688.8, "totalEventCPU":10832426.7, "totalJobTime":11255865.9}, "inputEvents":0}, "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573, "dataset":{}, "cmsRunCPUPerformance":{"totalJobCPU":167324.8, "totalEventCPU":166652.1, "totalJobTime":174975.7}, "inputEvents":0}}, "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0}, "jobtype":"Production"}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect": {"jobtype":"LogCollect", "subscription_status":{"updated":1392885768, "finished":0, "total":1,"open":1}}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput": {"status":{"success":41,"submitted":{"retry":1,"pending":1}}, "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915}, "success":41,"wrappedTotalJobTime":9190, "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM": {"totalLumis":973,"events":97300,"size":105698406915}}, "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125}, "inputEvents":97300}, "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}}, "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1}, "jobtype":"Merge"}, "agent_url":"vocms231.cern.ch:9999", "type":"agent_request"}} """ if jobData: for row in jobData["rows"]: # condition checks if documents are deleted between calls. # just ignore in that case if row["doc"]: jobInfo = requestData[row["doc"]["workflow"]] jobInfo.setdefault("AgentJobInfo", {}) jobInfo["AgentJobInfo"][row["doc"]["agent_url"]] = row["doc"] def _getRequestAndAgent(self, filterRequest=None): """ returns the [['request_name', 'agent_url'], ....] """ options = {} options["reduce"] = True options["group"] = True result = self._getCouchView("requestAgentUrl", options) if filterRequest is None: keys = [row['key'] for row in result["rows"]] else: keys = [row['key'] for row in result["rows"] if row['key'][0] in filterRequest] return keys def _getLatestJobInfo(self, keys): """ keys is [['request_name', 'agent_url'], ....] returns ids """ if len(keys) == 0: return [] options = {"include_docs": True} options["reduce"] = False result = self._getCouchView("latestRequest", options, keys) return result def _getAllDocsByIDs(self, ids, include_docs=True): """ keys is [id, ....] returns document """ if len(ids) == 0: return None options = {} options["include_docs"] = include_docs result = self.couchDB.allDocs(options, ids) return result def _getAgentInfo(self): """ returns all the agents status on wmstats """ options = {} result = self._getCouchView("agentInfo", options) return result def agentsByTeam(self, filterDrain=False): """ return a dictionary like {team:#agents,...} """ result = self._getAgentInfo() response = dict() for agentInfo in result["rows"]: #filtering empty string team = agentInfo['value']['agent_team'] if not team: continue response.setdefault(team, 0) if filterDrain: if not agentInfo['value'].get('drain_mode', False): response[team] += 1 else: response[team] += 1 return response def getServerInstance(self): return self.couchServer def getDBInstance(self): return self.couchDB def getRequestDBInstance(self): return self.reqDB def getHeartbeat(self): try: return self.couchDB.info() except Exception as ex: return {'error_message': str(ex)} def getRequestByNames(self, requestNames, jobInfoFlag=False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. """ requestInfo = self.reqDB.getRequestByNames(requestNames, True) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getActiveData(self, jobInfoFlag=False): return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS, jobInfoFlag) def getT0ActiveData(self, jobInfoFlag=False): return self.getRequestByStatus(WMStatsReader.T0_ACTIVE_STATUS, jobInfoFlag) def getRequestByStatus(self, statusList, jobInfoFlag=False, limit=None, skip=None, legacyFormat=False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. If legacyFormat is True convert data to old wmstats format from current reqmgr format. Shouldn't be set to True unless existing code breaks """ requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit, skip) if legacyFormat: # convert the format to wmstas old format for requestName, doc in requestInfo.items(): requestInfo[requestName] = convertToLegacyFormat(doc) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getRequestSummaryWithJobInfo(self, requestName): """ get request info with job status """ requestInfo = self.reqDB.getRequestByNames(requestName) self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getArchivedRequests(self): """ get list of archived workflow in wmstats db. """ options = {"group_level": 1, "reduce": True} results = self.couchDB.loadView(self.couchapp, "allWorkflows", options=options)['rows'] requestNames = [x['key'] for x in results] workflowDict = self.reqDB.getStatusAndTypeByRequest(requestNames) archivedRequests = [] for request, value in workflowDict.items(): if value[0].endswith("-archived"): archivedRequests.append(request) return archivedRequests def isWorkflowCompletedWithLogCollectAndCleanUp(self, requestName): """ check whether workflow is completed including LogCollect and CleanUp tasks TODO: If the parent task all failed and next task are not created at all, It can't detect complete status. If the one of the task doesn't contain any jobs, it will return False """ requestInfo = self.getRequestSummaryWithJobInfo(requestName) reqInfoInstance = RequestInfo(requestInfo[requestName]) return reqInfoInstance.isWorkflowFinished() def getTaskJobSummaryByRequest(self, requestName, sampleSize=1): options = {'reduce': True, 'group_level': 5, 'startkey': [requestName], 'endkey': [requestName, {}]} results = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options) jobDetails = {} for row in results['rows']: # row["key"] = ['workflow', 'task', 'jobstatus', 'exitCode', 'site'] startKey = row["key"][:4] endKey = [] site = row["key"][4] if site: startKey.append(site) endKey.extend(startKey) endKey.append({}) numOfError = row["value"] jobInfo = self.jobDetailByTasks(startKey, endKey, numOfError, sampleSize) jobDetails = nestedDictUpdate(jobDetails, jobInfo) return jobDetails def jobDetailByTasks(self, startKey, endKey, numOfError, limit=1): options = {'include_docs': True, 'reduce': False, 'startkey': startKey, 'endkey': endKey, 'limit': limit} result = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options) jobInfoDoc = {} for row in result['rows']: keys = row['key'] workflow = keys[0] task = keys[1] jobStatus = keys[2] exitCode = keys[3] site = keys[4] jobInfoDoc.setdefault(workflow, {}) jobInfoDoc[workflow].setdefault(task, {}) jobInfoDoc[workflow][task].setdefault(jobStatus, {}) jobInfoDoc[workflow][task][jobStatus].setdefault(exitCode, {}) jobInfoDoc[workflow][task][jobStatus][exitCode].setdefault(site, {}) finalStruct = jobInfoDoc[workflow][task][jobStatus][exitCode][site] finalStruct["errorCount"] = numOfError finalStruct.setdefault("samples", []) finalStruct["samples"].append(row["doc"]) return jobInfoDoc def getAllAgentRequestRevByID(self, agentURL): options = {"reduce": False} results = self.couchDB.loadView(self.couchapp, "byAgentURL", options=options, keys=[agentURL]) idRevMap = {} for row in results['rows']: idRevMap[row['id']] = row['value']['rev'] return idRevMap
def main(): """ It will either delete docs in couchdb for the workflow you have provided or it will loop over the final (or almost final) states and ask for your permission to delete them. """ wfName = sys.argv[1] if len(sys.argv) == 2 else [] if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) # Instantiating central services (couch stuff) # print "Central Couch URL : %s" % config.WorkloadSummary.couchurl # print "Central ReqMgr URL : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL wfDBReader = RequestDBReader( config.AnalyticsDataCollector.centralRequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) # Central services wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl) wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl) # Local services localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name="workqueue_inbox") localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl) statusList = [ "failed", "epic-FAILED", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected", "normal-archived", "aborted-archived", "rejected-archived" ] for stat in final_status: # retrieve list of workflows in each status if not wfName: # options = {'include_docs': False} date_range = { 'startkey': [2015, 5, 15, 0, 0, 0], 'endkey': [2015, 5, 26, 0, 0, 0] } # finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range) tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range) #print "Found %d wfs in status: %s" %(len(finalWfs), stat) finalWfs = [] for wf, content in tempWfs.iteritems(): if content['RequestStatus'] in statusList: finalWfs.append(wf) print "Found %d wfs in not in active state" % len(finalWfs) else: finalWfs = [wfName] tempWfs = wfDBReader.getRequestByNames(wfName, True) print "Checking %s with status '%s'." % ( wfName, tempWfs[wfName]['RequestStatus']) wqDocs, wqInboxDocs = [], [] localWQDocs, localWQInboxDocs = [], [] for counter, wf in enumerate(finalWfs): if counter % 100 == 0: print "%d wfs queried ..." % counter # check whether there are workqueue docs wqDocIDs = wqBackend.getElements(WorkflowName=wf) if wqDocIDs: print "Found %d workqueue docs for %s, status %s" % ( len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs wqDocs.append(wqDocIDs) # check whether there are workqueue_inbox docs if wqInboxDB.documentExists(wf): print "Found workqueue_inbox doc for %s, status %s" % ( wf, tempWfs[wf]['RequestStatus']) # then retrieve the document wqInboxDoc = wqInboxDB.document(wf) wqInboxDocs.append(wqInboxDoc) # check local queue wqDocIDs = localWQBackend.getElements(WorkflowName=wf) if wqDocIDs: print "Found %d local workqueue docs for %s, status %s" % ( len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs localWQDocs.append(wqDocIDs) if localWQInboxDB.documentExists(wf): print "Found local workqueue_inbox doc for %s, status %s" % ( wf, tempWfs[wf]['RequestStatus']) wqInboxDoc = localWQInboxDB.document(wf) print wqInboxDoc localWQInboxDocs.append(wqInboxDoc) # TODO TODO TODO for the moment only deletes for a specific workflow if wfName: var = raw_input("\nCan we delete all these documents (Y/N)? ") if var == "Y": # deletes workqueue_inbox doc if wqInboxDoc: print "Deleting workqueue_inbox id %s and %s" % ( wqInboxDoc['_id'], wqInboxDoc['_rev']) wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev']) # deletes workqueue docs if wqDocIDs: print "Deleting workqueue docs %s" % wqDocIDs wqBackend.deleteElements( *[x for x in wqDocIDs if x['RequestName'] in wfName]) else: print "You are the boss, aborting it ...\n"
class WMStatsReader(): #TODO need to get this from reqmgr api ACTIVE_STATUS = [ "new", "assignment-approved", "assigned", "ops-hold", "negotiating", "acquired", "running", "running-open", "running-closed", "failed", "completed", "closed-out", "announced", "aborted", "rejected" ] def __init__(self, couchURL, reqdbURL=None, reqdbCouchApp="ReqMgr"): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call self._commonInit(couchURL) if reqdbURL: self.reqDB = RequestDBReader(reqdbURL) else: self.reqDB = None def _commonInit(self, couchURL, appName="WMStats"): """ setting up comon variables for inherited class. inherited class should call this in their init function """ self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = appName self.defaultStale = {"stale": "update_after"} def setDefaultStaleOptions(self, options): if not options: options = {} if 'stale' not in options: options.update(self.defaultStale) return options def getLatestJobInfoByRequests(self, requestNames): jobInfoByRequestAndAgent = {} if len(requestNames) > 0: requestAndAgentKey = self._getRequestAndAgent(requestNames) jobDocIds = self._getLatestJobInfo(requestAndAgentKey) jobInfoByRequestAndAgent = self._getAllDocsByIDs(jobDocIds) return jobInfoByRequestAndAgent def _updateRequestInfoWithJobInfo(self, requestInfo): if len(requestInfo.keys()) != 0: jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests( requestInfo.keys()) self._combineRequestAndJobData(requestInfo, jobInfoByRequestAndAgent) def _getCouchView(self, view, options, keys=[]): options = self.setDefaultStaleOptions(options) if keys and type(keys) == str: keys = [keys] return self.couchDB.loadView(self.couchapp, view, options, keys) def _formatCouchData(self, data, key="id"): result = {} for row in data['rows']: if 'error' in row: continue if "doc" in row: result[row[key]] = row["doc"] else: result[row[key]] = None return result def _combineRequestAndJobData(self, requestData, jobData): """ update the request data with job info requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847", "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003", "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}}, "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731", "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}}, "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}}, "T2_DE_DESY":{"failure":{"exception":3},"success":375}}, "agent":"WMAgentCommissioning", "tasks": {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production": {"status":{"failure":{"exception":3},"success":331}, "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908, "dataset":{},"failure":{"exception":3}, "cmsRunCPUPerformance":{"totalJobCPU":10869688.8, "totalEventCPU":10832426.7, "totalJobTime":11255865.9}, "inputEvents":0}, "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573, "dataset":{}, "cmsRunCPUPerformance":{"totalJobCPU":167324.8, "totalEventCPU":166652.1, "totalJobTime":174975.7}, "inputEvents":0}}, "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0}, "jobtype":"Production"}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect": {"jobtype":"LogCollect", "subscription_status":{"updated":1392885768, "finished":0, "total":1,"open":1}}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput": {"status":{"success":41,"submitted":{"retry":1,"pending":1}}, "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915}, "success":41,"wrappedTotalJobTime":9190, "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM": {"totalLumis":973,"events":97300,"size":105698406915}}, "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125}, "inputEvents":97300}, "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}}, "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1}, "jobtype":"Merge"}, "agent_url":"vocms231.cern.ch:9999", "type":"agent_request"}} """ if jobData: for row in jobData["rows"]: # condition checks if documents are deleted between calls. # just ignore in that case if row["doc"]: jobInfo = requestData[row["doc"]["workflow"]] jobInfo.setdefault("AgentJobInfo", {}) jobInfo["AgentJobInfo"][row["doc"] ["agent_url"]] = row["doc"] def _getRequestAndAgent(self, filterRequest=None): """ returns the [['request_name', 'agent_url'], ....] """ options = {} options["reduce"] = True options["group"] = True result = self._getCouchView("requestAgentUrl", options) if filterRequest == None: keys = [row['key'] for row in result["rows"]] else: keys = [ row['key'] for row in result["rows"] if row['key'][0] in filterRequest ] return keys def _getLatestJobInfo(self, keys): """ keys is [['request_name', 'agent_url'], ....] returns ids """ if len(keys) == 0: return [] options = {} options["reduce"] = True options["group"] = True result = self._getCouchView("latestRequest", options, keys) ids = [row['value']['id'] for row in result["rows"]] return ids def _getAllDocsByIDs(self, ids, include_docs=True): """ keys is [id, ....] returns document """ if len(ids) == 0: return None options = {} options["include_docs"] = include_docs result = self.couchDB.allDocs(options, ids) return result def _getAgentInfo(self): """ returns all the agents status on wmstats """ options = {} result = self._getCouchView("agentInfo", options) return result def agentsByTeam(self, ignoreDrain=True): """ return a dictionary like {team:#agents,...} """ result = self._getAgentInfo() response = dict() for agentInfo in result["rows"]: teams = agentInfo['value']['agent_team'].split(',') for team in teams: if team not in response.keys(): response[team] = 0 if ignoreDrain: if not agentInfo['value']['drain_mode']: for team in teams: response[team] += 1 else: for team in teams: response[team] += 1 return response def getDBInstance(self): return self.couchDB def getHeartbeat(self): try: return self.couchDB.info() except Exception as ex: return {'error_message': str(ex)} def getRequestByNames(self, requestNames, jobInfoFlag=False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. """ requestInfo = self.reqDB.getRequestByNames(requestNames, True) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getActiveData(self, jobInfoFlag=False): return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS, jobInfoFlag) def getRequestByStatus(self, statusList, jobInfoFlag=False, limit=None, skip=None, legacyFormat=False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. If legacyFormat is True convert data to old wmstats format from current reqmgr format. Shouldn't be set to True unless existing code breaks """ requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit, skip) if legacyFormat: # convert the format to wmstas old format for requestName, doc in requestInfo.items(): requestInfo[requestName] = convertToLegacyFormat(doc) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getRequestSummaryWithJobInfo(self, requestName): """ get request info with job status """ requestInfo = self.reqDB.getRequestByNames(requestName) self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo
class RequestDBTest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["ReqMgr"] self.testInit = TestInitCouchApp('RequestDBServiceTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) dbName = 'requsetdb_t' self.testInit.setupCouch(dbName, *self.couchApps) reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.requestWriter = RequestDBWriter(reqDBURL) self.requestReader = RequestDBReader(reqDBURL) self.requestWriter.defaultStale = {} self.requestReader.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testRequestDBWriter(self): # test getWork schema = generate_reqmgr_schema(3) result = self.requestWriter.insertGenericRequest(schema[0]) self.assertEqual(len(result), 1, 'insert fail'); self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail') self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"), 'Error: document not found') result = self.requestWriter.updateRequestProperty(schema[0]['RequestName'], {'Teams': ['teamA']}) self.assertEqual(self.requestWriter.updateRequestProperty(schema[0]['RequestName'], {'Teams': ['teamA']}), 'OK', 'update fail') self.assertEqual(self.requestWriter.updateRequestProperty("not_exist_schema", {'Teams': 'teamA'}), 'Error: document not found') result = self.requestReader.getRequestByNames([schema[0]['RequestName']]) self.assertEqual(len(result), 1, "should be 1") result = self.requestReader.getRequestByStatus(["failed"], False, 1) self.assertEqual(len(result), 1, "should be 1") result = self.requestReader.getStatusAndTypeByRequest([schema[0]['RequestName']]) self.assertEqual(result[schema[0]['RequestName']][0], 'failed', "should be failed") result = self.requestWriter.insertGenericRequest(schema[1]) time.sleep(2) result = self.requestWriter.insertGenericRequest(schema[2]) endTime = int(time.time()) - 1 result = self.requestReader.getRequestByStatusAndEndTime("new", False, endTime) self.assertEqual(len(result), 1, "should be 1") endTime = int(time.time()) + 1 result = self.requestReader.getRequestByStatusAndEndTime("new", False, endTime) self.assertEqual(len(result), 2, "should be 2")
def main(): """ It will either delete docs in couchdb for the workflow you have provided or it will loop over the final (or almost final) states and ask for your permission to delete them. """ wfName = sys.argv[1] if len(sys.argv) == 2 else [] if 'WMAGENT_CONFIG' not in os.environ: os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) # Instantiating central services (couch stuff) # print "Central Couch URL : %s" % config.WorkloadSummary.couchurl # print "Central ReqMgr URL : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL, couchapp = config.AnalyticsDataCollector.RequestCouchApp) # Central services wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl) wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl) # Local services localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name = "workqueue_inbox") localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl) statusList = ["failed", "epic-FAILED", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected", "normal-archived", "aborted-archived", "rejected-archived"] for stat in final_status: # retrieve list of workflows in each status if not wfName: # options = {'include_docs': False} date_range = {'startkey': [2015,5,15,0,0,0], 'endkey': [2015,5,26,0,0,0]} # finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range) tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range) #print "Found %d wfs in status: %s" %(len(finalWfs), stat) finalWfs = [] for wf, content in tempWfs.iteritems(): if content['RequestStatus'] in statusList: finalWfs.append(wf) print "Found %d wfs in not in active state" % len(finalWfs) else: finalWfs = [wfName] tempWfs = wfDBReader.getRequestByNames(wfName, True) print "Checking %s with status '%s'." % (wfName, tempWfs[wfName]['RequestStatus']) wqDocs, wqInboxDocs = [], [] localWQDocs, localWQInboxDocs = [], [] for counter, wf in enumerate(finalWfs): if counter % 100 == 0: print "%d wfs queried ..." % counter # check whether there are workqueue docs wqDocIDs = wqBackend.getElements(WorkflowName = wf) if wqDocIDs: print "Found %d workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs wqDocs.append(wqDocIDs) # check whether there are workqueue_inbox docs if wqInboxDB.documentExists(wf): print "Found workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus']) # then retrieve the document wqInboxDoc = wqInboxDB.document(wf) wqInboxDocs.append(wqInboxDoc) # check local queue wqDocIDs = localWQBackend.getElements(WorkflowName = wf) if wqDocIDs: print "Found %d local workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs localWQDocs.append(wqDocIDs) if localWQInboxDB.documentExists(wf): print "Found local workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus']) wqInboxDoc = localWQInboxDB.document(wf) print wqInboxDoc localWQInboxDocs.append(wqInboxDoc) # TODO TODO TODO for the moment only deletes for a specific workflow if wfName: var = raw_input("\nCan we delete all these documents (Y/N)? ") if var == "Y": # deletes workqueue_inbox doc if wqInboxDoc: print "Deleting workqueue_inbox id %s and %s" % (wqInboxDoc['_id'], wqInboxDoc['_rev']) wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev']) # deletes workqueue docs if wqDocIDs: print "Deleting workqueue docs %s" % wqDocIDs wqBackend.deleteElements(*[x for x in wqDocIDs if x['RequestName'] in wfName]) else: print "You are the boss, aborting it ...\n"