def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl, reqdbURL=reqmgrUrl, reqdbCouchApp="ReqMgr") reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log.info("%s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = [ 'normal-archived', 'rejected-archived', 'aborted-archived' ] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag, legacyFormat=True) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log.info("%s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc: runWhiteList = [] filterEfficiency = None try: # log.debug("Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({ 'filterEfficiency': filterEfficiency, 'runWhiteList': runWhiteList }) if oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or \ oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown': prepID = oldCouchDoc.get('prepID', None) if prepID and nMCMCalls <= maxMCMCalls: log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID)) # Get information from McM. Don't call too many times, can take a long time nMCMCalls += 1 try: mcmHistory = mcm.getHistory(prepID=prepID) if 'mcmApprovalTime' not in oldCouchDoc: report[wf].update({'mcmApprovalTime': 'NoMcMData'}) found = False for entry in mcmHistory: if entry['action'] == 'set status' and entry[ 'step'] == 'announced': dateString = entry['updater'][ 'submission_date'] dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M') report[wf].update({ 'mcmApprovalTime': time.mktime(dt.timetuple()) }) found = True if not found: log.error( "History found but no approval time for %s" % wf) except McMNoDataError: log.error("Setting NoMcMData for %s" % wf) report[wf].update({'mcmApprovalTime': 'NoMcMData'}) except (RuntimeError, IOError): exc_type, dummy_exc_value, dummy_exc_traceback = sys.exc_info( ) log.error( "%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting history from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) try: mcmRequest = mcm.getRequest(prepID=prepID) report[wf].update({ 'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData') }) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting request from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) # Basic parameters of the workflow priority = requests[wf].get('priority', 0) requestType = requests[wf].get('request_type', 'Unknown') targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf].get('campaign', 'Unknown') prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) statuses = requests[wf].get('request_status', []) if not statuses: log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does. # Remove a single task_ from the start of PREP ID if it exists if prep_id and prep_id.startswith('task_'): prep_id.replace('task_', '', 1) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf].get('inputdataset', "") if isinstance(inputdataset, list): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if isinstance(ds, list): outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log.error( "Could not decode outputdatasets: %s" % outputdatasets ) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[ wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis / targetLumis * 100) if targetEvents: eventPercent = min(eventPercent, events / targetEvents * 100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime': int(time.time())}) if totalJobs and successJobs == totalJobs and not report[ wf].get('lastJobTime', None): report[wf].update({'lastJobTime': int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in statuses: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime': approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime': assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime': acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime': closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime': announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime': completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime': newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime': archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate': requestDate}) except: pass report[wf].update({ 'priority': priority, 'status': finalStatus, 'type': requestType }) report[wf].update({ 'totalLumis': targetLumis, 'totalEvents': targetEvents, }) report[wf].update({ 'campaign': campaign, 'prepID': prep_id, 'outputTier': outputTier, }) report[wf].update({ 'outputDatasets': outputdatasets, 'inputDataset': inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [ 1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100 ]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get( percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({ 'eventProgress': eventProgress, 'lumiProgress': lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: # log.debug("Workflow updated: %s" % wf) pass else: # log.debug("Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) dummy = json.dumps( newCouchDoc ) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log.info("%s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
class ConfigCache(WMObject): """ _ConfigCache_ The class that handles the upload and download of configCache artifacts from Couch """ def __init__(self, dbURL, couchDBName=None, id=None, rev=None, usePYCurl=False, ckey=None, cert=None, capath=None, detail=True): self.dbname = couchDBName self.dburl = dbURL self.detail = detail try: self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname) except Exception as ex: msg = "Error connecting to couch: %s\n" % str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) # local cache self.docs_cache = DocumentCache(self.database, self.detail) # UserGroup variables self.group = None self.owner = None # Internal data structure self.document = Document() self.attachments = {} self.document['type'] = "config" self.document['description'] = {} self.document['description']['config_label'] = None self.document['description']['config_desc'] = None if id != None: self.document['_id'] = id self.document['pset_tweak_details'] = None self.document['info'] = None self.document['config'] = None return def createDatabase(self): """ _createDatabase_ """ database = self.couchdb.createDatabase(self.dbname) database.commit() return database def connectUserGroup(self, groupname, username): """ _connectUserGroup_ """ self.group = Group(name=groupname) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.owner = makeUser(groupname, username, couchUrl=self.dburl, couchDatabase=self.dbname) return def createUserGroup(self, groupname, username): """ _createUserGroup_ Create all the userGroup information """ self.createGroup(name=groupname) self.createUser(username=username) return def createGroup(self, name): """ _createGroup_ Create Group for GroupUser """ self.group = Group(name=name) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.group.create() return def setLabel(self, label): """ _setLabel_ Util to add a descriptive label to the configuration doc """ self.document['description']['config_label'] = label def setDescription(self, desc): """ _setDescription_ Util to add a verbose description string to a configuration doc """ self.document['description']['config_desc'] = desc @Decorators.requireGroup def createUser(self, username): self.owner = makeUser(self.group['name'], username, couchUrl=self.dburl, couchDatabase=self.dbname) self.owner.create() self.owner.ownThis(self.document) return @Decorators.requireGroup @Decorators.requireUser def save(self): """ _save_ Save yourself! Save your internal document. """ rawResults = self.database.commit(doc=self.document) # We should only be committing one document at a time # if not, get the last one. try: commitResults = rawResults[-1] self.document["_rev"] = commitResults.get('rev') self.document["_id"] = commitResults.get('id') except KeyError as ex: msg = "Document returned from couch without ID or Revision\n" msg += "Document probably bad\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(message=msg) # Now do the attachments for attachName in self.attachments: self.saveAttachment(name=attachName, attachment=self.attachments[attachName]) return def saveAttachment(self, name, attachment): """ _saveAttachment_ Save an attachment to the document """ retval = self.database.addAttachment(self.document["_id"], self.document["_rev"], attachment, name) if retval.get('ok', False) != True: # Then we have a problem msg = "Adding an attachment to document failed\n" msg += str(retval) msg += "ID: %s, Rev: %s" % (self.document["_id"], self.document["_rev"]) logging.error(msg) raise ConfigCacheException(msg) self.document["_rev"] = retval['rev'] self.document["_id"] = retval['id'] self.attachments[name] = attachment return def loadDocument(self, configID): """ _loadDocument_ Load a document from the document cache given its couchID """ self.document = self.docs_cache[configID] def loadByID(self, configID): """ _loadByID_ Load a document from the server given its couchID """ try: self.document = self.database.document(id=configID) if 'owner' in self.document.keys(): self.connectUserGroup( groupname=self.document['owner'].get('group', None), username=self.document['owner'].get('user', None)) if '_attachments' in self.document.keys(): # Then we need to load the attachments for key in self.document['_attachments'].keys(): self.loadAttachment(name=key) except CouchNotFoundError as ex: msg = "Document with id %s not found in couch\n" % (configID) msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) except Exception as ex: msg = "Error loading document from couch\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) return def loadAttachment(self, name, overwrite=True): """ _loadAttachment_ Load an attachment from the database and put it somewhere useful """ attach = self.database.getAttachment(self.document["_id"], name) if not overwrite: if name in self.attachments.keys(): logging.info("Attachment already exists, so we're skipping") return self.attachments[name] = attach return def loadByView(self, view, value): """ _loadByView_ Underlying code to load views """ viewRes = self.database.loadView('ConfigCache', view, {}, [value]) if len(viewRes['rows']) == 0: # Then we have a problem logging.error("Unable to load using view %s and value %s" % (view, str(value))) self.unwrapView(viewRes) self.loadByID(self.document["_id"]) return def saveConfigToDisk(self, targetFile): """ _saveConfigToDisk_ Make sure we can save our config file to disk """ config = self.getConfig() if not config: return # Write to a file f = open(targetFile, 'w') f.write(config) f.close() return def load(self): """ _load_ Figure out how to load """ if self.document.get("_id", None) != None: # Then we should load by ID self.loadByID(self.document["_id"]) return # Otherwise we have to load by view if not self.document.get('md5_hash', None) == None: # Then we have an md5_hash self.loadByView(view='config_by_md5hash', value=self.document['md5_hash']) # TODO: Add more views as they become available. #elif not self.owner == None: # Then we have an owner #self.loadByView(view = 'config_by_owner', value = self.owner['name']) def unwrapView(self, view): """ _unwrapView_ Move view information into the main document """ self.document["_id"] = view['rows'][0].get('id') self.document["_rev"] = view['rows'][0].get('value').get('_rev') def setPSetTweaks(self, PSetTweak): """ _setPSetTweaks_ Set the PSet tweak details for the config. """ self.document['pset_tweak_details'] = PSetTweak return def getPSetTweaks(self): """ _getPSetTweaks_ Retrieve the PSet tweak details. """ return self.document['pset_tweak_details'] def getOutputModuleInfo(self): """ _getOutputModuleInfo_ Retrieve the dataset information for the config in the ConfigCache. """ psetTweaks = self.getPSetTweaks() if not 'process' in psetTweaks.keys(): raise ConfigCacheException( "Could not find process field in PSet while getting output modules!" ) try: outputModuleNames = psetTweaks["process"]["outputModules_"] except KeyError as ex: msg = "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(msg) results = {} for outputModuleName in outputModuleNames: try: outModule = psetTweaks["process"][outputModuleName] except KeyError: msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName logging.error(msg) raise ConfigCacheException(msg) dataset = outModule.get("dataset", None) if dataset: results[outputModuleName] = { "dataTier": outModule["dataset"]["dataTier"], "filterName": outModule["dataset"]["filterName"] } else: results[outputModuleName] = { "dataTier": None, "filterName": None } return results def addConfig(self, newConfig, psetHash=None): """ _addConfig_ """ # The newConfig parameter is a URL suitable for passing to urlopen. configString = urllib.urlopen(newConfig).read(-1) configMD5 = hashlib.md5(configString).hexdigest() self.document['md5_hash'] = configMD5 self.document['pset_hash'] = psetHash self.attachments['configFile'] = configString return def getConfig(self): """ _getConfig_ Get the currently active config """ return self.attachments.get('configFile', None) def getCouchID(self): """ _getCouchID_ Return the document's couchID """ return self.document["_id"] def getCouchRev(self): """ _getCouchRev_ Return the document's couchRevision """ return self.document["_rev"] @Decorators.requireGroup @Decorators.requireUser def delete(self): """ _delete_ Deletes the document with the current docid """ if not self.document["_id"]: logging.error("Attempted to delete with no couch ID") # TODO: Delete without loading first try: self.database.queueDelete(self.document) self.database.commit() except Exception as ex: msg = "Error in deleting document from couch" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) return def getIDFromLabel(self, label): """ _getIDFromLabel_ Retrieve the ID of a config given it's label. """ results = self.database.loadView("ConfigCache", "config_by_label", { "startkey": label, "limit": 1 }) if results["rows"][0]["key"] == label: return results["rows"][0]["value"] return None def listAllConfigsByLabel(self): """ _listAllConfigsByLabel_ Retrieve a list of all the configs in the config cache. This is returned in the form of a dictionary that is keyed by label. """ configs = {} results = self.database.loadView("ConfigCache", "config_by_label") for result in results["rows"]: configs[result["key"]] = result["value"] return configs def __str__(self): """ Make something printable """ return self.document.__str__() def validate(self, configID): try: #TODO: need to change to DataCache #self.loadDocument(configID = configID) self.loadByID(configID=configID) except Exception as ex: raise ConfigCacheException( "Failure to load ConfigCache while validating workload: %s" % str(ex)) if self.detail: duplicateCheck = {} try: outputModuleInfo = self.getOutputModuleInfo() except Exception as ex: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" raise ConfigCacheException("%s: %s" % (msg, str(ex))) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: raise ConfigCacheException("No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before raise ConfigCacheException( "Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) return outputModuleInfo else: return True
class CouchWorkQueueElement(WorkQueueElement): """ _CouchWorkQueueElement_ """ def __init__(self, couchDB, id = None, elementParams = None): elementParams = elementParams or {} WorkQueueElement.__init__(self, **elementParams) if id: self._id = id self._document = Document(id = id) self._couch = couchDB rev = property( lambda x: str(x._document[u'_rev']) if x._document.has_key(u'_rev') else x._document.__getitem__('_rev'), lambda x, newid: x._document.__setitem__('_rev', newid)) timestamp = property( lambda x: str(x._document[u'timestamp']) if x._document.has_key(u'timestamp') else x._document.__getitem__('timestamp') ) updatetime = property( lambda x: str(x._document[u'updatetime']) if x._document.has_key(u'updatetime') else 0 ) @classmethod def fromDocument(cls, couchDB, doc): """Create element from couch document""" element = CouchWorkQueueElement(couchDB = couchDB, id = doc['_id'], elementParams = doc.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement') ) element._document['_rev'] = doc.pop('_rev') element._document['timestamp'] = doc.pop('timestamp') element._document['updatetime'] = doc.pop('updatetime') return element def save(self): """ _save """ self.populateDocument() self._couch.queue(self._document) def load(self): """ _load_ Load the document representing this WQE """ document = self._couch.document(self._document['_id']) self.update(document.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement')) self._document['_rev'] = document.pop('_rev') self._document['timestamp'] = document.pop('timestamp', None) self._document['updatetime'] = document.pop('updatetime', None) return self def delete(self): """Delete element""" self.populateDocument() self._document.delete() self._couch.queue(self._document) def populateDocument(self): """Certain attributed shouldn't be stored""" self._document.update(self.__to_json__(None)) now = time.time() self._document['updatetime'] = now self._document.setdefault('timestamp', now) if not self._document.get('_id') and self.id: self._document['_id'] = self.id attrs = ['WMSpec', 'Task'] for attr in attrs: self._document['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'].pop(attr, None)
class CouchWorkQueueElement(WorkQueueElement): """ _CouchWorkQueueElement_ """ def __init__(self, couchDB, id=None, elementParams=None): elementParams = elementParams or {} WorkQueueElement.__init__(self, **elementParams) if id: self._id = id self._document = Document(id=id) self._couch = couchDB rev = property( lambda x: str(x._document[u'_rev']) if u'_rev' in x._document else x._document.__getitem__('_rev'), lambda x, newid: x._document.__setitem__('_rev', newid)) timestamp = property(lambda x: str(x._document[u'timestamp']) if u'timestamp' in x._document else x._document. __getitem__('timestamp')) updatetime = property(lambda x: str(x._document[u'updatetime']) if u'updatetime' in x._document else 0) @classmethod def fromDocument(cls, couchDB, doc): """Create element from couch document""" elementParams = doc.pop( 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement') elementParams["CreationTime"] = doc.pop('timestamp') element = CouchWorkQueueElement(couchDB=couchDB, id=doc['_id'], elementParams=elementParams) element._document['_rev'] = doc.pop('_rev') element._document['timestamp'] = elementParams["CreationTime"] element._document['updatetime'] = doc.pop('updatetime') return element def save(self): """ _save """ self.populateDocument() self._couch.queue(self._document) def load(self): """ _load_ Load the document representing this WQE """ document = self._couch.document(self._document['_id']) self.update( document.pop( 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement' )) self._document['_rev'] = document.pop('_rev') self._document['timestamp'] = document.pop('timestamp', None) self._document['updatetime'] = document.pop('updatetime', None) return self def delete(self): """Delete element""" self.populateDocument() self._document.delete() self._couch.queue(self._document) def populateDocument(self): """Certain attributed shouldn't be stored""" self._document.update(self.__to_json__(None)) now = time.time() self._document['updatetime'] = now self._document.setdefault('timestamp', now) if not self._document.get('_id') and self.id: self._document['_id'] = self.id attrs = ['WMSpec', 'Task'] for attr in attrs: self._document[ 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'].pop( attr, None)
class ConfigCache(WMObject): """ _ConfigCache_ The class that handles the upload and download of configCache artifacts from Couch """ def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False, ckey = None, cert = None, capath = None, detail = True): self.dbname = couchDBName self.dburl = dbURL self.detail = detail try: self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname) except Exception as ex: msg = "Error connecting to couch: %s\n" % str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) # local cache self.docs_cache = DocumentCache(self.database, self.detail) # UserGroup variables self.group = None self.owner = None # Internal data structure self.document = Document() self.attachments = {} self.document['type'] = "config" self.document['description'] = {} self.document['description']['config_label'] = None self.document['description']['config_desc'] = None if id != None: self.document['_id'] = id self.document['pset_tweak_details'] = None self.document['info'] = None self.document['config'] = None return def createDatabase(self): """ _createDatabase_ """ database = self.couchdb.createDatabase(self.dbname) database.commit() return database def connectUserGroup(self, groupname, username): """ _connectUserGroup_ """ self.group = Group(name = groupname) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.owner = makeUser(groupname, username, couchUrl = self.dburl, couchDatabase = self.dbname) return def createUserGroup(self, groupname, username): """ _createUserGroup_ Create all the userGroup information """ self.createGroup(name = groupname) self.createUser(username = username) return def createGroup(self, name): """ _createGroup_ Create Group for GroupUser """ self.group = Group(name = name) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.group.create() return def setLabel(self, label): """ _setLabel_ Util to add a descriptive label to the configuration doc """ self.document['description']['config_label'] = label def setDescription(self, desc): """ _setDescription_ Util to add a verbose description string to a configuration doc """ self.document['description']['config_desc'] = desc @Decorators.requireGroup def createUser(self, username): self.owner = makeUser(self.group['name'], username, couchUrl = self.dburl, couchDatabase = self.dbname) self.owner.create() self.owner.ownThis(self.document) return @Decorators.requireGroup @Decorators.requireUser def save(self): """ _save_ Save yourself! Save your internal document. """ rawResults = self.database.commit(doc = self.document) # We should only be committing one document at a time # if not, get the last one. try: commitResults = rawResults[-1] self.document["_rev"] = commitResults.get('rev') self.document["_id"] = commitResults.get('id') except KeyError as ex: msg = "Document returned from couch without ID or Revision\n" msg += "Document probably bad\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(message = msg) # Now do the attachments for attachName in self.attachments: self.saveAttachment(name = attachName, attachment = self.attachments[attachName]) return def saveAttachment(self, name, attachment): """ _saveAttachment_ Save an attachment to the document """ retval = self.database.addAttachment(self.document["_id"], self.document["_rev"], attachment, name) if retval.get('ok', False) != True: # Then we have a problem msg = "Adding an attachment to document failed\n" msg += str(retval) msg += "ID: %s, Rev: %s" % (self.document["_id"], self.document["_rev"]) logging.error(msg) raise ConfigCacheException(msg) self.document["_rev"] = retval['rev'] self.document["_id"] = retval['id'] self.attachments[name] = attachment return def loadDocument(self, configID): """ _loadDocument_ Load a document from the document cache given its couchID """ self.document = self.docs_cache[configID] def loadByID(self, configID): """ _loadByID_ Load a document from the server given its couchID """ try: self.document = self.database.document(id = configID) if 'owner' in self.document.keys(): self.connectUserGroup(groupname = self.document['owner'].get('group', None), username = self.document['owner'].get('user', None)) if '_attachments' in self.document.keys(): # Then we need to load the attachments for key in self.document['_attachments'].keys(): self.loadAttachment(name = key) except CouchNotFoundError as ex: msg = "Document with id %s not found in couch\n" % (configID) msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) except Exception as ex: msg = "Error loading document from couch\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) return def loadAttachment(self, name, overwrite = True): """ _loadAttachment_ Load an attachment from the database and put it somewhere useful """ attach = self.database.getAttachment(self.document["_id"], name) if not overwrite: if name in self.attachments.keys(): logging.info("Attachment already exists, so we're skipping") return self.attachments[name] = attach return def loadByView(self, view, value): """ _loadByView_ Underlying code to load views """ viewRes = self.database.loadView( 'ConfigCache', view, {}, [value] ) if len(viewRes['rows']) == 0: # Then we have a problem logging.error("Unable to load using view %s and value %s" % (view, str(value))) self.unwrapView(viewRes) self.loadByID(self.document["_id"]) return def saveConfigToDisk(self, targetFile): """ _saveConfigToDisk_ Make sure we can save our config file to disk """ config = self.getConfig() if not config: return # Write to a file f = open(targetFile, 'w') f.write(config) f.close() return def load(self): """ _load_ Figure out how to load """ if self.document.get("_id", None) != None: # Then we should load by ID self.loadByID(self.document["_id"]) return # Otherwise we have to load by view if not self.document.get('md5_hash', None) == None: # Then we have an md5_hash self.loadByView(view = 'config_by_md5hash', value = self.document['md5_hash']) # TODO: Add more views as they become available. #elif not self.owner == None: # Then we have an owner #self.loadByView(view = 'config_by_owner', value = self.owner['name']) def unwrapView(self, view): """ _unwrapView_ Move view information into the main document """ self.document["_id"] = view['rows'][0].get('id') self.document["_rev"] = view['rows'][0].get('value').get('_rev') def setPSetTweaks(self, PSetTweak): """ _setPSetTweaks_ Set the PSet tweak details for the config. """ self.document['pset_tweak_details'] = PSetTweak return def getPSetTweaks(self): """ _getPSetTweaks_ Retrieve the PSet tweak details. """ return self.document['pset_tweak_details'] def getOutputModuleInfo(self): """ _getOutputModuleInfo_ Retrieve the dataset information for the config in the ConfigCache. """ psetTweaks = self.getPSetTweaks() if not 'process' in psetTweaks.keys(): raise ConfigCacheException("Could not find process field in PSet while getting output modules!") try: outputModuleNames = psetTweaks["process"]["outputModules_"] except KeyError as ex: msg = "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(msg) results = {} for outputModuleName in outputModuleNames: try: outModule = psetTweaks["process"][outputModuleName] except KeyError: msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName logging.error(msg) raise ConfigCacheException(msg) dataset = outModule.get("dataset", None) if dataset: results[outputModuleName] = {"dataTier": outModule["dataset"]["dataTier"], "filterName": outModule["dataset"]["filterName"]} else: results[outputModuleName] = {"dataTier": None, "filterName": None} return results def addConfig(self, newConfig, psetHash = None): """ _addConfig_ """ # The newConfig parameter is a URL suitable for passing to urlopen. configString = urllib.urlopen(newConfig).read(-1) configMD5 = hashlib.md5(configString).hexdigest() self.document['md5_hash'] = configMD5 self.document['pset_hash'] = psetHash self.attachments['configFile'] = configString return def getConfig(self): """ _getConfig_ Get the currently active config """ return self.attachments.get('configFile', None) def getCouchID(self): """ _getCouchID_ Return the document's couchID """ return self.document["_id"] def getCouchRev(self): """ _getCouchRev_ Return the document's couchRevision """ return self.document["_rev"] @Decorators.requireGroup @Decorators.requireUser def delete(self): """ _delete_ Deletes the document with the current docid """ if not self.document["_id"]: logging.error("Attempted to delete with no couch ID") # TODO: Delete without loading first try: self.database.queueDelete(self.document) self.database.commit() except Exception as ex: msg = "Error in deleting document from couch" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) return def getIDFromLabel(self, label): """ _getIDFromLabel_ Retrieve the ID of a config given it's label. """ results = self.database.loadView("ConfigCache", "config_by_label", {"startkey": label, "limit": 1}) if results["rows"][0]["key"] == label: return results["rows"][0]["value"] return None def listAllConfigsByLabel(self): """ _listAllConfigsByLabel_ Retrieve a list of all the configs in the config cache. This is returned in the form of a dictionary that is keyed by label. """ configs = {} results = self.database.loadView("ConfigCache", "config_by_label") for result in results["rows"]: configs[result["key"]] = result["value"] return configs def __str__(self): """ Make something printable """ return self.document.__str__() def validate(self, configID): try: #TODO: need to change to DataCache #self.loadDocument(configID = configID) self.loadByID(configID = configID) except Exception as ex: raise ConfigCacheException("Failure to load ConfigCache while validating workload: %s" % str(ex)) if self.detail: duplicateCheck = {} try: outputModuleInfo = self.getOutputModuleInfo() except Exception as ex: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" raise ConfigCacheException("%s: %s" % (msg, str(ex))) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: raise ConfigCacheException("No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before raise ConfigCacheException("Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) return outputModuleInfo else: return True
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived = False, log = logging.info): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl, reqmgrUrl, reqdbCouchApp = "ReqMgr") reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log.info("%s: Getting job information from %s and %s. Please wait." % ( funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived'] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag, legacyFormat = True) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log.info("%s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc: runWhiteList = [] filterEfficiency = None try: #log.debug("Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList}) if ('mcmTotalEvents' not in oldCouchDoc or 'mcmApprovalTime' not in oldCouchDoc or oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown'): prepID = oldCouchDoc.get('prepID', None) if prepID and nMCMCalls <= maxMCMCalls: log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID)) # Get information from McM. Don't call too many times, can take a long time nMCMCalls += 1 try: mcmHistory = mcm.getHistory(prepID = prepID) if 'mcmApprovalTime' not in oldCouchDoc: report[wf].update({'mcmApprovalTime':'NoMcMData'}) found = False for entry in mcmHistory: if entry['action'] == 'set status' and entry['step'] == 'announced': dateString = entry['updater']['submission_date'] dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M') report[wf].update({'mcmApprovalTime':time.mktime(dt.timetuple())}) found = True if not found: log.error("History found but no approval time for %s" % wf) except McMNoDataError: log.error("Setting NoMcMData for %s" % wf) report[wf].update({'mcmApprovalTime':'NoMcMData'}) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting history from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) try: mcmRequest = mcm.getRequest(prepID = prepID) report[wf].update({'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData')}) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting request from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) # Basic parameters of the workflow priority = requests[wf]['priority'] requestType = requests[wf]['request_type'] targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf]['campaign'] prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) statuses = requests[wf].get('request_status', []) if not statuses: log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does. # Remove a single task_ from the start of PREP ID if it exists if prep_id and prep_id.startswith('task_'): prep_id.replace('task_', '', 1) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf].get('inputdataset', "") if isinstance(inputdataset, list): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if isinstance(ds, list): outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log.error("Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis/targetLumis*100) if targetEvents: eventPercent = min(eventPercent, events/targetEvents*100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime' : int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None): report[wf].update({'lastJobTime' : int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in statuses: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime':approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime':assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime':acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime':closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime':announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime':completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime':newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime':archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate' : requestDate}) except: pass report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType}) report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, }) report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, }) report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: #log.debug("Workflow updated: %s" % wf) pass else: #log.debug("Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log.info("%s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
def gatherWMDataMiningStats( wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info ): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl) reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log("INFO: %s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ["normal-archived", "rejected-archived", "aborted-archived"] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if not oldCouchDoc.has_key("filterEfficiency") or not oldCouchDoc.has_key("runWhiteList"): runWhiteList = [] filterEfficiency = None try: # log("DEBUG: Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get("RunWhiteList", []) filterEfficiency = rmDoc.get("FilterEfficiency", None) except: pass # ReqMgr no longer has the workflow report[wf].update({"filterEfficiency": filterEfficiency, "runWhiteList": runWhiteList}) if not oldCouchDoc.has_key("mcmTotalEvents") or not oldCouchDoc.has_key("mcmApprovalTime"): prepID = oldCouchDoc.get("prepID", None) if prepID and nMCMCalls <= maxMCMCalls: nMCMCalls += 1 mcmHistory = mcm.getHistory(prepID=prepID) mcmRequest = mcm.getRequest(prepID=prepID) report[wf].update({"mcmTotalEvents": mcmRequest.get("total_events", "Unknown")}) if not oldCouchDoc.has_key("mcmApprovalTime"): report[wf].update({"mcmApprovalTime": "Unknown"}) for entry in mcmHistory: if entry["action"] == "set status" and entry["step"] == "announced": dateString = entry["updater"]["submission_date"] dt = datetime.strptime(dateString, "%Y-%m-%d-%H-%M") report[wf].update({"mcmApprovalTime": time.mktime(dt.timetuple())}) # Basic parameters of the workflow priority = requests[wf]["priority"] requestType = requests[wf]["request_type"] targetLumis = requests[wf].get("input_lumis", 0) targetEvents = requests[wf].get("input_events", 0) campaign = requests[wf]["campaign"] prep_id = requests[wf].get("prep_id", None) outputdatasets = requests[wf].get("outputdatasets", []) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf]["inputdataset"] if isinstance(inputdataset, (list,)): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = "" outputTier = "Unknown" try: outputTiers = [] for ds in outputdatasets: if type(ds) == list: outputTiers.append(ds[0].split("/")[-1]) else: outputTiers.append(ds.split("/")[-1]) except: log( "ERROR: Could not decode outputdatasets: %s" % outputdatasets ) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split("/")[-1] if inputTier in ["GEN"]: outputTier = "LHE" elif inputTier in ["RAW", "RECO"]: outputTier = "AOD" elif inputTier in ["GEN-SIM"]: outputTier = "AODSIM" elif "AODSIM" in outputTiers: outputTier = "AODSIM" else: if len(outputTiers) == 1 and "GEN" in outputTiers: if "STEP0ATCERN" in wf: outputTier = "STEP0" else: outputTier = "FullGen" elif "GEN-SIM" in outputTiers and "AODSIM" in outputTiers and requestType == "TaskChain": outputTier = "RelVal" elif "RECO" in outputTiers and requestType == "TaskChain": outputTier = "RelVal" elif "GEN-SIM" in outputTiers: outputTier = "GEN-SIM" elif "AODSIM" in outputTiers: outputTier = "AODSIM" elif "RECO" in outputTiers: outputTier = "AOD" elif "AOD" in outputTiers: outputTier = "AOD" else: outputTier = "GEN-SIM" # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get("events", 0) lumis = dsr.get("totalLumis", 0) if targetLumis: lumiPercent = min(lumiPercent, lumis / targetLumis * 100) if targetEvents: eventPercent = min(eventPercent, events / targetEvents * 100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs["sucess"] totalJobs += jobs["created"] try: if totalJobs and not report[wf].get("firstJobTime", None): report[wf].update({"firstJobTime": int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get("lastJobTime", None): report[wf].update({"lastJobTime": int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in requests[wf]["request_status"]: finalStatus = status["status"] if status["status"] == "new": newTime = status["update_time"] if status["status"] == "assignment-approved": approvedTime = status["update_time"] if status["status"] == "assigned": assignedTime = status["update_time"] if status["status"] == "completed": completedTime = status["update_time"] if status["status"] == "acquired": acquireTime = status["update_time"] if status["status"] == "closed-out": closeoutTime = status["update_time"] if status["status"] == "announced": announcedTime = status["update_time"] if status["status"] == "normal-archived": archivedTime = status["update_time"] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get("approvedTime", None): report[wf].update({"approvedTime": approvedTime}) if assignedTime and not report[wf].get("assignedTime", None): report[wf].update({"assignedTime": assignedTime}) if acquireTime and not report[wf].get("acquireTime", None): report[wf].update({"acquireTime": acquireTime}) if closeoutTime and not report[wf].get("closeoutTime", None): report[wf].update({"closeoutTime": closeoutTime}) if announcedTime and not report[wf].get("announcedTime", None): report[wf].update({"announcedTime": announcedTime}) if completedTime and not report[wf].get("completedTime", None): report[wf].update({"completedTime": completedTime}) if newTime and not report[wf].get("newTime", None): report[wf].update({"newTime": newTime}) if archivedTime and not report[wf].get("archivedTime", None): report[wf].update({"archivedTime": archivedTime}) try: dt = requests[wf]["request_date"] requestDate = "%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d" % tuple(dt) report[wf].update({"requestDate": requestDate}) except: pass report[wf].update({"priority": priority, "status": finalStatus, "type": requestType}) report[wf].update({"totalLumis": targetLumis, "totalEvents": targetEvents}) report[wf].update({"campaign": campaign, "prepID": prep_id, "outputTier": outputTier}) report[wf].update({"outputDatasets": outputdatasets, "inputDataset": inputdataset}) report[wf].setdefault("lumiPercents", {}) report[wf].setdefault("eventPercents", {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]["lumiPercents"].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]["lumiPercents"][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]["eventPercents"].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]["eventPercents"][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({"eventProgress": eventProgress, "lumiProgress": lumiProgress}) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: # log("DEBUG: Workflow updated: %s" % wf) pass else: # log("DEBUG Workflow created: %s" % wf) pass try: newCouchDoc["updateTime"] = int(time.time()) report[wf]["updateTime"] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()