Beispiel #1
0
def gatherWMDataMiningStats(wmstatsUrl,
                            reqmgrUrl,
                            wmMiningUrl,
                            mcmUrl,
                            mcmCert,
                            mcmKey,
                            tmpDir,
                            archived=False,
                            log=logging.info):
    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl,
                            reqdbURL=reqmgrUrl,
                            reqdbCouchApp="ReqMgr")

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log.info("%s: Getting job information from %s and %s. Please wait." %
             (funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = [
            'normal-archived', 'rejected-archived', 'aborted-archived'
        ]
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates,
                                          jobInfoFlag=jobInfoFlag,
                                          legacyFormat=True)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log.info("%s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc:
                runWhiteList = []
                filterEfficiency = None
                try:
                    # log.debug("Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get('RunWhiteList', [])
                    filterEfficiency = rmDoc.get('FilterEfficiency', None)
                except:
                    pass  # ReqMgr no longer has the workflow
                report[wf].update({
                    'filterEfficiency': filterEfficiency,
                    'runWhiteList': runWhiteList
                })

            if oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or \
                oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown':

                prepID = oldCouchDoc.get('prepID', None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    log.info("Trying to update McM info for %s, PREPID %s" %
                             (wf, prepID))
                    # Get information from McM. Don't call too many times, can take a long time
                    nMCMCalls += 1
                    try:
                        mcmHistory = mcm.getHistory(prepID=prepID)
                        if 'mcmApprovalTime' not in oldCouchDoc:
                            report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                        found = False
                        for entry in mcmHistory:
                            if entry['action'] == 'set status' and entry[
                                    'step'] == 'announced':
                                dateString = entry['updater'][
                                    'submission_date']
                                dt = datetime.strptime(dateString,
                                                       '%Y-%m-%d-%H-%M')
                                report[wf].update({
                                    'mcmApprovalTime':
                                    time.mktime(dt.timetuple())
                                })
                                found = True
                        if not found:
                            log.error(
                                "History found but no approval time for %s" %
                                wf)
                    except McMNoDataError:
                        log.error("Setting NoMcMData for %s" % wf)
                        report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                    except (RuntimeError, IOError):
                        exc_type, dummy_exc_value, dummy_exc_traceback = sys.exc_info(
                        )
                        log.error(
                            "%s getting history from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting history from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

                    try:
                        mcmRequest = mcm.getRequest(prepID=prepID)
                        report[wf].update({
                            'mcmTotalEvents':
                            mcmRequest.get('total_events', 'NoMcMData')
                        })
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

            # Basic parameters of the workflow
            priority = requests[wf].get('priority', 0)
            requestType = requests[wf].get('request_type', 'Unknown')
            targetLumis = requests[wf].get('input_lumis', 0)
            targetEvents = requests[wf].get('input_events', 0)
            campaign = requests[wf].get('campaign', 'Unknown')
            prep_id = requests[wf].get('prep_id', None)
            outputdatasets = requests[wf].get('outputdatasets', [])
            statuses = requests[wf].get('request_status', [])

            if not statuses:
                log.error("Could not find any status from workflow: %s" %
                          wf)  # Should not happen but it does.

            # Remove a single  task_ from the start of PREP ID if it exists
            if prep_id and prep_id.startswith('task_'):
                prep_id.replace('task_', '', 1)

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf].get('inputdataset', "")
            if isinstance(inputdataset, list):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ''

            outputTier = 'Unknown'
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if isinstance(ds, list):
                        outputTiers.append(ds[0].split('/')[-1])
                    else:
                        outputTiers.append(ds.split('/')[-1])
            except:
                log.error(
                    "Could not decode outputdatasets: %s" % outputdatasets
                )  # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split('/')[-1]
                if inputTier in ['GEN']:
                    outputTier = 'LHE'
                elif inputTier in ['RAW', 'RECO']:
                    outputTier = 'AOD'
                elif inputTier in ['GEN-SIM']:
                    outputTier = 'AODSIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'

            else:
                if len(outputTiers) == 1 and 'GEN' in outputTiers:
                    if 'STEP0ATCERN' in wf:
                        outputTier = 'STEP0'
                    else:
                        outputTier = 'FullGen'
                elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'RECO' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'GEN-SIM' in outputTiers:
                    outputTier = 'GEN-SIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'
                elif 'RECO' in outputTiers:
                    outputTier = 'AOD'
                elif 'AOD' in outputTiers:
                    outputTier = 'AOD'
                else:
                    outputTier = 'GEN-SIM'

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[
                wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get('events', 0)
                lumis = dsr.get('totalLumis', 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis / targetLumis * 100)
                if targetEvents:
                    eventPercent = min(eventPercent,
                                       events / targetEvents * 100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs['sucess']
                totalJobs += jobs['created']
            try:
                if totalJobs and not report[wf].get('firstJobTime', None):
                    report[wf].update({'firstJobTime': int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[
                        wf].get('lastJobTime', None):
                    report[wf].update({'lastJobTime': int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None

            for status in statuses:
                finalStatus = status['status']
                if status['status'] == 'new':
                    newTime = status['update_time']
                if status['status'] == 'assignment-approved':
                    approvedTime = status['update_time']
                if status['status'] == 'assigned':
                    assignedTime = status['update_time']
                if status['status'] == 'completed':
                    completedTime = status['update_time']
                if status['status'] == 'acquired':
                    acquireTime = status['update_time']
                if status['status'] == 'closed-out':
                    closeoutTime = status['update_time']
                if status['status'] == 'announced':
                    announcedTime = status['update_time']
                if status['status'] == 'normal-archived':
                    archivedTime = status['update_time']

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get('approvedTime', None):
                report[wf].update({'approvedTime': approvedTime})
            if assignedTime and not report[wf].get('assignedTime', None):
                report[wf].update({'assignedTime': assignedTime})
            if acquireTime and not report[wf].get('acquireTime', None):
                report[wf].update({'acquireTime': acquireTime})
            if closeoutTime and not report[wf].get('closeoutTime', None):
                report[wf].update({'closeoutTime': closeoutTime})
            if announcedTime and not report[wf].get('announcedTime', None):
                report[wf].update({'announcedTime': announcedTime})
            if completedTime and not report[wf].get('completedTime', None):
                report[wf].update({'completedTime': completedTime})
            if newTime and not report[wf].get('newTime', None):
                report[wf].update({'newTime': newTime})
            if archivedTime and not report[wf].get('archivedTime', None):
                report[wf].update({'archivedTime': archivedTime})

            try:
                dt = requests[wf]['request_date']
                requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
                report[wf].update({'requestDate': requestDate})
            except:
                pass

            report[wf].update({
                'priority': priority,
                'status': finalStatus,
                'type': requestType
            })
            report[wf].update({
                'totalLumis': targetLumis,
                'totalEvents': targetEvents,
            })
            report[wf].update({
                'campaign': campaign,
                'prepID': prep_id,
                'outputTier': outputTier,
            })
            report[wf].update({
                'outputDatasets': outputdatasets,
                'inputDataset': inputdataset,
            })

            report[wf].setdefault('lumiPercents', {})
            report[wf].setdefault('eventPercents', {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [
                    1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100
            ]:
                percent = str(percentage)
                percentReported = report[wf]['lumiPercents'].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]['lumiPercents'][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]['eventPercents'].get(
                    percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]['eventPercents'][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({
                'eventProgress': eventProgress,
                'lumiProgress': lumiProgress,
            })

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    # log.debug("Workflow updated: %s" % wf)
                    pass
                else:
                    # log.debug("Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc['updateTime'] = int(time.time())
                    report[wf]['updateTime'] = int(time.time())
                    dummy = json.dumps(
                        newCouchDoc
                    )  # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log.error("Failed to queue document:%s \n" %
                              pprint.pprint(newCouchDoc))

    log.info("%s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Beispiel #2
0
class ConfigCache(WMObject):
    """
    _ConfigCache_

    The class that handles the upload and download of configCache
    artifacts from Couch
    """
    def __init__(self,
                 dbURL,
                 couchDBName=None,
                 id=None,
                 rev=None,
                 usePYCurl=False,
                 ckey=None,
                 cert=None,
                 capath=None,
                 detail=True):
        self.dbname = couchDBName
        self.dburl = dbURL
        self.detail = detail
        try:
            self.couchdb = CouchServer(self.dburl,
                                       usePYCurl=usePYCurl,
                                       ckey=ckey,
                                       cert=cert,
                                       capath=capath)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname)
        except Exception as ex:
            msg = "Error connecting to couch: %s\n" % str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        # local cache
        self.docs_cache = DocumentCache(self.database, self.detail)

        # UserGroup variables
        self.group = None
        self.owner = None

        # Internal data structure
        self.document = Document()
        self.attachments = {}
        self.document['type'] = "config"
        self.document['description'] = {}
        self.document['description']['config_label'] = None
        self.document['description']['config_desc'] = None

        if id != None:
            self.document['_id'] = id
        self.document['pset_tweak_details'] = None
        self.document['info'] = None
        self.document['config'] = None
        return

    def createDatabase(self):
        """
        _createDatabase_

        """
        database = self.couchdb.createDatabase(self.dbname)
        database.commit()
        return database

    def connectUserGroup(self, groupname, username):
        """
        _connectUserGroup_

        """
        self.group = Group(name=groupname)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.owner = makeUser(groupname,
                              username,
                              couchUrl=self.dburl,
                              couchDatabase=self.dbname)
        return

    def createUserGroup(self, groupname, username):
        """
        _createUserGroup_

        Create all the userGroup information
        """
        self.createGroup(name=groupname)
        self.createUser(username=username)
        return

    def createGroup(self, name):
        """
        _createGroup_

        Create Group for GroupUser
        """
        self.group = Group(name=name)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.group.create()
        return

    def setLabel(self, label):
        """
        _setLabel_

        Util to add a descriptive label to the configuration doc
        """
        self.document['description']['config_label'] = label

    def setDescription(self, desc):
        """
        _setDescription_

        Util to add a verbose description string to a configuration doc
        """
        self.document['description']['config_desc'] = desc

    @Decorators.requireGroup
    def createUser(self, username):
        self.owner = makeUser(self.group['name'],
                              username,
                              couchUrl=self.dburl,
                              couchDatabase=self.dbname)
        self.owner.create()
        self.owner.ownThis(self.document)
        return

    @Decorators.requireGroup
    @Decorators.requireUser
    def save(self):
        """
        _save_

        Save yourself!  Save your internal document.
        """
        rawResults = self.database.commit(doc=self.document)

        # We should only be committing one document at a time
        # if not, get the last one.

        try:
            commitResults = rawResults[-1]
            self.document["_rev"] = commitResults.get('rev')
            self.document["_id"] = commitResults.get('id')
        except KeyError as ex:
            msg = "Document returned from couch without ID or Revision\n"
            msg += "Document probably bad\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        # Now do the attachments
        for attachName in self.attachments:
            self.saveAttachment(name=attachName,
                                attachment=self.attachments[attachName])

        return

    def saveAttachment(self, name, attachment):
        """
        _saveAttachment_

        Save an attachment to the document
        """

        retval = self.database.addAttachment(self.document["_id"],
                                             self.document["_rev"], attachment,
                                             name)

        if retval.get('ok', False) != True:
            # Then we have a problem
            msg = "Adding an attachment to document failed\n"
            msg += str(retval)
            msg += "ID: %s, Rev: %s" % (self.document["_id"],
                                        self.document["_rev"])
            logging.error(msg)
            raise ConfigCacheException(msg)

        self.document["_rev"] = retval['rev']
        self.document["_id"] = retval['id']
        self.attachments[name] = attachment

        return

    def loadDocument(self, configID):
        """
        _loadDocument_

        Load a document from the document cache given its couchID
        """
        self.document = self.docs_cache[configID]

    def loadByID(self, configID):
        """
        _loadByID_

        Load a document from the server given its couchID
        """
        try:
            self.document = self.database.document(id=configID)
            if 'owner' in self.document.keys():
                self.connectUserGroup(
                    groupname=self.document['owner'].get('group', None),
                    username=self.document['owner'].get('user', None))
            if '_attachments' in self.document.keys():
                # Then we need to load the attachments
                for key in self.document['_attachments'].keys():
                    self.loadAttachment(name=key)
        except CouchNotFoundError as ex:
            msg = "Document with id %s not found in couch\n" % (configID)
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)
        except Exception as ex:
            msg = "Error loading document from couch\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        return

    def loadAttachment(self, name, overwrite=True):
        """
        _loadAttachment_

        Load an attachment from the database and put it somewhere useful
        """

        attach = self.database.getAttachment(self.document["_id"], name)

        if not overwrite:
            if name in self.attachments.keys():
                logging.info("Attachment already exists, so we're skipping")
                return

        self.attachments[name] = attach

        return

    def loadByView(self, view, value):
        """
        _loadByView_

        Underlying code to load views
        """

        viewRes = self.database.loadView('ConfigCache', view, {}, [value])

        if len(viewRes['rows']) == 0:
            # Then we have a problem
            logging.error("Unable to load using view %s and value %s" %
                          (view, str(value)))

        self.unwrapView(viewRes)
        self.loadByID(self.document["_id"])
        return

    def saveConfigToDisk(self, targetFile):
        """
        _saveConfigToDisk_

        Make sure we can save our config file to disk
        """
        config = self.getConfig()
        if not config:
            return

        # Write to a file
        f = open(targetFile, 'w')
        f.write(config)
        f.close()
        return

    def load(self):
        """
        _load_

        Figure out how to load
        """

        if self.document.get("_id", None) != None:
            # Then we should load by ID
            self.loadByID(self.document["_id"])
            return

        # Otherwise we have to load by view

        if not self.document.get('md5_hash', None) == None:
            # Then we have an md5_hash
            self.loadByView(view='config_by_md5hash',
                            value=self.document['md5_hash'])
        # TODO: Add more views as they become available.

        #elif not self.owner == None:
        # Then we have an owner
        #self.loadByView(view = 'config_by_owner', value = self.owner['name'])

    def unwrapView(self, view):
        """
        _unwrapView_

        Move view information into the main document
        """

        self.document["_id"] = view['rows'][0].get('id')
        self.document["_rev"] = view['rows'][0].get('value').get('_rev')

    def setPSetTweaks(self, PSetTweak):
        """
        _setPSetTweaks_

        Set the PSet tweak details for the config.
        """
        self.document['pset_tweak_details'] = PSetTweak
        return

    def getPSetTweaks(self):
        """
        _getPSetTweaks_

        Retrieve the PSet tweak details.
        """
        return self.document['pset_tweak_details']

    def getOutputModuleInfo(self):
        """
        _getOutputModuleInfo_

        Retrieve the dataset information for the config in the ConfigCache.
        """
        psetTweaks = self.getPSetTweaks()
        if not 'process' in psetTweaks.keys():
            raise ConfigCacheException(
                "Could not find process field in PSet while getting output modules!"
            )
        try:
            outputModuleNames = psetTweaks["process"]["outputModules_"]
        except KeyError as ex:
            msg = "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(msg)

        results = {}
        for outputModuleName in outputModuleNames:
            try:
                outModule = psetTweaks["process"][outputModuleName]
            except KeyError:
                msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName
                logging.error(msg)
                raise ConfigCacheException(msg)
            dataset = outModule.get("dataset", None)
            if dataset:
                results[outputModuleName] = {
                    "dataTier": outModule["dataset"]["dataTier"],
                    "filterName": outModule["dataset"]["filterName"]
                }
            else:
                results[outputModuleName] = {
                    "dataTier": None,
                    "filterName": None
                }

        return results

    def addConfig(self, newConfig, psetHash=None):
        """
        _addConfig_


        """
        # The newConfig parameter is a URL suitable for passing to urlopen.
        configString = urllib.urlopen(newConfig).read(-1)
        configMD5 = hashlib.md5(configString).hexdigest()

        self.document['md5_hash'] = configMD5
        self.document['pset_hash'] = psetHash
        self.attachments['configFile'] = configString
        return

    def getConfig(self):
        """
        _getConfig_

        Get the currently active config
        """
        return self.attachments.get('configFile', None)

    def getCouchID(self):
        """
        _getCouchID_

        Return the document's couchID
        """

        return self.document["_id"]

    def getCouchRev(self):
        """
        _getCouchRev_

        Return the document's couchRevision
        """

        return self.document["_rev"]

    @Decorators.requireGroup
    @Decorators.requireUser
    def delete(self):
        """
        _delete_

        Deletes the document with the current docid
        """
        if not self.document["_id"]:
            logging.error("Attempted to delete with no couch ID")

        # TODO: Delete without loading first
        try:
            self.database.queueDelete(self.document)
            self.database.commit()
        except Exception as ex:
            msg = "Error in deleting document from couch"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        return

    def getIDFromLabel(self, label):
        """
        _getIDFromLabel_

        Retrieve the ID of a config given it's label.
        """
        results = self.database.loadView("ConfigCache", "config_by_label", {
            "startkey": label,
            "limit": 1
        })

        if results["rows"][0]["key"] == label:
            return results["rows"][0]["value"]

        return None

    def listAllConfigsByLabel(self):
        """
        _listAllConfigsByLabel_

        Retrieve a list of all the configs in the config cache.  This is
        returned in the form of a dictionary that is keyed by label.
        """
        configs = {}
        results = self.database.loadView("ConfigCache", "config_by_label")

        for result in results["rows"]:
            configs[result["key"]] = result["value"]

        return configs

    def __str__(self):
        """
        Make something printable

        """

        return self.document.__str__()

    def validate(self, configID):

        try:
            #TODO: need to change to DataCache
            #self.loadDocument(configID = configID)
            self.loadByID(configID=configID)
        except Exception as ex:
            raise ConfigCacheException(
                "Failure to load ConfigCache while validating workload: %s" %
                str(ex))

        if self.detail:
            duplicateCheck = {}
            try:
                outputModuleInfo = self.getOutputModuleInfo()
            except Exception as ex:
                # Something's gone wrong with trying to open the configCache
                msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
                raise ConfigCacheException("%s: %s" % (msg, str(ex)))
            for outputModule in outputModuleInfo.values():
                dataTier = outputModule.get('dataTier', None)
                filterName = outputModule.get('filterName', None)
                if not dataTier:
                    raise ConfigCacheException("No DataTier in output module.")

                # Add dataTier to duplicate dictionary
                if not dataTier in duplicateCheck.keys():
                    duplicateCheck[dataTier] = []
                if filterName in duplicateCheck[dataTier]:
                    # Then we've seen this combination before
                    raise ConfigCacheException(
                        "Duplicate dataTier/filterName combination.")
                else:
                    duplicateCheck[dataTier].append(filterName)
            return outputModuleInfo
        else:
            return True
Beispiel #3
0
class CouchWorkQueueElement(WorkQueueElement):
    """
    _CouchWorkQueueElement_

    """
    def __init__(self, couchDB, id = None, elementParams = None):
        elementParams = elementParams or {}
        WorkQueueElement.__init__(self, **elementParams)
        if id:
            self._id = id
        self._document = Document(id = id)
        self._couch = couchDB

    rev = property(
        lambda x: str(x._document[u'_rev']) if x._document.has_key(u'_rev') else x._document.__getitem__('_rev'),
        lambda x, newid: x._document.__setitem__('_rev', newid))
    timestamp = property(
        lambda x: str(x._document[u'timestamp']) if x._document.has_key(u'timestamp') else x._document.__getitem__('timestamp')
        )
    updatetime = property(
        lambda x: str(x._document[u'updatetime']) if x._document.has_key(u'updatetime') else 0
        )


    @classmethod
    def fromDocument(cls, couchDB, doc):
        """Create element from couch document"""
        element = CouchWorkQueueElement(couchDB = couchDB,
                                        id = doc['_id'],
                                        elementParams = doc.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement')
                                        )
        element._document['_rev'] = doc.pop('_rev')
        element._document['timestamp'] = doc.pop('timestamp')
        element._document['updatetime'] = doc.pop('updatetime')
        return element

    def save(self):
        """
        _save
        """
        self.populateDocument()
        self._couch.queue(self._document)

    def load(self):
        """
        _load_

        Load the document representing this WQE
        """
        document = self._couch.document(self._document['_id'])
        self.update(document.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'))
        self._document['_rev'] = document.pop('_rev')
        self._document['timestamp'] = document.pop('timestamp', None)
        self._document['updatetime'] = document.pop('updatetime', None)
        return self

    def delete(self):
        """Delete element"""
        self.populateDocument()
        self._document.delete()
        self._couch.queue(self._document)

    def populateDocument(self):
        """Certain attributed shouldn't be stored"""
        self._document.update(self.__to_json__(None))
        now = time.time()
        self._document['updatetime'] = now
        self._document.setdefault('timestamp', now)
        if not self._document.get('_id') and self.id:
            self._document['_id'] = self.id
        attrs = ['WMSpec', 'Task']
        for attr in attrs:
            self._document['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'].pop(attr, None)
class CouchWorkQueueElement(WorkQueueElement):
    """
    _CouchWorkQueueElement_

    """
    def __init__(self, couchDB, id=None, elementParams=None):
        elementParams = elementParams or {}
        WorkQueueElement.__init__(self, **elementParams)
        if id:
            self._id = id
        self._document = Document(id=id)
        self._couch = couchDB

    rev = property(
        lambda x: str(x._document[u'_rev'])
        if u'_rev' in x._document else x._document.__getitem__('_rev'),
        lambda x, newid: x._document.__setitem__('_rev', newid))
    timestamp = property(lambda x: str(x._document[u'timestamp'])
                         if u'timestamp' in x._document else x._document.
                         __getitem__('timestamp'))
    updatetime = property(lambda x: str(x._document[u'updatetime'])
                          if u'updatetime' in x._document else 0)

    @classmethod
    def fromDocument(cls, couchDB, doc):
        """Create element from couch document"""
        elementParams = doc.pop(
            'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement')
        elementParams["CreationTime"] = doc.pop('timestamp')
        element = CouchWorkQueueElement(couchDB=couchDB,
                                        id=doc['_id'],
                                        elementParams=elementParams)
        element._document['_rev'] = doc.pop('_rev')
        element._document['timestamp'] = elementParams["CreationTime"]
        element._document['updatetime'] = doc.pop('updatetime')
        return element

    def save(self):
        """
        _save
        """
        self.populateDocument()
        self._couch.queue(self._document)

    def load(self):
        """
        _load_

        Load the document representing this WQE
        """
        document = self._couch.document(self._document['_id'])
        self.update(
            document.pop(
                'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'
            ))
        self._document['_rev'] = document.pop('_rev')
        self._document['timestamp'] = document.pop('timestamp', None)
        self._document['updatetime'] = document.pop('updatetime', None)
        return self

    def delete(self):
        """Delete element"""
        self.populateDocument()
        self._document.delete()
        self._couch.queue(self._document)

    def populateDocument(self):
        """Certain attributed shouldn't be stored"""
        self._document.update(self.__to_json__(None))
        now = time.time()
        self._document['updatetime'] = now
        self._document.setdefault('timestamp', now)
        if not self._document.get('_id') and self.id:
            self._document['_id'] = self.id
        attrs = ['WMSpec', 'Task']
        for attr in attrs:
            self._document[
                'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'].pop(
                    attr, None)
class ConfigCache(WMObject):
    """
    _ConfigCache_

    The class that handles the upload and download of configCache
    artifacts from Couch
    """
    def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False, 
                 ckey = None, cert = None, capath = None, detail = True):
        self.dbname = couchDBName
        self.dburl  = dbURL
        self.detail = detail
        try:
            self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname)
        except Exception as ex:
            msg = "Error connecting to couch: %s\n" % str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)

        # local cache
        self.docs_cache = DocumentCache(self.database, self.detail)

        # UserGroup variables
        self.group = None
        self.owner = None

        # Internal data structure
        self.document  = Document()
        self.attachments = {}
        self.document['type'] = "config"
        self.document['description'] = {}
        self.document['description']['config_label'] = None
        self.document['description']['config_desc'] = None

        if id != None:
            self.document['_id']                = id
        self.document['pset_tweak_details'] = None
        self.document['info']               = None
        self.document['config']             = None
        return

    def createDatabase(self):
        """
        _createDatabase_

        """
        database = self.couchdb.createDatabase(self.dbname)
        database.commit()
        return database

    def connectUserGroup(self, groupname, username):
        """
        _connectUserGroup_

        """
        self.group = Group(name = groupname)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.owner = makeUser(groupname, username,
                              couchUrl = self.dburl,
                              couchDatabase = self.dbname)
        return

    def createUserGroup(self, groupname, username):
        """
        _createUserGroup_

        Create all the userGroup information
        """
        self.createGroup(name = groupname)
        self.createUser(username = username)
        return

    def createGroup(self, name):
        """
        _createGroup_

        Create Group for GroupUser
        """
        self.group = Group(name = name)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.group.create()
        return

    def setLabel(self, label):
        """
        _setLabel_

        Util to add a descriptive label to the configuration doc
        """
        self.document['description']['config_label'] = label

    def setDescription(self, desc):
        """
        _setDescription_

        Util to add a verbose description string to a configuration doc
        """
        self.document['description']['config_desc'] = desc

    @Decorators.requireGroup
    def createUser(self, username):
        self.owner = makeUser(self.group['name'], username,
                              couchUrl = self.dburl,
                              couchDatabase = self.dbname)
        self.owner.create()
        self.owner.ownThis(self.document)
        return

    @Decorators.requireGroup
    @Decorators.requireUser
    def save(self):
        """
        _save_

        Save yourself!  Save your internal document.
        """
        rawResults = self.database.commit(doc = self.document)

        # We should only be committing one document at a time
        # if not, get the last one.

        try:
            commitResults = rawResults[-1]
            self.document["_rev"] = commitResults.get('rev')
            self.document["_id"]  = commitResults.get('id')
        except KeyError as ex:
            msg  = "Document returned from couch without ID or Revision\n"
            msg += "Document probably bad\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(message = msg)


        # Now do the attachments
        for attachName in self.attachments:
            self.saveAttachment(name = attachName,
                                attachment = self.attachments[attachName])


        return


    def saveAttachment(self, name, attachment):
        """
        _saveAttachment_

        Save an attachment to the document
        """


        retval = self.database.addAttachment(self.document["_id"],
                                             self.document["_rev"],
                                             attachment,
                                             name)

        if retval.get('ok', False) != True:
            # Then we have a problem
            msg = "Adding an attachment to document failed\n"
            msg += str(retval)
            msg += "ID: %s, Rev: %s" % (self.document["_id"], self.document["_rev"])
            logging.error(msg)
            raise ConfigCacheException(msg)

        self.document["_rev"] = retval['rev']
        self.document["_id"]  = retval['id']
        self.attachments[name] = attachment

        return


    def loadDocument(self, configID):
        """
        _loadDocument_

        Load a document from the document cache given its couchID
        """
        self.document = self.docs_cache[configID]

    def loadByID(self, configID):
        """
        _loadByID_

        Load a document from the server given its couchID
        """
        try:
            self.document = self.database.document(id = configID)
            if 'owner' in self.document.keys():
                self.connectUserGroup(groupname = self.document['owner'].get('group', None),
                                      username  = self.document['owner'].get('user', None))
            if '_attachments' in self.document.keys():
                # Then we need to load the attachments
                for key in self.document['_attachments'].keys():
                    self.loadAttachment(name = key)
        except CouchNotFoundError as ex:
            msg =  "Document with id %s not found in couch\n" % (configID)
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)
        except Exception as ex:
            msg =  "Error loading document from couch\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)

        return

    def loadAttachment(self, name, overwrite = True):
        """
        _loadAttachment_

        Load an attachment from the database and put it somewhere useful
        """


        attach = self.database.getAttachment(self.document["_id"], name)

        if not overwrite:
            if name in self.attachments.keys():
                logging.info("Attachment already exists, so we're skipping")
                return

        self.attachments[name] = attach

        return

    def loadByView(self, view, value):
        """
        _loadByView_

        Underlying code to load views
        """

        viewRes = self.database.loadView( 'ConfigCache', view, {}, [value] )

        if len(viewRes['rows']) == 0:
            # Then we have a problem
            logging.error("Unable to load using view %s and value %s" % (view, str(value)))

        self.unwrapView(viewRes)
        self.loadByID(self.document["_id"])
        return

    def saveConfigToDisk(self, targetFile):
        """
        _saveConfigToDisk_

        Make sure we can save our config file to disk
        """
        config = self.getConfig()
        if not config:
            return

        # Write to a file
        f = open(targetFile, 'w')
        f.write(config)
        f.close()
        return


    def load(self):
        """
        _load_

        Figure out how to load
        """

        if self.document.get("_id", None) != None:
            # Then we should load by ID
            self.loadByID(self.document["_id"])
            return

        # Otherwise we have to load by view

        if not self.document.get('md5_hash', None) == None:
            # Then we have an md5_hash
            self.loadByView(view = 'config_by_md5hash', value = self.document['md5_hash'])
        # TODO: Add more views as they become available.


        #elif not self.owner == None:
            # Then we have an owner
            #self.loadByView(view = 'config_by_owner', value = self.owner['name'])





    def unwrapView(self, view):
        """
        _unwrapView_

        Move view information into the main document
        """

        self.document["_id"]  = view['rows'][0].get('id')
        self.document["_rev"] = view['rows'][0].get('value').get('_rev')




    def setPSetTweaks(self, PSetTweak):
        """
        _setPSetTweaks_

        Set the PSet tweak details for the config.
        """
        self.document['pset_tweak_details'] = PSetTweak
        return

    def getPSetTweaks(self):
        """
        _getPSetTweaks_

        Retrieve the PSet tweak details.
        """
        return self.document['pset_tweak_details']

    def getOutputModuleInfo(self):
        """
        _getOutputModuleInfo_

        Retrieve the dataset information for the config in the ConfigCache.
        """
        psetTweaks = self.getPSetTweaks()
        if not 'process' in psetTweaks.keys():
            raise ConfigCacheException("Could not find process field in PSet while getting output modules!")
        try:
            outputModuleNames = psetTweaks["process"]["outputModules_"]
        except KeyError as ex:
            msg =  "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(msg)

        results = {}
        for outputModuleName in outputModuleNames:
            try:
                outModule = psetTweaks["process"][outputModuleName]
            except KeyError:
                msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName
                logging.error(msg)
                raise ConfigCacheException(msg)
            dataset = outModule.get("dataset", None)
            if dataset:
                results[outputModuleName] = {"dataTier": outModule["dataset"]["dataTier"],
                                             "filterName": outModule["dataset"]["filterName"]}
            else:
                results[outputModuleName] = {"dataTier": None, "filterName": None}

        return results


    def addConfig(self, newConfig, psetHash = None):
        """
        _addConfig_


        """
        # The newConfig parameter is a URL suitable for passing to urlopen.
        configString = urllib.urlopen(newConfig).read(-1)
        configMD5 = hashlib.md5(configString).hexdigest()

        self.document['md5_hash'] = configMD5
        self.document['pset_hash'] = psetHash
        self.attachments['configFile'] = configString
        return

    def getConfig(self):
        """
        _getConfig_

        Get the currently active config
        """
        return self.attachments.get('configFile', None)

    def getCouchID(self):
        """
        _getCouchID_

        Return the document's couchID
        """

        return self.document["_id"]


    def getCouchRev(self):
        """
        _getCouchRev_

        Return the document's couchRevision
        """


        return self.document["_rev"]


    @Decorators.requireGroup
    @Decorators.requireUser
    def delete(self):
        """
        _delete_

        Deletes the document with the current docid
        """
        if not self.document["_id"]:
            logging.error("Attempted to delete with no couch ID")


        # TODO: Delete without loading first
        try:
            self.database.queueDelete(self.document)
            self.database.commit()
        except Exception as ex:
            msg =  "Error in deleting document from couch"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)


        return

    def getIDFromLabel(self, label):
        """
        _getIDFromLabel_

        Retrieve the ID of a config given it's label.
        """
        results = self.database.loadView("ConfigCache", "config_by_label",
                                         {"startkey": label,
                                          "limit": 1})

        if results["rows"][0]["key"] == label:
            return results["rows"][0]["value"]

        return None

    def listAllConfigsByLabel(self):
        """
        _listAllConfigsByLabel_

        Retrieve a list of all the configs in the config cache.  This is
        returned in the form of a dictionary that is keyed by label.
        """
        configs = {}
        results = self.database.loadView("ConfigCache", "config_by_label")

        for result in results["rows"]:
            configs[result["key"]] = result["value"]

        return configs


    def __str__(self):
        """
        Make something printable

        """

        return self.document.__str__()
    
    def validate(self, configID):
        
        try:
            #TODO: need to change to DataCache
            #self.loadDocument(configID = configID)
            self.loadByID(configID = configID)
        except Exception as ex:
            raise ConfigCacheException("Failure to load ConfigCache while validating workload: %s" % str(ex))
        
        if self.detail:
            duplicateCheck = {}
            try:
                outputModuleInfo = self.getOutputModuleInfo()
            except Exception as ex:
                # Something's gone wrong with trying to open the configCache
                msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
                raise ConfigCacheException("%s: %s" % (msg, str(ex)))
            for outputModule in outputModuleInfo.values():
                dataTier   = outputModule.get('dataTier', None)
                filterName = outputModule.get('filterName', None)
                if not dataTier:
                    raise ConfigCacheException("No DataTier in output module.")
    
                # Add dataTier to duplicate dictionary
                if not dataTier in duplicateCheck.keys():
                    duplicateCheck[dataTier] = []
                if filterName in duplicateCheck[dataTier]:
                    # Then we've seen this combination before
                    raise ConfigCacheException("Duplicate dataTier/filterName combination.")
                else:
                    duplicateCheck[dataTier].append(filterName)            
            return outputModuleInfo
        else:
            return True
Beispiel #6
0
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl,
                            mcmUrl, mcmCert, mcmKey, tmpDir,
                            archived = False, log = logging.info):

    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl, reqmgrUrl, reqdbCouchApp = "ReqMgr")

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log.info("%s: Getting job information from %s and %s. Please wait." % (
                  funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived']
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag, legacyFormat = True)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log.info("%s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc:
                runWhiteList = []
                filterEfficiency = None
                try:
                    #log.debug("Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get('RunWhiteList', [])
                    filterEfficiency = rmDoc.get('FilterEfficiency', None)
                except:
                    pass # ReqMgr no longer has the workflow
                report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList})

            if ('mcmTotalEvents' not in oldCouchDoc or
                'mcmApprovalTime' not in oldCouchDoc or
                oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or
                oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown'):

                prepID = oldCouchDoc.get('prepID', None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID))
                    # Get information from McM. Don't call too many times, can take a long time
                    nMCMCalls += 1
                    try:
                        mcmHistory = mcm.getHistory(prepID = prepID)
                        if 'mcmApprovalTime' not in oldCouchDoc:
                            report[wf].update({'mcmApprovalTime':'NoMcMData'})
                        found = False
                        for entry in mcmHistory:
                            if entry['action'] == 'set status' and entry['step'] == 'announced':
                                dateString = entry['updater']['submission_date']
                                dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M')
                                report[wf].update({'mcmApprovalTime':time.mktime(dt.timetuple())})
                                found = True
                        if not found:
                            log.error("History found but no approval time for %s" % wf)
                    except McMNoDataError:
                        log.error("Setting NoMcMData for %s" % wf)
                        report[wf].update({'mcmApprovalTime':'NoMcMData'})
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." %
                            (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting history from McM for PREP ID %s. Unknown error." %
                            (exc_type, prepID))

                    try:
                        mcmRequest = mcm.getRequest(prepID = prepID)
                        report[wf].update({'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData')})
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." %
                            (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting request from McM for PREP ID %s. Unknown error." %
                            (exc_type, prepID))

            # Basic parameters of the workflow
            priority = requests[wf]['priority']
            requestType = requests[wf]['request_type']
            targetLumis = requests[wf].get('input_lumis', 0)
            targetEvents = requests[wf].get('input_events', 0)
            campaign = requests[wf]['campaign']
            prep_id = requests[wf].get('prep_id', None)
            outputdatasets = requests[wf].get('outputdatasets', [])
            statuses = requests[wf].get('request_status', [])

            if not statuses:
                log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does.

            # Remove a single  task_ from the start of PREP ID if it exists
            if prep_id and prep_id.startswith('task_'):
                prep_id.replace('task_', '', 1)

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf].get('inputdataset', "")
            if isinstance(inputdataset, list):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ''

            outputTier = 'Unknown'
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if isinstance(ds, list):
                        outputTiers.append(ds[0].split('/')[-1])
                    else:
                        outputTiers.append(ds.split('/')[-1])
            except:
                log.error("Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split('/')[-1]
                if inputTier in ['GEN']:
                    outputTier = 'LHE'
                elif inputTier in ['RAW', 'RECO']:
                    outputTier = 'AOD'
                elif inputTier in ['GEN-SIM']:
                    outputTier = 'AODSIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'

            else:
                if len(outputTiers) == 1 and 'GEN' in outputTiers:
                    if 'STEP0ATCERN' in wf:
                        outputTier = 'STEP0'
                    else:
                        outputTier = 'FullGen'
                elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'RECO' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'GEN-SIM' in outputTiers:
                    outputTier = 'GEN-SIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'
                elif 'RECO' in outputTiers:
                    outputTier = 'AOD'
                elif 'AOD' in outputTiers:
                    outputTier = 'AOD'
                else:
                    outputTier = 'GEN-SIM'

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get('events', 0)
                lumis = dsr.get('totalLumis', 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis/targetLumis*100)
                if targetEvents:
                    eventPercent = min(eventPercent, events/targetEvents*100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs['sucess']
                totalJobs += jobs['created']
            try:
                if totalJobs and not report[wf].get('firstJobTime', None):
                    report[wf].update({'firstJobTime' : int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None):
                    report[wf].update({'lastJobTime' : int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None

            for status in statuses:
                finalStatus = status['status']
                if status['status'] == 'new':
                    newTime = status['update_time']
                if status['status'] == 'assignment-approved':
                    approvedTime = status['update_time']
                if status['status'] == 'assigned':
                    assignedTime = status['update_time']
                if status['status'] == 'completed':
                    completedTime = status['update_time']
                if status['status'] == 'acquired':
                    acquireTime = status['update_time']
                if status['status'] == 'closed-out':
                    closeoutTime = status['update_time']
                if status['status'] == 'announced':
                    announcedTime = status['update_time']
                if status['status'] == 'normal-archived':
                    archivedTime = status['update_time']

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get('approvedTime', None):
                report[wf].update({'approvedTime':approvedTime})
            if assignedTime and not report[wf].get('assignedTime', None):
                report[wf].update({'assignedTime':assignedTime})
            if acquireTime and not report[wf].get('acquireTime', None):
                report[wf].update({'acquireTime':acquireTime})
            if closeoutTime and not report[wf].get('closeoutTime', None):
                report[wf].update({'closeoutTime':closeoutTime})
            if announcedTime and not report[wf].get('announcedTime', None):
                report[wf].update({'announcedTime':announcedTime})
            if completedTime and not report[wf].get('completedTime', None):
                report[wf].update({'completedTime':completedTime})
            if newTime and not report[wf].get('newTime', None):
                report[wf].update({'newTime':newTime})
            if archivedTime and not report[wf].get('archivedTime', None):
                report[wf].update({'archivedTime':archivedTime})

            try:
                dt = requests[wf]['request_date']
                requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
                report[wf].update({'requestDate' : requestDate})
            except:
                pass

            report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType})
            report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, })
            report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, })
            report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, })

            report[wf].setdefault('lumiPercents', {})
            report[wf].setdefault('eventPercents', {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
                percent = str(percentage)
                percentReported = report[wf]['lumiPercents'].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]['lumiPercents'][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]['eventPercents'].get(percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]['eventPercents'][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress,  })

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    #log.debug("Workflow updated: %s" % wf)
                    pass
                else:
                    #log.debug("Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc['updateTime'] = int(time.time())
                    report[wf]['updateTime'] = int(time.time())
                    cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log.info("%s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Beispiel #7
0
def gatherWMDataMiningStats(
    wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info
):

    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl)

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log("INFO: %s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ["normal-archived", "rejected-archived", "aborted-archived"]
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if not oldCouchDoc.has_key("filterEfficiency") or not oldCouchDoc.has_key("runWhiteList"):
                runWhiteList = []
                filterEfficiency = None
                try:
                    # log("DEBUG: Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get("RunWhiteList", [])
                    filterEfficiency = rmDoc.get("FilterEfficiency", None)
                except:
                    pass  # ReqMgr no longer has the workflow
                report[wf].update({"filterEfficiency": filterEfficiency, "runWhiteList": runWhiteList})

            if not oldCouchDoc.has_key("mcmTotalEvents") or not oldCouchDoc.has_key("mcmApprovalTime"):
                prepID = oldCouchDoc.get("prepID", None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    nMCMCalls += 1
                    mcmHistory = mcm.getHistory(prepID=prepID)
                    mcmRequest = mcm.getRequest(prepID=prepID)
                    report[wf].update({"mcmTotalEvents": mcmRequest.get("total_events", "Unknown")})

                    if not oldCouchDoc.has_key("mcmApprovalTime"):
                        report[wf].update({"mcmApprovalTime": "Unknown"})
                    for entry in mcmHistory:
                        if entry["action"] == "set status" and entry["step"] == "announced":
                            dateString = entry["updater"]["submission_date"]
                            dt = datetime.strptime(dateString, "%Y-%m-%d-%H-%M")
                            report[wf].update({"mcmApprovalTime": time.mktime(dt.timetuple())})

            # Basic parameters of the workflow
            priority = requests[wf]["priority"]
            requestType = requests[wf]["request_type"]
            targetLumis = requests[wf].get("input_lumis", 0)
            targetEvents = requests[wf].get("input_events", 0)
            campaign = requests[wf]["campaign"]
            prep_id = requests[wf].get("prep_id", None)
            outputdatasets = requests[wf].get("outputdatasets", [])

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf]["inputdataset"]
            if isinstance(inputdataset, (list,)):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ""

            outputTier = "Unknown"
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if type(ds) == list:
                        outputTiers.append(ds[0].split("/")[-1])
                    else:
                        outputTiers.append(ds.split("/")[-1])
            except:
                log(
                    "ERROR: Could not decode outputdatasets: %s" % outputdatasets
                )  # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split("/")[-1]
                if inputTier in ["GEN"]:
                    outputTier = "LHE"
                elif inputTier in ["RAW", "RECO"]:
                    outputTier = "AOD"
                elif inputTier in ["GEN-SIM"]:
                    outputTier = "AODSIM"
                elif "AODSIM" in outputTiers:
                    outputTier = "AODSIM"

            else:
                if len(outputTiers) == 1 and "GEN" in outputTiers:
                    if "STEP0ATCERN" in wf:
                        outputTier = "STEP0"
                    else:
                        outputTier = "FullGen"
                elif "GEN-SIM" in outputTiers and "AODSIM" in outputTiers and requestType == "TaskChain":
                    outputTier = "RelVal"
                elif "RECO" in outputTiers and requestType == "TaskChain":
                    outputTier = "RelVal"
                elif "GEN-SIM" in outputTiers:
                    outputTier = "GEN-SIM"
                elif "AODSIM" in outputTiers:
                    outputTier = "AODSIM"
                elif "RECO" in outputTiers:
                    outputTier = "AOD"
                elif "AOD" in outputTiers:
                    outputTier = "AOD"
                else:
                    outputTier = "GEN-SIM"

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get("events", 0)
                lumis = dsr.get("totalLumis", 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis / targetLumis * 100)
                if targetEvents:
                    eventPercent = min(eventPercent, events / targetEvents * 100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs["sucess"]
                totalJobs += jobs["created"]
            try:
                if totalJobs and not report[wf].get("firstJobTime", None):
                    report[wf].update({"firstJobTime": int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[wf].get("lastJobTime", None):
                    report[wf].update({"lastJobTime": int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None
            for status in requests[wf]["request_status"]:
                finalStatus = status["status"]
                if status["status"] == "new":
                    newTime = status["update_time"]
                if status["status"] == "assignment-approved":
                    approvedTime = status["update_time"]
                if status["status"] == "assigned":
                    assignedTime = status["update_time"]
                if status["status"] == "completed":
                    completedTime = status["update_time"]
                if status["status"] == "acquired":
                    acquireTime = status["update_time"]
                if status["status"] == "closed-out":
                    closeoutTime = status["update_time"]
                if status["status"] == "announced":
                    announcedTime = status["update_time"]
                if status["status"] == "normal-archived":
                    archivedTime = status["update_time"]

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get("approvedTime", None):
                report[wf].update({"approvedTime": approvedTime})
            if assignedTime and not report[wf].get("assignedTime", None):
                report[wf].update({"assignedTime": assignedTime})
            if acquireTime and not report[wf].get("acquireTime", None):
                report[wf].update({"acquireTime": acquireTime})
            if closeoutTime and not report[wf].get("closeoutTime", None):
                report[wf].update({"closeoutTime": closeoutTime})
            if announcedTime and not report[wf].get("announcedTime", None):
                report[wf].update({"announcedTime": announcedTime})
            if completedTime and not report[wf].get("completedTime", None):
                report[wf].update({"completedTime": completedTime})
            if newTime and not report[wf].get("newTime", None):
                report[wf].update({"newTime": newTime})
            if archivedTime and not report[wf].get("archivedTime", None):
                report[wf].update({"archivedTime": archivedTime})

            try:
                dt = requests[wf]["request_date"]
                requestDate = "%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d" % tuple(dt)
                report[wf].update({"requestDate": requestDate})
            except:
                pass

            report[wf].update({"priority": priority, "status": finalStatus, "type": requestType})
            report[wf].update({"totalLumis": targetLumis, "totalEvents": targetEvents})
            report[wf].update({"campaign": campaign, "prepID": prep_id, "outputTier": outputTier})
            report[wf].update({"outputDatasets": outputdatasets, "inputDataset": inputdataset})

            report[wf].setdefault("lumiPercents", {})
            report[wf].setdefault("eventPercents", {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
                percent = str(percentage)
                percentReported = report[wf]["lumiPercents"].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]["lumiPercents"][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]["eventPercents"].get(percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]["eventPercents"][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({"eventProgress": eventProgress, "lumiProgress": lumiProgress})

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    # log("DEBUG: Workflow updated: %s" % wf)
                    pass
                else:
                    # log("DEBUG Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc["updateTime"] = int(time.time())
                    report[wf]["updateTime"] = int(time.time())
                    cjson.encode(newCouchDoc)  # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()