Example #1
0
def gatherWMDataMiningStats(wmstatsUrl,
                            reqmgrUrl,
                            wmMiningUrl,
                            mcmUrl,
                            mcmCert,
                            mcmKey,
                            tmpDir,
                            archived=False,
                            log=logging.info):
    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl,
                            reqdbURL=reqmgrUrl,
                            reqdbCouchApp="ReqMgr")

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log.info("%s: Getting job information from %s and %s. Please wait." %
             (funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = [
            'normal-archived', 'rejected-archived', 'aborted-archived'
        ]
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates,
                                          jobInfoFlag=jobInfoFlag,
                                          legacyFormat=True)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log.info("%s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc:
                runWhiteList = []
                filterEfficiency = None
                try:
                    # log.debug("Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get('RunWhiteList', [])
                    filterEfficiency = rmDoc.get('FilterEfficiency', None)
                except:
                    pass  # ReqMgr no longer has the workflow
                report[wf].update({
                    'filterEfficiency': filterEfficiency,
                    'runWhiteList': runWhiteList
                })

            if oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or \
                oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown':

                prepID = oldCouchDoc.get('prepID', None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    log.info("Trying to update McM info for %s, PREPID %s" %
                             (wf, prepID))
                    # Get information from McM. Don't call too many times, can take a long time
                    nMCMCalls += 1
                    try:
                        mcmHistory = mcm.getHistory(prepID=prepID)
                        if 'mcmApprovalTime' not in oldCouchDoc:
                            report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                        found = False
                        for entry in mcmHistory:
                            if entry['action'] == 'set status' and entry[
                                    'step'] == 'announced':
                                dateString = entry['updater'][
                                    'submission_date']
                                dt = datetime.strptime(dateString,
                                                       '%Y-%m-%d-%H-%M')
                                report[wf].update({
                                    'mcmApprovalTime':
                                    time.mktime(dt.timetuple())
                                })
                                found = True
                        if not found:
                            log.error(
                                "History found but no approval time for %s" %
                                wf)
                    except McMNoDataError:
                        log.error("Setting NoMcMData for %s" % wf)
                        report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                    except (RuntimeError, IOError):
                        exc_type, dummy_exc_value, dummy_exc_traceback = sys.exc_info(
                        )
                        log.error(
                            "%s getting history from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting history from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

                    try:
                        mcmRequest = mcm.getRequest(prepID=prepID)
                        report[wf].update({
                            'mcmTotalEvents':
                            mcmRequest.get('total_events', 'NoMcMData')
                        })
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

            # Basic parameters of the workflow
            priority = requests[wf].get('priority', 0)
            requestType = requests[wf].get('request_type', 'Unknown')
            targetLumis = requests[wf].get('input_lumis', 0)
            targetEvents = requests[wf].get('input_events', 0)
            campaign = requests[wf].get('campaign', 'Unknown')
            prep_id = requests[wf].get('prep_id', None)
            outputdatasets = requests[wf].get('outputdatasets', [])
            statuses = requests[wf].get('request_status', [])

            if not statuses:
                log.error("Could not find any status from workflow: %s" %
                          wf)  # Should not happen but it does.

            # Remove a single  task_ from the start of PREP ID if it exists
            if prep_id and prep_id.startswith('task_'):
                prep_id.replace('task_', '', 1)

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf].get('inputdataset', "")
            if isinstance(inputdataset, list):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ''

            outputTier = 'Unknown'
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if isinstance(ds, list):
                        outputTiers.append(ds[0].split('/')[-1])
                    else:
                        outputTiers.append(ds.split('/')[-1])
            except:
                log.error(
                    "Could not decode outputdatasets: %s" % outputdatasets
                )  # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split('/')[-1]
                if inputTier in ['GEN']:
                    outputTier = 'LHE'
                elif inputTier in ['RAW', 'RECO']:
                    outputTier = 'AOD'
                elif inputTier in ['GEN-SIM']:
                    outputTier = 'AODSIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'

            else:
                if len(outputTiers) == 1 and 'GEN' in outputTiers:
                    if 'STEP0ATCERN' in wf:
                        outputTier = 'STEP0'
                    else:
                        outputTier = 'FullGen'
                elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'RECO' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'GEN-SIM' in outputTiers:
                    outputTier = 'GEN-SIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'
                elif 'RECO' in outputTiers:
                    outputTier = 'AOD'
                elif 'AOD' in outputTiers:
                    outputTier = 'AOD'
                else:
                    outputTier = 'GEN-SIM'

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[
                wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get('events', 0)
                lumis = dsr.get('totalLumis', 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis / targetLumis * 100)
                if targetEvents:
                    eventPercent = min(eventPercent,
                                       events / targetEvents * 100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs['sucess']
                totalJobs += jobs['created']
            try:
                if totalJobs and not report[wf].get('firstJobTime', None):
                    report[wf].update({'firstJobTime': int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[
                        wf].get('lastJobTime', None):
                    report[wf].update({'lastJobTime': int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None

            for status in statuses:
                finalStatus = status['status']
                if status['status'] == 'new':
                    newTime = status['update_time']
                if status['status'] == 'assignment-approved':
                    approvedTime = status['update_time']
                if status['status'] == 'assigned':
                    assignedTime = status['update_time']
                if status['status'] == 'completed':
                    completedTime = status['update_time']
                if status['status'] == 'acquired':
                    acquireTime = status['update_time']
                if status['status'] == 'closed-out':
                    closeoutTime = status['update_time']
                if status['status'] == 'announced':
                    announcedTime = status['update_time']
                if status['status'] == 'normal-archived':
                    archivedTime = status['update_time']

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get('approvedTime', None):
                report[wf].update({'approvedTime': approvedTime})
            if assignedTime and not report[wf].get('assignedTime', None):
                report[wf].update({'assignedTime': assignedTime})
            if acquireTime and not report[wf].get('acquireTime', None):
                report[wf].update({'acquireTime': acquireTime})
            if closeoutTime and not report[wf].get('closeoutTime', None):
                report[wf].update({'closeoutTime': closeoutTime})
            if announcedTime and not report[wf].get('announcedTime', None):
                report[wf].update({'announcedTime': announcedTime})
            if completedTime and not report[wf].get('completedTime', None):
                report[wf].update({'completedTime': completedTime})
            if newTime and not report[wf].get('newTime', None):
                report[wf].update({'newTime': newTime})
            if archivedTime and not report[wf].get('archivedTime', None):
                report[wf].update({'archivedTime': archivedTime})

            try:
                dt = requests[wf]['request_date']
                requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
                report[wf].update({'requestDate': requestDate})
            except:
                pass

            report[wf].update({
                'priority': priority,
                'status': finalStatus,
                'type': requestType
            })
            report[wf].update({
                'totalLumis': targetLumis,
                'totalEvents': targetEvents,
            })
            report[wf].update({
                'campaign': campaign,
                'prepID': prep_id,
                'outputTier': outputTier,
            })
            report[wf].update({
                'outputDatasets': outputdatasets,
                'inputDataset': inputdataset,
            })

            report[wf].setdefault('lumiPercents', {})
            report[wf].setdefault('eventPercents', {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [
                    1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100
            ]:
                percent = str(percentage)
                percentReported = report[wf]['lumiPercents'].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]['lumiPercents'][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]['eventPercents'].get(
                    percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]['eventPercents'][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({
                'eventProgress': eventProgress,
                'lumiProgress': lumiProgress,
            })

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    # log.debug("Workflow updated: %s" % wf)
                    pass
                else:
                    # log.debug("Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc['updateTime'] = int(time.time())
                    report[wf]['updateTime'] = int(time.time())
                    dummy = json.dumps(
                        newCouchDoc
                    )  # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log.error("Failed to queue document:%s \n" %
                              pprint.pprint(newCouchDoc))

    log.info("%s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Example #2
0
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmminigUrl, archived = False, log = logging.info):
    
    server, database = splitCouchServiceURL(wmminigUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl)
    
    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)
    
    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)
    
    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"
    
    log("INFO: %s: Getting job information from %s and %s. Please wait." % (
                  funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived']
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    for wf in result.keys():

        # Store a copy of the CouchDB document so we can compare later before updating
        if couchdb.documentExists(wf):
            oldCouchDoc = couchdb.document(wf)
            wfExists = True
        else:
            oldCouchDoc = CouchDoc(id=wf)
            wfExists = False

        newCouchDoc = copy.deepcopy(oldCouchDoc)
        ancientCouchDoc = copy.deepcopy(oldCouchDoc)
        report[wf] = oldCouchDoc
        # FIXME: remove report, only have two instances of couchDoc

        if not oldCouchDoc.has_key('filterEfficiency') or not oldCouchDoc.has_key('runWhiteList'):
            runWhiteList = []
            filterEfficiency = None
            try:
                #log("DEBUG: Looking up %s in ReqMgr" % wf)
                rmDoc = reqMgr.document(wf)
                runWhiteList = rmDoc.get('RunWhiteList', [])
                filterEfficiency = rmDoc.get('FilterEfficiency', None)
            except:
                pass # ReqMgr no longer has the workflow
            report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList})

        # Basic parameters of the workflow
        priority = requests[wf]['priority']
        requestType = requests[wf]['request_type']
        targetLumis = requests[wf].get('input_lumis', 0)
        targetEvents = requests[wf].get('input_events', 0)
        campaign = requests[wf]['campaign']
        prep_id = requests[wf].get('prep_id', None)
        outputdatasets = requests[wf].get('outputdatasets', [])

        # Can be an empty list, full list, empty string, or non-empty string!
        inputdataset = requests[wf]['inputdataset']
        if isinstance(inputdataset, (list,)):
            if inputdataset:
                inputdataset = inputdataset[0]
            else:
                inputdataset = ''

        outputTier = 'Unknown'
        try:
            outputTiers = []
            for ds in outputdatasets:
                if type(ds) == list:
                    outputTiers.append(ds[0].split('/')[-1])
                else:
                    outputTiers.append(ds.split('/')[-1])
        except:
            log("ERROR: Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail
        if inputdataset:
            inputTier = inputdataset.split('/')[-1]
            if inputTier in ['GEN']:
                outputTier = 'LHE'
            elif inputTier in ['RAW', 'RECO']:
                outputTier = 'AOD'
            elif inputTier in ['GEN-SIM']:
                outputTier = 'AODSIM'
            elif 'AODSIM' in outputTiers:
                outputTier = 'AODSIM'

        else:
            if len(outputTiers) == 1 and 'GEN' in outputTiers:
                if 'STEP0ATCERN' in wf:
                    outputTier = 'STEP0'
                else:
                    outputTier = 'FullGen'
            elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                outputTier = 'RelVal'
            elif 'RECO' in outputTiers and requestType == 'TaskChain':
                outputTier = 'RelVal'
            elif 'GEN-SIM' in outputTiers:
                outputTier = 'GEN-SIM'
            elif 'AODSIM' in outputTiers:
                outputTier = 'AODSIM'
            elif 'RECO' in outputTiers:
                outputTier = 'AOD'
            elif 'AOD' in outputTiers:
                outputTier = 'AOD'
            else:
                outputTier = 'GEN-SIM'

        # Calculate completion ratios for events and lumi sections, take minimum for all datasets
        eventPercent = 200
        lumiPercent = 200
        datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
        for dataset in datasetReports:
            dsr = datasetReports[dataset].getReport()
            events = dsr.get('events', 0)
            lumis = dsr.get('totalLumis', 0)
            if targetLumis:
                lumiPercent = min(lumiPercent, lumis/targetLumis*100)
            if targetEvents:
                eventPercent = min(eventPercent, events/targetEvents*100)
        if eventPercent > 100:
            eventPercent = 0
        if lumiPercent > 100:
            lumiPercent = 0

        # Sum up all jobs across agents to see if we've run the first, last
        successJobs = 0
        totalJobs = 0
        for agent in result[wf]:
            jobs = result[wf][agent]
            successJobs += jobs['sucess']
            totalJobs += jobs['created']
        try:
            if totalJobs and not report[wf].get('firstJobTime', None):
                report[wf].update({'firstJobTime' : int(time.time())})
            if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None):
                report[wf].update({'lastJobTime' : int(time.time())})
        except:
            pass

        # Figure out current status of workflow and transition times
        finalStatus = None
        newTime = None
        approvedTime = None
        assignedTime = None
        acquireTime = None
        completedTime = None
        closeoutTime = None
        announcedTime = None
        archivedTime = None
        requestDate = None
        for status in requests[wf]['request_status']:
            finalStatus = status['status']
            if status['status'] == 'new':
                newTime = status['update_time']
            if status['status'] == 'assignment-approved':
                approvedTime = status['update_time']
            if status['status'] == 'assigned':
                assignedTime = status['update_time']
            if status['status'] == 'completed':
                completedTime = status['update_time']
            if status['status'] == 'acquired':
                acquireTime = status['update_time']
            if status['status'] == 'closed-out':
                closeoutTime = status['update_time']
            if status['status'] == 'announced':
                announcedTime = status['update_time']
            if status['status'] == 'normal-archived':
                archivedTime = status['update_time']

        # Build or modify the report dictionary for the WF
        report.setdefault(wf, {})

        if approvedTime and not report[wf].get('approvedTime', None):
            report[wf].update({'approvedTime':approvedTime})
        if assignedTime and not report[wf].get('assignedTime', None):
            report[wf].update({'assignedTime':assignedTime})
        if acquireTime and not report[wf].get('acquireTime', None):
            report[wf].update({'acquireTime':acquireTime})
        if closeoutTime and not report[wf].get('closeoutTime', None):
            report[wf].update({'closeoutTime':closeoutTime})
        if announcedTime and not report[wf].get('announcedTime', None):
            report[wf].update({'announcedTime':announcedTime})
        if completedTime and not report[wf].get('completedTime', None):
            report[wf].update({'completedTime':completedTime})
        if newTime and not report[wf].get('newTime', None):
            report[wf].update({'newTime':newTime})
        if archivedTime and not report[wf].get('archivedTime', None):
            report[wf].update({'archivedTime':archivedTime})

        try:
            dt = requests[wf]['request_date']
            requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
            report[wf].update({'requestDate' : requestDate})
        except:
            pass

        report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType})
        report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, })
        report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, })
        report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, })

        report[wf].setdefault('lumiPercents', {})
        report[wf].setdefault('eventPercents', {})
        lumiProgress = 0
        eventProgress = 0
        for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
            percent = str(percentage)
            percentReported = report[wf]['lumiPercents'].get(percent, None)
            if not percentReported and lumiPercent >= percentage:
                report[wf]['lumiPercents'][percent] = int(time.time())
            if lumiPercent >= percentage:
                lumiProgress = percentage

            percentReported = report[wf]['eventPercents'].get(percent, None)
            if not percentReported and eventPercent >= percentage:
                report[wf]['eventPercents'][percent] = int(time.time())
            if eventPercent >= percentage:
                eventProgress = percentage

        report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress,  })

        newCouchDoc.update(report[wf])

        # Queue the updated document for addition if it's changed.
        if ancientCouchDoc != newCouchDoc:
            if wfExists:
                #log("DEBUG: Workflow updated: %s" % wf)
                pass
            else:
                #log("DEBUG Workflow created: %s" % wf)
                pass

            try:
                newCouchDoc['updateTime'] = int(time.time())
                report[wf]['updateTime'] = int(time.time())
                cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue
                couchdb.queue(newCouchDoc)
            except:
                log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Example #3
0
def gatherWMDataMiningStats(
    wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info
):

    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl)

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log("INFO: %s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ["normal-archived", "rejected-archived", "aborted-archived"]
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if not oldCouchDoc.has_key("filterEfficiency") or not oldCouchDoc.has_key("runWhiteList"):
                runWhiteList = []
                filterEfficiency = None
                try:
                    # log("DEBUG: Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get("RunWhiteList", [])
                    filterEfficiency = rmDoc.get("FilterEfficiency", None)
                except:
                    pass  # ReqMgr no longer has the workflow
                report[wf].update({"filterEfficiency": filterEfficiency, "runWhiteList": runWhiteList})

            if not oldCouchDoc.has_key("mcmTotalEvents") or not oldCouchDoc.has_key("mcmApprovalTime"):
                prepID = oldCouchDoc.get("prepID", None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    nMCMCalls += 1
                    mcmHistory = mcm.getHistory(prepID=prepID)
                    mcmRequest = mcm.getRequest(prepID=prepID)
                    report[wf].update({"mcmTotalEvents": mcmRequest.get("total_events", "Unknown")})

                    if not oldCouchDoc.has_key("mcmApprovalTime"):
                        report[wf].update({"mcmApprovalTime": "Unknown"})
                    for entry in mcmHistory:
                        if entry["action"] == "set status" and entry["step"] == "announced":
                            dateString = entry["updater"]["submission_date"]
                            dt = datetime.strptime(dateString, "%Y-%m-%d-%H-%M")
                            report[wf].update({"mcmApprovalTime": time.mktime(dt.timetuple())})

            # Basic parameters of the workflow
            priority = requests[wf]["priority"]
            requestType = requests[wf]["request_type"]
            targetLumis = requests[wf].get("input_lumis", 0)
            targetEvents = requests[wf].get("input_events", 0)
            campaign = requests[wf]["campaign"]
            prep_id = requests[wf].get("prep_id", None)
            outputdatasets = requests[wf].get("outputdatasets", [])

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf]["inputdataset"]
            if isinstance(inputdataset, (list,)):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ""

            outputTier = "Unknown"
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if type(ds) == list:
                        outputTiers.append(ds[0].split("/")[-1])
                    else:
                        outputTiers.append(ds.split("/")[-1])
            except:
                log(
                    "ERROR: Could not decode outputdatasets: %s" % outputdatasets
                )  # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split("/")[-1]
                if inputTier in ["GEN"]:
                    outputTier = "LHE"
                elif inputTier in ["RAW", "RECO"]:
                    outputTier = "AOD"
                elif inputTier in ["GEN-SIM"]:
                    outputTier = "AODSIM"
                elif "AODSIM" in outputTiers:
                    outputTier = "AODSIM"

            else:
                if len(outputTiers) == 1 and "GEN" in outputTiers:
                    if "STEP0ATCERN" in wf:
                        outputTier = "STEP0"
                    else:
                        outputTier = "FullGen"
                elif "GEN-SIM" in outputTiers and "AODSIM" in outputTiers and requestType == "TaskChain":
                    outputTier = "RelVal"
                elif "RECO" in outputTiers and requestType == "TaskChain":
                    outputTier = "RelVal"
                elif "GEN-SIM" in outputTiers:
                    outputTier = "GEN-SIM"
                elif "AODSIM" in outputTiers:
                    outputTier = "AODSIM"
                elif "RECO" in outputTiers:
                    outputTier = "AOD"
                elif "AOD" in outputTiers:
                    outputTier = "AOD"
                else:
                    outputTier = "GEN-SIM"

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get("events", 0)
                lumis = dsr.get("totalLumis", 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis / targetLumis * 100)
                if targetEvents:
                    eventPercent = min(eventPercent, events / targetEvents * 100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs["sucess"]
                totalJobs += jobs["created"]
            try:
                if totalJobs and not report[wf].get("firstJobTime", None):
                    report[wf].update({"firstJobTime": int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[wf].get("lastJobTime", None):
                    report[wf].update({"lastJobTime": int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None
            for status in requests[wf]["request_status"]:
                finalStatus = status["status"]
                if status["status"] == "new":
                    newTime = status["update_time"]
                if status["status"] == "assignment-approved":
                    approvedTime = status["update_time"]
                if status["status"] == "assigned":
                    assignedTime = status["update_time"]
                if status["status"] == "completed":
                    completedTime = status["update_time"]
                if status["status"] == "acquired":
                    acquireTime = status["update_time"]
                if status["status"] == "closed-out":
                    closeoutTime = status["update_time"]
                if status["status"] == "announced":
                    announcedTime = status["update_time"]
                if status["status"] == "normal-archived":
                    archivedTime = status["update_time"]

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get("approvedTime", None):
                report[wf].update({"approvedTime": approvedTime})
            if assignedTime and not report[wf].get("assignedTime", None):
                report[wf].update({"assignedTime": assignedTime})
            if acquireTime and not report[wf].get("acquireTime", None):
                report[wf].update({"acquireTime": acquireTime})
            if closeoutTime and not report[wf].get("closeoutTime", None):
                report[wf].update({"closeoutTime": closeoutTime})
            if announcedTime and not report[wf].get("announcedTime", None):
                report[wf].update({"announcedTime": announcedTime})
            if completedTime and not report[wf].get("completedTime", None):
                report[wf].update({"completedTime": completedTime})
            if newTime and not report[wf].get("newTime", None):
                report[wf].update({"newTime": newTime})
            if archivedTime and not report[wf].get("archivedTime", None):
                report[wf].update({"archivedTime": archivedTime})

            try:
                dt = requests[wf]["request_date"]
                requestDate = "%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d" % tuple(dt)
                report[wf].update({"requestDate": requestDate})
            except:
                pass

            report[wf].update({"priority": priority, "status": finalStatus, "type": requestType})
            report[wf].update({"totalLumis": targetLumis, "totalEvents": targetEvents})
            report[wf].update({"campaign": campaign, "prepID": prep_id, "outputTier": outputTier})
            report[wf].update({"outputDatasets": outputdatasets, "inputDataset": inputdataset})

            report[wf].setdefault("lumiPercents", {})
            report[wf].setdefault("eventPercents", {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
                percent = str(percentage)
                percentReported = report[wf]["lumiPercents"].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]["lumiPercents"][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]["eventPercents"].get(percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]["eventPercents"][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({"eventProgress": eventProgress, "lumiProgress": lumiProgress})

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    # log("DEBUG: Workflow updated: %s" % wf)
                    pass
                else:
                    # log("DEBUG Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc["updateTime"] = int(time.time())
                    report[wf]["updateTime"] = int(time.time())
                    cjson.encode(newCouchDoc)  # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Example #4
0
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl,
                            mcmUrl, mcmCert, mcmKey, tmpDir,
                            archived = False, log = logging.info):

    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl, reqmgrUrl, reqdbCouchApp = "ReqMgr")

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log.info("%s: Getting job information from %s and %s. Please wait." % (
                  funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived']
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag, legacyFormat = True)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log.info("%s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc:
                runWhiteList = []
                filterEfficiency = None
                try:
                    #log.debug("Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get('RunWhiteList', [])
                    filterEfficiency = rmDoc.get('FilterEfficiency', None)
                except:
                    pass # ReqMgr no longer has the workflow
                report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList})

            if ('mcmTotalEvents' not in oldCouchDoc or
                'mcmApprovalTime' not in oldCouchDoc or
                oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or
                oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown'):

                prepID = oldCouchDoc.get('prepID', None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID))
                    # Get information from McM. Don't call too many times, can take a long time
                    nMCMCalls += 1
                    try:
                        mcmHistory = mcm.getHistory(prepID = prepID)
                        if 'mcmApprovalTime' not in oldCouchDoc:
                            report[wf].update({'mcmApprovalTime':'NoMcMData'})
                        found = False
                        for entry in mcmHistory:
                            if entry['action'] == 'set status' and entry['step'] == 'announced':
                                dateString = entry['updater']['submission_date']
                                dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M')
                                report[wf].update({'mcmApprovalTime':time.mktime(dt.timetuple())})
                                found = True
                        if not found:
                            log.error("History found but no approval time for %s" % wf)
                    except McMNoDataError:
                        log.error("Setting NoMcMData for %s" % wf)
                        report[wf].update({'mcmApprovalTime':'NoMcMData'})
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." %
                            (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting history from McM for PREP ID %s. Unknown error." %
                            (exc_type, prepID))

                    try:
                        mcmRequest = mcm.getRequest(prepID = prepID)
                        report[wf].update({'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData')})
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." %
                            (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting request from McM for PREP ID %s. Unknown error." %
                            (exc_type, prepID))

            # Basic parameters of the workflow
            priority = requests[wf]['priority']
            requestType = requests[wf]['request_type']
            targetLumis = requests[wf].get('input_lumis', 0)
            targetEvents = requests[wf].get('input_events', 0)
            campaign = requests[wf]['campaign']
            prep_id = requests[wf].get('prep_id', None)
            outputdatasets = requests[wf].get('outputdatasets', [])
            statuses = requests[wf].get('request_status', [])

            if not statuses:
                log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does.

            # Remove a single  task_ from the start of PREP ID if it exists
            if prep_id and prep_id.startswith('task_'):
                prep_id.replace('task_', '', 1)

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf].get('inputdataset', "")
            if isinstance(inputdataset, list):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ''

            outputTier = 'Unknown'
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if isinstance(ds, list):
                        outputTiers.append(ds[0].split('/')[-1])
                    else:
                        outputTiers.append(ds.split('/')[-1])
            except:
                log.error("Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split('/')[-1]
                if inputTier in ['GEN']:
                    outputTier = 'LHE'
                elif inputTier in ['RAW', 'RECO']:
                    outputTier = 'AOD'
                elif inputTier in ['GEN-SIM']:
                    outputTier = 'AODSIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'

            else:
                if len(outputTiers) == 1 and 'GEN' in outputTiers:
                    if 'STEP0ATCERN' in wf:
                        outputTier = 'STEP0'
                    else:
                        outputTier = 'FullGen'
                elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'RECO' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'GEN-SIM' in outputTiers:
                    outputTier = 'GEN-SIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'
                elif 'RECO' in outputTiers:
                    outputTier = 'AOD'
                elif 'AOD' in outputTiers:
                    outputTier = 'AOD'
                else:
                    outputTier = 'GEN-SIM'

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get('events', 0)
                lumis = dsr.get('totalLumis', 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis/targetLumis*100)
                if targetEvents:
                    eventPercent = min(eventPercent, events/targetEvents*100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs['sucess']
                totalJobs += jobs['created']
            try:
                if totalJobs and not report[wf].get('firstJobTime', None):
                    report[wf].update({'firstJobTime' : int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None):
                    report[wf].update({'lastJobTime' : int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None

            for status in statuses:
                finalStatus = status['status']
                if status['status'] == 'new':
                    newTime = status['update_time']
                if status['status'] == 'assignment-approved':
                    approvedTime = status['update_time']
                if status['status'] == 'assigned':
                    assignedTime = status['update_time']
                if status['status'] == 'completed':
                    completedTime = status['update_time']
                if status['status'] == 'acquired':
                    acquireTime = status['update_time']
                if status['status'] == 'closed-out':
                    closeoutTime = status['update_time']
                if status['status'] == 'announced':
                    announcedTime = status['update_time']
                if status['status'] == 'normal-archived':
                    archivedTime = status['update_time']

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get('approvedTime', None):
                report[wf].update({'approvedTime':approvedTime})
            if assignedTime and not report[wf].get('assignedTime', None):
                report[wf].update({'assignedTime':assignedTime})
            if acquireTime and not report[wf].get('acquireTime', None):
                report[wf].update({'acquireTime':acquireTime})
            if closeoutTime and not report[wf].get('closeoutTime', None):
                report[wf].update({'closeoutTime':closeoutTime})
            if announcedTime and not report[wf].get('announcedTime', None):
                report[wf].update({'announcedTime':announcedTime})
            if completedTime and not report[wf].get('completedTime', None):
                report[wf].update({'completedTime':completedTime})
            if newTime and not report[wf].get('newTime', None):
                report[wf].update({'newTime':newTime})
            if archivedTime and not report[wf].get('archivedTime', None):
                report[wf].update({'archivedTime':archivedTime})

            try:
                dt = requests[wf]['request_date']
                requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
                report[wf].update({'requestDate' : requestDate})
            except:
                pass

            report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType})
            report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, })
            report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, })
            report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, })

            report[wf].setdefault('lumiPercents', {})
            report[wf].setdefault('eventPercents', {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
                percent = str(percentage)
                percentReported = report[wf]['lumiPercents'].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]['lumiPercents'][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]['eventPercents'].get(percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]['eventPercents'][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress,  })

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    #log.debug("Workflow updated: %s" % wf)
                    pass
                else:
                    #log.debug("Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc['updateTime'] = int(time.time())
                    report[wf]['updateTime'] = int(time.time())
                    cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log.info("%s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Example #5
0
            wfExists = True
        else:
            oldCouchDoc = CouchDoc(id=wf)
            wfExists = False

        newCouchDoc = copy.deepcopy(oldCouchDoc)
        ancientCouchDoc = copy.deepcopy(oldCouchDoc)
        report[wf] = oldCouchDoc
        # FIXME: remove report, only have two instances of couchDoc

        if not oldCouchDoc.has_key('filterEfficiency') or not oldCouchDoc.has_key('runWhiteList'):
            runWhiteList = []
            filterEfficiency = None
            try:
                print "Looking up %s in ReqMgr" % wf
                rmDoc = reqMgr.document(wf)
                runWhiteList = rmDoc.get('RunWhiteList', [])
                filterEfficiency = rmDoc.get('FilterEfficiency', None)
            except:
                pass # ReqMgr no longer has the workflow
            report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList})

        # Basic parameters of the workflow
        priority = requests[wf]['priority']
        requestType = requests[wf]['request_type']
        targetLumis = requests[wf].get('input_lumis', 0)
        targetEvents = requests[wf].get('input_events', 0)
        campaign = requests[wf]['campaign']
        prep_id = requests[wf].get('prep_id', None)
        outputdatasets = requests[wf].get('outputdatasets', [])