Beispiel #1
0
    def moveToArchived(self, config):
        """
        gather active data statistics
        """

        testbedWMStats = WMStatsReader(config.wmstats_url,
                                       reqdbURL=config.reqmgrdb_url)
        reqdbWriter = RequestDBWriter(config.reqmgrdb_url)

        statusTransition = {
            "aborted": ["aborted-completed", "aborted-archived"],
            "rejected": ["rejected-archived"]
        }

        for status, nextStatusList in statusTransition.items():

            requests = testbedWMStats.getRequestByStatus([status],
                                                         jobInfoFlag=True,
                                                         legacyFormat=True)

            self.logger.info("checking %s workflows: %d" %
                             (status, len(requests)))

            if len(requests) > 0:

                requestCollection = RequestInfoCollection(requests)

                requestsDict = requestCollection.getData()
                numOfArchived = 0

                for requestName, requestInfo in requestsDict.items():

                    if requestInfo.getJobSummary().getTotalJobs() == 0:
                        for nextStatus in nextStatusList:
                            reqdbWriter.updateRequestStatus(
                                requestName, nextStatus)
                        numOfArchived += 1

                self.logger.info("Total %s-archieved: %d" %
                                 (status, numOfArchived))

        return
Beispiel #2
0
    def testWMStatsWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()

        result = self.reqDBWriter.insertGenericRequest(schema[0])
        self.assertEquals(result[0]['ok'], True, 'insert fail')

        result = self.reqDBWriter.updateRequestStatus(schema[0]['RequestName'],
                                                      "failed")
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestStatus("not_exist_schema",
                                                      "assigned")
        self.assertEquals(result, 'Error: document not found')

        result = self.reqDBWriter.updateRequestProperty(
            schema[0]['RequestName'], {"Teams": ['teamA']})
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestProperty(
            "not_exist_schema", {"Teams": ['teamA']})
        self.assertEquals(result, 'Error: document not found')

        totalStats = {
            'TotalEstimatedJobs': 100,
            'TotalInputEvents': 1000,
            'TotalInputLumis': 1234,
            'TotalInputFiles': 5
        }
        result = self.reqDBWriter.updateRequestProperty(
            schema[0]['RequestName'], totalStats)
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestProperty(
            schema[0]['RequestName'], totalStats)
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestProperty(
            "not_exist_schema", totalStats)
        self.assertEquals(result, 'Error: document not found')

        spec1 = newWorkload(schema[0]['RequestName'])
        production = spec1.newTask("Production")
        production.setTaskType("Merge")
        production.setSiteWhitelist(['TEST_SITE'])
        properties = {
            "RequestPriority": spec1.priority(),
            'SiteWhitelist': spec1.getTopLevelTask()[0].siteWhitelist(),
            'OutputDatasets': spec1.listOutputDatasets()
        }
        result = self.reqDBWriter.updateRequestProperty(
            spec1.name(), properties)
        self.assertEquals(result, 'OK', 'update fail')

        spec2 = newWorkload("not_exist_schema")
        production = spec2.newTask("Production")
        production.setTaskType("Merge")
        properties = {
            "RequestPriority": spec2.priority(),
            'SiteWhitelist': spec2.getTopLevelTask()[0].siteWhitelist(),
            'OutputDatasets': spec2.listOutputDatasets()
        }
        result = self.reqDBWriter.updateRequestProperty(
            spec2.name(), properties)
        self.assertEquals(result, 'Error: document not found')

        requests = self.wmstatsReader.getRequestByStatus(["failed"],
                                                         jobInfoFlag=False,
                                                         legacyFormat=True)
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])

        requestCollection = RequestInfoCollection(requests)
        result = requestCollection.getJSONData()
        self.assertEquals(result.keys(), [schema[0]['RequestName']])

        requests = self.wmstatsReader.getActiveData()
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
        requests = self.wmstatsReader.getRequestByStatus(["failed"])
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])

        requests = self.wmstatsReader.getRequestSummaryWithJobInfo(
            schema[0]['RequestName'])
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
Beispiel #3
0
def gatherWMDataMiningStats(wmstatsUrl,
                            reqmgrUrl,
                            wmMiningUrl,
                            mcmUrl,
                            mcmCert,
                            mcmKey,
                            tmpDir,
                            archived=False,
                            log=logging.info):
    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl,
                            reqdbURL=reqmgrUrl,
                            reqdbCouchApp="ReqMgr")

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log.info("%s: Getting job information from %s and %s. Please wait." %
             (funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = [
            'normal-archived', 'rejected-archived', 'aborted-archived'
        ]
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates,
                                          jobInfoFlag=jobInfoFlag,
                                          legacyFormat=True)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log.info("%s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc:
                runWhiteList = []
                filterEfficiency = None
                try:
                    # log.debug("Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get('RunWhiteList', [])
                    filterEfficiency = rmDoc.get('FilterEfficiency', None)
                except:
                    pass  # ReqMgr no longer has the workflow
                report[wf].update({
                    'filterEfficiency': filterEfficiency,
                    'runWhiteList': runWhiteList
                })

            if oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or \
                oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown':

                prepID = oldCouchDoc.get('prepID', None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    log.info("Trying to update McM info for %s, PREPID %s" %
                             (wf, prepID))
                    # Get information from McM. Don't call too many times, can take a long time
                    nMCMCalls += 1
                    try:
                        mcmHistory = mcm.getHistory(prepID=prepID)
                        if 'mcmApprovalTime' not in oldCouchDoc:
                            report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                        found = False
                        for entry in mcmHistory:
                            if entry['action'] == 'set status' and entry[
                                    'step'] == 'announced':
                                dateString = entry['updater'][
                                    'submission_date']
                                dt = datetime.strptime(dateString,
                                                       '%Y-%m-%d-%H-%M')
                                report[wf].update({
                                    'mcmApprovalTime':
                                    time.mktime(dt.timetuple())
                                })
                                found = True
                        if not found:
                            log.error(
                                "History found but no approval time for %s" %
                                wf)
                    except McMNoDataError:
                        log.error("Setting NoMcMData for %s" % wf)
                        report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                    except (RuntimeError, IOError):
                        exc_type, dummy_exc_value, dummy_exc_traceback = sys.exc_info(
                        )
                        log.error(
                            "%s getting history from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting history from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

                    try:
                        mcmRequest = mcm.getRequest(prepID=prepID)
                        report[wf].update({
                            'mcmTotalEvents':
                            mcmRequest.get('total_events', 'NoMcMData')
                        })
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

            # Basic parameters of the workflow
            priority = requests[wf].get('priority', 0)
            requestType = requests[wf].get('request_type', 'Unknown')
            targetLumis = requests[wf].get('input_lumis', 0)
            targetEvents = requests[wf].get('input_events', 0)
            campaign = requests[wf].get('campaign', 'Unknown')
            prep_id = requests[wf].get('prep_id', None)
            outputdatasets = requests[wf].get('outputdatasets', [])
            statuses = requests[wf].get('request_status', [])

            if not statuses:
                log.error("Could not find any status from workflow: %s" %
                          wf)  # Should not happen but it does.

            # Remove a single  task_ from the start of PREP ID if it exists
            if prep_id and prep_id.startswith('task_'):
                prep_id.replace('task_', '', 1)

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf].get('inputdataset', "")
            if isinstance(inputdataset, list):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ''

            outputTier = 'Unknown'
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if isinstance(ds, list):
                        outputTiers.append(ds[0].split('/')[-1])
                    else:
                        outputTiers.append(ds.split('/')[-1])
            except:
                log.error(
                    "Could not decode outputdatasets: %s" % outputdatasets
                )  # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split('/')[-1]
                if inputTier in ['GEN']:
                    outputTier = 'LHE'
                elif inputTier in ['RAW', 'RECO']:
                    outputTier = 'AOD'
                elif inputTier in ['GEN-SIM']:
                    outputTier = 'AODSIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'

            else:
                if len(outputTiers) == 1 and 'GEN' in outputTiers:
                    if 'STEP0ATCERN' in wf:
                        outputTier = 'STEP0'
                    else:
                        outputTier = 'FullGen'
                elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'RECO' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'GEN-SIM' in outputTiers:
                    outputTier = 'GEN-SIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'
                elif 'RECO' in outputTiers:
                    outputTier = 'AOD'
                elif 'AOD' in outputTiers:
                    outputTier = 'AOD'
                else:
                    outputTier = 'GEN-SIM'

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[
                wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get('events', 0)
                lumis = dsr.get('totalLumis', 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis / targetLumis * 100)
                if targetEvents:
                    eventPercent = min(eventPercent,
                                       events / targetEvents * 100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs['sucess']
                totalJobs += jobs['created']
            try:
                if totalJobs and not report[wf].get('firstJobTime', None):
                    report[wf].update({'firstJobTime': int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[
                        wf].get('lastJobTime', None):
                    report[wf].update({'lastJobTime': int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None

            for status in statuses:
                finalStatus = status['status']
                if status['status'] == 'new':
                    newTime = status['update_time']
                if status['status'] == 'assignment-approved':
                    approvedTime = status['update_time']
                if status['status'] == 'assigned':
                    assignedTime = status['update_time']
                if status['status'] == 'completed':
                    completedTime = status['update_time']
                if status['status'] == 'acquired':
                    acquireTime = status['update_time']
                if status['status'] == 'closed-out':
                    closeoutTime = status['update_time']
                if status['status'] == 'announced':
                    announcedTime = status['update_time']
                if status['status'] == 'normal-archived':
                    archivedTime = status['update_time']

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get('approvedTime', None):
                report[wf].update({'approvedTime': approvedTime})
            if assignedTime and not report[wf].get('assignedTime', None):
                report[wf].update({'assignedTime': assignedTime})
            if acquireTime and not report[wf].get('acquireTime', None):
                report[wf].update({'acquireTime': acquireTime})
            if closeoutTime and not report[wf].get('closeoutTime', None):
                report[wf].update({'closeoutTime': closeoutTime})
            if announcedTime and not report[wf].get('announcedTime', None):
                report[wf].update({'announcedTime': announcedTime})
            if completedTime and not report[wf].get('completedTime', None):
                report[wf].update({'completedTime': completedTime})
            if newTime and not report[wf].get('newTime', None):
                report[wf].update({'newTime': newTime})
            if archivedTime and not report[wf].get('archivedTime', None):
                report[wf].update({'archivedTime': archivedTime})

            try:
                dt = requests[wf]['request_date']
                requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
                report[wf].update({'requestDate': requestDate})
            except:
                pass

            report[wf].update({
                'priority': priority,
                'status': finalStatus,
                'type': requestType
            })
            report[wf].update({
                'totalLumis': targetLumis,
                'totalEvents': targetEvents,
            })
            report[wf].update({
                'campaign': campaign,
                'prepID': prep_id,
                'outputTier': outputTier,
            })
            report[wf].update({
                'outputDatasets': outputdatasets,
                'inputDataset': inputdataset,
            })

            report[wf].setdefault('lumiPercents', {})
            report[wf].setdefault('eventPercents', {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [
                    1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100
            ]:
                percent = str(percentage)
                percentReported = report[wf]['lumiPercents'].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]['lumiPercents'][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]['eventPercents'].get(
                    percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]['eventPercents'][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({
                'eventProgress': eventProgress,
                'lumiProgress': lumiProgress,
            })

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    # log.debug("Workflow updated: %s" % wf)
                    pass
                else:
                    # log.debug("Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc['updateTime'] = int(time.time())
                    report[wf]['updateTime'] = int(time.time())
                    dummy = json.dumps(
                        newCouchDoc
                    )  # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log.error("Failed to queue document:%s \n" %
                              pprint.pprint(newCouchDoc))

    log.info("%s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Beispiel #4
0
    def testWMStatsWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        self.assertEquals(self.wmstatsWriter.insertRequest(schema[0]), 'OK',
                          'insert fail')
        self.assertEquals(
            self.wmstatsWriter.updateRequestStatus(schema[0]['RequestName'],
                                                   "failed"), 'OK',
            'update fail')
        self.assertEquals(
            self.wmstatsWriter.updateRequestStatus("not_exist_schema",
                                                   "assigned"),
            'ERROR: request not found - not_exist_schema')
        self.assertEquals(
            self.wmstatsWriter.updateTeam(schema[0]['RequestName'], 'teamA'),
            'OK', 'update fail')
        self.assertEquals(
            self.wmstatsWriter.updateTeam("not_exist_schema", 'teamA'),
            'ERROR: request not found - not_exist_schema')
        totalStats = {
            'total_jobs': 100,
            'input_events': 1000,
            'input_lumis': 1234,
            'input_num_files': 5
        }
        self.assertEquals(
            self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'],
                                                totalStats), 'INSERTED',
            'update fail')
        self.assertEquals(
            self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'],
                                                totalStats), 'UPDATED',
            'update fail')
        self.assertEquals(
            self.wmstatsWriter.insertTotalStats("not_exist_schema",
                                                totalStats),
            'ERROR: request not found - not_exist_schema')
        spec1 = newWorkload(schema[0]['RequestName'])
        production = spec1.newTask("Production")
        production.setTaskType("Merge")
        production.setSiteWhitelist(['TEST_SITE'])
        self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec1), 'OK',
                          'update fail')
        spec2 = newWorkload("not_exist_schema")
        production = spec2.newTask("Production")
        production.setTaskType("Merge")
        self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec2),
                          'ERROR: request not found - not_exist_schema')

        requests = self.wmstatsReader.getRequestByStatus(["failed"],
                                                         jobInfoFlag=False)
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])

        requestCollection = RequestInfoCollection(requests)
        result = requestCollection.getJSONData()
        self.assertEquals(result.keys(), [schema[0]['RequestName']])

        requests = self.wmstatsReader.getActiveData()
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
        requests = self.wmstatsReader.workflowsByStatus(["failed"])
        self.assertEquals(requests, [schema[0]['RequestName']])
Beispiel #5
0
    couchdb = analyticsServer.connectDatabase(options.database)

    url = "https://cmsweb.cern.ch/couchdb/wmstats"
    WMStats = WMStatsReader(url)
    reqMgr = CouchServer('https://cmsweb.cern.ch/couchdb/').connectDatabase('reqmgr_workload_cache', False)
    print "Getting job information from %s. Please wait." % url

    if options.archived:
        checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived']
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    print "Total %s requests retrieved\n" % len(result)

    report = {}
    for wf in result.keys():

        # Store a copy of the CouchDB document so we can compare later before updating
        if couchdb.documentExists(wf):
            oldCouchDoc = couchdb.document(wf)
            wfExists = True
        else:
            oldCouchDoc = CouchDoc(id=wf)
            wfExists = False