Beispiel #1
0
 def __init__(self, app, api, config, mount):
     RESTEntity.__init__(self, app, api, config, mount)
     # CouchDB auxiliary database name
     self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
     self.reqmgr_aux_db_service = RequestDBReader(self.reqmgr_aux_db,
                                                  couchapp="ReqMgrAux")
     self.setName()
Beispiel #2
0
 def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"):
     self._sanitizeURL(couchURL)
     # set the connection for local couchDB call
     self._commonInit(couchURL, appName)
     if reqdbURL:
         self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp)
     else:
         self.reqDB = None
Beispiel #3
0
 def __init__(self, couchURL, reqdbURL=None, reqdbCouchApp="ReqMgr"):
     couchURL = sanitizeURL(couchURL)['url']
     # set the connection for local couchDB call
     self._commonInit(couchURL)
     if reqdbURL:
         self.reqDB = RequestDBReader(reqdbURL)
     else:
         self.reqDB = None
    def __init__(self, rest, config):

        super(CouchDBCleanup, self).__init__(config)
        self.reqDB = RequestDBReader(config.reqmgrdb_url)
        self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger)
        # statuses that we want to keep the transfer documents
        self.transferStatuses = [
            "assigned", "staging", "staged", "acquired", "failed",
            "running-open", "running-closed"
        ]

        baseURL, acdcDB = splitCouchServiceURL(config.acdc_url)
        self.acdcService = CouchService(url=baseURL, database=acdcDB)
def getRequestInformationAndWorkload(requestName, reqmgrUrl, centralRequestDBURL):
    """
    _getRequestInformationAndWorkload_

    Retrieve the request information for assignment
    and the full pickled workload.
    """
    wfDBReader = RequestDBReader(centralRequestDBURL, couchapp="ReqMgr")
    result = wfDBReader.getRequestByNames(requestName, True)
    workloadDB = Database(result[requestName]['CouchWorkloadDBName'], result[requestName]['CouchURL'])
    workloadPickle = workloadDB.getAttachment(requestName, 'spec')
    spec = pickle.loads(workloadPickle)
    workload = WMWorkloadHelper(spec)
    return workload, result[requestName]
def getRequestInformationAndWorkload(requestName, reqmgrUrl, centralRequestDBURL):
    """
    _getRequestInformationAndWorkload_

    Retrieve the request information for assignment
    and the full pickled workload.
    """
    wfDBReader = RequestDBReader(centralRequestDBURL, couchapp = "ReqMgr")
    result = wfDBReader.getRequestByNames(requestName,True)
    workloadDB = Database(result[requestName]['CouchWorkloadDBName'], result[requestName]['CouchURL'])
    workloadPickle = workloadDB.getAttachment(requestName, 'spec')
    spec = pickle.loads(workloadPickle)
    workload = WMWorkloadHelper(spec)
    return workload, result[requestName]
Beispiel #7
0
 def gatherActiveDataStats(self, config):
     """
     gather active data statistics
     """
     try:
         if DataCache.islatestJobDataExpired():
             reqDB = RequestDBReader(config.requestDBURL)
             wmstatsDB = WMStatsReader(config.wmstatsURL)
             
             requestNames = reqDB.getRequestByStatus(ACTIVE_STATUS)
             jobData = wmstatsDB.getLatestJobInfoByRequests(requestNames)
             DataCache.setlatestJobData(jobData)
         
     except Exception, ex:
         cherrypy.log.error(str(ex))
Beispiel #8
0
    def __init__(self, rest, config):

        super(BuildParentLock, self).__init__(config)
        self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger)
        self.dbs = DBS3Reader(config.dbs_url)
        # cache of dbs lookups mapping input dataset to parent dataset
        self.dbsLookupCache = {}
        # set of of currently active datasets requiring parent dataset
        self.inputDatasetCache = set()
        self.reqDB = RequestDBReader(config.reqmgrdb_url)
        self.filterKeys = [
            'assignment-approved', 'assigned', 'staging', 'staged', 'failed',
            'acquired', 'running-open', 'running-closed', 'force-complete',
            'completed', 'closed-out'
        ]
Beispiel #9
0
    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours = getattr(self.config.TaskArchiver,
                                         'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(
            self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent")

        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(
            self.config.AnalyticsDataCollector.centralRequestDBURL,
            couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)

        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            if self.config.TaskArchiver.reqmgr2Only:
                self.reqmgr2Svc = ReqMgr(
                    self.config.TaskArchiver.ReqMgr2ServiceURL)
            else:
                #TODO: remove this for reqmgr2
                self.reqmgrSvc = RequestManager(
                    {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)

        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" %
                                                            jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" %
                                                            jobDBName)

        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(
            statSummaryDBName)
Beispiel #10
0
    def gatherActiveDataStats(self, config):
        """
        gather active data statistics
        """
        try:
            if DataCache.islatestJobDataExpired():
                reqDB = RequestDBReader(config.requestDBURL)
                wmstatsDB = WMStatsReader(config.wmstatsURL)

                requestNames = reqDB.getRequestByStatus(ACTIVE_STATUS)
                jobData = wmstatsDB.getLatestJobInfoByRequests(requestNames)
                DataCache.setlatestJobData(jobData)

        except Exception as ex:
            self.logger.error(str(ex))
        return
Beispiel #11
0
 def setup(self, parameters):
     """
     Called at startup
     """
     # set the connection for local couchDB call
     self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
     self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL)
     
     #TODO: we might need to use local db for Tier0
     self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
     
     if self.useReqMgrForCompletionCheck:
         self.deletableStates = ["announced"]
         self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
         #TODO: remove this for reqmgr2
         self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
     else:
         # Tier0 case
         self.deletableStates = ["completed"]
         # use local for update
         self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
     
     jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
     jobDBName = self.config.JobStateMachine.couchDBName
     self.jobCouchdb  = CouchServer(jobDBurl)
     self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
     self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
     
     statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
     self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)
Beispiel #12
0
def fetchWorkflowsSpec(config, listOfWfs):
    """
    Fetch the workload of a list of workflows. Filter out only a few
    usefull keys
    """
    if isinstance(listOfWfs, basestring):
        listOfWfs = [listOfWfs]

    wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL,
                                 couchapp=config.AnalyticsDataCollector.RequestCouchApp)
    tempWfs = wfDBReader.getRequestByNames(listOfWfs, True)

    wfShortDict = {}
    for wf in listOfWfs:
        wfShortDict[wf] = filterKeys(tempWfs[wf])

    return wfShortDict
Beispiel #13
0
 def __init__(self, couchURL, reqdbURL = None, reqdbCouchApp = "ReqMgr"):
     couchURL = sanitizeURL(couchURL)['url']
     # set the connection for local couchDB call
     self._commonInit(couchURL)
     if reqdbURL:
         self.reqDB = RequestDBReader(reqdbURL)
     else:
         self.reqDB = None
Beispiel #14
0
def fetchWorkflowsSpec(config, listOfWfs):
    """
    Fetch the workload of a list of workflows. Filter out only a few
    usefull keys
    """
    if isinstance(listOfWfs, basestring):
        listOfWfs = [listOfWfs]

    wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL,
                                 couchapp=config.AnalyticsDataCollector.RequestCouchApp)
    tempWfs = wfDBReader.getRequestByNames(listOfWfs, True)

    wfShortDict = {}
    for wf in listOfWfs:
        wfShortDict[wf] = filterKeys(tempWfs[wf])

    return wfShortDict
Beispiel #15
0
 def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"):
     self._sanitizeURL(couchURL)
     # set the connection for local couchDB call
     self._commonInit(couchURL, appName)
     if reqdbURL:
         self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp)
     else:
         self.reqDB = None
Beispiel #16
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["ReqMgr"]
     self.testInit = TestInitCouchApp('RequestDBServiceTest')
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = 'requsetdb_t'
     self.testInit.setupCouch(dbName, *self.couchApps)
     reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     self.requestWriter = RequestDBWriter(reqDBURL)
     self.requestReader = RequestDBReader(reqDBURL)
     self.requestWriter.defaultStale = {}
     self.requestReader.defaultStale = {}
     return
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    args = sys.argv[1:]
    if not len(args) == 1:
        print "usage: python syncPrioReqMgrxGQ.py <text_file_with_the_workflow_names>"
        sys.exit(0)
    inputFile = args[0]
    with open(inputFile) as f:
        listWorkflows = [x.rstrip('\n') for x in f.readlines()]

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    wfDBReader = RequestDBReader(
        config.AnalyticsDataCollector.centralRequestDBURL,
        couchapp=config.AnalyticsDataCollector.RequestCouchApp)

    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)

    workflowsDict = wfDBReader.getRequestByNames(listWorkflows)

    for wf, details in workflowsDict.iteritems():
        print "wf: %s and prio: %s" % (wf, details['RequestPriority'])
        wqDocs = wqBackend.getElements(WorkflowName=wf)
        docIds = [
            elem._id for elem in wqDocs if elem['Status'] == 'Available'
            and elem['Priority'] != details['RequestPriority']
        ]
        if docIds:
            print "Changing the priority of the following available docs: %s" % docIds
            wqBackend.updateElements(*docIds,
                                     Priority=details['RequestPriority'])
        else:
            print " there is nothing to update for this workflow."
Beispiel #18
0
    def acdcCleanup(self, config):
        """
        gather active data statistics
        """

        reqDB = RequestDBReader(config.reqmgrdb_url)

        from WMCore.ACDC.CouchService import CouchService
        baseURL, acdcDB = splitCouchServiceURL(config.acdc_url)
        acdcService = CouchService(url=baseURL, database=acdcDB)
        originalRequests = acdcService.listCollectionNames()

        if len(originalRequests) == 0:
            return
        # filter requests
        results = reqDB._getCouchView("byrequest", {}, originalRequests)
        # checkt he status of the requests [announced, rejected-archived, aborted-archived, normal-archived]
        deleteStates = [
            "announced", "rejected-archived", "aborted-archived",
            "normal-archived"
        ]
        filteredRequests = []
        for row in results["rows"]:
            if row["value"][0] in deleteStates:
                filteredRequests.append(row["key"])

        total = 0
        for req in filteredRequests:
            try:
                deleted = acdcService.removeFilesetsByCollectionName(req)
                if deleted == None:
                    self.logger.warning("request alread deleted %s", req)
                else:
                    total += len(deleted)
                    self.logger.info("request %s deleted", req)
            except Exception as ex:
                self.logger.error(
                    "request deleted failed: will try again %s: %s", req,
                    str(ex))
        self.logger.info("total %s requests deleted", total)
        return
    def acdcCleanup(self, config):
        """
        gather active data statistics
        """
        
        reqDB = RequestDBReader(config.reqmgrdb_url)

        from WMCore.ACDC.CouchService import CouchService
        baseURL, acdcDB = splitCouchServiceURL(config.acdc_url)
        acdcService = CouchService(url = baseURL, database = acdcDB)
        originalRequests = acdcService.listCollectionNames()
        
        if len(originalRequests) == 0:
            return 
        # filter requests
        results = reqDB._getCouchView("byrequest", {}, originalRequests)
        # checkt he status of the requests [announced, rejected-archived, aborted-archived, normal-archived]
        deleteStates = ["announced", "rejected-archived", "aborted-archived", "normal-archived"]
        filteredRequests = []
        for row in results["rows"]:
            if row["value"][0] in deleteStates:
                filteredRequests.append(row["key"])
                
        total = 0
        for req in filteredRequests:
            try:
                deleted = acdcService.removeFilesetsByCollectionName(req)
                if deleted == None:
                    self.logger.warning("request alread deleted %s" % req)
                else:
                    total += len(deleted)
                    self.logger.info("request %s deleted" % req)
            except:
                self.logger.error("request deleted failed: will try again %s" % req)
        self.logger.info("total %s requests deleted" % total)        
        return
Beispiel #20
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["ReqMgr"]
     self.testInit = TestInitCouchApp("RequestDBServiceTest")
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = "requsetdb_t"
     self.testInit.setupCouch(dbName, *self.couchApps)
     reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     self.requestWriter = RequestDBWriter(reqDBURL)
     self.requestReader = RequestDBReader(reqDBURL)
     self.requestWriter.defaultStale = {}
     self.requestReader.defaultStale = {}
     return
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    wfName = sys.argv[1] if len(sys.argv) == 2 else []

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Instantiating central services (couch stuff)
#    print "Central Couch URL  : %s" % config.WorkloadSummary.couchurl
#    print "Central ReqMgr URL  : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL

    wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL, 
                                 couchapp = config.AnalyticsDataCollector.RequestCouchApp)

    # Central services
    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl)

    # Local services
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name = "workqueue_inbox")
    localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl)

    statusList = ["failed", "epic-FAILED", "completed", "closed-out",
                  "announced", "aborted", "aborted-completed", "rejected",
                  "normal-archived", "aborted-archived", "rejected-archived"]

    for stat in final_status:
        # retrieve list of workflows in each status
        if not wfName:
#            options = {'include_docs': False}
            date_range = {'startkey': [2015,5,15,0,0,0], 'endkey': [2015,5,26,0,0,0]}
#            finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range)
            tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range)
            #print "Found %d wfs in status: %s" %(len(finalWfs), stat)
            finalWfs = []
            for wf, content in tempWfs.iteritems():
                if content['RequestStatus'] in statusList:
                  finalWfs.append(wf)
            print "Found %d wfs in not in active state" % len(finalWfs)
        else:
            finalWfs = [wfName]
            tempWfs = wfDBReader.getRequestByNames(wfName, True)
            print "Checking %s with status '%s'." % (wfName, tempWfs[wfName]['RequestStatus'])

        wqDocs, wqInboxDocs = [], []
        localWQDocs, localWQInboxDocs = [], []
        for counter, wf in enumerate(finalWfs):
            if counter % 100 == 0:
                print "%d wfs queried ..." % counter
            # check whether there are workqueue docs
            wqDocIDs = wqBackend.getElements(WorkflowName = wf)
            if wqDocIDs:
                print "Found %d workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                wqDocs.append(wqDocIDs)

            # check whether there are workqueue_inbox docs
            if wqInboxDB.documentExists(wf):
                print "Found workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus'])
                # then retrieve the document
                wqInboxDoc = wqInboxDB.document(wf)
                wqInboxDocs.append(wqInboxDoc)

            # check local queue
            wqDocIDs = localWQBackend.getElements(WorkflowName = wf)
            if wqDocIDs:
                print "Found %d local workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                localWQDocs.append(wqDocIDs)
            if localWQInboxDB.documentExists(wf):
                print "Found local workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus'])
                wqInboxDoc = localWQInboxDB.document(wf)
                print wqInboxDoc
                localWQInboxDocs.append(wqInboxDoc)

    # TODO TODO TODO for the moment only deletes for a specific workflow
    if wfName:
        var = raw_input("\nCan we delete all these documents (Y/N)? ")
        if var == "Y":
            # deletes workqueue_inbox doc
            if wqInboxDoc:
                print "Deleting workqueue_inbox id %s and %s" % (wqInboxDoc['_id'], wqInboxDoc['_rev'])
                wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev'])

            # deletes workqueue docs
            if wqDocIDs:
                print "Deleting workqueue docs %s" % wqDocIDs
                wqBackend.deleteElements(*[x for x in wqDocIDs if x['RequestName'] in wfName])
        else:
            print "You are the boss, aborting it ...\n"
Beispiel #22
0
class WMStatsReader(object):
    # TODO need to get this from reqmgr api
    ACTIVE_STATUS = ["new",
                     "assignment-approved",
                     "assigned",
                     "acquired",
                     "running",
                     "running-open",
                     "running-closed",
                     "failed",
                     "force-complete",
                     "completed",
                     "closed-out",
                     "announced",
                     "aborted",
                     "aborted-completed",
                     "rejected"]

    T0_ACTIVE_STATUS = ["new",
                        "Closed",
                        "Merge",
                        "Harvesting",
                        "Processing Done",
                        "AlcaSkim",
                        "completed"]

    def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"):
        self._sanitizeURL(couchURL)
        # set the connection for local couchDB call
        self._commonInit(couchURL, appName)
        if reqdbURL:
            self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp)
        else:
            self.reqDB = None

    def _sanitizeURL(self, couchURL):
        return sanitizeURL(couchURL)['url']

    def _commonInit(self, couchURL, appName="WMStats"):
        """
        setting up comon variables for inherited class.
        inherited class should call this in their init function
        """

        self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = self.couchServer.connectDatabase(self.dbName, False)
        self.couchapp = appName
        self.defaultStale = {"stale": "update_after"}

    def setDefaultStaleOptions(self, options):
        if not options:
            options = {}
        if 'stale' not in options:
            options.update(self.defaultStale)
        return options

    def getLatestJobInfoByRequests(self, requestNames):
        jobInfoByRequestAndAgent = {}

        if len(requestNames) > 0:
            requestAndAgentKey = self._getRequestAndAgent(requestNames)
            jobInfoByRequestAndAgent = self._getLatestJobInfo(requestAndAgentKey)
        return jobInfoByRequestAndAgent

    def _updateRequestInfoWithJobInfo(self, requestInfo):
        if len(requestInfo.keys()) != 0:
            jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests(requestInfo.keys())
            self._combineRequestAndJobData(requestInfo, jobInfoByRequestAndAgent)

    def _getCouchView(self, view, options, keys=None):
        keys = keys or []
        options = self.setDefaultStaleOptions(options)

        if keys and isinstance(keys, str):
            keys = [keys]
        return self.couchDB.loadView(self.couchapp, view, options, keys)

    def _formatCouchData(self, data, key="id"):
        result = {}
        for row in data['rows']:
            if 'error' in row:
                continue
            if "doc" in row:
                result[row[key]] = row["doc"]
            else:
                result[row[key]] = None
        return result

    def _combineRequestAndJobData(self, requestData, jobData):
        """
        update the request data with job info
        requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847",
        "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003",
        "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}},
        "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731",
        "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}},
        "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}},
        "T2_DE_DESY":{"failure":{"exception":3},"success":375}},
        "agent":"WMAgentCommissioning",
        "tasks":
           {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production":
            {"status":{"failure":{"exception":3},"success":331},
             "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908,
                                     "dataset":{},"failure":{"exception":3},
                                     "cmsRunCPUPerformance":{"totalJobCPU":10869688.8,
                                                             "totalEventCPU":10832426.7,
                                                             "totalJobTime":11255865.9},
                                     "inputEvents":0},
                      "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573,
                                    "dataset":{},
                                    "cmsRunCPUPerformance":{"totalJobCPU":167324.8,
                                                            "totalEventCPU":166652.1,
                                                            "totalJobTime":174975.7},
                                    "inputEvents":0}},
             "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0},
             "jobtype":"Production"},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect":
             {"jobtype":"LogCollect",
              "subscription_status":{"updated":1392885768,
              "finished":0,
              "total":1,"open":1}},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput":
              {"status":{"success":41,"submitted":{"retry":1,"pending":1}},
                "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915},
                                       "success":41,"wrappedTotalJobTime":9190,
                                       "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM":
                                                   {"totalLumis":973,"events":97300,"size":105698406915}},
                                       "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125},
                                    "inputEvents":97300},
                         "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}},
                "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1},
                "jobtype":"Merge"},
           "agent_url":"vocms231.cern.ch:9999",
           "type":"agent_request"}}
        """
        if jobData:
            for row in jobData["rows"]:
                # condition checks if documents are deleted between calls.
                # just ignore in that case
                if row["doc"]:
                    jobInfo = requestData[row["doc"]["workflow"]]
                    jobInfo.setdefault("AgentJobInfo", {})
                    jobInfo["AgentJobInfo"][row["doc"]["agent_url"]] = row["doc"]

    def _getRequestAndAgent(self, filterRequest=None):
        """
        returns the [['request_name', 'agent_url'], ....]
        """
        options = {}
        options["reduce"] = True
        options["group"] = True
        result = self._getCouchView("requestAgentUrl", options)

        if filterRequest is None:
            keys = [row['key'] for row in result["rows"]]
        else:
            keys = [row['key'] for row in result["rows"] if row['key'][0] in filterRequest]
        return keys

    def _getLatestJobInfo(self, keys):
        """
        keys is [['request_name', 'agent_url'], ....]
        returns ids
        """
        if len(keys) == 0:
            return []
        options = {"include_docs": True}
        options["reduce"] = False
        result = self._getCouchView("latestRequest", options, keys)
        return result

    def _getAllDocsByIDs(self, ids, include_docs=True):
        """
        keys is [id, ....]
        returns document
        """
        if len(ids) == 0:
            return None
        options = {}
        options["include_docs"] = include_docs
        result = self.couchDB.allDocs(options, ids)

        return result

    def _getAgentInfo(self):
        """
        returns all the agents status on wmstats
        """
        options = {}
        result = self._getCouchView("agentInfo", options)

        return result

    def agentsByTeam(self, filterDrain=False):
        """
        return a dictionary like {team:#agents,...}
        """
        result = self._getAgentInfo()
        response = dict()

        for agentInfo in result["rows"]:
            #filtering empty string
            team = agentInfo['value']['agent_team']
            if not team:
                continue

            response.setdefault(team, 0)
            if filterDrain:
                if not agentInfo['value'].get('drain_mode', False):
                    response[team] += 1
            else:
                response[team] += 1

        return response

    def getServerInstance(self):
        return self.couchServer

    def getDBInstance(self):
        return self.couchDB

    def getRequestDBInstance(self):
        return self.reqDB

    def getHeartbeat(self):
        try:
            return self.couchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getRequestByNames(self, requestNames, jobInfoFlag=False):
        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this.
        """
        requestInfo = self.reqDB.getRequestByNames(requestNames, True)

        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getActiveData(self, jobInfoFlag=False):

        return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS, jobInfoFlag)

    def getT0ActiveData(self, jobInfoFlag=False):

        return self.getRequestByStatus(WMStatsReader.T0_ACTIVE_STATUS, jobInfoFlag)

    def getRequestByStatus(self, statusList, jobInfoFlag=False, limit=None, skip=None,
                           legacyFormat=False):

        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this.
        If legacyFormat is True convert data to old wmstats format from current reqmgr format.
        Shouldn't be set to True unless existing code breaks
        """

        requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit, skip)

        if legacyFormat:
            # convert the format to wmstas old format
            for requestName, doc in requestInfo.items():
                requestInfo[requestName] = convertToLegacyFormat(doc)

        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getRequestSummaryWithJobInfo(self, requestName):
        """
        get request info with job status
        """
        requestInfo = self.reqDB.getRequestByNames(requestName)
        self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getArchivedRequests(self):
        """
        get list of archived workflow in wmstats db.
        """

        options = {"group_level": 1, "reduce": True}

        results = self.couchDB.loadView(self.couchapp, "allWorkflows", options=options)['rows']
        requestNames = [x['key'] for x in results]

        workflowDict = self.reqDB.getStatusAndTypeByRequest(requestNames)
        archivedRequests = []
        for request, value in workflowDict.items():
            if value[0].endswith("-archived"):
                archivedRequests.append(request)

        return archivedRequests

    def isWorkflowCompletedWithLogCollectAndCleanUp(self, requestName):
        """
        check whether workflow  is completed including LogCollect and CleanUp tasks
        TODO: If the parent task all failed and next task are not created at all,
            It can't detect complete status.
            If the one of the task doesn't contain any jobs, it will return False
        """

        requestInfo = self.getRequestSummaryWithJobInfo(requestName)
        reqInfoInstance = RequestInfo(requestInfo[requestName])
        return reqInfoInstance.isWorkflowFinished()

    def getTaskJobSummaryByRequest(self, requestName, sampleSize=1):

        options = {'reduce': True, 'group_level': 5, 'startkey': [requestName],
                   'endkey': [requestName, {}]}
        results = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options)
        jobDetails = {}
        for row in results['rows']:
            # row["key"] = ['workflow', 'task', 'jobstatus', 'exitCode', 'site']
            startKey = row["key"][:4]
            endKey = []
            site = row["key"][4]
            if site:
                startKey.append(site)

            endKey.extend(startKey)
            endKey.append({})
            numOfError = row["value"]

            jobInfo = self.jobDetailByTasks(startKey, endKey, numOfError, sampleSize)
            jobDetails = nestedDictUpdate(jobDetails, jobInfo)
        return jobDetails

    def jobDetailByTasks(self, startKey, endKey, numOfError, limit=1):
        options = {'include_docs': True, 'reduce': False,
                   'startkey': startKey, 'endkey': endKey,
                   'limit': limit}
        result = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options)
        jobInfoDoc = {}
        for row in result['rows']:
            keys = row['key']
            workflow = keys[0]
            task = keys[1]
            jobStatus = keys[2]
            exitCode = keys[3]
            site = keys[4]

            jobInfoDoc.setdefault(workflow, {})
            jobInfoDoc[workflow].setdefault(task, {})
            jobInfoDoc[workflow][task].setdefault(jobStatus, {})
            jobInfoDoc[workflow][task][jobStatus].setdefault(exitCode, {})
            jobInfoDoc[workflow][task][jobStatus][exitCode].setdefault(site, {})
            finalStruct = jobInfoDoc[workflow][task][jobStatus][exitCode][site]
            finalStruct["errorCount"] = numOfError
            finalStruct.setdefault("samples", [])
            finalStruct["samples"].append(row["doc"])

        return jobInfoDoc

    def getAllAgentRequestRevByID(self, agentURL):
        options = {"reduce": False}
        results = self.couchDB.loadView(self.couchapp, "byAgentURL", options=options, keys=[agentURL])
        idRevMap = {}
        for row in results['rows']:
            idRevMap[row['id']] = row['value']['rev']

        return idRevMap
Beispiel #23
0
class CouchDBCleanup(CherryPyPeriodicTask):
    def __init__(self, rest, config):

        super(CouchDBCleanup, self).__init__(config)
        self.reqDB = RequestDBReader(config.reqmgrdb_url)
        self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger)
        # statuses that we want to keep the transfer documents
        self.transferStatuses = [
            "assigned", "staging", "staged", "acquired", "failed",
            "running-open", "running-closed"
        ]

        baseURL, acdcDB = splitCouchServiceURL(config.acdc_url)
        self.acdcService = CouchService(url=baseURL, database=acdcDB)

    def setConcurrentTasks(self, config):
        """
        sets the list of functions which
        """
        self.concurrentTasks = [{
            'func': self.acdcCleanup,
            'duration': config.acdcCleanDuration
        }, {
            'func': self.auxCouchCleanup,
            'duration': config.auxCleanDuration
        }]

    def auxCouchCleanup(self, config):
        """
        Cleanup TRANSFER documents from the reqmgr_auxiliary CouchDB.
        The list of status can be expanded in the future
        """
        self.logger.info("Fetching TRANSFER documents from CouchDB...")

        transferDocs = self.reqmgrAux.getTransferInfo("ALL_DOCS")
        if not transferDocs:
            self.logger.info(
                "  there are no transfer documents in the database.")
            return
        auxDocs = []
        for row in transferDocs:
            auxDocs.append(row['workflowName'])

        results = self.reqDB._getCouchView("bystatus", {},
                                           self.transferStatuses)
        activeRequests = []
        for row in results["rows"]:
            activeRequests.append(row["id"])

        # now find transfer docs that are not active in the system
        transferDocs = []
        for transferDoc in auxDocs:
            if transferDoc not in activeRequests:
                transferDocs.append(transferDoc)
        self.logger.info("Found %d transfer documents to delete",
                         len(transferDocs))

        for wflowName in transferDocs:
            self.logger.info("Deleting transfer document: %s", wflowName)
            try:
                self.reqmgrAux.deleteConfigDoc("transferinfo", wflowName)
            except Exception as exc:
                self.logger.warning(
                    "Failed to delete transfer doc: %s. Error: %s", wflowName,
                    str(exc))
        self.logger.info("Transfer documents cleanup completed.")

    def acdcCleanup(self, config):
        """
        gather active data statistics
        """
        self.logger.info("Fetching ACDC collection names...")
        originalRequests = self.acdcService.listCollectionNames()
        if not originalRequests:
            self.logger.info("  there are no collection documents to delete.")
            return

        # filter requests
        results = self.reqDB._getCouchView("byrequest", {}, originalRequests)
        # filter requests only in the following status
        deleteStates = [
            "announced", "rejected-archived", "aborted-archived",
            "normal-archived"
        ]
        filteredRequests = []
        for row in results["rows"]:
            if row["value"][0] in deleteStates:
                filteredRequests.append(row["key"])

        total = 0
        for req in filteredRequests:
            try:
                self.logger.info("Removing ACDC collection for: %s", req)
                deleted = self.acdcService.removeFilesetsByCollectionName(req)
                if deleted is None:
                    self.logger.warning("  request '%s' already deleted", req)
                else:
                    total += len(deleted)
                    self.logger.info("request %s deleted", req)
            except Exception as ex:
                self.logger.error(
                    "Failed to delete request: %s, will try again later. Error: %s",
                    req, str(ex))
        self.logger.info("total %s requests deleted", total)
        return
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    wfName = sys.argv[1] if len(sys.argv) == 2 else []

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Instantiating central services (couch stuff)
    #    print "Central Couch URL  : %s" % config.WorkloadSummary.couchurl
    #    print "Central ReqMgr URL  : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL

    wfDBReader = RequestDBReader(
        config.AnalyticsDataCollector.centralRequestDBURL,
        couchapp=config.AnalyticsDataCollector.RequestCouchApp)

    # Central services
    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl)

    # Local services
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                      db_name="workqueue_inbox")
    localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl)

    statusList = [
        "failed", "epic-FAILED", "completed", "closed-out", "announced",
        "aborted", "aborted-completed", "rejected", "normal-archived",
        "aborted-archived", "rejected-archived"
    ]

    for stat in final_status:
        # retrieve list of workflows in each status
        if not wfName:
            #            options = {'include_docs': False}
            date_range = {
                'startkey': [2015, 5, 15, 0, 0, 0],
                'endkey': [2015, 5, 26, 0, 0, 0]
            }
            #            finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range)
            tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range)
            #print "Found %d wfs in status: %s" %(len(finalWfs), stat)
            finalWfs = []
            for wf, content in tempWfs.iteritems():
                if content['RequestStatus'] in statusList:
                    finalWfs.append(wf)
            print "Found %d wfs in not in active state" % len(finalWfs)
        else:
            finalWfs = [wfName]
            tempWfs = wfDBReader.getRequestByNames(wfName, True)
            print "Checking %s with status '%s'." % (
                wfName, tempWfs[wfName]['RequestStatus'])

        wqDocs, wqInboxDocs = [], []
        localWQDocs, localWQInboxDocs = [], []
        for counter, wf in enumerate(finalWfs):
            if counter % 100 == 0:
                print "%d wfs queried ..." % counter
            # check whether there are workqueue docs
            wqDocIDs = wqBackend.getElements(WorkflowName=wf)
            if wqDocIDs:
                print "Found %d workqueue docs for %s, status %s" % (
                    len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                wqDocs.append(wqDocIDs)

            # check whether there are workqueue_inbox docs
            if wqInboxDB.documentExists(wf):
                print "Found workqueue_inbox doc for %s, status %s" % (
                    wf, tempWfs[wf]['RequestStatus'])
                # then retrieve the document
                wqInboxDoc = wqInboxDB.document(wf)
                wqInboxDocs.append(wqInboxDoc)

            # check local queue
            wqDocIDs = localWQBackend.getElements(WorkflowName=wf)
            if wqDocIDs:
                print "Found %d local workqueue docs for %s, status %s" % (
                    len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                localWQDocs.append(wqDocIDs)
            if localWQInboxDB.documentExists(wf):
                print "Found local workqueue_inbox doc for %s, status %s" % (
                    wf, tempWfs[wf]['RequestStatus'])
                wqInboxDoc = localWQInboxDB.document(wf)
                print wqInboxDoc
                localWQInboxDocs.append(wqInboxDoc)

    # TODO TODO TODO for the moment only deletes for a specific workflow
    if wfName:
        var = raw_input("\nCan we delete all these documents (Y/N)? ")
        if var == "Y":
            # deletes workqueue_inbox doc
            if wqInboxDoc:
                print "Deleting workqueue_inbox id %s and %s" % (
                    wqInboxDoc['_id'], wqInboxDoc['_rev'])
                wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev'])

            # deletes workqueue docs
            if wqDocIDs:
                print "Deleting workqueue docs %s" % wqDocIDs
                wqBackend.deleteElements(
                    *[x for x in wqDocIDs if x['RequestName'] in wfName])
        else:
            print "You are the boss, aborting it ...\n"
Beispiel #25
0
class RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["ReqMgr"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = 'requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.requestWriter = RequestDBWriter(reqDBURL)
        self.requestReader = RequestDBReader(reqDBURL)
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema(3)
        result = self.requestWriter.insertGenericRequest(schema[0])

        self.assertEqual(len(result), 1, 'insert fail')

        self.assertEqual(
            self.requestWriter.updateRequestStatus(schema[0]['RequestName'],
                                                   "failed"), 'OK',
            'update fail')
        self.assertEqual(
            self.requestWriter.updateRequestStatus("not_exist_schema",
                                                   "assigned"),
            'Error: document not found')
        result = self.requestWriter.updateRequestProperty(
            schema[0]['RequestName'], {'Teams': ['teamA']})
        self.assertEqual(
            self.requestWriter.updateRequestProperty(schema[0]['RequestName'],
                                                     {'Teams': ['teamA']}),
            'OK', 'update fail')
        self.assertEqual(
            self.requestWriter.updateRequestProperty("not_exist_schema",
                                                     {'Teams': 'teamA'}),
            'Error: document not found')

        result = self.requestReader.getRequestByNames(
            [schema[0]['RequestName']])
        self.assertEqual(len(result), 1, "should be 1")
        result = self.requestReader.getRequestByStatus(["failed"], False, 1)
        self.assertEqual(len(result), 1, "should be 1")

        result = self.requestReader.getStatusAndTypeByRequest(
            [schema[0]['RequestName']])
        self.assertEqual(result[schema[0]['RequestName']][0], 'failed',
                         "should be failed")

        result = self.requestWriter.insertGenericRequest(schema[1])
        time.sleep(2)
        result = self.requestWriter.insertGenericRequest(schema[2])
        endTime = int(time.time()) - 1
        result = self.requestReader.getRequestByStatusAndEndTime(
            "new", False, endTime)
        self.assertEqual(len(result), 1, "should be 1")
        endTime = int(time.time()) + 1
        result = self.requestReader.getRequestByStatusAndEndTime(
            "new", False, endTime)
        self.assertEqual(len(result), 2, "should be 2")
Beispiel #26
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        self.teamName = self.config.Agent.teamName
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, appName="WMStatsAgent")
        
        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                      couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        
        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)

    def algorithm(self, parameters):
        """
        Get information from wmbs, workqueue and local couch.
          - It deletes old wmstats docs
          - Archive workflows
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            
            logging.info("Cleaning up the archived request docs")
            report = self.cleanAlreadyArchivedWorkflows()
            logging.info("%s archived workflows deleted" % report)
            
            # archiving only workflows that I own (same team)
            logging.info("Getting requests in '%s' state for team '%s'", self.deletableState,
                                                                           self.teamName)
            endTime = int(time.time()) - self.archiveDelayHours * 3600
            wfs = self.centralRequestDBReader.getRequestByTeamAndStatus(self.teamName,
                                                                        self.deletableState)
            commonWfs = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, 
                                                                                   False, endTime)
            deletableWorkflows = list(set(wfs) & set(commonWfs))
            logging.info("Ready to archive normal %s workflows", len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived")
            logging.info("archive normal %s workflows", numUpdated)
            
            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows", len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived")
            logging.info("archive aborted %s workflows", numUpdated)
            
            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"])
            logging.info("Ready to archive rejected %s workflows", len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived")
            logging.info("archive rejected %s workflows", numUpdated)

        except Exception as ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
    
    def archiveWorkflows(self, workflows, archiveState):
        updated = 0
        for workflowName in workflows:
            if self.cleanAllLocalCouchDB(workflowName):
                if self.useReqMgrForCompletionCheck:
                    try:
                        #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                        self.reqmgrSvc.updateRequestStatus(workflowName, archiveState)
                        #And replace with this - remove all the excption
                        #self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                    except HTTPException as ex:
                        # If we get an HTTPException of 404 means reqmgr2 request
                        if ex.status == 404:
                            # try reqmgr2 call
                            msg = "%s : reqmgr2 request: %s" % (workflowName, str(ex))
                            logging.warning(msg)
                            self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                        else:
                            msg = "%s : fail to update status with HTTP error: %s" % (workflowName, str(ex))
                            logging.error(msg)
                            raise ex
                            
                    updated += 1 
                    logging.debug("status updated to %s %s",  archiveState, workflowName)
                else:
                    # tier0 update case
                    self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState)
        return updated
    
    def deleteWorkflowFromJobCouch(self, workflowName, db):
        """
        _deleteWorkflowFromCouch_

        If we are asked to delete the workflow from couch, delete it
        to clear up some space.

        Load the document IDs and revisions out of couch by workflowName,
        then order a delete on them.
        """
        options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False}
        
        if db == "JobDump":
            couchDB = self.jobsdatabase
            view = "jobsByWorkflowName"
        elif db == "FWJRDump":
            couchDB = self.fwjrdatabase
            view = "fwjrsByWorkflowName"
        elif db == "SummaryStats":
            couchDB = self.statsumdatabase
            view = None
        elif db == "WMStatsAgent":
            couchDB = self.wmstatsCouchDB.getDBInstance()
            view = "allWorkflows"
            options = {"key": workflowName, "reduce": False}
            
        if view == None:
            try:
                committed = couchDB.delete_doc(workflowName)
            except CouchNotFoundError as ex:
                return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))}
        else:
            try:
                jobs = couchDB.loadView(db, view, options = options)['rows']
            except Exception as ex:
                errorMsg = "Error on loading jobs for %s" % workflowName
                logging.warning("%s/n%s" % (str(ex), errorMsg))
                return {'status': 'error', 'message': errorMsg}
            
            for j in jobs:
                doc = {}
                doc["_id"]  = j['value']['id']
                doc["_rev"] = j['value']['rev']
                couchDB.queueDelete(doc)
            committed = couchDB.commit()
        
        if committed:
            #create the error report
            errorReport = {}
            deleted = 0
            status = "ok"
            for data in committed:
                if 'error' in data:
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                    status = "error"
                else:
                    deleted += 1
            return {'status': status, 'delete': deleted, 'message': errorReport}
        else:
            return {'status': 'warning', 'message': "no %s exist" % workflowName}


    def cleanAllLocalCouchDB(self, workflowName):
        logging.info("Deleting %s from JobCouch" % workflowName)
        
        jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump")
        logging.debug("%s docs deleted from JobDump", jobReport)
        
        fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump")
        logging.debug("%s docs deleted from FWJRDump", fwjrReport)
        
        summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats")
        logging.debug("%s docs deleted from SummaryStats", summaryReport)
        
        wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStatsAgent")
        logging.debug("%s docs deleted from wmagent_summary", wmstatsReport)
        
        # if one of the procedure fails return False
        if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or 
            wmstatsReport["status"] == "error"):
            return False
        # other wise return True.
        return True
        
    def cleanAlreadyArchivedWorkflows(self):
        """
        loop through the workflows in couchdb, if archived delete all the data in couchdb
        """
        
        numDeletedRequests = 0
        try:
            localWMStats = self.wmstatsCouchDB.getDBInstance()
            options = {"group_level": 1, "reduce": True}
            
            results = localWMStats.loadView("WMStatsAgent", "allWorkflows", options = options)['rows']
            requestNames = [x['key'] for x in results]
            logging.info("There are %s workfows to check for archived status" % len(requestNames))
            
            workflowDict = self.centralRequestDBReader.getStatusAndTypeByRequest(requestNames)
            
            for request, value in workflowDict.items():
                if value[0].endswith("-archived"):
                    self.cleanAllLocalCouchDB(request)
                    numDeletedRequests += 1
        
        except Exception as ex:
            errorMsg = "Error on loading workflow list from wmagent_summary db"
            logging.warning("%s/n%s" % (errorMsg, str(ex)))
            
        return numDeletedRequests
Beispiel #27
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        self.teamName = self.config.Agent.teamName
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, 
                                            "WMStatsAgent")
        
        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                      couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            if self.config.TaskArchiver.reqmgr2Only:
                self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            else:
                #TODO: remove this for reqmgr2
                self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        
        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)

    def algorithm(self, parameters):
        """
        Get information from wmbs, workqueue and local couch.
          - It deletes old wmstats docs
          - Archive workflows
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)

            # archiving only workflows that I own (same team)
            logging.info("Getting requests in '%s' state for team '%s'" % (self.deletableState,
                                                                           self.teamName))
            endTime = int(time.time()) - self.archiveDelayHours * 3600
            wfs = self.centralRequestDBReader.getRequestByTeamAndStatus(self.teamName,
                                                                        self.deletableState)
            commonWfs = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, 
                                                                                   False, endTime)
            deletableWorkflows = list(set(wfs) & set(commonWfs))
            logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived")
            logging.info("archive normal %s workflows" % numUpdated)
            
            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived")
            logging.info("archive aborted %s workflows" % numUpdated)
            
            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"])
            logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived")
            logging.info("archive rejected %s workflows" % numUpdated)

        except Exception as ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
    
    def archiveWorkflows(self, workflows, archiveState):
        updated = 0
        for workflowName in workflows:
            if self.cleanAllLocalCouchDB(workflowName):
                if self.useReqMgrForCompletionCheck:
                    
                    if self.config.TaskArchiver.reqmgr2Only:
                        self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                    else:
                        self.reqmgrSvc.updateRequestStatus(workflowName, archiveState);
                    updated += 1 
                    logging.debug("status updated to %s %s" % (archiveState, workflowName))
                else:
                    self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState)
        return updated
    
    def deleteWorkflowFromJobCouch(self, workflowName, db):
        """
        _deleteWorkflowFromCouch_

        If we are asked to delete the workflow from couch, delete it
        to clear up some space.

        Load the document IDs and revisions out of couch by workflowName,
        then order a delete on them.
        """
        if db == "JobDump":
            couchDB = self.jobsdatabase
            view = "jobsByWorkflowName"
        elif db == "FWJRDump":
            couchDB = self.fwjrdatabase
            view = "fwjrsByWorkflowName"
        elif db == "SummaryStats":
            couchDB = self.statsumdatabase
            view = None
        elif db == "WMStatsAgent":
            couchDB = self.wmstatsCouchDB.getDBInstance()
            view = "jobsByStatusWorkflow"
        
        if view == None:
            try:
                committed = couchDB.delete_doc(workflowName)
            except CouchNotFoundError as ex:
                return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))}
        else:
            options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False}
            try:
                jobs = couchDB.loadView(db, view, options = options)['rows']
            except Exception as ex:
                errorMsg = "Error on loading jobs for %s" % workflowName
                logging.warning("%s/n%s" % (str(ex), errorMsg))
                return {'status': 'error', 'message': errorMsg}
            
            for j in jobs:
                doc = {}
                doc["_id"]  = j['value']['id']
                doc["_rev"] = j['value']['rev']
                couchDB.queueDelete(doc)
            committed = couchDB.commit()
        
        if committed:
            #create the error report
            errorReport = {}
            deleted = 0
            status = "ok"
            for data in committed:
                if 'error' in data:
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                    status = "error"
                else:
                    deleted += 1
            return {'status': status, 'delete': deleted, 'message': errorReport}
        else:
            return {'status': 'warning', 'message': "no %s exist" % workflowName}


    def cleanAllLocalCouchDB(self, workflowName):
        logging.info("Deleting %s from JobCouch" % workflowName)
        
        jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump")
        logging.debug("%s docs deleted from JobDump" % jobReport)
        
        fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump")
        logging.debug("%s docs deleted from FWJRDump" % fwjrReport)
        
        summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats")
        logging.debug("%s docs deleted from SummaryStats" % summaryReport)
        
        wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStatsAgent")
        logging.debug("%s docs deleted from wmagent_summary" % wmstatsReport)
        
        # if one of the procedure fails return False
        if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or 
            wmstatsReport["status"] == "error"):
            return False
        # other wise return True.
        return True
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours   = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, 
                                            "WMStatsAgent")
        
        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
            if self.config.TaskArchiver.reqmgr2Only:
                self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            else:
                #TODO: remove this for reqmgr2
                self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        
        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            logging.info("getting complete and announced requests")
            
            endTime = int(time.time()) - self.archiveDelayHours * 3600
            deletableWorkflows = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, 
                                                                                            False, endTime)
            logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived")
            logging.info("archive normal %s workflows" % numUpdated)
            
            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived")
            logging.info("archive aborted %s workflows" % numUpdated)
            
            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"])
            logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived")
            logging.info("archive rejected %s workflows" % numUpdated)

        except Exception as ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
    
    def archiveWorkflows(self, workflows, archiveState):
        updated = 0
        for workflowName in workflows:
            if self.cleanAllLocalCouchDB(workflowName):
                if self.useReqMgrForCompletionCheck:
                    
                    if self.config.TaskArchiver.reqmgr2Only:
                        self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                    else:
                        self.reqmgrSvc.updateRequestStatus(workflowName, archiveState);
                    updated += 1 
                    logging.debug("status updated to %s %s" % (archiveState, workflowName))
                else:
                    self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState)
        return updated
    
    def deleteWorkflowFromJobCouch(self, workflowName, db):
        """
        _deleteWorkflowFromCouch_

        If we are asked to delete the workflow from couch, delete it
        to clear up some space.

        Load the document IDs and revisions out of couch by workflowName,
        then order a delete on them.
        """
        if (db == "JobDump"):
            couchDB = self.jobsdatabase
            view = "jobsByWorkflowName"
        elif (db == "FWJRDump"):
            couchDB = self.fwjrdatabase
            view = "fwjrsByWorkflowName"
        elif (db == "SummaryStats"):
            couchDB = self.statsumdatabase
            view = None
        elif (db == "WMStats"):
            couchDB = self.wmstatsCouchDB.getDBInstance()
            view = "jobsByStatusWorkflow"
        
        if view == None:
            try:
                committed = couchDB.delete_doc(workflowName)
            except CouchNotFoundError as ex:
                return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))}
        else:
            options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False}
            try:
                jobs = couchDB.loadView(db, view, options = options)['rows']
            except Exception as ex:
                errorMsg = "Error on loading jobs for %s" % workflowName
                logging.warning("%s/n%s" % (str(ex), errorMsg))
                return {'status': 'error', 'message': errorMsg}
            
            for j in jobs:
                doc = {}
                doc["_id"]  = j['value']['id']
                doc["_rev"] = j['value']['rev']
                couchDB.queueDelete(doc)
            committed = couchDB.commit()
        
        if committed:
            #create the error report
            errorReport = {}
            deleted = 0
            status = "ok"
            for data in committed:
                if 'error' in data:
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                    status = "error"
                else:
                    deleted += 1
            return {'status': status, 'delete': deleted, 'message': errorReport}
        else:
            return {'status': 'warning', 'message': "no %s exist" % workflowName}


    def cleanAllLocalCouchDB(self, workflowName):
        logging.info("Deleting %s from JobCouch" % workflowName)
        
        jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump")
        logging.debug("%s docs deleted from JobDump" % jobReport)
        
        fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump")
        logging.debug("%s docs deleted from FWJRDump" % fwjrReport)
        
        summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats")
        logging.debug("%s docs deleted from SummaryStats" % summaryReport)
        
        wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStats")
        logging.debug("%s docs deleted from wmagent_summary" % wmstatsReport)
        
        # if one of the procedure fails return False
        if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or 
            wmstatsReport["status"] == "error"):
            return False
        # other wise return True.
        return True
        
Beispiel #29
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours   = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL)
        
        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
            if self.config.TaskArchiver.reqmgr2Only:
                self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            else:
                #TODO: remove this for reqmgr2
                self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        
        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            logging.info("getting complete and announced requests")
            
            endTime = int(time.time()) - self.archiveDelayHours * 3600
            deletableWorkflows = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, 
                                                                                            False, endTime)
            logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived")
            logging.info("archive normal %s workflows" % numUpdated)
            
            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived")
            logging.info("archive aborted %s workflows" % numUpdated)
            
            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"])
            logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived")
            logging.info("archive rejected %s workflows" % numUpdated)

        except Exception, ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
Beispiel #30
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config

    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.wmstatsCouchDB = WMStatsWriter(
            self.config.TaskArchiver.localWMStatsURL)

        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(
            self.config.AnalyticsDataCollector.centralRequestDBURL,
            couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)

        if self.useReqMgrForCompletionCheck:
            self.deletableStates = ["announced"]
            self.centralRequestDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            #TODO: remove this for reqmgr2
            self.reqmgrSvc = RequestManager(
                {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableStates = ["completed"]
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)

        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" %
                                                            jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" %
                                                            jobDBName)

        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(
            statSummaryDBName)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(
                self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            logging.info("getting complete and announced requests")

            deletableWorkflows = self.centralRequestDBReader.getRequestByStatus(
                self.deletableStates)

            logging.info("Ready to archive normal %s workflows" %
                         len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows,
                                               "normal-archived")
            logging.info("archive normal %s workflows" % numUpdated)

            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(
                ["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows" %
                         len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows,
                                               "aborted-archived")
            logging.info("archive aborted %s workflows" % numUpdated)

            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(
                ["rejected"])
            logging.info("Ready to archive rejected %s workflows" %
                         len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows,
                                               "rejected-archived")
            logging.info("archive rejected %s workflows" % numUpdated)

        except Exception, ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
Beispiel #31
0
class WMStatsReader(object):
    
    #TODO need to get this from reqmgr api
    ACTIVE_STATUS = ["new",
                 "assignment-approved",
                 "assigned",
                 "acquired",
                 "running",
                 "running-open",
                 "running-closed",
                 "failed",
                 "force-complete",
                 "completed",
                 "closed-out",
                 "announced",
                 "aborted",
                 "aborted-completed",
                 "rejected"]
    
    T0_ACTIVE_STATUS = ["new",
                        "Closed",
                        "Merge",
                        "Harvesting",
                        "Processing Done",
                        "AlcaSkim",
                        "completed"]
    
    def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"):
        self._sanitizeURL(couchURL)
        # set the connection for local couchDB call
        self._commonInit(couchURL, appName)
        if reqdbURL:
            self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp)
        else:
            self.reqDB = None
    
    def _sanitizeURL(self, couchURL):
        return sanitizeURL(couchURL)['url']
        
    def _commonInit(self, couchURL, appName = "WMStats"):
        """
        setting up comon variables for inherited class.
        inherited class should call this in their init function
        """
        
        self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = self.couchServer.connectDatabase(self.dbName, False)
        self.couchapp = appName
        self.defaultStale = {"stale": "update_after"}
        
    
    def setDefaultStaleOptions(self, options):
        if not options:
            options = {}  
        if 'stale' not in options:
            options.update(self.defaultStale)
        return options
    
    def getLatestJobInfoByRequests(self, requestNames):
        jobInfoByRequestAndAgent = {}
        
        if len(requestNames) > 0:
            requestAndAgentKey = self._getRequestAndAgent(requestNames)
            jobDocIds = self._getLatestJobInfo(requestAndAgentKey)
            jobInfoByRequestAndAgent = self._getAllDocsByIDs(jobDocIds)
        return jobInfoByRequestAndAgent
                    
    def _updateRequestInfoWithJobInfo(self, requestInfo):
        if len(requestInfo.keys()) != 0:
            jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests(requestInfo.keys())
            self._combineRequestAndJobData(requestInfo, jobInfoByRequestAndAgent)
            
    def _getCouchView(self, view, options, keys = []):
        
        options = self.setDefaultStaleOptions(options)
            
        if keys and isinstance(keys, str):
            keys = [keys]
        return self.couchDB.loadView(self.couchapp, view, options, keys)
            
        
    def _formatCouchData(self, data, key = "id"):
        result = {}
        for row in data['rows']:
            if 'error' in row:
                continue
            if "doc" in row:
                result[row[key]] = row["doc"]
            else:
                result[row[key]] = None
        return result
    
    def _combineRequestAndJobData(self, requestData, jobData):
        """
        update the request data with job info
        requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847",
        "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003",
        "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}},
        "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731",
        "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}},
        "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}},
        "T2_DE_DESY":{"failure":{"exception":3},"success":375}},
        "agent":"WMAgentCommissioning",
        "tasks":
           {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production":
            {"status":{"failure":{"exception":3},"success":331},
             "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908,
                                     "dataset":{},"failure":{"exception":3},
                                     "cmsRunCPUPerformance":{"totalJobCPU":10869688.8,
                                                             "totalEventCPU":10832426.7,
                                                             "totalJobTime":11255865.9},
                                     "inputEvents":0},
                      "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573,
                                    "dataset":{},
                                    "cmsRunCPUPerformance":{"totalJobCPU":167324.8,
                                                            "totalEventCPU":166652.1,
                                                            "totalJobTime":174975.7},
                                    "inputEvents":0}},
             "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0},
             "jobtype":"Production"},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect":
             {"jobtype":"LogCollect",
              "subscription_status":{"updated":1392885768,
              "finished":0,
              "total":1,"open":1}},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput":
              {"status":{"success":41,"submitted":{"retry":1,"pending":1}},
                "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915},
                                       "success":41,"wrappedTotalJobTime":9190,
                                       "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM":
                                                   {"totalLumis":973,"events":97300,"size":105698406915}},
                                       "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125},
                                    "inputEvents":97300},
                         "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}},
                "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1},
                "jobtype":"Merge"},
           "agent_url":"vocms231.cern.ch:9999",
           "type":"agent_request"}}
        """
        if jobData:
            for row in jobData["rows"]:
                # condition checks if documents are deleted between calls.
                # just ignore in that case
                if row["doc"]:
                    jobInfo = requestData[row["doc"]["workflow"]]
                    jobInfo.setdefault("AgentJobInfo", {}) 
                    jobInfo["AgentJobInfo"][row["doc"]["agent_url"]] = row["doc"]
        
    def _getRequestAndAgent(self, filterRequest = None):
        """
        returns the [['request_name', 'agent_url'], ....]
        """
        options = {}
        options["reduce"] = True
        options["group"] = True
        result = self._getCouchView("requestAgentUrl", options)
        
        if filterRequest == None:
            keys = [row['key'] for row in result["rows"]]
        else:
            keys = [row['key'] for row in result["rows"] if row['key'][0] in filterRequest]
        return keys
    
    def _getLatestJobInfo(self, keys):
        """
        keys is [['request_name', 'agent_url'], ....]
        returns ids
        """
        if len(keys) == 0:
            return []
        options = {}
        options["reduce"] = True
        options["group"] = True
        result = self._getCouchView("latestRequest", options, keys)
        ids = [row['value']['id'] for row in result["rows"]]
        return ids
    
    def _getAllDocsByIDs(self, ids, include_docs = True):
        """
        keys is [id, ....]
        returns document
        """
        if len(ids) == 0:
            return None
        options = {}
        options["include_docs"] =  include_docs
        result = self.couchDB.allDocs(options, ids)
        
        return result

    def _getAgentInfo(self):
        """
        returns all the agents status on wmstats
        """
        options = {}
        result = self._getCouchView("agentInfo", options)
        
        return result
    
    def agentsByTeam(self, ignoreDrain = True):
        """
        return a dictionary like {team:#agents,...}
        """
        result = self._getAgentInfo()
        response = dict()
        for agentInfo in result["rows"]:
            
            teams = agentInfo['value']['agent_team'].split(',')
            for team in teams:
                if team not in response.keys():
                    response[team] = 0
            if ignoreDrain:
                if not agentInfo['value']['drain_mode']:
                    for team in teams:
                        response[team] += 1
            else:
                for team in teams:
                    response[team] += 1
        return response
    
    def getServerInstance(self):
        return self.couchServer
        
    def getDBInstance(self):
        return self.couchDB

    def getRequestDBInstance(self):
        return self.reqDB
    
    def getHeartbeat(self):
        try:
            return self.couchDB.info();
        except Exception as ex:
            return {'error_message': str(ex)}
    
    def getRequestByNames(self, requestNames, jobInfoFlag = False):
        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this. 
        """
        requestInfo = self.reqDB.getRequestByNames(requestNames, True)

        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo
    
    def getActiveData(self, jobInfoFlag = False):
        
        return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS, jobInfoFlag)
    
    
    def getT0ActiveData(self, jobInfoFlag = False):
        
        return self.getRequestByStatus(WMStatsReader.T0_ACTIVE_STATUS, jobInfoFlag)
    
    def getRequestByStatus(self, statusList, jobInfoFlag = False, limit = None, skip = None, 
                           legacyFormat = False):
        
        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this.
        If legacyFormat is True convert data to old wmstats format from current reqmgr format.
        Shouldn't be set to True unless existing code breaks  
        """
        
        requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit, skip)
        
        if legacyFormat:
            # convert the format to wmstas old format
            for requestName, doc in requestInfo.items():
                requestInfo[requestName] = convertToLegacyFormat(doc)
                
        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo
    
    def getRequestSummaryWithJobInfo(self, requestName):
        """
        get request info with job status
        """
        requestInfo = self.reqDB.getRequestByNames(requestName)
        self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo
        
    def getArchivedRequests(self):
        """
        get list of archived workflow in wmstats db.
        """
        
        options = {"group_level": 1, "reduce": True}
        
        results = self.couchDB.loadView(self.couchapp, "allWorkflows", options = options)['rows']
        requestNames = [x['key'] for x in results]
        
        workflowDict = self.reqDB.getStatusAndTypeByRequest(requestNames)
        archivedRequests = []
        for request, value in workflowDict.items():
            if value[0].endswith("-archived"):
                archivedRequests.append(request)
        
        return archivedRequests
    
    def isWorkflowCompletedWithLogCollectAndCleanUp(self, requestName):
        """
        check whether workflow  is completed including LogCollect and CleanUp tasks
        TODO: If the parent task all failed and next task are not created at all, 
            It can't detect complete status. 
            If the one of the task doesn't contain any jobs, it will return False
        """
        
        requestInfo = self.getRequestSummaryWithJobInfo(requestName)
        reqInfoInstance = RequestInfo(requestInfo[requestName])
        return reqInfoInstance.isWorkflowFinished()
Beispiel #32
0
class AuxBaseAPI(RESTEntity):
    """
    Base class for Aux db RESTEntry which contains get, post method
    """
    def __init__(self, app, api, config, mount):
        RESTEntity.__init__(self, app, api, config, mount)
        # CouchDB auxiliary database name
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.reqmgr_aux_db_service = RequestDBReader(self.reqmgr_aux_db,
                                                     couchapp="ReqMgrAux")
        self.setName()

    def setName(self):
        "Sets the document name"
        raise NotImplementedError(
            "Couch document id(name) should be specified. i.e. self.name='software'"
        )

    def validate(self, apiobj, method, api, param, safe):
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["subName"] = param.args.pop(0)
            return
        return

    @restcall(formats=[('text/plain', PrettyJSONFormat()),
                       ('application/json', JSONFormat())])
    def get(self, subName=None):
        """
        Return entire self.name document
        subName is subcategory of document which is added as postfix string
        """
        try:
            if subName:
                if subName.lower() == "all_docs":
                    return rows(self._getAllDocs())
                else:
                    docName = "%s_%s" % (self.name, subName)
            else:
                docName = self.name
            sw = self.reqmgr_aux_db.document(docName)
            del sw["_id"]
            del sw["_rev"]
        except CouchNotFoundError:
            raise NoSuchInstance

        return rows([sw])

    def _getAllDocs(self):
        """
        Return all the documents under a given document type
        """
        try:
            option = {"include_docs": True}
            allDocs = self.reqmgr_aux_db_service.getRequestByCouchView(
                "byconfig", option, [self.name])
        except CouchError as ex:
            msg = "ERROR: Failed to fetch ALL_DOCS for ConfigType: %s. Reason: %s" % (
                self.name, str(ex))
            cherrypy.log(msg)
            raise cherrypy.HTTPError(404, msg)

        return allDocs.values()

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self, subName=None):
        """
        Inserts a new document into the database
        """
        data = cherrypy.request.body.read()
        if not data:
            raise MissingPostData()
        else:
            doc = json.loads(data)
        if subName:
            docName = "%s_%s" % (self.name, subName)
        else:
            docName = self.name

        doc["ConfigType"] = self.name
        doc = Document(docName, doc)
        result = self.reqmgr_aux_db.commitOne(doc)
        return result

    @restcall(formats=[('text/plain', PrettyJSONFormat()),
                       ('application/json', JSONFormat())])
    def put(self, subName=None):
        """
        Update document for the given self.name and subName.
        It assumes the client has provided the entire entity, i.e., the old
        content gets completely replaced by the new one.

        Given that the each couch document contains a revision number, these PUT calls
        are not going to be idempotent.
        """
        data = cherrypy.request.body.read()
        if not data:
            raise MissingPostData()
        else:
            propertyDict = json.loads(data)

        result = None
        if subName:
            docName = "%s_%s" % (self.name, subName)
        else:
            docName = self.name

        try:
            existDoc = self.reqmgr_aux_db.document(docName)
            # replace original document
            newDoc = Document(existDoc['_id'],
                              inputDict={
                                  '_rev': existDoc['_rev'],
                                  'ConfigType': existDoc['ConfigType']
                              })
            newDoc.update(propertyDict)
            result = self.reqmgr_aux_db.commitOne(newDoc)
        except CouchNotFoundError:
            cherrypy.log("Document %s not found. Creating one." % docName)
            doc = Document(docName, propertyDict)
            doc.update({'ConfigType': self.name})
            result = self.reqmgr_aux_db.commitOne(doc)

        return result

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, subName):
        """
        Delete a document from ReqMgrAux
        """
        docName = "%s_%s" % (self.name, subName)
        try:
            res = self.reqmgr_aux_db.delete_doc(docName)
        except (CouchError, CouchNotFoundError) as ex:
            msg = "ERROR: failed to delete document: %s\nReason: %s" % (
                docName, str(ex))
            cherrypy.log(msg)
            res = None
        return res
Beispiel #33
0
class RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["ReqMgr"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 'requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.requestWriter = RequestDBWriter(reqDBURL)
        self.requestReader = RequestDBReader(reqDBURL)
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema(3)
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEqual(len(result), 1, 'insert fail');
        
        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"),
                          'Error: document not found')
        result = self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']})
        self.assertEqual(self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']}), 'OK', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestProperty("not_exist_schema", {'Teams': 'teamA'}),
                          'Error: document not found')
        
        result = self.requestReader.getRequestByNames([schema[0]['RequestName']])
        self.assertEqual(len(result), 1, "should be 1")
        result = self.requestReader.getRequestByStatus(["failed"], False, 1)
        self.assertEqual(len(result), 1, "should be 1")
        
        result = self.requestReader.getStatusAndTypeByRequest([schema[0]['RequestName']])
        self.assertEqual(result[schema[0]['RequestName']][0], 'failed', "should be failed")
        
        result =  self.requestWriter.insertGenericRequest(schema[1])
        time.sleep(2)
        result =  self.requestWriter.insertGenericRequest(schema[2])
        endTime = int(time.time()) - 1
        result = self.requestReader.getRequestByStatusAndEndTime("new", False, endTime)
        self.assertEqual(len(result), 1, "should be 1")
        endTime = int(time.time()) + 1
        result = self.requestReader.getRequestByStatusAndEndTime("new", False, endTime)
        self.assertEqual(len(result), 2, "should be 2")
Beispiel #34
0
class BuildParentLock(CherryPyPeriodicTask):
    def __init__(self, rest, config):

        super(BuildParentLock, self).__init__(config)
        self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger)
        self.dbs = DBS3Reader(config.dbs_url)
        # cache of dbs lookups mapping input dataset to parent dataset
        self.dbsLookupCache = {}
        # set of of currently active datasets requiring parent dataset
        self.inputDatasetCache = set()
        self.reqDB = RequestDBReader(config.reqmgrdb_url)
        self.filterKeys = [
            'assignment-approved', 'assigned', 'staging', 'staged', 'failed',
            'acquired', 'running-open', 'running-closed', 'force-complete',
            'completed', 'closed-out'
        ]

    def setConcurrentTasks(self, config):
        """
        sets the list of functions which
        """
        self.concurrentTasks = [{
            'func': self.fetchIncludeParentsRequests,
            'duration': config.updateParentsInterval
        }]

    def fetchIncludeParentsRequests(self, config):
        """
        Fetch active requests from the "requestsincludeparents" couch view that
        have IncludeParents=True, find parents of each dataset and send to
        reqmgr2 auxiliary database.
        """
        # use this boolean to signal whether there were datasets that failed
        # to get their parentage resolved
        incompleteParentage = False
        # use this boolean to signal if new parent datasets need to be locked
        auxDbUpdateRequired = False

        setDsets = set()
        setParents = set()
        dictParents = {}

        self.logger.info("Executing parent lock cherrypy thread")

        # query couch view to find datasets for workflows requiring parent datasets
        # only returning requests with the statuses in filterKeys
        try:
            results = self.reqDB._getCouchView("requestsincludeparents", {},
                                               self.filterKeys)
        except Exception as ex:
            self.logger.error(
                "Error retrieving requests including parent datasets from couchdb."
            )
            self.logger.error("Error: %s", str(ex))
            return

        for row in results["rows"]:
            dataset = row["value"]
            setDsets.add(dataset)

        # check to see if any changes have been made
        if setDsets != self.inputDatasetCache:
            auxDbUpdateRequired = True
            self.inputDatasetCache = setDsets.copy()

        self.logger.info(
            "Found %d unique datasets requiring the parent dataset",
            len(setDsets))
        if auxDbUpdateRequired:
            self.logger.info("Found new parent dataset locks to update.")
            # look up parent datasets first via the local DBS cache, if not found do lookup via DBS
            for dset in setDsets:
                if dset in self.dbsLookupCache:
                    setParents.add(self.dbsLookupCache[dset])
                    self.logger.info(
                        "Resolved parentage via lookup cache for: %s", dset)
                else:
                    try:
                        res = self.dbs.listDatasetParents(dset)
                    except Exception as exc:
                        self.logger.warning(
                            "Failed to resolve parentage for: %s. Error: %s",
                            dset, str(exc))
                        incompleteParentage = True
                        continue
                    self.logger.info("Resolved parentage via DBS for: %s", res)
                    if res:
                        setParents.add(res[0]['parent_dataset'])
                        self.dbsLookupCache[dset] = res[0]['parent_dataset']

            if not incompleteParentage:
                dictParents['parentlocks'] = list(setParents)
                if self.reqmgrAux.updateParentLocks(dictParents):
                    self.logger.info(
                        "Parentage lookup complete and auxiliary database updated."
                    )
                else:
                    self.logger.info(
                        "Error updating parentage document. Using stale data until next cycle."
                    )
            else:
                # then don't replace any data for the moment, simply add new parents
                previousData = self.reqmgrAux.getParentLocks()
                # check to see if response from aux db has been populated
                if previousData and 'parentlocks' in previousData[0]:
                    setPreviousData = set(previousData[0]['parentlocks'])
                    setParents = setParents | setPreviousData
                    dictParents['parentlocks'] = list(setParents)
                    self.reqmgrAux.updateParentLocks(dictParents)
                    self.logger.info(
                        "Parentage lookup complete (with errors) and auxiliary database updated."
                    )
                else:
                    self.logger.info(
                        "Parent locks not returned from auxiliary database. Skipping parentage update."
                    )

        else:
            self.logger.info(
                "No new parent datasets need locked. Skipping update of auxiliary database."
            )

        return
Beispiel #35
0
class WMStatsReader():

    #TODO need to get this from reqmgr api
    ACTIVE_STATUS = [
        "new", "assignment-approved", "assigned", "ops-hold", "negotiating",
        "acquired", "running", "running-open", "running-closed", "failed",
        "completed", "closed-out", "announced", "aborted", "rejected"
    ]

    def __init__(self, couchURL, reqdbURL=None, reqdbCouchApp="ReqMgr"):
        couchURL = sanitizeURL(couchURL)['url']
        # set the connection for local couchDB call
        self._commonInit(couchURL)
        if reqdbURL:
            self.reqDB = RequestDBReader(reqdbURL)
        else:
            self.reqDB = None

    def _commonInit(self, couchURL, appName="WMStats"):
        """
        setting up comon variables for inherited class.
        inherited class should call this in their init function
        """

        self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = self.couchServer.connectDatabase(self.dbName, False)
        self.couchapp = appName
        self.defaultStale = {"stale": "update_after"}

    def setDefaultStaleOptions(self, options):
        if not options:
            options = {}
        if 'stale' not in options:
            options.update(self.defaultStale)
        return options

    def getLatestJobInfoByRequests(self, requestNames):
        jobInfoByRequestAndAgent = {}

        if len(requestNames) > 0:
            requestAndAgentKey = self._getRequestAndAgent(requestNames)
            jobDocIds = self._getLatestJobInfo(requestAndAgentKey)
            jobInfoByRequestAndAgent = self._getAllDocsByIDs(jobDocIds)
        return jobInfoByRequestAndAgent

    def _updateRequestInfoWithJobInfo(self, requestInfo):
        if len(requestInfo.keys()) != 0:
            jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests(
                requestInfo.keys())
            self._combineRequestAndJobData(requestInfo,
                                           jobInfoByRequestAndAgent)

    def _getCouchView(self, view, options, keys=[]):

        options = self.setDefaultStaleOptions(options)

        if keys and type(keys) == str:
            keys = [keys]
        return self.couchDB.loadView(self.couchapp, view, options, keys)

    def _formatCouchData(self, data, key="id"):
        result = {}
        for row in data['rows']:
            if 'error' in row:
                continue
            if "doc" in row:
                result[row[key]] = row["doc"]
            else:
                result[row[key]] = None
        return result

    def _combineRequestAndJobData(self, requestData, jobData):
        """
        update the request data with job info
        requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847",
        "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003",
        "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}},
        "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731",
        "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}},
        "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}},
        "T2_DE_DESY":{"failure":{"exception":3},"success":375}},
        "agent":"WMAgentCommissioning",
        "tasks":
           {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production":
            {"status":{"failure":{"exception":3},"success":331},
             "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908,
                                     "dataset":{},"failure":{"exception":3},
                                     "cmsRunCPUPerformance":{"totalJobCPU":10869688.8,
                                                             "totalEventCPU":10832426.7,
                                                             "totalJobTime":11255865.9},
                                     "inputEvents":0},
                      "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573,
                                    "dataset":{},
                                    "cmsRunCPUPerformance":{"totalJobCPU":167324.8,
                                                            "totalEventCPU":166652.1,
                                                            "totalJobTime":174975.7},
                                    "inputEvents":0}},
             "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0},
             "jobtype":"Production"},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect":
             {"jobtype":"LogCollect",
              "subscription_status":{"updated":1392885768,
              "finished":0,
              "total":1,"open":1}},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput":
              {"status":{"success":41,"submitted":{"retry":1,"pending":1}},
                "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915},
                                       "success":41,"wrappedTotalJobTime":9190,
                                       "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM":
                                                   {"totalLumis":973,"events":97300,"size":105698406915}},
                                       "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125},
                                    "inputEvents":97300},
                         "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}},
                "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1},
                "jobtype":"Merge"},
           "agent_url":"vocms231.cern.ch:9999",
           "type":"agent_request"}}
        """
        if jobData:
            for row in jobData["rows"]:
                # condition checks if documents are deleted between calls.
                # just ignore in that case
                if row["doc"]:
                    jobInfo = requestData[row["doc"]["workflow"]]
                    jobInfo.setdefault("AgentJobInfo", {})
                    jobInfo["AgentJobInfo"][row["doc"]
                                            ["agent_url"]] = row["doc"]

    def _getRequestAndAgent(self, filterRequest=None):
        """
        returns the [['request_name', 'agent_url'], ....]
        """
        options = {}
        options["reduce"] = True
        options["group"] = True
        result = self._getCouchView("requestAgentUrl", options)

        if filterRequest == None:
            keys = [row['key'] for row in result["rows"]]
        else:
            keys = [
                row['key'] for row in result["rows"]
                if row['key'][0] in filterRequest
            ]
        return keys

    def _getLatestJobInfo(self, keys):
        """
        keys is [['request_name', 'agent_url'], ....]
        returns ids
        """
        if len(keys) == 0:
            return []
        options = {}
        options["reduce"] = True
        options["group"] = True
        result = self._getCouchView("latestRequest", options, keys)
        ids = [row['value']['id'] for row in result["rows"]]
        return ids

    def _getAllDocsByIDs(self, ids, include_docs=True):
        """
        keys is [id, ....]
        returns document
        """
        if len(ids) == 0:
            return None
        options = {}
        options["include_docs"] = include_docs
        result = self.couchDB.allDocs(options, ids)

        return result

    def _getAgentInfo(self):
        """
        returns all the agents status on wmstats
        """
        options = {}
        result = self._getCouchView("agentInfo", options)

        return result

    def agentsByTeam(self, ignoreDrain=True):
        """
        return a dictionary like {team:#agents,...}
        """
        result = self._getAgentInfo()
        response = dict()
        for agentInfo in result["rows"]:

            teams = agentInfo['value']['agent_team'].split(',')
            for team in teams:
                if team not in response.keys():
                    response[team] = 0
            if ignoreDrain:
                if not agentInfo['value']['drain_mode']:
                    for team in teams:
                        response[team] += 1
            else:
                for team in teams:
                    response[team] += 1
        return response

    def getDBInstance(self):
        return self.couchDB

    def getHeartbeat(self):
        try:
            return self.couchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getRequestByNames(self, requestNames, jobInfoFlag=False):
        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this. 
        """
        requestInfo = self.reqDB.getRequestByNames(requestNames, True)

        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getActiveData(self, jobInfoFlag=False):

        return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS,
                                       jobInfoFlag)

    def getRequestByStatus(self,
                           statusList,
                           jobInfoFlag=False,
                           limit=None,
                           skip=None,
                           legacyFormat=False):
        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this.
        If legacyFormat is True convert data to old wmstats format from current reqmgr format.
        Shouldn't be set to True unless existing code breaks  
        """

        requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit,
                                                    skip)

        if legacyFormat:
            # convert the format to wmstas old format
            for requestName, doc in requestInfo.items():
                requestInfo[requestName] = convertToLegacyFormat(doc)

        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getRequestSummaryWithJobInfo(self, requestName):
        """
        get request info with job status
        """
        requestInfo = self.reqDB.getRequestByNames(requestName)
        self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo