예제 #1
0
class StepChainParentageFixTask(CherryPyPeriodicTask):
    """
    Upldates StepChain parentage periodically
    """

    def __init__(self, rest, config):

        super(StepChainParentageFixTask, self).__init__(config)
        self.reqmgrDB = RequestDBWriter(config.reqmgrdb_url)
        self.dbsSvc = DBS3Reader(config.dbs_url, logger=self.logger)
        self.statusToCheck = ["announced", "normal-archived"]

    def setConcurrentTasks(self, config):
        """
        sets the list of functions which
        """
        self.concurrentTasks = [{'func': self.fixStepChainParentage, 'duration': config.parentageFixDuration}]

    def fixStepChainParentage(self, config):
        """
        Look through the stepchain workflows with ParentageResolved flag is False.
        Fix the StepChain parentage and update the ParentageResolved flag to True
        """
        self.logger.info("Updating parentage for StepChain workflows for %s", self.statusToCheck)
        childDatasets = set()
        requests = set()
        requestsByChildDataset = {}
        for status in self.statusToCheck:
            reqByChildDS= getChildDatasetsForStepChainMissingParent(self.reqmgrDB, status)
            childDatasets = childDatasets.union(set(reqByChildDS.keys()))
            # We need to just get one of the StepChain workflow if multiple workflow contains the same datasets. (i.e. ACDC)
            requestsByChildDataset.update(reqByChildDS)

            for wfs in reqByChildDS.values():
                requests = requests.union(wfs)

        failedRequests = set()
        totalChildDS = len(childDatasets)
        fixCount = 0
        for childDS in childDatasets:
            start = int(time.time())
            failedBlocks = self.dbsSvc.fixMissingParentageDatasets(childDS, insertFlag=True)
            end = int(time.time())
            timeTaken = end - start
            if failedBlocks:
                self.logger.warning("Failed to fix the parentage for %s will be retried: time took: %s (sec)",
                                 failedBlocks, timeTaken)
                failedRequests = failedRequests.union(requestsByChildDataset[childDS])
            else:
                fixCount += 1
                self.logger.info("Fixed %s parentage: %s out of %s datasets. time took: %s (sec)",
                                 childDS, fixCount, totalChildDS, timeTaken)

        requestsToUpdate = requests - failedRequests

        for request in requestsToUpdate:
            self.reqmgrDB.updateRequestProperty(request, {"ParentageResolved": True})

        self.logger.info("Total %s requests' ParentageResolved flag is set to True", len(requestsToUpdate))
        self.logger.info("Total %s requests will be retried next cycle: %s", len(failedRequests), failedRequests)
예제 #2
0
    def moveToArchived(self, config):
        """
        gather active data statistics
        """

        testbedWMStats = WMStatsReader(config.wmstats_url, reqdbURL=config.reqmgrdb_url)
        reqdbWriter = RequestDBWriter(config.reqmgrdb_url)

        statusTransition = {"aborted": ["aborted-completed", "aborted-archived"], "rejected": ["rejected-archived"]}

        for status, nextStatusList in statusTransition.items():

            requests = testbedWMStats.getRequestByStatus([status], jobInfoFlag=True, legacyFormat=True)

            self.logger.info("checking %s workflows: %d" % (status, len(requests)))

            if len(requests) > 0:

                requestCollection = RequestInfoCollection(requests)

                requestsDict = requestCollection.getData()
                numOfArchived = 0

                for requestName, requestInfo in requestsDict.items():

                    if requestInfo.getJobSummary().getTotalJobs() == 0:
                        for nextStatus in nextStatusList:
                            reqdbWriter.updateRequestStatus(requestName, nextStatus)
                        numOfArchived += 1

                self.logger.info("Total %s-archieved: %d" % (status, numOfArchived))

        return
예제 #3
0
 def __init__(self, app, api, config, mount):
     # main CouchDB database where requests/workloads are stored
     RESTEntity.__init__(self, app, api, config, mount)
     self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
     self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp="ReqMgr")
     # this need for the post validtiaon
     self.gq_service = WorkQueue(config.couch_host, config.couch_workqueue_db)
예제 #4
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(
                    **self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)

        if not self.useReqMgrForCompletionCheck:
            #sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(
                self.config.TaskArchiver.ReqMgr2ServiceURL)
            #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager(
                {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})

        #Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return
예제 #5
0
    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMComponent.DBS3Buffer"],
            useDefault=False)
        self.databaseName = "taskarchiver_t_0"
        self.testInit.setupCouch("%s/workloadsummary" % self.databaseName,
                                 "WorkloadSummary")
        self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")
        self.testInit.setupCouch("wmagent_summary_central_t", "WMStats")
        self.testInit.setupCouch("stat_summary_t", "SummaryStats")
        reqmgrdb = "reqmgrdb_t"
        self.testInit.setupCouch(reqmgrdb, "ReqMgr")

        reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqmgrdb)
        self.requestWriter = RequestDBWriter(reqDBURL)
        self.requestWriter.defaultStale = {}

        self.daofactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getJobs = self.daofactory(classname="Jobs.GetAllJobs")
        self.inject = self.daofactory(
            classname="Workflow.MarkInjectedWorkflows")

        self.testDir = self.testInit.generateWorkDir()
        os.makedirs(os.path.join(self.testDir, 'specDir'))

        self.nJobs = 10
        self.campaignName = 'aCampaign'

        self.uploadPublishInfo = False
        self.uploadPublishDir = None

        return
예제 #6
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(
            package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi
        )

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, "WorkQueueParams"):
                self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig

                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, "useReqMgrForCompletionCheck", True)

        if not self.useReqMgrForCompletionCheck:
            # sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp,
            )
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            # TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager({"endpoint": self.config.TaskArchiver.ReqMgrServiceURL})

        # Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return
예제 #7
0
    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours = getattr(self.config.TaskArchiver,
                                         'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(
            self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent")

        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(
            self.config.AnalyticsDataCollector.centralRequestDBURL,
            couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)

        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            if self.config.TaskArchiver.reqmgr2Only:
                self.reqmgr2Svc = ReqMgr(
                    self.config.TaskArchiver.ReqMgr2ServiceURL)
            else:
                #TODO: remove this for reqmgr2
                self.reqmgrSvc = RequestManager(
                    {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)

        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" %
                                                            jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" %
                                                            jobDBName)

        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(
            statSummaryDBName)
예제 #8
0
class RequestDBTest(unittest.TestCase):
    """
    """

    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["ReqMgr"]
        self.testInit = TestInitCouchApp("RequestDBServiceTest")
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = "requsetdb_t"
        self.testInit.setupCouch(dbName, *self.couchApps)
        self.requestWriter = RequestDBWriter(self.testInit.couchUrl, dbName)
        self.requestReader = RequestDBReader(self.testInit.couchUrl, dbName)
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result = self.requestWriter.insertGenericRequest(schema[0])

        self.assertEquals(len(result), 1, "insert fail")

        self.assertEquals(
            self.requestWriter.updateRequestStatus(schema[0]["RequestName"], "failed"), "OK", "update fail"
        )
        self.assertEquals(
            self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"), "Error: document not found"
        )
        result = self.requestWriter.updateRequestProperty(schema[0]["RequestName"], {"Teams": ["teamA"]})
        self.assertEquals(
            self.requestWriter.updateRequestProperty(schema[0]["RequestName"], {"Teams": ["teamA"]}),
            "OK",
            "update fail",
        )
        self.assertEquals(
            self.requestWriter.updateRequestProperty("not_exist_schema", {"Teams": "teamA"}),
            "Error: document not found",
        )

        result = self.requestWriter.getRequestByNames([schema[0]["RequestName"]])
        self.assertEquals(len(result), 1, "should be 1")
        result = self.requestWriter.getRequestByStatus(["failed"], False, 1)
        self.assertEquals(len(result), 1, "should be 1")
예제 #9
0
 def __init__(self, app, api, config, mount):
     # main CouchDB database where requests/workloads are stored
     RESTEntity.__init__(self, app, api, config, mount)
     self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
     self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp = "ReqMgr")
     # this need for the post validtiaon 
     self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
예제 #10
0
    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        # set the connection to local queue
        self.localQueue = WorkQueueService(self.config.AnalyticsDataCollector.localQueueURL)

        # set the connection for local couchDB call
        self.localCouchDB = LocalCouchDBData(self.config.AnalyticsDataCollector.localCouchURL, 
                                             self.config.JobStateMachine.summaryStatsDBName,
                                             self.summaryLevel)

        # interface to WMBS/BossAir db
        myThread = threading.currentThread()
        # set wmagent db data
        self.wmagentDB = WMAgentDBData(self.summaryLevel, myThread.dbi, myThread.logger)
        # set the connection for local couchDB call
        self.localSummaryCouchDB = WMStatsWriter(self.config.AnalyticsDataCollector.localWMStatsURL, 
                                                 "WMStatsAgent")
        
        if hasattr(self.config, "Tier0Feeder"):
            #use local db for tier0
            centralRequestCouchDBURL = self.config.AnalyticsDataCollector.localT0RequestDBURL
        else:
            centralRequestCouchDBURL = self.config.AnalyticsDataCollector.centralRequestDBURL
        
        self.centralRequestCouchDB = RequestDBWriter(centralRequestCouchDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
        #TODO: change the config to hold couch url
        self.localCouchServer = CouchMonitor(self.config.JobStateMachine.couchurl)
        
        if self.pluginName != None:
            pluginFactory = WMFactory("plugins", "WMComponent.AnalyticsDataCollector.Plugins")
            self.plugin = pluginFactory.loadObject(classname = self.pluginName)
예제 #11
0
    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.transferSystemBaseDir = getattr(config.Tier0Feeder, "transferSystemBaseDir", None)
        if self.transferSystemBaseDir != None:
            if not os.path.exists(self.transferSystemBaseDir):
                self.transferSystemBaseDir = None

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy", None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(
            config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp
        )

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging, dburl=hltConfConnectUrl, options={})
        dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(classname="RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging, dburl=storageManagerConnectUrl, options={})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging, dburl=popConLogConnectUrl, options={})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(classname="Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase, "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging, dburl=t0datasvcConnectUrl, options={})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(
                    package="T0.WMBS", logger=logging, dbinterface=dbInterfaceT0DataSvc
                )

        return
예제 #12
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["ReqMgr"]
     self.testInit = TestInitCouchApp('RequestDBServiceTest')
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = 'requsetdb_t'
     self.testInit.setupCouch(dbName, *self.couchApps)
     reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     self.requestWriter = RequestDBWriter(reqDBURL)
     self.requestReader = RequestDBReader(reqDBURL)
     self.requestWriter.defaultStale = {}
     self.requestReader.defaultStale = {}
     return
예제 #13
0
class T0RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["T0Request"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 't0_requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.requestWriter = RequestDBWriter(reqDBURL, self.couchApps[0])
        self.requestReader = RequestDBReader(reqDBURL, self.couchApps[0])
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEqual(len(result), 1, 'insert fail');

        result = self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "assigned")

        self.assertEqual(result, 'not allowed state assigned', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "new"),
                          'Error: document not found')

        allowedStates = ["Closed", "Merge", "AlcaSkim", "Harvesting",
                         "Processing Done", "completed"]
        for state in allowedStates:
            self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], state),
                          'OK')

        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "Processing Done"),
                          'not allowed transition completed to Processing Done')

        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "normal-archived"),
                          'OK')
        result = self.requestWriter.getRequestByStatus(["normal-archived"], False, 1)
        self.assertEqual(len(result), 1, "should be 1 but %s" % result)
예제 #14
0
    def moveToArchived(self, config):
        """
        gather active data statistics
        """

        testbedWMStats = WMStatsReader(config.wmstats_url,
                                       reqdbURL=config.reqmgrdb_url)
        reqdbWriter = RequestDBWriter(config.reqmgrdb_url)

        statusTransition = {
            "aborted": ["aborted-completed", "aborted-archived"],
            "rejected": ["rejected-archived"]
        }

        for status, nextStatusList in statusTransition.items():

            requests = testbedWMStats.getRequestByStatus([status],
                                                         jobInfoFlag=True,
                                                         legacyFormat=True)

            self.logger.info("checking %s workflows: %d" %
                             (status, len(requests)))

            if len(requests) > 0:

                requestCollection = RequestInfoCollection(requests)

                requestsDict = requestCollection.getData()
                numOfArchived = 0

                for requestName, requestInfo in requestsDict.items():

                    if requestInfo.getJobSummary().getTotalJobs() == 0:
                        for nextStatus in nextStatusList:
                            reqdbWriter.updateRequestStatus(
                                requestName, nextStatus)
                        numOfArchived += 1

                self.logger.info("Total %s-archieved: %d" %
                                 (status, numOfArchived))

        return
예제 #15
0
class T0RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["T0Request"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 't0_requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.requestWriter = RequestDBWriter(reqDBURL, self.couchApps[0])
        self.requestReader = RequestDBReader(reqDBURL, self.couchApps[0])
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEqual(len(result), 1, 'insert fail');
        
        result = self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "assigned")

        self.assertEqual(result, 'not allowed state assigned', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "new"),
                          'Error: document not found')
        
        allowedStates = ["Closed", "Merge", "AlcaSkim", "Harvesting",  
                         "Processing Done", "completed"]
        for state in allowedStates:
            self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], state),
                          'OK')
        
        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "Processing Done"),
                          'not allowed transition completed to Processing Done')  
        
        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "normal-archived"),
                          'OK')  
        result = self.requestWriter.getRequestByStatus(["normal-archived"], False, 1)
        self.assertEqual(len(result), 1, "should be 1 but %s" % result)
예제 #16
0
class RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["ReqMgr"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 'requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        self.requestWriter = RequestDBWriter(self.testInit.couchUrl, dbName)
        self.requestReader = RequestDBReader(self.testInit.couchUrl, dbName)
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEquals(len(result), 1, 'insert fail');
        
        self.assertEquals(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail')
        self.assertEquals(self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"),
                          'Error: document not found')
        result = self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']})
        self.assertEquals(self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']}), 'OK', 'update fail')
        self.assertEquals(self.requestWriter.updateRequestProperty("not_exist_schema", {'Teams': 'teamA'}),
                          'Error: document not found')
        
        result = self.requestWriter.getRequestByNames([schema[0]['RequestName']])
        self.assertEquals(len(result), 1, "should be 1")
        result = self.requestWriter.getRequestByStatus(["failed"], False, 1)
        self.assertEquals(len(result), 1, "should be 1")
예제 #17
0
    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.requestCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.requestCouchDB, 'T0Request')
        self.testDir = self.testInit.generateWorkDir()
        reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB)
        self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request")
        self.requestDBWriter._setNoStale()

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return
예제 #18
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["WMStats"]
     self.testInit = TestInitCouchApp('WorkQueueServiceTest')
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = 'wmstats_t'
     self.testInit.setupCouch(dbName, "WMStats")
     reqDBName = "reqmgrdb_t"
     self.testInit.setupCouch(reqDBName, "ReqMgr")
     wmstatsURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqDBName)
     self.reqDBWriter = RequestDBWriter(reqDBURL)
     self.wmstatsReader = WMStatsReader(wmstatsURL, reqdbURL=reqDBURL)
     self.wmstatsReader.defaultStale = {}
     self.wmstatsReader.reqDB.defaultStale = {}
     return
예제 #19
0
 def setup(self, parameters):
     """
     Called at startup
     """
     self.teamName = self.config.Agent.teamName
     # set the connection for local couchDB call
     self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
     self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
     self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, 
                                         "WMStatsAgent")
     
     #TODO: we might need to use local db for Tier0
     self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
     
     if self.useReqMgrForCompletionCheck:
         self.deletableState = "announced"
         self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                       couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
         if self.config.TaskArchiver.reqmgr2Only:
             self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
         else:
             #TODO: remove this for reqmgr2
             self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
     else:
         # Tier0 case
         self.deletableState = "completed"
         # use local for update
         self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                       couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
     
     jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
     jobDBName = self.config.JobStateMachine.couchDBName
     self.jobCouchdb  = CouchServer(jobDBurl)
     self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
     self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
     
     statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
     self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)
예제 #20
0
    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase = True)
        self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer"],
                                useDefault = False)
        self.databaseName = "taskarchiver_t_0"
        self.testInit.setupCouch("%s/workloadsummary" % self.databaseName, "WorkloadSummary")
        self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")
        self.testInit.setupCouch("wmagent_summary_central_t", "WMStats")
        self.testInit.setupCouch("stat_summary_t", "SummaryStats")
        reqmgrdb = "reqmgrdb_t"
        self.testInit.setupCouch(reqmgrdb, "ReqMgr")
        
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqmgrdb)
        self.requestWriter = RequestDBWriter(reqDBURL)
        self.requestWriter.defaultStale = {}
        
        self.daofactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getJobs = self.daofactory(classname = "Jobs.GetAllJobs")
        self.inject  = self.daofactory(classname = "Workflow.MarkInjectedWorkflows")

        self.testDir = self.testInit.generateWorkDir()
        os.makedirs(os.path.join(self.testDir, 'specDir'))


        self.nJobs = 10
        self.campaignName = 'aCampaign'

        self.uploadPublishInfo = False
        self.uploadPublishDir  = None

        return
예제 #21
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["ReqMgr"]
     self.testInit = TestInitCouchApp("RequestDBServiceTest")
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = "requsetdb_t"
     self.testInit.setupCouch(dbName, *self.couchApps)
     self.requestWriter = RequestDBWriter(self.testInit.couchUrl, dbName)
     self.requestReader = RequestDBReader(self.testInit.couchUrl, dbName)
     self.requestWriter.defaultStale = {}
     self.requestReader.defaultStale = {}
     return
예제 #22
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["T0Request"]
     self.testInit = TestInitCouchApp('RequestDBServiceTest')
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules = self.schema,
                             useDefault = False)
     dbName = 't0_requsetdb_t'
     self.testInit.setupCouch(dbName, *self.couchApps)
     reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     self.requestWriter = RequestDBWriter(reqDBURL, self.couchApps[0])
     self.requestReader = RequestDBReader(reqDBURL, self.couchApps[0])
     self.requestWriter.defaultStale = {}
     self.requestReader.defaultStale = {}
     return
예제 #23
0
    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.requestCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.requestCouchDB, 'T0Request')
        self.testDir = self.testInit.generateWorkDir()
        reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB)
        self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request")
        self.requestDBWriter._setNoStale()

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return
예제 #24
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["WMStats"]
     self.testInit = TestInitCouchApp("WorkQueueServiceTest")
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = "wmstats_t"
     self.testInit.setupCouch(dbName, "WMStats")
     reqDBName = "reqmgrdb_t"
     self.testInit.setupCouch(reqDBName, "ReqMgr")
     wmstatsURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqDBName)
     self.reqDBWriter = RequestDBWriter(reqDBURL)
     self.wmstatsReader = WMStatsReader(wmstatsURL, reqDBURL)
     self.wmstatsReader.defaultStale = {}
     self.wmstatsReader.reqDB.defaultStale = {}
     return
예제 #25
0
class WMStatsTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["WMStats"]
        self.testInit = TestInitCouchApp('WorkQueueServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 'wmstats_t'
        self.testInit.setupCouch(dbName, "WMStats")
        reqDBName = "reqmgrdb_t"
        self.testInit.setupCouch(reqDBName, "ReqMgr")
        wmstatsURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqDBName)
        self.reqDBWriter = RequestDBWriter(reqDBURL)
        self.wmstatsReader = WMStatsReader(wmstatsURL, reqdbURL=reqDBURL)
        self.wmstatsReader.defaultStale = {}
        self.wmstatsReader.reqDB.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testWMStatsWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        
        result = self.reqDBWriter.insertGenericRequest(schema[0])
        self.assertEquals(result[0]['ok'], True, 'insert fail')
        
        result = self.reqDBWriter.updateRequestStatus(schema[0]['RequestName'], "failed")
        self.assertEquals(result, 'OK', 'update fail')
        
        result = self.reqDBWriter.updateRequestStatus("not_exist_schema", "assigned") 
        self.assertEquals(result,'Error: document not found')
        
        result = self.reqDBWriter.updateRequestProperty(schema[0]['RequestName'], {"Teams": ['teamA']})
        self.assertEquals(result, 'OK', 'update fail')
        
        result = self.reqDBWriter.updateRequestProperty("not_exist_schema", {"Teams": ['teamA']})                  
        self.assertEquals(result, 'Error: document not found')
        
        totalStats = {'TotalEstimatedJobs': 100, 'TotalInputEvents': 1000, 'TotalInputLumis': 1234, 'TotalInputFiles': 5}
        result = self.reqDBWriter.updateRequestProperty(schema[0]['RequestName'], totalStats)
        self.assertEquals(result, 'OK', 'update fail')
        
        result = self.reqDBWriter.updateRequestProperty(schema[0]['RequestName'], totalStats)
        self.assertEquals(result, 'OK', 'update fail')
        
        result = self.reqDBWriter.updateRequestProperty("not_exist_schema", totalStats)
        self.assertEquals(result, 'Error: document not found')
        
        spec1 = newWorkload(schema[0]['RequestName'])
        production = spec1.newTask("Production")
        production.setTaskType("Merge")
        production.setSiteWhitelist(['TEST_SITE'])
        properties = {"RequestPriority": spec1.priority(),
                      'SiteWhitelist': spec1.getTopLevelTask()[0].siteWhitelist(),
                      'OutputDatasets': spec1.listOutputDatasets()}
        result = self.reqDBWriter.updateRequestProperty(spec1.name(), properties)
        self.assertEquals(result, 'OK', 'update fail')
        
        spec2 = newWorkload("not_exist_schema")
        production = spec2.newTask("Production")
        production.setTaskType("Merge")
        properties = {"RequestPriority": spec2.priority(),
                      'SiteWhitelist': spec2.getTopLevelTask()[0].siteWhitelist(),
                      'OutputDatasets': spec2.listOutputDatasets()}
        result = self.reqDBWriter.updateRequestProperty(spec2.name(), properties)
        self.assertEquals(result, 'Error: document not found')

        requests = self.wmstatsReader.getRequestByStatus(["failed"], jobInfoFlag = False, legacyFormat = True)
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
        
        requestCollection = RequestInfoCollection(requests)
        result = requestCollection.getJSONData()
        self.assertEquals(result.keys(), [schema[0]['RequestName']])
        
        requests = self.wmstatsReader.getActiveData()
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
        requests = self.wmstatsReader.getRequestByStatus(["failed"])
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
        
        requests = self.wmstatsReader.getRequestSummaryWithJobInfo(schema[0]['RequestName'])
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
예제 #26
0
class TaskArchiverTest(unittest.TestCase):
    """
    TestCase for TestTaskArchiver module
    """

    _setup_done = False
    _teardown = False
    _maxMessage = 10
    OWNERDN = os.environ['OWNERDN'] if 'OWNERDN' in os.environ else "Generic/OWNERDN"

    def setUp(self):
        """
        setup for test.
        """

        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase = True)
        self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer"],
                                useDefault = False)
        self.databaseName = "taskarchiver_t_0"
        self.testInit.setupCouch("%s/workloadsummary" % self.databaseName, "WorkloadSummary")
        self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")
        self.testInit.setupCouch("wmagent_summary_central_t", "WMStats")
        self.testInit.setupCouch("stat_summary_t", "SummaryStats")
        reqmgrdb = "reqmgrdb_t"
        self.testInit.setupCouch(reqmgrdb, "ReqMgr")
        
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqmgrdb)
        self.requestWriter = RequestDBWriter(reqDBURL)
        self.requestWriter.defaultStale = {}
        
        self.daofactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getJobs = self.daofactory(classname = "Jobs.GetAllJobs")
        self.inject  = self.daofactory(classname = "Workflow.MarkInjectedWorkflows")

        self.testDir = self.testInit.generateWorkDir()
        os.makedirs(os.path.join(self.testDir, 'specDir'))


        self.nJobs = 10
        self.campaignName = 'aCampaign'

        self.uploadPublishInfo = False
        self.uploadPublishDir  = None

        return

    def tearDown(self):
        """
        Database deletion
        """
        myThread = threading.currentThread()

        self.testInit.clearDatabase(modules = ["WMCore.WMBS"])
        self.testInit.delWorkDir()
        self.testInit.tearDownCouch()
        return

    def getConfig(self):
        """
        _createConfig_

        General config file
        """
        config = self.testInit.getConfiguration()
        #self.testInit.generateWorkDir(config)

        config.section_("General")
        config.General.workDir = "."

        config.section_("JobStateMachine")
        config.JobStateMachine.couchurl     = os.getenv("COUCHURL", "cmssrv52.fnal.gov:5984")
        config.JobStateMachine.couchDBName  = self.databaseName
        config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t'
        config.JobStateMachine.summaryStatsDBName = 'stat_summary_t'

        config.component_("JobCreator")
        config.JobCreator.jobCacheDir       = os.path.join(self.testDir, 'testDir')

        config.component_("TaskArchiver")
        config.TaskArchiver.componentDir    = self.testDir
        config.TaskArchiver.WorkQueueParams = {}
        config.TaskArchiver.pollInterval    = 60
        config.TaskArchiver.logLevel        = 'INFO'
        config.TaskArchiver.timeOut         = 0
        config.TaskArchiver.histogramKeys   = ['AvgEventTime', 'writeTotalMB', 'jobTime']
        config.TaskArchiver.histogramBins   = 5
        config.TaskArchiver.histogramLimit  = 5
        config.TaskArchiver.perfPrimaryDatasets        = ['SingleMu', 'MuHad', 'MinimumBias']
        config.TaskArchiver.perfDashBoardMinLumi = 50
        config.TaskArchiver.perfDashBoardMaxLumi = 9000
        config.TaskArchiver.dqmUrl = 'https://cmsweb.cern.ch/dqm/dev/'
        config.TaskArchiver.dashBoardUrl = 'http://dashboard43.cern.ch/dashboard/request.py/putluminositydata'
        config.TaskArchiver.workloadSummaryCouchDBName = "%s/workloadsummary" % self.databaseName
        config.TaskArchiver.workloadSummaryCouchURL    = config.JobStateMachine.couchurl
        config.TaskArchiver.requireCouch               = True
        config.TaskArchiver.uploadPublishInfo = self.uploadPublishInfo
        config.TaskArchiver.uploadPublishDir  = self.uploadPublishDir
        config.TaskArchiver.userFileCacheURL = os.getenv('UFCURL', 'http://cms-xen38.fnal.gov:7725/userfilecache/')
        config.TaskArchiver.ReqMgr2ServiceURL = "https://cmsweb-dev.cern.ch/reqmgr2"
        config.TaskArchiver.ReqMgrServiceURL = "https://cmsweb-dev.cern.ch/reqmgr/rest"
        config.TaskArchiver.localWMStatsURL = "%s/%s" % (config.JobStateMachine.couchurl, config.JobStateMachine.jobSummaryDBName)
         
        config.component_("AnalyticsDataCollector")
        config.AnalyticsDataCollector.centralRequestDBURL = '%s/reqmgrdb_t' % config.JobStateMachine.couchurl
        config.AnalyticsDataCollector.RequestCouchApp = "ReqMgr"

        config.section_("ACDC")
        config.ACDC.couchurl                = config.JobStateMachine.couchurl
        config.ACDC.database                = config.JobStateMachine.couchDBName

        # Make the jobCacheDir
        os.mkdir(config.JobCreator.jobCacheDir)

        # addition for Alerts messaging framework, work (alerts) and control
        # channel addresses to which the component will be sending alerts
        # these are destination addresses where AlertProcessor:Receiver listens
        config.section_("Alert")
        config.Alert.address = "tcp://127.0.0.1:5557"
        config.Alert.controlAddr = "tcp://127.0.0.1:5559"

        config.section_("BossAir")
        config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh'
        config.BossAir.gliteConf = '/afs/cern.ch/cms/LCG/LCG-2/UI/conf/glite_wms_CERN.conf'
        config.BossAir.credentialDir = '/home/crab/ALL_SETUP/credentials/'
        config.BossAir.gLiteProcesses = 2
        config.BossAir.gLitePrefixEnv = "/lib64/"
        config.BossAir.pluginNames = ["gLitePlugin"]
        config.BossAir.proxyDir = "/tmp/credentials"
        config.BossAir.manualProxyPath = os.environ['X509_USER_PROXY'] if 'X509_USER_PROXY' in os.environ else None

        config.section_("Agent")
        config.Agent.serverDN = "/we/bypass/myproxy/logon"

        return config


    def createWorkload(self, workloadName = 'Test', emulator = True):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload("Tier1ReReco")

        taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        workload.setCampaign(self.campaignName)

        workload.save(workloadName)

        return workload



    def createTestJobGroup(self, config, name = "TestWorkthrough",
                           filesetName = "TestFileset",
                           specLocation = "spec.xml", error = False,
                           task = "/TestWorkload/ReReco",
                           type = "Processing"):
        """
        Creates a group of several jobs

        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec = specLocation, owner = self.OWNERDN,
                                name = name, task = task, owner_vogroup="", owner_vorole="")
        testWorkflow.create()
        self.inject.execute(names = [name], injected = True)

        testWMBSFileset = Fileset(name = filesetName)
        testWMBSFileset.create()

        testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        outputWMBSFileset = Fileset(name = '%sOutput' % filesetName)
        outputWMBSFileset.create()
        testFileC = File(lfn = "/this/is/a/lfnC" , size = 1024, events = 10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation('malpaquet')
        testFileC.create()
        outputWMBSFileset.addFile(testFileC)
        outputWMBSFileset.commit()
        outputWMBSFileset.markOpen(0)

        testWorkflow.addOutput('output', outputWMBSFileset)


        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow,
                                        type = type)
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        for i in range(0,self.nJobs):
            testJob = Job(name = makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run = 10, lumis = [12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        changer = ChangeState(config)

        report1 = Report()
        report2 = Report()
        if error:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl")
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        else:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'mergeReport1.pkl')
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        report1.load(filename = path1)
        report2.load(filename = path2)

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        for i in range(self.nJobs):
            if i < self.nJobs/2:
                testJobGroup.jobs[i]['fwjr'] = report1
            else:
                testJobGroup.jobs[i]['fwjr'] = report2
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup


    def createGiantJobSet(self, name, config, nSubs = 10, nJobs = 10,
                          nFiles = 1, spec = "spec.xml"):
        """
        Creates a massive set of jobs

        """


        jobList = []



        for i in range(0, nSubs):
            # Make a bunch of subscriptions
            localName = '%s-%i' % (name, i)
            testWorkflow = Workflow(spec = spec, owner = self.OWNERDN,
                                    name = localName, task="Test", owner_vogroup="", owner_vorole="")
            testWorkflow.create()

            testWMBSFileset = Fileset(name = localName)
            testWMBSFileset.create()


            testSubscription = Subscription(fileset = testWMBSFileset,
                                            workflow = testWorkflow)
            testSubscription.create()

            testJobGroup = JobGroup(subscription = testSubscription)
            testJobGroup.create()

            filesToComplete = []

            for j in range(0, nJobs):
                # Create jobs for each subscription
                testFileA = File(lfn = "%s-%i-lfnA" % (localName, j) , size = 1024, events = 10)
                testFileA.addRun(Run(10, *[11,12,13,14,15,16,17,18,19,20,
                                           21,22,23,24,25,26,27,28,29,30,
                                           31,32,33,34,35,36,37,38,39,40]))
                testFileA.setLocation('malpaquet')
                testFileA.create()

                testWMBSFileset.addFile(testFileA)
                testWMBSFileset.commit()

                filesToComplete.append(testFileA)

                testJob = Job(name = '%s-%i' % (localName, j))
                testJob.addFile(testFileA)
                testJob['retry_count'] = 1
                testJob['retry_max'] = 10
                testJobGroup.add(testJob)
                jobList.append(testJob)

                for k in range(0, nFiles):
                    # Create output files
                    testFile = File(lfn = "%s-%i-output" % (localName, k) , size = 1024, events = 10)
                    testFile.addRun(Run(10, *[12312]))
                    testFile.setLocation('malpaquet')
                    testFile.create()

                    testJobGroup.output.addFile(testFile)

                testJobGroup.output.commit()


            testJobGroup.commit()

            changer = ChangeState(config)

            changer.propagate(testJobGroup.jobs, 'created', 'new')
            changer.propagate(testJobGroup.jobs, 'executing', 'created')
            changer.propagate(testJobGroup.jobs, 'complete', 'executing')
            changer.propagate(testJobGroup.jobs, 'success', 'complete')
            changer.propagate(testJobGroup.jobs, 'cleanout', 'success')

            testWMBSFileset.markOpen(0)

            testSubscription.completeFiles(filesToComplete)


        return jobList
    
    def getPerformanceFromDQM(self, dqmUrl, dataset, run):
        # Make function to fetch this from DQM. Returning Null or False if it fails
        getUrl = "%sjsonfairy/archive/%s%s/DQM/TimerService/event_byluminosity" % (dqmUrl, run, dataset)
        # Assert if the URL is assembled as expected
        if run == 207214:
            self.assertEqual('https://cmsweb.cern.ch/dqm/dev/jsonfairy/archive/207214/MinimumBias/Commissioning10-v4/DQM/DQM/TimerService/event_byluminosity',
                               getUrl)
        # let's suppose it works..
        testResponseFile = open(os.path.join(getTestBase(),
                                             'WMComponent_t/TaskArchiver_t/DQMGUIResponse.json'), 'r')
        response = testResponseFile.read()
        testResponseFile.close()
        responseJSON = json.loads(response)
        return responseJSON

    def filterInterestingPerfPoints(self, responseJSON, minLumi, maxLumi):
        worthPoints = {}
        points = responseJSON["hist"]["bins"]["content"]
        for i in range(responseJSON["hist"]["xaxis"]["first"]["id"], responseJSON["hist"]["xaxis"]["last"]["id"]):
                    # is the point worth it? if yes add to interesting points dictionary.
                    # 1 - non 0
                    # 2 - between minimum and maximum expected luminosity
                    # FIXME : 3 - population in dashboard for the bin interval < 100
                    # Those should come from the config :
                    if points[i] == 0:
                        continue
                    binSize = responseJSON["hist"]["xaxis"]["last"]["value"]/responseJSON["hist"]["xaxis"]["last"]["id"]
                    # Fetching the important values
                    instLuminosity = i*binSize
                    timePerEvent = points[i]

                    if instLuminosity > minLumi and instLuminosity <  maxLumi :
                        worthPoints[instLuminosity] = timePerEvent
        return worthPoints

    def publishPerformanceDashBoard(self, dashBoardUrl, PD, release, worthPoints):
        dashboardPayload = []
        for instLuminosity in worthPoints :
            timePerEvent = int(worthPoints[instLuminosity])
            dashboardPayload.append({"primaryDataset" : PD,
                                     "release" : release,
                                     "integratedLuminosity" : instLuminosity,
                                     "timePerEvent" : timePerEvent})

        data = "{\"data\":%s}" % str(dashboardPayload).replace("\'","\"")

        # let's suppose it works..
        testDashBoardPayloadFile = open(os.path.join(getTestBase(),
                                             'WMComponent_t/TaskArchiver_t/DashBoardPayload.json'), 'r')
        testDashBoardPayload = testDashBoardPayloadFile.read()
        testDashBoardPayloadFile.close()

        self.assertEqual(data, testDashBoardPayload)

        return True
    
    def populateWorkflowWithCompleteStatus(self, name ="TestWorkload"):
        schema = generate_reqmgr_schema(1)
        schema[0]["RequestName"] = name

        self.requestWriter.insertGenericRequest(schema[0])
        result = self.requestWriter.updateRequestStatus(name, "completed")
        return result
    
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(config = config,
                                                name = workload.name(),
                                                filesetName = "TestFileset_2",
                                                specLocation = workloadPath,
                                                task = "/TestWorkload/ReReco/LogCollect", 
                                                type = "LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname       = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump", "jobsByWorkflowName",
                              options = {"startkey": [workflowName],
                                         "endkey": [workflowName, {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workflowName],
                                   "endkey": [workflowName, {}]})['rows']

        self.assertEqual(len(jobs), 2*self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])
 
        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()
        
        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(os.path.exists(os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id = 1)
        self.assertEqual(testWMBSFileset.exists(), False)



        workloadSummary = workdatabase.document(id = "TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO'])
        self.assertEqual(sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']['tasks']),
                        ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect'])
        # Check performance
        # Check histograms
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['average'],
                                0.89405199999999996, places = 2)
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['nEvents'],
                         10)

        # Check standard performance
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['average'], 17.786300000000001,
                                places = 2)
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['stdDev'], 0.0,
                                places = 2)

        # Check worstOffenders
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['worstOffenders'],
                         [{'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2}])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10})
        logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar'
        self.assertEqual(workloadSummary['logArchives'], {'/TestWorkload/ReReco/LogCollect' : [logCollectPFN for _ in range(10)]})

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                                        workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'][x][y],
                                        places = 2)

        return

    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = True)
        # Create second workload
        testJobGroup2 = self.createTestJobGroup(config = config,
                                                name = workload.name(),
                                                filesetName = "TestFileset_2",
                                                specLocation = workloadPath,
                                                task = "/TestWorkload/ReReco/LogCollect", 
                                                type = "LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump", "jobsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']
    
        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()
        cleanCouch.algorithm()
        
        dbname       = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)
    
        workloadSummary = workdatabase.document(id = workload.name())

        self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500)
        self.assertTrue('99999' in workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'])

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs']
        self.assertEqual(failedRunInfo, {'10' : [[12312, 12312]]},
                          "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return

    def testC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """

        return

        import cProfile, pstats

        myThread = threading.currentThread()

        name    = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name = name, config = config,
                                         nSubs = 10, nJobs = 1000, nFiles = 10)

        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()

        cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename = "testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
        return

    def testD_Timing(self):
        """
        _Timing_

        This is to see how fast things go.
        """

        return

        myThread = threading.currentThread()

        name    = makeUUID()

        config  = self.getConfig()
        jobList = self.createGiantJobSet(name = name, config = config, nSubs = 10,
                                         nJobs = 1000, nFiles = 10)


        testTaskArchiver = TaskArchiverPoller(config = config)

        startTime = time.time()
        testTaskArchiver.algorithm()
        stopTime  = time.time()


        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)
        testWMBSFileset = Fileset(id = 1)
        self.assertEqual(testWMBSFileset.exists(), False)


        logging.info("TaskArchiver took %f seconds" % (stopTime - startTime))


    def testDQMRecoPerformanceToDashBoard(self):

        myThread = threading.currentThread()

        listRunsWorkflow = self.dbsDaoFactory(classname="ListRunsWorkflow")

        # Didn't like to have done that, but the test doesn't provide all info I need in the system, so faking it:
        myThread.dbi.processData("""insert into dbsbuffer_workflow(id, name) values (1, 'TestWorkload')"""
                                 , transaction = False)
        myThread.dbi.processData("""insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (1, '/store/t/e/s/t.test', 1, 1)"""
                                 , transaction = False)
        myThread.dbi.processData("""insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (2, '/store/t/e/s/t.test2', 1, 1)"""
                                 , transaction = False)
        myThread.dbi.processData("""insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207214, 100, 1)"""
                                 , transaction = False)
        myThread.dbi.processData("""insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207215, 200, 2)"""
                                 , transaction = False)

        config = self.getConfig()

        dqmUrl = getattr(config.TaskArchiver, "dqmUrl")
        perfDashBoardMinLumi = getattr(config.TaskArchiver, "perfDashBoardMinLumi")
        perfDashBoardMaxLumi = getattr(config.TaskArchiver, "perfDashBoardMaxLumi")
        dashBoardUrl = getattr(config.TaskArchiver, "dashBoardUrl")



        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = True)
        testJobGroup2 = self.createTestJobGroup(config = config,
                                                name = workload.name(),
                                                filesetName = "TestFileset_2",
                                                specLocation = workloadPath,
                                                task = "/TestWorkload/ReReco/LogCollect", 
                                                type = "LogCollect")

        # Adding request type as ReReco, real ReqMgr requests have it
        workload.data.request.section_("schema")
        workload.data.request.schema.RequestType = "ReReco"
        workload.data.request.schema.CMSSWVersion = 'test_compops_CMSSW_5_3_6_patch1'
        workload.getTask('ReReco').addInputDataset(primary='a',processed='b',tier='c')

        interestingPDs = getattr(config.TaskArchiver, "perfPrimaryDatasets")
        interestingDatasets = []
        # Are the datasets from this request interesting? Do they have DQM output? One might ask afterwards if they have harvest
        for dataset in workload.listOutputDatasets():
            (nothing, PD, procDataSet, dataTier) = dataset.split('/')
            if PD in interestingPDs and dataTier == "DQM":
                interestingDatasets.append(dataset)
        # We should have found 1 interesting dataset
        self.assertAlmostEquals(len(interestingDatasets), 1)
        if len(interestingDatasets) == 0 :
            return
        # Request will be only interesting for performance if it's a ReReco or PromptReco
        (isReReco, isPromptReco) = (False, False)
        if getattr(workload.data.request.schema, "RequestType", None) == 'ReReco':
            isReReco=True
        # Yes, few people like magic strings, but have a look at :
        # https://github.com/dmwm/T0/blob/master/src/python/T0/RunConfig/RunConfigAPI.py#L718
        # Might be safe enough
        # FIXME: in TaskArchiver, add a test to make sure that the dataset makes sense (procDataset ~= /a/ERA-PromptReco-vVERSON/DQM)
        if re.search('PromptReco', workload.name()):
            isPromptReco = True
        if not (isReReco or isPromptReco):
            return

        self.assertTrue(isReReco)
        self.assertFalse(isPromptReco)

        # We are not interested if it's not a PromptReco or a ReReco
        if (isReReco or isPromptReco) == False:
            return
        if isReReco :
            release = getattr(workload.data.request.schema, "CMSSWVersion")
            if not release :
                logging.info("no release for %s, bailing out" % workload.name())
        else :
            release = getattr(workload.tasks.Reco.steps.cmsRun1.application.setup, "cmsswVersion")
            if not release :
                logging.info("no release for %s, bailing out" % workload.name())

        self.assertEqual(release, "test_compops_CMSSW_5_3_6_patch1")
        # If all is true, get the run numbers processed by this worklfow
        runList = listRunsWorkflow.execute(workflow = workload.name())
        self.assertEqual([207214, 207215], runList)
        # GO to DQM GUI, get what you want
        # https://cmsweb.cern.ch/dqm/offline/jsonfairy/archive/211313/PAMuon/HIRun2013-PromptReco-v1/DQM/DQM/TimerService/event
        for dataset in interestingDatasets :
            (nothing, PD, procDataSet, dataTier) = dataset.split('/')
            worthPoints = {}
            for run in runList :
                responseJSON = self.getPerformanceFromDQM(dqmUrl, dataset, run)
                worthPoints.update(self.filterInterestingPerfPoints(responseJSON, perfDashBoardMinLumi, perfDashBoardMaxLumi))

            # Publish dataset performance to DashBoard.
            if self.publishPerformanceDashBoard(dashBoardUrl, PD, release, worthPoints) == False:
                logging.info("something went wrong when publishing dataset %s to DashBoard" % dataset)

        return

    # Requires a running UserFileCache to succeed. https://cmsweb.cern.ch worked for me
    # The environment variable OWNERDN needs to be set. Used to retrieve an already delegated proxy and contact the ufc
    @attr('integration')
    def testPublishJSONCreate(self):
        """
        Re-run testA_BasicFunctionTest with data in DBSBuffer
        Make sure files are generated
        """

        # Set up uploading and write them elsewhere since the test deletes them.
        self.uploadPublishInfo = True
        self.uploadPublishDir  = self.testDir

        # Insert some DBSFiles
        testFileChildA = DBSBufferFile(lfn = "/this/is/a/child/lfnA", size = 1024, events = 20)
        testFileChildA.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8",
                                    appFam = "RECO", psetHash = "GIBBERISH",
                                    configContent = "MOREGIBBERISH")
        testFileChildB = DBSBufferFile(lfn = "/this/is/a/child/lfnB", size = 1024, events = 20)
        testFileChildB.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8",
                                    appFam = "RECO", psetHash = "GIBBERISH",
                                    configContent = "MOREGIBBERISH")
        testFileChildC = DBSBufferFile(lfn = "/this/is/a/child/lfnC", size = 1024, events = 20)
        testFileChildC.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8",
                                    appFam = "RECO", psetHash = "GIBBERISH",
                                    configContent = "MOREGIBBERISH")

        testFileChildA.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER")
        testFileChildB.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER")
        testFileChildC.setDatasetPath("/Cosmics/USER-DATASET2-v1/USER")

        testFileChildA.create()
        testFileChildB.create()
        testFileChildC.create()

        testFile = DBSBufferFile(lfn = "/this/is/a/lfn", size = 1024, events = 10)
        testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8",
                              appFam = "RECO", psetHash = "GIBBERISH",
                              configContent = "MOREGIBBERISH")
        testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFile.create()

        testFileChildA.addParents([testFile["lfn"]])
        testFileChildB.addParents([testFile["lfn"]])
        testFileChildC.addParents([testFile["lfn"]])

        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi)
        self.insertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow")
        workflowID = self.insertWorkflow.execute(requestName='TestWorkload', taskPath='TestWorkload/Production',
                                                 blockMaxCloseTime=100, blockMaxFiles=100,
                                                 blockMaxEvents=100, blockMaxSize=100)
        myThread.dbi.processData("update dbsbuffer_file set workflow=1 where id < 4")

        # Run the test again
        self.testA_BasicFunctionTest()

        # Reset default values
        self.uploadPublishInfo = False
        self.uploadPublishDir  = None

        # Make sure the files are there
        self.assertTrue(os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.json')))
        self.assertTrue(os.path.getsize(os.path.join(self.testDir, 'TestWorkload_publish.json')) > 100)
        self.assertTrue(os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.tgz' )))

        return
예제 #27
0
class RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["ReqMgr"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 'requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.requestWriter = RequestDBWriter(reqDBURL)
        self.requestReader = RequestDBReader(reqDBURL)
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema(3)
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEqual(len(result), 1, 'insert fail');
        
        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"),
                          'Error: document not found')
        result = self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']})
        self.assertEqual(self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']}), 'OK', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestProperty("not_exist_schema", {'Teams': 'teamA'}),
                          'Error: document not found')
        
        result = self.requestReader.getRequestByNames([schema[0]['RequestName']])
        self.assertEqual(len(result), 1, "should be 1")
        result = self.requestReader.getRequestByStatus(["failed"], False, 1)
        self.assertEqual(len(result), 1, "should be 1")
        
        result = self.requestReader.getStatusAndTypeByRequest([schema[0]['RequestName']])
        self.assertEqual(result[schema[0]['RequestName']][0], 'failed', "should be failed")
        
        result =  self.requestWriter.insertGenericRequest(schema[1])
        time.sleep(2)
        result =  self.requestWriter.insertGenericRequest(schema[2])
        endTime = int(time.time()) - 1
        result = self.requestReader.getRequestByStatusAndEndTime("new", False, endTime)
        self.assertEqual(len(result), 1, "should be 1")
        endTime = int(time.time()) + 1
        result = self.requestReader.getRequestByStatusAndEndTime("new", False, endTime)
        self.assertEqual(len(result), 2, "should be 2")
예제 #28
0
파일: Request.py 프로젝트: menglu21/WMCore
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db,
                                                 couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host,
                                    config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser

        TODO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        no_multi_key = ["detail", "_nostale", "date_range", "common_dict"]
        for key, value in param.kwargs.items():
            # convert string to list
            if key not in no_multi_key and isinstance(value, basestring):
                param.kwargs[key] = [value]

        detail = param.kwargs.get('detail', True)
        if detail in (False, "false", "False", "FALSE"):
            detail = False

        if "status" in param.kwargs and detail:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        """Can't retrieve bulk archived status requests with detail option True, 
                           set detail=false or use other search arguments""")

        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):
        data = cherrypy.request.body.read()
        if data:
            request_args = json.loads(data)
            if requestName:
                request_args["RequestName"] = requestName

        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(
                    multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(
                json.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(
                param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            # remove this
            # tmp = []
            # for prop in param.kwargs:
            #    tmp.append(prop)
            # for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, param, safe, valFunc):

        request_names = json.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method == 'GET':
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe,
                                          validate_request_update_args,
                                          requestName)
                # TO: handle multiple clone
            #                 if len(param.args) == 2:
            #                     #validate clone case
            #                     if param.args[0] == "clone":
            #                         param.args.pop()
            #                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe,
                                                validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(
                        param, safe, validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe,
                                              validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)

    def _maskTaskStepChain(self, masked_dict, req_dict, chain_name, mask_key):

        mask_exist = False
        num_loop = req_dict["%sChain" % chain_name]
        for i in range(num_loop):
            if mask_key in req_dict["%s%s" % (chain_name, i + 1)]:
                mask_exist = True
                break
        if mask_exist:
            defaultValue = masked_dict[mask_key]
            masked_dict[mask_key] = []
            # assume mask_key is list if the condition doesn't meet.

            for i in range(num_loop):
                chain = req_dict["%s%s" % (chain_name, i + 1)]
                if mask_key in chain:
                    chain_key = "%sName" % chain_name
                    masked_dict[mask_key].append({
                        chain_key: chain[chain_key],
                        mask_key: chain[mask_key]
                    })
                else:
                    if isinstance(defaultValue, dict):
                        value = defaultValue.get(chain_key, None)
                    else:
                        value = defaultValue
                    masked_dict[mask_key].append({
                        chain_key: chain[chain_key],
                        mask_key: chain[mask_key]
                    })
        return

    def _mask_result(self, mask, result):

        if len(mask) == 1 and mask[0] == "DAS":
            mask = ReqMgrConfigDataCache.getConfig(
                "DAS_RESULT_FILTER")["filter_list"]

        if len(mask) > 0:
            masked_result = {}
            for req_name, req_info in result.items():
                masked_result.setdefault(req_name, {})
                for mask_key in mask:
                    masked_result[req_name].update(
                        {mask_key: req_info.get(mask_key, None)})
                    if "TaskChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name],
                                                req_info, "Task", mask_key)
                    elif "StepChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name],
                                                req_info, "Step", mask_key)

            return masked_result
        else:
            return result

    @restcall(formats=[('text/plain', PrettyJSONFormat()),
                       ('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to masked out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        # list of status
        status = kwargs.get("status", [])
        # list of request names
        name = kwargs.get("name", [])
        request_type = kwargs.get("request_type", [])
        prep_id = kwargs.get("prep_id", [])
        inputdataset = kwargs.get("inputdataset", [])
        outputdataset = kwargs.get("outputdataset", [])
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", [])
        workqueue = kwargs.get("workqueue", [])
        team = kwargs.get("team", [])
        mc_pileup = kwargs.get("mc_pileup", [])
        data_pileup = kwargs.get("data_pileup", [])
        requestor = kwargs.get("requestor", [])
        mask = kwargs.get("mask", [])
        detail = kwargs.get("detail", True)
        # set the return format. default format has requset name as a key
        # if is set to one it returns list of dictionary with RequestName field.
        common_dict = int(kwargs.get("common_dict", 0))
        if detail in (False, "false", "False", "FALSE"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []

        if len(status) == 1 and status[0] == "ACTIVE":
            status = ACTIVE_STATUS
        if status and not team and not request_type and not requestor:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bystatus", option, status))
        if status and team:
            query_keys = [[t, s] for t in team for s in status]
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byteamandstatus", option, query_keys))
        if status and request_type:
            query_keys = [[s, rt] for rt in request_type for s in status]
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "requestsbystatusandtype", option, query_keys))
        if status and requestor:
            query_keys = [[s, r] for r in requestor for s in status]
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bystatusandrequestor", option, query_keys))

        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byprepid", option, prep_id))
        if inputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bydate", option, date_range))
        if campaign:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bycampaign", option, campaign))
        if workqueue:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byworkqueue", option, workqueue))
        if mc_pileup:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bymcpileup", option, mc_pileup))
        if data_pileup:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bydatapileup", option, data_pileup))
        # get interaction of the request
        result = self._intersection_of_request_info(request_info)

        if len(result) == 0:
            return []

        result = self._mask_result(mask, result)
        # If detail is set to False return just list of request name
        if not option["include_docs"]:
            return result.keys()

        if common_dict == 1:
            response_list = result.values()
        else:
            response_list = [result]
        return rows(response_list)

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

        # TODO move this out of this class

    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _retrieveResubmissionChildren(self, request_name):

        result = self.reqmgr_db.loadView('ReqMgr',
                                         'childresubmissionrequests',
                                         keys=[request_name])['rows']
        childrenRequestNames = []
        for child in result:
            childrenRequestNames.append(child['id'])
            childrenRequestNames.extend(
                self._retrieveResubmissionChildren(child['id']))
        return childrenRequestNames

    def _handleNoStatusUpdate(self, workload, request_args):
        """
        only few values can be updated without state transition involved
        currently 'RequestPriority' and 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'
        """
        if 'RequestPriority' in request_args:
            # must update three places: GQ elements, workload_cache and workload spec
            self.gq_service.updatePriority(workload.name(),
                                           request_args['RequestPriority'])
            report = self.reqmgr_db_service.updateRequestProperty(
                workload.name(), request_args)
            workload.setPriority(request_args['RequestPriority'])
            workload.saveCouchUrl(workload.specUrl())
        elif "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(
                workload.name(), request_args)
        else:
            raise InvalidSpecParameterValue(
                "can't update value without state transition: %s" %
                request_args)

        return report

    def _handleAssignmentStateTransition(self, workload, request_args, dn):

        req_status = request_args["RequestStatus"]
        if req_status == "assigned" and not request_args.get('Team',
                                                             '').strip():
            raise InvalidSpecParameterValue(
                "Team must be set during workflow assignment: %s" %
                request_args)

        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['SoftTimeout'] = int(request_args['SoftTimeout'])
            #TODO: not sure why GracePeriod when passed from web ingerface but convert here
            request_args['GracePeriod'] = int(request_args['GracePeriod'])
            request_args['HardTimeout'] = request_args[
                'SoftTimeout'] + request_args['GracePeriod']

        #Only allow extra value update for assigned status
        try:
            workload.updateArguments(request_args)
        except Exception as ex:
            msg = traceback.format_exc()
            cherrypy.log("Error for request args %s: %s" % (request_args, msg))
            raise InvalidSpecParameterValue(str(ex))

        # legacy update schema to support ops script
        loadRequestSchema(workload, request_args)
        #update OutputDatasets after ProcessingString and AcquisionEra is updated
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        report = self.reqmgr_db_service.updateRequestProperty(
            workload.name(), request_args, dn)
        workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)
        return report

    def _handleCascadeUpdate(self, workload, request_args, dn):
        """
        only closed-out and announced has this option
        """
        req_status = request_args["RequestStatus"]
        # check whehter it is casecade option
        if request_args["cascade"]:
            cascade_list = self._retrieveResubmissionChildren(workload.name())
            for req_name in cascade_list:
                self.reqmgr_db_service.updateRequestStatus(
                    req_name, req_status, dn)
        # update original workflow status
        report = self.reqmgr_db_service.updateRequestStatus(
            workload.name(), req_status, dn)
        return report

    def _handleOnlyStateTransition(self, workload, req_status, dn):
        """
        It handles only the state transition. Special handling needed if a
        request is aborted or force completed.
        """
        if req_status in ["aborted", "force-complete"]:
            # cancel the workflow first
            self.gq_service.cancelWorkflow(workload.name())
        #update the request status in couchdb
        report = self.reqmgr_db_service.updateRequestStatus(
            workload.name(), req_status, dn)
        return report

    def _updateRequest(self, workload, request_args):
        dn = cherrypy.request.user.get("dn", "unknown")

        if workload is None:
            (workload, request_args) = self.initialize_clone(
                request_args["OriginalRequestName"])
            return self.post([workload, request_args])

        if "RequestStatus" not in request_args:
            report = self._handleNoStatusUpdate(workload, request_args)

        else:
            req_status = request_args["RequestStatus"]
            if len(request_args) > 1 and req_status == "assigned":
                report = self._handleAssignmentStateTransition(
                    workload, request_args, dn)
            elif len(request_args) == 2 and req_status in ["closed-out", "announced"] and \
                "cascade" in request_args:
                report = self._handleCascadeUpdate(workload, request_args, dn)

            elif len(request_args) == 1:
                report = self._handleOnlyStateTransition(
                    workload, req_status, dn)
            else:
                raise InvalidSpecParameterValue(
                    "can't update value except transition to assigned status: %s"
                    % request_args)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        """workloadPairList is a list of tuple containing (workload, requeat_args)"""
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL(
            "%s/%s/%s/spec" %
            (request_args["CouchURL"], request_args["CouchWorkloadDBName"],
             workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()

        #Add initial priority only for the creation of the request
        request_args['InitialPriority'] = request_args["RequestPriority"]

        # TODO: remove this after reqmgr2 replice reqmgr (reqmgr2Only)
        request_args['ReqMgr2Only'] = True
        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self,
             workload_pair_list,
             multi_update_flag=False,
             multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(json.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)

            # legacy update schema to support ops script
            loadRequestSchema(workload, request_args)

            cherrypy.log("INFO: Create request, input args: %s ..." %
                         request_args)
            workload.saveCouch(request_args["CouchURL"],
                               request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #29
0
class Tier0PluginTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.requestCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.requestCouchDB, 'T0Request')
        self.testDir = self.testInit.generateWorkDir()
        reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB)
        self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request")
        self.requestDBWriter._setNoStale()

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return

    def tearDown(self):
        """
        _tearDown_

        Clear databases and delete files
        """
        self.testInit.tearDownCouch()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()

        return

    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = [
            'RepackMergewrite_QuadElectron_RAW',
            'RepackMergewrite_TriPhoton_RAW',
            'RepackMergewrite_SingleNeutrino_RAW'
        ]

        self.stateMap = {'Merge': [], 'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {
            'spec': specPath,
            'owner': 'ItsAMeMario',
            'name': workflowName,
            'wfType': 'tier0'
        }
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' %
                                     (workflowName, task),
                                     **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(
            task=
            '/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
            **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return

    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = [
            'ExpressMergewrite_StreamZFast_DQM',
            'ExpressMergewrite_ExpressPhysics_FEVT',
            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO',
            'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT',
            'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO'
        ]
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged'

        self.stateMap = {'Merge': [], 'Harvesting': [], 'Processing Done': []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name='TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {
            'spec': specPath,
            'owner': 'ItsAMeMario',
            'name': workflowName,
            'wfType': 'tier0'
        }
        topLevelWorkflow = Workflow(task='/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in [
                x for x in secondLevelTasks if not x.count('CleanupUnmerged')
        ]:
            secondLevelWorkflow = Workflow(task='/%s/Express/%s' %
                                           (workflowName, task),
                                           **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [
            ('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
            ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)
        ]:
            harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' %
                                          (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return

    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = PromptRecoWorkloadFactory.getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        factory = PromptRecoWorkloadFactory()
        testArguments["EnableHarvesting"] = True
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        workload = factory.factoryWorkloadConstruction(workflowName,
                                                       testArguments)

        wmbsHelper = WMBSHelper(workload,
                                'Reco',
                                'SomeBlock',
                                cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask,
                                              wmbsHelper.topLevelFileset)

        self.stateMap = {
            'AlcaSkim': [],
            'Merge': [],
            'Harvesting': [],
            'Processing Done': []
        }
        self.orderedStates = [
            'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'
        ]

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = [
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
            '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM',
            '%s/RecoMergewrite_RECO'
        ]
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO'
        )
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' %
                                          (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' %
                                          (topLevelTask, dataTier, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM'
        )
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return

    def verifyStateTransitions(self,
                               transitionMethod='markFinished',
                               transitionTrigger=True):
        """
        _verifyStateTransitions_

        Utility method which goes through the list of states in self.orderedStates and
        finishes the tasks that demand a state transition in each step. This according
        to the defined transition method and trigger.
        It verifies that the request document in WMStats is moving according to the transitions
        """

        for idx in range(0, len(self.orderedStates) * 2):
            nextState = self.orderedStates[idx // 2]
            if (idx // 2) == 0:
                currentState = 'Closed'
            else:
                currentState = self.orderedStates[idx // 2 - 1]
            if idx % 2 == 0:
                for transitionObject in self.stateMap[nextState][:-1]:
                    method = getattr(transitionObject, transitionMethod)
                    method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [nextState])
                self.assertEqual(
                    len(currentStateWorkflows), 1,
                    'Workflow moved incorrectly from %s' % currentState)
                self.assertEqual(
                    len(nextStateWorkflows), 0,
                    'Workflow moved incorrectly to %s' % nextState)
            else:
                transitionObject = self.stateMap[nextState][-1]
                method = getattr(transitionObject, transitionMethod)
                method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [nextState])
                self.assertEqual(
                    len(currentStateWorkflows), 0,
                    'Workflow did not move correctly from %s' % currentState)
                self.assertEqual(
                    len(nextStateWorkflows), 1,
                    'Workflow did not move correctly to %s' % nextState)
        return

    def testA_RepackStates(self):
        """
        _testA_RepackStates_

        Setup an environment with a Repack workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupRepackWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions('markOpen', False)

        return

    def testB_ExpressStates(self):
        """
        _testB_ExpressStates_

        Setup an environment with a Express workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupExpressWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return

    def testC_PromptRecoStates(self):
        """
        _testC_PromptRecoStates_

        Setup an environment with a PromptReco workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupPromptRecoWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return
예제 #30
0
def main():
    BASE_URL = "https://cmsweb.cern.ch"
    COUCH_URL = "%s/couchdb" % BASE_URL

    dbs_url = "https://cmsweb.cern.ch/dbs/prod/global/DBSWriter"
    reqmgrdb_url = "%s/reqmgr_workload_cache" % (COUCH_URL)
    statusToCheck = ['closed-out', 'announced']

    logger = setupLogger()
    dbsSvc = DBS3Reader(dbs_url, logger=logger)
    reqmgrDB = RequestDBWriter(reqmgrdb_url)

    logger.info("Running fixStepChainParentage thread for statuses")
    childDatasets = set()
    requests = set()
    requestsByChildDataset = {}
    for status in statusToCheck:
        reqByChildDS = getChildDatasetsForStepChainMissingParent(
            reqmgrDB, status, logger)
        logger.info("Retrieved %d datasets to fix parentage, in status: %s",
                    len(reqByChildDS), status)
        childDatasets = childDatasets.union(set(reqByChildDS.keys()))
        # We need to just get one of the StepChain workflow if multiple workflow contains the same datasets. (i.e. ACDC)
        requestsByChildDataset.update(reqByChildDS)

        for wfs in reqByChildDS.values():
            requests = requests.union(wfs)
    logger.info("  datasets are: %s", reqByChildDS)

    failedRequests = set()
    totalChildDS = len(childDatasets)
    fixCount = 0
    for childDS in childDatasets:
        logger.info("Resolving parentage for dataset: %s", childDS)
        try:
            failedBlocks = dbsSvc.fixMissingParentageDatasets(childDS,
                                                              insertFlag=True)
        except Exception as exc:
            logger.exception(
                "Failed to resolve parentage data for dataset: %s. Error: %s",
                childDS, str(exc))
            failedRequests = failedRequests.union(
                requestsByChildDataset[childDS])
        else:
            if failedBlocks:
                logger.warning(
                    "These blocks failed to be resolved and will be retried later: %s",
                    failedBlocks)
                failedRequests = failedRequests.union(
                    requestsByChildDataset[childDS])
            else:
                fixCount += 1
                logger.info(
                    "Parentage for '%s' successfully updated. Processed %s out of %s datasets.",
                    childDS, fixCount, totalChildDS)
        logger.info("    dataset sorted: %s\n", childDS)

    requestsToUpdate = requests - failedRequests

    ### FIXME: disable the block below if you do NOT want to update the
    # workflow in ReqMgr2
    for request in requestsToUpdate:
        try:
            reqmgrDB.updateRequestProperty(request,
                                           {"ParentageResolved": True})
            logger.info("Marked ParentageResolved=True for request: %s",
                        request)
        except Exception as exc:
            logger.error(
                "Failed to update 'ParentageResolved' flag to True for request: %s",
                request)

    msg = "A total of %d requests have been processed, where %d will have to be retried in the next cycle."
    logger.info(msg, len(requestsToUpdate), len(failedRequests))

    sys.exit(0)
예제 #31
0
    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="T0.WMBS",
                                     logger=logging,
                                     dbinterface=myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy",
                                      None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(
            config.AnalyticsDataCollector.localT0RequestDBURL,
            couchapp=config.AnalyticsDataCollector.RequestCouchApp)

        self.injectedRuns = set()

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging,
                                     dburl=hltConfConnectUrl,
                                     options={})
        self.dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package="T0.WMBS",
                                       logger=logging,
                                       dbinterface=self.dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(
            classname="RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging,
                                            dburl=storageManagerConnectUrl,
                                            options={})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.dbInterfaceSMNotify = None
        if hasattr(config, "SMNotifyDatabase"):
            smNotifyConnectUrl = config.SMNotifyDatabase.connectUrl
            dbFactorySMNotify = DBFactory(logging,
                                          dburl=smNotifyConnectUrl,
                                          options={})
            self.dbInterfaceSMNotify = dbFactorySMNotify.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase,
                                          "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging,
                                               dburl=popConLogConnectUrl,
                                               options={})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(
                    classname="Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase,
                                          "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging,
                                               dburl=t0datasvcConnectUrl,
                                               options={})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfaceT0DataSvc)

        #
        # Set deployment ID
        #

        SetDeploymentIdDAO = self.daoFactory(
            classname="Tier0Feeder.SetDeploymentID")
        GetDeploymentIdDAO = self.daoFactory(
            classname="Tier0Feeder.GetDeploymentID")
        try:
            self.deployID = GetDeploymentIdDAO.execute()
            if self.deployID == 0:
                self.deployID = int(
                    datetime.datetime.now().strftime("%y%m%d%H%M%S"))
                SetDeploymentIdDAO.execute(self.deployID)

        except:
            logging.exception(
                "Something went wrong with setting deployment ID")
            raise

        return
예제 #32
0
class Tier0FeederPoller(BaseWorkerThread):
    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="T0.WMBS",
                                     logger=logging,
                                     dbinterface=myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy",
                                      None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(
            config.AnalyticsDataCollector.localT0RequestDBURL,
            couchapp=config.AnalyticsDataCollector.RequestCouchApp)

        self.injectedRuns = set()

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging,
                                     dburl=hltConfConnectUrl,
                                     options={})
        self.dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package="T0.WMBS",
                                       logger=logging,
                                       dbinterface=self.dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(
            classname="RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging,
                                            dburl=storageManagerConnectUrl,
                                            options={})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.dbInterfaceSMNotify = None
        if hasattr(config, "SMNotifyDatabase"):
            smNotifyConnectUrl = config.SMNotifyDatabase.connectUrl
            dbFactorySMNotify = DBFactory(logging,
                                          dburl=smNotifyConnectUrl,
                                          options={})
            self.dbInterfaceSMNotify = dbFactorySMNotify.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase,
                                          "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging,
                                               dburl=popConLogConnectUrl,
                                               options={})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(
                    classname="Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase,
                                          "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging,
                                               dburl=t0datasvcConnectUrl,
                                               options={})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(
                    package="T0.WMBS",
                    logger=logging,
                    dbinterface=dbInterfaceT0DataSvc)

        #
        # Set deployment ID
        #

        SetDeploymentIdDAO = self.daoFactory(
            classname="Tier0Feeder.SetDeploymentID")
        GetDeploymentIdDAO = self.daoFactory(
            classname="Tier0Feeder.GetDeploymentID")
        try:
            self.deployID = GetDeploymentIdDAO.execute()
            if self.deployID == 0:
                self.deployID = int(
                    datetime.datetime.now().strftime("%y%m%d%H%M%S"))
                SetDeploymentIdDAO.execute(self.deployID)

        except:
            logging.exception(
                "Something went wrong with setting deployment ID")
            raise

        return

    @timeFunction
    def algorithm(self, parameters=None):
        """
        _algorithm_

        """
        logging.debug("Running Tier0Feeder algorithm...")
        myThread = threading.currentThread()

        findNewRunsDAO = self.daoFactory(classname="Tier0Feeder.FindNewRuns")
        findNewRunStreamsDAO = self.daoFactory(
            classname="Tier0Feeder.FindNewRunStreams")
        findNewExpressRunsDAO = self.daoFactory(
            classname="Tier0Feeder.FindNewExpressRuns")
        releaseExpressDAO = self.daoFactory(
            classname="Tier0Feeder.ReleaseExpress")
        feedStreamersDAO = self.daoFactory(
            classname="Tier0Feeder.FeedStreamers")
        markWorkflowsInjectedDAO = self.daoFactory(
            classname="Tier0Feeder.MarkWorkflowsInjected")

        tier0Config = None
        try:
            tier0Config = loadConfigurationFile(self.tier0ConfigFile)
        except:
            # usually happens when there are syntax errors in the configuration
            logging.exception(
                "Cannot load Tier0 configuration file, not configuring new runs and run/streams"
            )

        # only configure new runs and run/streams if we have a valid Tier0 configuration
        if tier0Config != None:

            #
            # we don't inject data if the Tier0Config is unreadable
            #
            # discover new data from StorageManager and inject into Tier0
            # (if the config specifies a list of runs do it only once)
            #
            # replays call data discovery only once (and ignore data status)
            #
            try:
                if tier0Config.Global.InjectRuns == None:
                    StorageManagerAPI.injectNewData(
                        self.dbInterfaceStorageManager,
                        self.dbInterfaceHltConf,
                        self.dbInterfaceSMNotify,
                        streamerPNN=tier0Config.Global.StreamerPNN,
                        minRun=tier0Config.Global.InjectMinRun,
                        maxRun=tier0Config.Global.InjectMaxRun)
                else:
                    injectRuns = set()
                    for injectRun in tier0Config.Global.InjectRuns:
                        if injectRun not in self.injectedRuns:
                            injectRuns.add(injectRun)
                    for injectRun in injectRuns:
                        StorageManagerAPI.injectNewData(
                            self.dbInterfaceStorageManager,
                            self.dbInterfaceHltConf,
                            self.dbInterfaceSMNotify,
                            streamerPNN=tier0Config.Global.StreamerPNN,
                            injectRun=injectRun)
                        self.injectedRuns.add(injectRun)
            except:
                # shouldn't happen, just a catch all insurance
                logging.exception(
                    "Something went wrong with data retrieval from StorageManager"
                )

            #
            # Set deployment ID
            #
            setDeploymentId(tier0Config, self.deployID)
            logging.info("Deploy ID: %d" % tier0Config.Global.DeploymentID)

            #
            # find new runs, setup global run settings and stream/dataset/trigger mapping
            #
            runHltkeys = findNewRunsDAO.execute(transaction=False)
            for run, hltkey in sorted(runHltkeys.items()):

                hltConfig = None

                # local runs have no hltkey and are configured differently
                if hltkey != None:

                    # retrieve HLT configuration and make sure it's usable
                    try:
                        hltConfig = self.getHLTConfigDAO.execute(
                            hltkey, transaction=False)
                        if hltConfig['process'] == None or len(
                                hltConfig['mapping']) == 0:
                            raise RuntimeError(
                                "HLTConfDB query returned no process or mapping"
                            )
                    except:
                        logging.exception(
                            "Can't retrieve hltkey %s for run %d" %
                            (hltkey, run))
                        continue

                try:
                    RunConfigAPI.configureRun(tier0Config, run, hltConfig)
                except:
                    logging.exception("Can't configure for run %d" % (run))

            #
            # find unconfigured run/stream with data
            # populate RunConfig, setup workflows/filesets/subscriptions
            #
            runStreams = findNewRunStreamsDAO.execute(transaction=False)
            for run in sorted(runStreams.keys()):
                for stream in sorted(runStreams[run]):
                    try:
                        RunConfigAPI.configureRunStream(
                            tier0Config, run, stream, self.specDirectory,
                            self.dqmUploadProxy)
                    except:
                        logging.exception(
                            "Can't configure for run %d and stream %s" %
                            (run, stream))

        #
        # stop and close runs based on RunSummary and StorageManager records
        #
        RunLumiCloseoutAPI.stopRuns(self.dbInterfaceStorageManager)
        RunLumiCloseoutAPI.closeRuns(self.dbInterfaceStorageManager)

        #
        # release runs for Express
        #
        runs = findNewExpressRunsDAO.execute(transaction=False)

        if len(runs) > 0:

            binds = []
            for run in runs:
                binds.append({'RUN': run})

            if self.getExpressReadyRunsDAO != None:
                runs = self.getExpressReadyRunsDAO.execute(binds=binds,
                                                           transaction=False)

            if len(runs) > 0:

                binds = []
                for run in runs:
                    binds.append({'RUN': run})

                releaseExpressDAO.execute(binds=binds, transaction=False)

        #
        # release runs for PromptReco
        # check PromptRecoStatus first, i.e. big red button
        #
        if self.getPromptRecoStatusT0DataSvc():
            RunConfigAPI.releasePromptReco(tier0Config, self.specDirectory,
                                           self.dqmUploadProxy)

        #
        # insert express and reco configs into Tier0 Data Service
        #
        if self.haveT0DataSvc:
            self.updateRunConfigT0DataSvc()
            self.updateRunStreamDoneT0DataSvc()
            self.updateExpressConfigsT0DataSvc()
            self.updateRecoConfigsT0DataSvc()
            self.updateRecoReleaseConfigsT0DataSvc()
            self.lockDatasetsT0DataSvc()

        #
        # mark express and repack workflows as injected if certain conditions are met
        # (we don't do it immediately to prevent the TaskArchiver from cleaning up too early)
        #
        markWorkflowsInjectedDAO.execute(self.dbInterfaceSMNotify != None,
                                         transaction=False)

        #
        # close stream/lumis for run/streams that are active (fileset exists and open)
        #
        RunLumiCloseoutAPI.closeLumiSections(self.dbInterfaceStorageManager)

        #
        # feed new data into exisiting filesets
        #
        try:
            myThread.transaction.begin()
            feedStreamersDAO.execute(conn=myThread.transaction.conn,
                                     transaction=True)
        except:
            logging.exception("Can't feed data, bailing out...")
            raise
        else:
            myThread.transaction.commit()

        #
        # run ended and run/stream fileset open
        #    => check for complete lumi_closed record, all lumis finally closed and all data feed
        #          => if all conditions satisfied, close the run/stream fileset
        #
        RunLumiCloseoutAPI.closeRunStreamFilesets()

        #
        # check and delete active split lumis
        #
        RunLumiCloseoutAPI.checkActiveSplitLumis()

        #
        # insert workflows into CouchDB for monitoring
        #
        self.feedCouchMonitoring()

        #
        # Update Couch when Repack and Express have closed input filesets (analog to old T0 closeout)
        #
        self.closeOutRealTimeWorkflows()

        #
        # send repacked notifications to StorageManager
        #
        if self.dbInterfaceSMNotify:
            StorageManagerAPI.markRepacked(self.dbInterfaceSMNotify)

        #
        # upload PCL conditions to DropBox
        #
        ConditionUploadAPI.uploadConditions(self.dropboxuser, self.dropboxpass,
                                            self.serviceProxy)

        return

    def feedCouchMonitoring(self):
        """
        _feedCouchMonitoring_

        check for workflows that haven't been uploaded to Couch for monitoring yet

        """
        getStreamerWorkflowsForMonitoringDAO = self.daoFactory(
            classname="Tier0Feeder.GetStreamerWorkflowsForMonitoring")
        getPromptRecoWorkflowsForMonitoringDAO = self.daoFactory(
            classname="Tier0Feeder.GetPromptRecoWorkflowsForMonitoring")
        markTrackedWorkflowMonitoringDAO = self.daoFactory(
            classname="Tier0Feeder.MarkTrackedWorkflowMonitoring")
        workflows = getStreamerWorkflowsForMonitoringDAO.execute()
        workflows += getPromptRecoWorkflowsForMonitoringDAO.execute()

        if len(workflows) == 0:
            logging.debug(
                "No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            logging.debug(" Going to publish %d workflows" % len(workflows))
            for (workflowId, run, workflowName) in workflows:
                logging.info(" Publishing workflow %s to monitoring" %
                             workflowName)
                #TODO: add more information about workflow if there need to be kept longer than
                # workflow life cycle.
                doc = {}
                doc["RequestName"] = workflowName
                doc["Run"] = run
                response = self.localRequestCouchDB.insertGenericRequest(doc)
                if response == "OK" or "EXISTS":
                    logging.info(" Successfully uploaded request %s" %
                                 workflowName)
                    # Here we have to trust the insert, if it doesn't happen will be easy to spot on the logs
                    markTrackedWorkflowMonitoringDAO.execute(workflowId)

        return

    def closeOutRealTimeWorkflows(self):
        """
        _closeOutRealTimeWorkflows_

        Updates couch with the closeout status of Repack and Express
        PromptReco should be closed out automatically

        """
        getNotClosedOutWorkflowsDAO = self.daoFactory(
            classname="Tier0Feeder.GetNotClosedOutWorkflows")
        workflows = getNotClosedOutWorkflowsDAO.execute()

        if len(workflows) == 0:
            logging.debug(
                "No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            for workflow in workflows:
                (workflowId, filesetId, filesetOpen, workflowName) = workflow
                # find returns -1 if the string is not found
                if workflowName.find('PromptReco') >= 0:
                    logging.debug(
                        "Closing out instantaneously PromptReco Workflow %s" %
                        workflowName)
                    self.updateClosedState(workflowName, workflowId)
                else:
                    # Check if fileset (which you already know) is closed or not
                    # FIXME: No better way to do it? what comes from the DAO is a string, casting bool or int doesn't help much.
                    # Works like that :
                    if filesetOpen == '0':
                        self.updateClosedState(workflowName, workflowId)

        return

    def updateClosedState(self, workflowName, workflowId):
        """
        _updateClosedState_

        Mark a workflow as Closed

        """
        markCloseoutWorkflowMonitoringDAO = self.daoFactory(
            classname="Tier0Feeder.MarkCloseoutWorkflowMonitoring")

        response = self.localRequestCouchDB.updateRequestStatus(
            workflowName, 'Closed')

        if response == "OK" or "EXISTS":
            logging.debug("Successfully closed workflow %s" % workflowName)
            markCloseoutWorkflowMonitoringDAO.execute(workflowId)

        return

    def getPromptRecoStatusT0DataSvc(self):
        """
        _getPromptRecoStatusDataSvc_

        Check the PromptRecoStatus (enabled/disabled) set by the ORM

        """
        getPromptRecoStatusDAO = self.daoFactoryT0DataSvc(
            classname="T0DataSvc.GetPromptRecoStatus")
        status = getPromptRecoStatusDAO.execute(transaction=False)
        return status

    def updateRunConfigT0DataSvc(self):
        """
        _updateRunConfigT0DataSvc_

        Check for new runs and push their info into the Tier0 Data Service.

        """
        getNewRunDAO = self.daoFactory(classname="T0DataSvc.GetNewRun")
        newRun = getNewRunDAO.execute(transaction=False)

        if len(newRun) > 0:

            binds = []
            for runInfo in newRun:
                binds.append({
                    'RUN': runInfo['run'],
                    'ACQ_ERA': runInfo['acq_era']
                })

            insertNewRunDAO = self.daoFactoryT0DataSvc(
                classname="T0DataSvc.InsertNewRun")
            insertNewRunDAO.execute(binds=binds, transaction=False)

            for bind in binds:
                del bind['ACQ_ERA']

            updateNewRunDAO = self.daoFactory(
                classname="T0DataSvc.UpdateNewRun")
            updateNewRunDAO.execute(binds=binds, transaction=False)

        return

    def updateRunStreamDoneT0DataSvc(self):
        """
        _updateRunStreamDoneT0DataSvc_

        Check if a run/stream workflow (express or repack) is finished and
        cleaned up and push a completion record into the Tier0 Data Service.

        """
        getRunStreamDoneDAO = self.daoFactory(
            classname="T0DataSvc.GetRunStreamDone")
        runStreamDone = getRunStreamDoneDAO.execute(transaction=False)

        if len(runStreamDone) > 0:

            binds = []
            for runStream in runStreamDone:
                binds.append({
                    'RUN': runStream['run'],
                    'STREAM': runStream['stream']
                })

            insertRunStreamDoneDAO = self.daoFactoryT0DataSvc(
                classname="T0DataSvc.InsertRunStreamDone")
            insertRunStreamDoneDAO.execute(binds=binds, transaction=False)

            updateRunStreamDoneDAO = self.daoFactory(
                classname="T0DataSvc.UpdateRunStreamDone")
            updateRunStreamDoneDAO.execute(binds=binds, transaction=False)

        return

    def updateExpressConfigsT0DataSvc(self):
        """
        _updateExpressConfigsT0DataSvc_

        Check which express_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getExpressConfigsDAO = self.daoFactory(
            classname="T0DataSvc.GetExpressConfigs")
        expressConfigs = getExpressConfigsDAO.execute(transaction=False)

        if len(expressConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in expressConfigs:
                bindsInsert.append({
                    'RUN': config['run'],
                    'STREAM': config['stream'],
                    'CMSSW': config['cmssw'],
                    'SCRAM_ARCH': config['scram_arch'],
                    'RECO_CMSSW': config['reco_cmssw'],
                    'RECO_SCRAM_ARCH': config['reco_scram_arch'],
                    'ALCA_SKIM': config['alca_skim'],
                    'DQM_SEQ': config['dqm_seq'],
                    'GLOBAL_TAG': config['global_tag'][:50],
                    'SCENARIO': config['scenario'],
                    'MULTICORE': config['multicore'],
                    'WRITE_TIERS': config['write_tiers'],
                    'WRITE_DQM': config['write_dqm']
                })
                bindsUpdate.append({
                    'RUN': config['run'],
                    'STREAM': config['stream']
                })

            insertExpressConfigsDAO = self.daoFactoryT0DataSvc(
                classname="T0DataSvc.InsertExpressConfigs")
            insertExpressConfigsDAO.execute(binds=bindsInsert,
                                            transaction=False)

            updateExpressConfigsDAO = self.daoFactory(
                classname="T0DataSvc.UpdateExpressConfigs")
            updateExpressConfigsDAO.execute(binds=bindsUpdate,
                                            transaction=False)

        return

    def updateRecoConfigsT0DataSvc(self):
        """
        _updateRecoConfigsT0DataSvc_

        Check which reco_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getRecoConfigsDAO = self.daoFactory(
            classname="T0DataSvc.GetRecoConfigs")
        recoConfigs = getRecoConfigsDAO.execute(transaction=False)

        if len(recoConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in recoConfigs:
                bindsInsert.append({
                    'RUN': config['run'],
                    'PRIMDS': config['primds'],
                    'CMSSW': config['cmssw'],
                    'SCRAM_ARCH': config['scram_arch'],
                    'ALCA_SKIM': config['alca_skim'],
                    'PHYSICS_SKIM': config['physics_skim'],
                    'DQM_SEQ': config['dqm_seq'],
                    'GLOBAL_TAG': config['global_tag'][:50],
                    'SCENARIO': config['scenario'],
                    'MULTICORE': config['multicore'],
                    'WRITE_RECO': config['write_reco'],
                    'WRITE_DQM': config['write_dqm'],
                    'WRITE_AOD': config['write_aod'],
                    'WRITE_MINIAOD': config['write_miniaod']
                })
                bindsUpdate.append({
                    'RUN': config['run'],
                    'PRIMDS': config['primds']
                })

            insertRecoConfigsDAO = self.daoFactoryT0DataSvc(
                classname="T0DataSvc.InsertRecoConfigs")
            insertRecoConfigsDAO.execute(binds=bindsInsert, transaction=False)

            updateRecoConfigsDAO = self.daoFactory(
                classname="T0DataSvc.UpdateRecoConfigs")
            updateRecoConfigsDAO.execute(binds=bindsUpdate, transaction=False)

        return

    def updateRecoReleaseConfigsT0DataSvc(self):
        """
        _updateRecoReleaseConfigsT0DataSvc_

        Insert information about PromptReco release into the Tier0 Data Service.

        That means updating the reco_locked records in run granularity (where one
        released dataset means the whole run is locked).

        Also insert and update the run_primds_done records to track PromptReco status.

        """
        getRunDatasetNewDAO = self.daoFactory(
            classname="Tier0Feeder.GetRunDatasetNew")
        getRunDatasetReleasedDAO = self.daoFactory(
            classname="Tier0Feeder.GetRunDatasetReleased")
        getRunDatasetDoneDAO = self.daoFactory(
            classname="Tier0Feeder.GetRunDatasetDone")

        updateRecoReleaseConfigsDAO = self.daoFactory(
            classname="Tier0Feeder.UpdateRecoReleaseConfigs")

        insertRecoLockedDAO = self.daoFactoryT0DataSvc(
            classname="T0DataSvc.InsertRecoLocked")
        updateRecoLockedDAO = self.daoFactoryT0DataSvc(
            classname="T0DataSvc.UpdateRecoLocked")

        insertRunDatasetDoneDAO = self.daoFactoryT0DataSvc(
            classname="T0DataSvc.InsertRunDatasetDone")
        updateRunDatasetDoneDAO = self.daoFactoryT0DataSvc(
            classname="T0DataSvc.UpdateRunDatasetDone")

        # first check for records that are completely new
        # insert the two Tier0 Data Service records for them
        # update reco release records accordingly
        runDatasetNew = getRunDatasetNewDAO.execute(transaction=False)
        foundRuns = set()
        bindsInsertLocked = []
        bindsInsertDone = []
        bindsUpdate = []
        for runDataset in runDatasetNew:
            run = runDataset['run']
            if run not in foundRuns:
                bindsInsertLocked.append({'RUN': run})
                foundRuns.add(run)
            bindsInsertDone.append({
                'RUN': run,
                'PRIMDS': runDataset['primds']
            })
            bindsUpdate.append({
                'RUN': run,
                'PRIMDS_ID': runDataset['primds_id'],
                'IN_DATASVC': 1
            })

        if len(bindsInsertLocked) > 0:
            insertRecoLockedDAO.execute(binds=bindsInsertLocked,
                                        transaction=False)
        if len(bindsInsertDone) > 0:
            insertRunDatasetDoneDAO.execute(binds=bindsInsertDone,
                                            transaction=False)
        if len(bindsUpdate) > 0:
            updateRecoReleaseConfigsDAO.execute(binds=bindsUpdate,
                                                transaction=False)

        # then check for reco release and lock runs in the Tier0 Data Service
        runDatasetReleased = getRunDatasetReleasedDAO.execute(
            transaction=False)
        foundRuns = set()
        bindsUpdateLocked = []
        bindsUpdate = []
        for runDataset in runDatasetReleased:
            run = runDataset['run']
            if run not in foundRuns:
                bindsUpdateLocked.append({'RUN': run})
                foundRuns.add(run)
            bindsUpdate.append({
                'RUN': run,
                'PRIMDS_ID': runDataset['primds_id'],
                'IN_DATASVC': 2
            })

        if len(bindsUpdateLocked) > 0:
            updateRecoLockedDAO.execute(binds=bindsUpdateLocked,
                                        transaction=False)
        if len(bindsUpdate) > 0:
            updateRecoReleaseConfigsDAO.execute(binds=bindsUpdate,
                                                transaction=False)

        # finally check for reco completions and mark this in the Tier0 Data Service
        runDatasetDone = getRunDatasetDoneDAO.execute(transaction=False)
        bindsUpdateDone = []
        bindsUpdate = []
        for runDataset in runDatasetDone:
            run = runDataset['run']
            bindsUpdateDone.append({
                'RUN': run,
                'PRIMDS': runDataset['primds']
            })
            bindsUpdate.append({
                'RUN': run,
                'PRIMDS_ID': runDataset['primds_id'],
                'IN_DATASVC': 3
            })
        if len(bindsUpdateDone) > 0:
            updateRunDatasetDoneDAO.execute(binds=bindsUpdateDone,
                                            transaction=False)
        if len(bindsUpdate) > 0:
            updateRecoReleaseConfigsDAO.execute(binds=bindsUpdate,
                                                transaction=False)

        return

    def lockDatasetsT0DataSvc(self):
        """
        _lockDatasetsT0DataSvc_

        Publish dataset information into the Tier0 Data Service.

        """
        getDatasetLockedDAO = self.daoFactory(
            classname="T0DataSvc.GetDatasetLocked")
        datasetConfigs = getDatasetLockedDAO.execute(transaction=False)

        if len(datasetConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in datasetConfigs:
                bindsInsert.append({'PATH': config['path']})
                bindsUpdate.append({'ID': config['id']})

            insertDatasetLockedDAO = self.daoFactoryT0DataSvc(
                classname="T0DataSvc.InsertDatasetLocked")
            insertDatasetLockedDAO.execute(binds=bindsInsert,
                                           transaction=False)

            updateDatasetLockedDAO = self.daoFactory(
                classname="T0DataSvc.UpdateDatasetLocked")
            updateDatasetLockedDAO.execute(binds=bindsUpdate,
                                           transaction=False)

        return

    def terminate(self, params):
        """
        _terminate_

        Kill the code after one final pass when called by the master thread.

        """
        logging.debug("terminating immediately")
예제 #33
0
class WMStatsTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["WMStats"]
        self.testInit = TestInitCouchApp('WorkQueueServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = 'wmstats_t'
        self.testInit.setupCouch(dbName, "WMStats")
        reqDBName = "reqmgrdb_t"
        self.testInit.setupCouch(reqDBName, "ReqMgr")
        wmstatsURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqDBName)
        self.reqDBWriter = RequestDBWriter(reqDBURL)
        self.wmstatsReader = WMStatsReader(wmstatsURL, reqdbURL=reqDBURL)
        self.wmstatsReader.defaultStale = {}
        self.wmstatsReader.reqDB.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testWMStatsWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()

        result = self.reqDBWriter.insertGenericRequest(schema[0])
        self.assertEquals(result[0]['ok'], True, 'insert fail')

        result = self.reqDBWriter.updateRequestStatus(schema[0]['RequestName'],
                                                      "failed")
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestStatus("not_exist_schema",
                                                      "assigned")
        self.assertEquals(result, 'Error: document not found')

        result = self.reqDBWriter.updateRequestProperty(
            schema[0]['RequestName'], {"Teams": ['teamA']})
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestProperty(
            "not_exist_schema", {"Teams": ['teamA']})
        self.assertEquals(result, 'Error: document not found')

        totalStats = {
            'TotalEstimatedJobs': 100,
            'TotalInputEvents': 1000,
            'TotalInputLumis': 1234,
            'TotalInputFiles': 5
        }
        result = self.reqDBWriter.updateRequestProperty(
            schema[0]['RequestName'], totalStats)
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestProperty(
            schema[0]['RequestName'], totalStats)
        self.assertEquals(result, 'OK', 'update fail')

        result = self.reqDBWriter.updateRequestProperty(
            "not_exist_schema", totalStats)
        self.assertEquals(result, 'Error: document not found')

        spec1 = newWorkload(schema[0]['RequestName'])
        production = spec1.newTask("Production")
        production.setTaskType("Merge")
        production.setSiteWhitelist(['TEST_SITE'])
        properties = {
            "RequestPriority": spec1.priority(),
            'SiteWhitelist': spec1.getTopLevelTask()[0].siteWhitelist(),
            'OutputDatasets': spec1.listOutputDatasets()
        }
        result = self.reqDBWriter.updateRequestProperty(
            spec1.name(), properties)
        self.assertEquals(result, 'OK', 'update fail')

        spec2 = newWorkload("not_exist_schema")
        production = spec2.newTask("Production")
        production.setTaskType("Merge")
        properties = {
            "RequestPriority": spec2.priority(),
            'SiteWhitelist': spec2.getTopLevelTask()[0].siteWhitelist(),
            'OutputDatasets': spec2.listOutputDatasets()
        }
        result = self.reqDBWriter.updateRequestProperty(
            spec2.name(), properties)
        self.assertEquals(result, 'Error: document not found')

        requests = self.wmstatsReader.getRequestByStatus(["failed"],
                                                         jobInfoFlag=False,
                                                         legacyFormat=True)
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])

        requestCollection = RequestInfoCollection(requests)
        result = requestCollection.getJSONData()
        self.assertEquals(result.keys(), [schema[0]['RequestName']])

        requests = self.wmstatsReader.getActiveData()
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
        requests = self.wmstatsReader.getRequestByStatus(["failed"])
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])

        requests = self.wmstatsReader.getRequestSummaryWithJobInfo(
            schema[0]['RequestName'])
        self.assertEquals(requests.keys(), [schema[0]['RequestName']])
예제 #34
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        self.teamName = self.config.Agent.teamName
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, appName="WMStatsAgent")
        
        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                      couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        
        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)

    def algorithm(self, parameters):
        """
        Get information from wmbs, workqueue and local couch.
          - It deletes old wmstats docs
          - Archive workflows
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            
            logging.info("Cleaning up the archived request docs")
            report = self.cleanAlreadyArchivedWorkflows()
            logging.info("%s archived workflows deleted" % report)
            
            # archiving only workflows that I own (same team)
            logging.info("Getting requests in '%s' state for team '%s'", self.deletableState,
                                                                           self.teamName)
            endTime = int(time.time()) - self.archiveDelayHours * 3600
            wfs = self.centralRequestDBReader.getRequestByTeamAndStatus(self.teamName,
                                                                        self.deletableState)
            commonWfs = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, 
                                                                                   False, endTime)
            deletableWorkflows = list(set(wfs) & set(commonWfs))
            logging.info("Ready to archive normal %s workflows", len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived")
            logging.info("archive normal %s workflows", numUpdated)
            
            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows", len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived")
            logging.info("archive aborted %s workflows", numUpdated)
            
            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"])
            logging.info("Ready to archive rejected %s workflows", len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived")
            logging.info("archive rejected %s workflows", numUpdated)

        except Exception as ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
    
    def archiveWorkflows(self, workflows, archiveState):
        updated = 0
        for workflowName in workflows:
            if self.cleanAllLocalCouchDB(workflowName):
                if self.useReqMgrForCompletionCheck:
                    try:
                        #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                        self.reqmgrSvc.updateRequestStatus(workflowName, archiveState)
                        #And replace with this - remove all the excption
                        #self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                    except HTTPException as ex:
                        # If we get an HTTPException of 404 means reqmgr2 request
                        if ex.status == 404:
                            # try reqmgr2 call
                            msg = "%s : reqmgr2 request: %s" % (workflowName, str(ex))
                            logging.warning(msg)
                            self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                        else:
                            msg = "%s : fail to update status with HTTP error: %s" % (workflowName, str(ex))
                            logging.error(msg)
                            raise ex
                            
                    updated += 1 
                    logging.debug("status updated to %s %s",  archiveState, workflowName)
                else:
                    # tier0 update case
                    self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState)
        return updated
    
    def deleteWorkflowFromJobCouch(self, workflowName, db):
        """
        _deleteWorkflowFromCouch_

        If we are asked to delete the workflow from couch, delete it
        to clear up some space.

        Load the document IDs and revisions out of couch by workflowName,
        then order a delete on them.
        """
        options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False}
        
        if db == "JobDump":
            couchDB = self.jobsdatabase
            view = "jobsByWorkflowName"
        elif db == "FWJRDump":
            couchDB = self.fwjrdatabase
            view = "fwjrsByWorkflowName"
        elif db == "SummaryStats":
            couchDB = self.statsumdatabase
            view = None
        elif db == "WMStatsAgent":
            couchDB = self.wmstatsCouchDB.getDBInstance()
            view = "allWorkflows"
            options = {"key": workflowName, "reduce": False}
            
        if view == None:
            try:
                committed = couchDB.delete_doc(workflowName)
            except CouchNotFoundError as ex:
                return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))}
        else:
            try:
                jobs = couchDB.loadView(db, view, options = options)['rows']
            except Exception as ex:
                errorMsg = "Error on loading jobs for %s" % workflowName
                logging.warning("%s/n%s" % (str(ex), errorMsg))
                return {'status': 'error', 'message': errorMsg}
            
            for j in jobs:
                doc = {}
                doc["_id"]  = j['value']['id']
                doc["_rev"] = j['value']['rev']
                couchDB.queueDelete(doc)
            committed = couchDB.commit()
        
        if committed:
            #create the error report
            errorReport = {}
            deleted = 0
            status = "ok"
            for data in committed:
                if 'error' in data:
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                    status = "error"
                else:
                    deleted += 1
            return {'status': status, 'delete': deleted, 'message': errorReport}
        else:
            return {'status': 'warning', 'message': "no %s exist" % workflowName}


    def cleanAllLocalCouchDB(self, workflowName):
        logging.info("Deleting %s from JobCouch" % workflowName)
        
        jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump")
        logging.debug("%s docs deleted from JobDump", jobReport)
        
        fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump")
        logging.debug("%s docs deleted from FWJRDump", fwjrReport)
        
        summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats")
        logging.debug("%s docs deleted from SummaryStats", summaryReport)
        
        wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStatsAgent")
        logging.debug("%s docs deleted from wmagent_summary", wmstatsReport)
        
        # if one of the procedure fails return False
        if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or 
            wmstatsReport["status"] == "error"):
            return False
        # other wise return True.
        return True
        
    def cleanAlreadyArchivedWorkflows(self):
        """
        loop through the workflows in couchdb, if archived delete all the data in couchdb
        """
        
        numDeletedRequests = 0
        try:
            localWMStats = self.wmstatsCouchDB.getDBInstance()
            options = {"group_level": 1, "reduce": True}
            
            results = localWMStats.loadView("WMStatsAgent", "allWorkflows", options = options)['rows']
            requestNames = [x['key'] for x in results]
            logging.info("There are %s workfows to check for archived status" % len(requestNames))
            
            workflowDict = self.centralRequestDBReader.getStatusAndTypeByRequest(requestNames)
            
            for request, value in workflowDict.items():
                if value[0].endswith("-archived"):
                    self.cleanAllLocalCouchDB(request)
                    numDeletedRequests += 1
        
        except Exception as ex:
            errorMsg = "Error on loading workflow list from wmagent_summary db"
            logging.warning("%s/n%s" % (errorMsg, str(ex)))
            
        return numDeletedRequests
예제 #35
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        self.teamName = self.config.Agent.teamName
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, 
                                            "WMStatsAgent")
        
        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                      couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            if self.config.TaskArchiver.reqmgr2Only:
                self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            else:
                #TODO: remove this for reqmgr2
                self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                          couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        
        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)

    def algorithm(self, parameters):
        """
        Get information from wmbs, workqueue and local couch.
          - It deletes old wmstats docs
          - Archive workflows
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)

            # archiving only workflows that I own (same team)
            logging.info("Getting requests in '%s' state for team '%s'" % (self.deletableState,
                                                                           self.teamName))
            endTime = int(time.time()) - self.archiveDelayHours * 3600
            wfs = self.centralRequestDBReader.getRequestByTeamAndStatus(self.teamName,
                                                                        self.deletableState)
            commonWfs = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, 
                                                                                   False, endTime)
            deletableWorkflows = list(set(wfs) & set(commonWfs))
            logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived")
            logging.info("archive normal %s workflows" % numUpdated)
            
            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived")
            logging.info("archive aborted %s workflows" % numUpdated)
            
            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"])
            logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived")
            logging.info("archive rejected %s workflows" % numUpdated)

        except Exception as ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
    
    def archiveWorkflows(self, workflows, archiveState):
        updated = 0
        for workflowName in workflows:
            if self.cleanAllLocalCouchDB(workflowName):
                if self.useReqMgrForCompletionCheck:
                    
                    if self.config.TaskArchiver.reqmgr2Only:
                        self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                    else:
                        self.reqmgrSvc.updateRequestStatus(workflowName, archiveState);
                    updated += 1 
                    logging.debug("status updated to %s %s" % (archiveState, workflowName))
                else:
                    self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState)
        return updated
    
    def deleteWorkflowFromJobCouch(self, workflowName, db):
        """
        _deleteWorkflowFromCouch_

        If we are asked to delete the workflow from couch, delete it
        to clear up some space.

        Load the document IDs and revisions out of couch by workflowName,
        then order a delete on them.
        """
        if db == "JobDump":
            couchDB = self.jobsdatabase
            view = "jobsByWorkflowName"
        elif db == "FWJRDump":
            couchDB = self.fwjrdatabase
            view = "fwjrsByWorkflowName"
        elif db == "SummaryStats":
            couchDB = self.statsumdatabase
            view = None
        elif db == "WMStatsAgent":
            couchDB = self.wmstatsCouchDB.getDBInstance()
            view = "jobsByStatusWorkflow"
        
        if view == None:
            try:
                committed = couchDB.delete_doc(workflowName)
            except CouchNotFoundError as ex:
                return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))}
        else:
            options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False}
            try:
                jobs = couchDB.loadView(db, view, options = options)['rows']
            except Exception as ex:
                errorMsg = "Error on loading jobs for %s" % workflowName
                logging.warning("%s/n%s" % (str(ex), errorMsg))
                return {'status': 'error', 'message': errorMsg}
            
            for j in jobs:
                doc = {}
                doc["_id"]  = j['value']['id']
                doc["_rev"] = j['value']['rev']
                couchDB.queueDelete(doc)
            committed = couchDB.commit()
        
        if committed:
            #create the error report
            errorReport = {}
            deleted = 0
            status = "ok"
            for data in committed:
                if 'error' in data:
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                    status = "error"
                else:
                    deleted += 1
            return {'status': status, 'delete': deleted, 'message': errorReport}
        else:
            return {'status': 'warning', 'message': "no %s exist" % workflowName}


    def cleanAllLocalCouchDB(self, workflowName):
        logging.info("Deleting %s from JobCouch" % workflowName)
        
        jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump")
        logging.debug("%s docs deleted from JobDump" % jobReport)
        
        fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump")
        logging.debug("%s docs deleted from FWJRDump" % fwjrReport)
        
        summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats")
        logging.debug("%s docs deleted from SummaryStats" % summaryReport)
        
        wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStatsAgent")
        logging.debug("%s docs deleted from wmagent_summary" % wmstatsReport)
        
        # if one of the procedure fails return False
        if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or 
            wmstatsReport["status"] == "error"):
            return False
        # other wise return True.
        return True
예제 #36
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.archiveDelayHours   = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, 
                                            "WMStatsAgent")
        
        #TODO: we might need to use local db for Tier0
        self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableState = "announced"
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
            if self.config.TaskArchiver.reqmgr2Only:
                self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            else:
                #TODO: remove this for reqmgr2
                self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
        else:
            # Tier0 case
            self.deletableState = "completed"
            # use local for update
            self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        
        statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
        self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            logging.info("getting complete and announced requests")
            
            endTime = int(time.time()) - self.archiveDelayHours * 3600
            deletableWorkflows = self.centralRequestDBReader.getRequestByStatusAndStartTime(self.deletableState, 
                                                                                            False, endTime)
            logging.info("Ready to archive normal %s workflows" % len(deletableWorkflows))
            numUpdated = self.archiveWorkflows(deletableWorkflows, "normal-archived")
            logging.info("archive normal %s workflows" % numUpdated)
            
            abortedWorkflows = self.centralRequestDBReader.getRequestByStatus(["aborted-completed"])
            logging.info("Ready to archive aborted %s workflows" % len(abortedWorkflows))
            numUpdated = self.archiveWorkflows(abortedWorkflows, "aborted-archived")
            logging.info("archive aborted %s workflows" % numUpdated)
            
            rejectedWorkflows = self.centralRequestDBReader.getRequestByStatus(["rejected"])
            logging.info("Ready to archive rejected %s workflows" % len(rejectedWorkflows))
            numUpdated = self.archiveWorkflows(rejectedWorkflows, "rejected-archived")
            logging.info("archive rejected %s workflows" % numUpdated)

        except Exception as ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
    
    def archiveWorkflows(self, workflows, archiveState):
        updated = 0
        for workflowName in workflows:
            if self.cleanAllLocalCouchDB(workflowName):
                if self.useReqMgrForCompletionCheck:
                    
                    if self.config.TaskArchiver.reqmgr2Only:
                        self.reqmgr2Svc.updateRequestStatus(workflowName, archiveState)
                    else:
                        self.reqmgrSvc.updateRequestStatus(workflowName, archiveState);
                    updated += 1 
                    logging.debug("status updated to %s %s" % (archiveState, workflowName))
                else:
                    self.centralRequestDBWriter.updateRequestStatus(workflowName, archiveState)
        return updated
    
    def deleteWorkflowFromJobCouch(self, workflowName, db):
        """
        _deleteWorkflowFromCouch_

        If we are asked to delete the workflow from couch, delete it
        to clear up some space.

        Load the document IDs and revisions out of couch by workflowName,
        then order a delete on them.
        """
        if (db == "JobDump"):
            couchDB = self.jobsdatabase
            view = "jobsByWorkflowName"
        elif (db == "FWJRDump"):
            couchDB = self.fwjrdatabase
            view = "fwjrsByWorkflowName"
        elif (db == "SummaryStats"):
            couchDB = self.statsumdatabase
            view = None
        elif (db == "WMStats"):
            couchDB = self.wmstatsCouchDB.getDBInstance()
            view = "jobsByStatusWorkflow"
        
        if view == None:
            try:
                committed = couchDB.delete_doc(workflowName)
            except CouchNotFoundError as ex:
                return {'status': 'warning', 'message': "%s: %s" % (workflowName, str(ex))}
        else:
            options = {"startkey": [workflowName], "endkey": [workflowName, {}], "reduce": False}
            try:
                jobs = couchDB.loadView(db, view, options = options)['rows']
            except Exception as ex:
                errorMsg = "Error on loading jobs for %s" % workflowName
                logging.warning("%s/n%s" % (str(ex), errorMsg))
                return {'status': 'error', 'message': errorMsg}
            
            for j in jobs:
                doc = {}
                doc["_id"]  = j['value']['id']
                doc["_rev"] = j['value']['rev']
                couchDB.queueDelete(doc)
            committed = couchDB.commit()
        
        if committed:
            #create the error report
            errorReport = {}
            deleted = 0
            status = "ok"
            for data in committed:
                if 'error' in data:
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                    status = "error"
                else:
                    deleted += 1
            return {'status': status, 'delete': deleted, 'message': errorReport}
        else:
            return {'status': 'warning', 'message': "no %s exist" % workflowName}


    def cleanAllLocalCouchDB(self, workflowName):
        logging.info("Deleting %s from JobCouch" % workflowName)
        
        jobReport = self.deleteWorkflowFromJobCouch(workflowName, "JobDump")
        logging.debug("%s docs deleted from JobDump" % jobReport)
        
        fwjrReport = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump")
        logging.debug("%s docs deleted from FWJRDump" % fwjrReport)
        
        summaryReport = self.deleteWorkflowFromJobCouch(workflowName, "SummaryStats")
        logging.debug("%s docs deleted from SummaryStats" % summaryReport)
        
        wmstatsReport = self.deleteWorkflowFromJobCouch(workflowName, "WMStats")
        logging.debug("%s docs deleted from wmagent_summary" % wmstatsReport)
        
        # if one of the procedure fails return False
        if (jobReport["status"] == "error" or fwjrReport["status"] == "error" or 
            wmstatsReport["status"] == "error"):
            return False
        # other wise return True.
        return True
        
예제 #37
0
    def __init__(self, rest, config):

        super(StepChainParentageFixTask, self).__init__(config)
        self.reqmgrDB = RequestDBWriter(config.reqmgrdb_url)
        self.dbsSvc = DBS3Reader(config.dbs_url, logger=self.logger)
        self.statusToCheck = ["closed-out", "announced", "normal-archived"]
예제 #38
0
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(
                    **self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)

        if not self.useReqMgrForCompletionCheck:
            #sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(
                self.config.TaskArchiver.ReqMgr2ServiceURL)
            #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager(
                {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})

        #Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    def algorithm(self, parameters=None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp
             ) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        #Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(
            classname="Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut=self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return

    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return 
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """

        finishedWorkflowsDAO = self.daoFactory(
            classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(
            onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)

    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:
        
        1. Notify the WorkQueue about finished subscriptions
        2. update dbsbuffer_workflow table with finished subscription
        """

        #Only delete those where the upload and notification succeeded
        logging.info("Found %d candidate workflows for completing: %s" %
                     (len(finishedwfs), finishedwfs.keys()))
        # update the completed flag in dbsbuffer_workflow table so blocks can be closed
        # create updateDBSBufferWorkflowComplete DAO
        if len(finishedwfs) == 0:
            return

        completedWorkflowsDAO = self.dbsDaoFactory(
            classname="UpdateWorkflowsToCompleted")

        centralCouchAlive = True
        try:
            #TODO: need to enable when reqmgr2 -wmstats is ready
            #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict");
            abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus(
                ["aborted"])
            logging.info(
                "There are %d requests in 'aborted' status in central couch." %
                len(abortedWorkflows))
            forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus(
                ["force-complete"])
            logging.info(
                "List of 'force-complete' workflows in central couch: %s" %
                forceCompleteWorkflows)

        except Exception as ex:
            centralCouchAlive = False
            logging.error(
                "we will try again when remote couch server comes back\n%s" %
                str(ex))

        if centralCouchAlive:
            for workflow in finishedwfs:
                try:
                    #Notify the WorkQueue, if there is one
                    if self.workQueue != None:
                        subList = []
                        logging.info("Marking subscriptions as Done ...")
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)

                    #Now we know the workflow as a whole is gone, we can delete the information from couch
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(
                            workflow, "completed")
                        logging.info("status updated to completed %s" %
                                     workflow)

                    if workflow in abortedWorkflows:
                        #TODO: remove when reqmgr2-wmstats deployed
                        newState = "aborted-completed"
                    elif workflow in forceCompleteWorkflows:
                        newState = "completed"
                    else:
                        newState = None

                    if newState != None:
                        # update reqmgr workload document only request mgr is installed
                        if not self.useReqMgrForCompletionCheck:
                            # commented out untill all the agent is updated so every request have new state
                            # TODO: agent should be able to write reqmgr db diretly add the right group in
                            # reqmgr
                            self.requestLocalCouchDB.updateRequestStatus(
                                workflow, newState)
                        else:
                            try:
                                #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                                logging.info(
                                    "Updating status to '%s' in both oracle and couchdb ..."
                                    % newState)
                                self.reqmgrSvc.updateRequestStatus(
                                    workflow, newState)
                                #And replace with this - remove all the excption
                                #self.reqmgr2Svc.updateRequestStatus(workflow, newState)
                            except httplib.HTTPException as ex:
                                # If we get an HTTPException of 404 means reqmgr2 request
                                if ex.status == 404:
                                    # try reqmgr2 call
                                    msg = "%s : reqmgr2 request: %s" % (
                                        workflow, str(ex))
                                    logging.warning(msg)
                                    self.reqmgr2Svc.updateRequestStatus(
                                        workflow, newState)
                                else:
                                    msg = "%s : fail to update status %s  with HTTP error: %s" % (
                                        workflow, newState, str(ex))
                                    logging.error(msg)
                                    raise ex

                        logging.info("status updated to '%s' : %s" %
                                     (newState, workflow))

                    completedWorkflowsDAO.execute([workflow])

                except TaskArchiverPollerException as ex:

                    #Something didn't go well when notifying the workqueue, abort!!!
                    logging.error(
                        "Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    #Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return

    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId=sub)
            except WorkQueueNoMatchingElements:
                #Subscription wasn't known to WorkQueue, feel free to clean up
                logging.debug(
                    "Local WorkQueue knows nothing about this subscription: %s"
                    % sub)
                pass
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
예제 #39
0
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        
        self.dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                     logger = myThread.logger, 
                                     dbinterface = myThread.dbi)

        self.config      = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout           = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        
        if not self.useReqMgrForCompletionCheck:
            #sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB;
        else:
            self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL)
            
            self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})

        #Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname = "Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    def algorithm(self, parameters = None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        #Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(classname = "Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut = self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return
    
    
    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return 
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """
        
        finishedWorkflowsDAO = self.daoFactory(classname = "Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)
        
    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:
        
        1. Notify the WorkQueue about finished subscriptions
        2. update dbsbuffer_workflow table with finished subscription
        """


        #Only delete those where the upload and notification succeeded
        logging.info("Found %d candidate workflows for completing: %s" % (len(finishedwfs),finishedwfs.keys()))
        # update the completed flag in dbsbuffer_workflow table so blocks can be closed
        # create updateDBSBufferWorkflowComplete DAO
        if len(finishedwfs) == 0:
            return
        
        completedWorkflowsDAO = self.dbsDaoFactory(classname = "UpdateWorkflowsToCompleted")
        
        centralCouchAlive = True
        try:
            #TODO: need to enable when reqmgr2 -wmstats is ready
            #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict");
            abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus(["aborted"])
            logging.info("There are %d requests in 'aborted' status in central couch." % len(abortedWorkflows))
            forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus(["force-complete"])
            logging.info("List of 'force-complete' workflows in central couch: %s" % forceCompleteWorkflows)
            
        except Exception as ex:
            centralCouchAlive = False
            logging.error("we will try again when remote couch server comes back\n%s" % str(ex))
        
        if centralCouchAlive:
            for workflow in finishedwfs:
                try:
                    #Notify the WorkQueue, if there is one
                    if self.workQueue != None:
                        subList = []
                        logging.info("Marking subscriptions as Done ...")
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)
                    
                    #Now we know the workflow as a whole is gone, we can delete the information from couch
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(workflow, "completed")
                        logging.info("status updated to completed %s" % workflow)
    
                    if workflow in abortedWorkflows:
                        #TODO: remove when reqmgr2-wmstats deployed
                        newState = "aborted-completed"
                    elif workflow in forceCompleteWorkflows:
                        newState = "completed"
                    else:
                        newState = None
                        
                    if newState != None:
                        # update reqmgr workload document only request mgr is installed
                        if not self.useReqMgrForCompletionCheck:
                            # commented out untill all the agent is updated so every request have new state
                            # TODO: agent should be able to write reqmgr db diretly add the right group in
                            # reqmgr
                            self.requestLocalCouchDB.updateRequestStatus(workflow, newState)
                        else:
                            try:
                                #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                                logging.info("Updating status to '%s' in both oracle and couchdb ..." % newState)
                                self.reqmgrSvc.updateRequestStatus(workflow, newState)
                                #And replace with this - remove all the excption
                                #self.reqmgr2Svc.updateRequestStatus(workflow, newState)
                            except httplib.HTTPException as ex:
                                # If we get an HTTPException of 404 means reqmgr2 request
                                if ex.status == 404:
                                    # try reqmgr2 call
                                    msg = "%s : reqmgr2 request: %s" % (workflow, str(ex))
                                    logging.warning(msg)
                                    self.reqmgr2Svc.updateRequestStatus(workflow, newState)
                                else:
                                    msg = "%s : fail to update status %s  with HTTP error: %s" % (workflow, newState, str(ex))
                                    logging.error(msg)
                                    raise ex
                            
                        logging.info("status updated to '%s' : %s" % (newState, workflow))
                    
                    completedWorkflowsDAO.execute([workflow])
        
                except TaskArchiverPollerException as ex:

                    #Something didn't go well when notifying the workqueue, abort!!!
                    logging.error("Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    #Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return
    
    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId = sub)
            except WorkQueueNoMatchingElements:
                #Subscription wasn't known to WorkQueue, feel free to clean up
                logging.info("Local WorkQueue knows nothing about this subscription: %s" % sub)
                pass
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
예제 #40
0
class Tier0PluginTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.requestCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.requestCouchDB, 'T0Request')
        self.testDir = self.testInit.generateWorkDir()
        reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB)
        self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request")
        self.requestDBWriter._setNoStale()

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return

    def tearDown(self):
        """
        _tearDown_

        Clear databases and delete files
        """
        self.testInit.tearDownCouch()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()

        return

    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW',
                      'RepackMergewrite_SingleNeutrino_RAW']

        self.stateMap = {'Merge': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
                                   **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return

    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = ['ExpressMergewrite_StreamZFast_DQM', 'ExpressMergewrite_ExpressPhysics_FEVT',
                            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO', 'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
                            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT',
                            'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO']
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged'

        self.stateMap = {'Merge': [],
                         'Harvesting': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name='TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in [x for x in secondLevelTasks if not x.count('CleanupUnmerged')]:
            secondLevelWorkflow = Workflow(task='/%s/Express/%s' % (workflowName, task), **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
                                ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)]:
            harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' % (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return

    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = PromptRecoWorkloadFactory.getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        factory = PromptRecoWorkloadFactory()
        testArguments["EnableHarvesting"] = True
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        workload = factory.factoryWorkloadConstruction(workflowName, testArguments)

        wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset)

        self.stateMap = {'AlcaSkim': [],
                         'Merge': [],
                         'Harvesting': [],
                         'Processing Done': []}
        self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
                      '%s/RecoMergewrite_AOD',
                      '%s/RecoMergewrite_DQM',
                      '%s/RecoMergewrite_RECO']
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO')
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM')
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return

    def verifyStateTransitions(self, transitionMethod='markFinished', transitionTrigger=True):
        """
        _verifyStateTransitions_

        Utility method which goes through the list of states in self.orderedStates and
        finishes the tasks that demand a state transition in each step. This according
        to the defined transition method and trigger.
        It verifies that the request document in WMStats is moving according to the transitions
        """

        for idx in range(0, len(self.orderedStates) * 2):
            nextState = self.orderedStates[idx / 2]
            if (idx / 2) == 0:
                currentState = 'Closed'
            else:
                currentState = self.orderedStates[idx / 2 - 1]
            if idx % 2 == 0:
                for transitionObject in self.stateMap[nextState][:-1]:
                    method = getattr(transitionObject, transitionMethod)
                    method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState])
                self.assertEqual(len(currentStateWorkflows), 1, 'Workflow moved incorrectly from %s' % currentState)
                self.assertEqual(len(nextStateWorkflows), 0, 'Workflow moved incorrectly to %s' % nextState)
            else:
                transitionObject = self.stateMap[nextState][-1]
                method = getattr(transitionObject, transitionMethod)
                method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState])
                self.assertEqual(len(currentStateWorkflows), 0,
                                 'Workflow did not move correctly from %s' % currentState)
                self.assertEqual(len(nextStateWorkflows), 1, 'Workflow did not move correctly to %s' % nextState)
        return

    def testA_RepackStates(self):
        """
        _testA_RepackStates_

        Setup an environment with a Repack workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupRepackWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions('markOpen', False)

        return

    def testB_ExpressStates(self):
        """
        _testB_ExpressStates_

        Setup an environment with a Express workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupExpressWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return

    def testC_PromptRecoStates(self):
        """
        _testC_PromptRecoStates_

        Setup an environment with a PromptReco workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupPromptRecoWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return
예제 #41
0
class Tier0FeederPoller(BaseWorkerThread):

    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package = "T0.WMBS",
                                     logger = logging,
                                     dbinterface = myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy", None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = config.AnalyticsDataCollector.RequestCouchApp)

        self.injectedRuns = set()

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging, dburl = hltConfConnectUrl, options = {})
        self.dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package = "T0.WMBS",
                                       logger = logging,
                                       dbinterface = self.dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(classname = "RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging, dburl = storageManagerConnectUrl, options = {})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.dbInterfaceSMNotify = None
        if hasattr(config, "SMNotifyDatabase"):
            smNotifyConnectUrl = config.SMNotifyDatabase.connectUrl
            dbFactorySMNotify = DBFactory(logging, dburl = smNotifyConnectUrl, options = {})
            self.dbInterfaceSMNotify = dbFactorySMNotify.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging, dburl = popConLogConnectUrl, options = {})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(package = "T0.WMBS",
                                                 logger = logging,
                                                 dbinterface = dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(classname = "Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase, "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging, dburl = t0datasvcConnectUrl, options = {})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(package = "T0.WMBS",
                                                      logger = logging,
                                                      dbinterface = dbInterfaceT0DataSvc)

        return

    @timeFunction
    def algorithm(self, parameters = None):
        """
        _algorithm_

        """
        logging.debug("Running Tier0Feeder algorithm...")
        myThread = threading.currentThread()

        findNewRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRuns")
        findNewRunStreamsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRunStreams")
        findNewExpressRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewExpressRuns")
        releaseExpressDAO = self.daoFactory(classname = "Tier0Feeder.ReleaseExpress")
        feedStreamersDAO = self.daoFactory(classname = "Tier0Feeder.FeedStreamers")
        markWorkflowsInjectedDAO = self.daoFactory(classname = "Tier0Feeder.MarkWorkflowsInjected")

        tier0Config = None
        try:
            tier0Config = loadConfigurationFile(self.tier0ConfigFile)
        except:
            # usually happens when there are syntax errors in the configuration
            logging.exception("Cannot load Tier0 configuration file, not configuring new runs and run/streams")

        # only configure new runs and run/streams if we have a valid Tier0 configuration
        if tier0Config != None:

            #
            # we don't inject data if the Tier0Config is unreadable
            #
            # discover new data from StorageManager and inject into Tier0
            # (if the config specifies a list of runs do it only once)
            #
            # replays call data discovery only once (and ignore data status)
            #
            try:
                if tier0Config.Global.InjectRuns == None:
                    StorageManagerAPI.injectNewData(self.dbInterfaceStorageManager,
                                                    self.dbInterfaceHltConf,
                                                    self.dbInterfaceSMNotify,
                                                    streamerPNN = tier0Config.Global.StreamerPNN,
                                                    minRun = tier0Config.Global.InjectMinRun,
                                                    maxRun = tier0Config.Global.InjectMaxRun)
                else:
                    injectRuns = set()
                    for injectRun in tier0Config.Global.InjectRuns:
                        if injectRun not in self.injectedRuns:
                            injectRuns.add(injectRun)
                    for injectRun in injectRuns:
                        StorageManagerAPI.injectNewData(self.dbInterfaceStorageManager,
                                                        self.dbInterfaceHltConf,
                                                        self.dbInterfaceSMNotify,
                                                        streamerPNN = tier0Config.Global.StreamerPNN,
                                                        injectRun = injectRun)
                        self.injectedRuns.add(injectRun)
            except:
                # shouldn't happen, just a catch all insurance
                logging.exception("Something went wrong with data retrieval from StorageManager")


            #
            # find new runs, setup global run settings and stream/dataset/trigger mapping
            #
            runHltkeys = findNewRunsDAO.execute(transaction = False)
            for run, hltkey in sorted(runHltkeys.items()):

                hltConfig = None

                # local runs have no hltkey and are configured differently
                if hltkey != None:

                    # retrieve HLT configuration and make sure it's usable
                    try:
                        hltConfig = self.getHLTConfigDAO.execute(hltkey, transaction = False)
                        if hltConfig['process'] == None or len(hltConfig['mapping']) == 0:
                            raise RuntimeError("HLTConfDB query returned no process or mapping")
                    except:
                        logging.exception("Can't retrieve hltkey %s for run %d" % (hltkey, run))
                        continue

                try:
                    RunConfigAPI.configureRun(tier0Config, run, hltConfig)
                except:
                    logging.exception("Can't configure for run %d" % (run))

            #
            # find unconfigured run/stream with data
            # populate RunConfig, setup workflows/filesets/subscriptions
            # 
            runStreams = findNewRunStreamsDAO.execute(transaction = False)
            for run in sorted(runStreams.keys()):
                for stream in sorted(runStreams[run]):
                    try:
                        RunConfigAPI.configureRunStream(tier0Config,
                                                        run, stream,
                                                        self.specDirectory,
                                                        self.dqmUploadProxy)
                    except:
                        logging.exception("Can't configure for run %d and stream %s" % (run, stream))

        #
        # stop and close runs based on RunSummary and StorageManager records
        #
        RunLumiCloseoutAPI.stopRuns(self.dbInterfaceStorageManager)
        RunLumiCloseoutAPI.closeRuns(self.dbInterfaceStorageManager)

        #
        # release runs for Express
        #
        runs = findNewExpressRunsDAO.execute(transaction = False)

        if len(runs) > 0:

            binds = []
            for run in runs:
                binds.append( { 'RUN' : run } )

            if self.getExpressReadyRunsDAO != None:
                runs = self.getExpressReadyRunsDAO.execute(binds = binds, transaction = False)

            if len(runs) > 0:

                binds = []
                for run in runs:
                    binds.append( { 'RUN' : run } )

                releaseExpressDAO.execute(binds = binds, transaction = False)

        #
        # release runs for PromptReco
        # check PromptRecoStatus first, i.e. big red button
        #
        if self.getPromptRecoStatusT0DataSvc():
            RunConfigAPI.releasePromptReco(tier0Config,
                                           self.specDirectory,
                                           self.dqmUploadProxy)

        #
        # insert express and reco configs into Tier0 Data Service
        #
        if self.haveT0DataSvc:
            self.updateRunConfigT0DataSvc()
            self.updateRunStreamDoneT0DataSvc()
            self.updateExpressConfigsT0DataSvc()
            self.updateRecoConfigsT0DataSvc()
            self.updateRecoReleaseConfigsT0DataSvc()
            self.lockDatasetsT0DataSvc()

        #
        # mark express and repack workflows as injected if certain conditions are met
        # (we don't do it immediately to prevent the TaskArchiver from cleaning up too early)
        #
        markWorkflowsInjectedDAO.execute(self.dbInterfaceSMNotify != None,
                                         transaction = False)

        #
        # close stream/lumis for run/streams that are active (fileset exists and open)
        #
        RunLumiCloseoutAPI.closeLumiSections(self.dbInterfaceStorageManager)

        #
        # feed new data into exisiting filesets
        #
        try:
            myThread.transaction.begin()
            feedStreamersDAO.execute(conn = myThread.transaction.conn, transaction = True)
        except:
            logging.exception("Can't feed data, bailing out...")
            raise
        else:
            myThread.transaction.commit()

        #
        # run ended and run/stream fileset open
        #    => check for complete lumi_closed record, all lumis finally closed and all data feed
        #          => if all conditions satisfied, close the run/stream fileset
        #
        RunLumiCloseoutAPI.closeRunStreamFilesets()

        #
        # check and delete active split lumis
        #
        RunLumiCloseoutAPI.checkActiveSplitLumis()

        #
        # insert workflows into CouchDB for monitoring
        #
        self.feedCouchMonitoring()

        #
        # Update Couch when Repack and Express have closed input filesets (analog to old T0 closeout)
        #
        self.closeOutRealTimeWorkflows()

        #
        # send repacked notifications to StorageManager
        #
        if self.dbInterfaceSMNotify:
            StorageManagerAPI.markRepacked(self.dbInterfaceSMNotify)


        #
        # upload PCL conditions to DropBox
        #
        ConditionUploadAPI.uploadConditions(self.dropboxuser, self.dropboxpass, self.serviceProxy)

        return

    def feedCouchMonitoring(self):
        """
        _feedCouchMonitoring_

        check for workflows that haven't been uploaded to Couch for monitoring yet

        """
        getStreamerWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetStreamerWorkflowsForMonitoring")
        getPromptRecoWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetPromptRecoWorkflowsForMonitoring")
        markTrackedWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkTrackedWorkflowMonitoring")
        workflows = getStreamerWorkflowsForMonitoringDAO.execute()
        workflows += getPromptRecoWorkflowsForMonitoringDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            logging.debug(" Going to publish %d workflows" % len(workflows))
            for (workflowId, run, workflowName) in workflows:
                logging.info(" Publishing workflow %s to monitoring" % workflowName)
                #TODO: add more information about workflow if there need to be kept longer than 
                # workflow life cycle.
                doc = {}
                doc["RequestName"] =   workflowName
                doc["Run"]      =   run
                response = self.localRequestCouchDB.insertGenericRequest(doc)
                if response == "OK" or "EXISTS":
                    logging.info(" Successfully uploaded request %s" % workflowName)
                    # Here we have to trust the insert, if it doesn't happen will be easy to spot on the logs
                    markTrackedWorkflowMonitoringDAO.execute(workflowId)

        return

    def closeOutRealTimeWorkflows(self):
        """
        _closeOutRealTimeWorkflows_

        Updates couch with the closeout status of Repack and Express
        PromptReco should be closed out automatically

        """
        getNotClosedOutWorkflowsDAO = self.daoFactory(classname = "Tier0Feeder.GetNotClosedOutWorkflows")
        workflows = getNotClosedOutWorkflowsDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            for workflow in workflows:
                (workflowId, filesetId, filesetOpen, workflowName) = workflow
                # find returns -1 if the string is not found
                if workflowName.find('PromptReco') >= 0:
                    logging.debug("Closing out instantaneously PromptReco Workflow %s" % workflowName)
                    self.updateClosedState(workflowName, workflowId)
                else :
                    # Check if fileset (which you already know) is closed or not
                    # FIXME: No better way to do it? what comes from the DAO is a string, casting bool or int doesn't help much.
                    # Works like that :
                    if filesetOpen == '0':
                        self.updateClosedState(workflowName, workflowId)

        return

    def updateClosedState(self, workflowName, workflowId):
        """
        _updateClosedState_

        Mark a workflow as Closed

        """
        markCloseoutWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkCloseoutWorkflowMonitoring")

        response = self.localRequestCouchDB.updateRequestStatus(workflowName, 'Closed')

        if response == "OK" or "EXISTS":
            logging.debug("Successfully closed workflow %s" % workflowName)
            markCloseoutWorkflowMonitoringDAO.execute(workflowId)

        return

    def getPromptRecoStatusT0DataSvc(self):
        """
        _getPromptRecoStatusDataSvc_

        Check the PromptRecoStatus (enabled/disabled) set by the ORM

        """
        getPromptRecoStatusDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.GetPromptRecoStatus")
        status = getPromptRecoStatusDAO.execute(transaction = False)
        return status

    def updateRunConfigT0DataSvc(self):
        """
        _updateRunConfigT0DataSvc_

        Check for new runs and push their info into the Tier0 Data Service.

        """
        getNewRunDAO = self.daoFactory(classname = "T0DataSvc.GetNewRun")
        newRun = getNewRunDAO.execute(transaction = False)

        if len(newRun) > 0:

            binds = []
            for runInfo in newRun:
                binds.append( { 'RUN' : runInfo['run'],
                                'ACQ_ERA' : runInfo['acq_era'] } )

            insertNewRunDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertNewRun")
            insertNewRunDAO.execute(binds = binds, transaction = False)

            for bind in binds:
                del bind['ACQ_ERA']
            
            updateNewRunDAO = self.daoFactory(classname = "T0DataSvc.UpdateNewRun")
            updateNewRunDAO.execute(binds = binds, transaction = False)

        return

    def updateRunStreamDoneT0DataSvc(self):
        """
        _updateRunStreamDoneT0DataSvc_

        Check if a run/stream workflow (express or repack) is finished and
        cleaned up and push a completion record into the Tier0 Data Service.

        """
        getRunStreamDoneDAO = self.daoFactory(classname = "T0DataSvc.GetRunStreamDone")
        runStreamDone = getRunStreamDoneDAO.execute(transaction = False)

        if len(runStreamDone) > 0:

            binds = []
            for runStream in runStreamDone:
                binds.append( { 'RUN' : runStream['run'],
                                'STREAM' : runStream['stream'] } )

            insertRunStreamDoneDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRunStreamDone")
            insertRunStreamDoneDAO.execute(binds = binds, transaction = False)

            updateRunStreamDoneDAO = self.daoFactory(classname = "T0DataSvc.UpdateRunStreamDone")
            updateRunStreamDoneDAO.execute(binds = binds, transaction = False)

        return

    def updateExpressConfigsT0DataSvc(self):
        """
        _updateExpressConfigsT0DataSvc_

        Check which express_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getExpressConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetExpressConfigs")
        expressConfigs = getExpressConfigsDAO.execute(transaction = False)

        if len(expressConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in expressConfigs:
                bindsInsert.append( { 'RUN' : config['run'],
                                      'STREAM' : config['stream'],
                                      'CMSSW' : config['cmssw'],
                                      'SCRAM_ARCH' : config['scram_arch'],
                                      'RECO_CMSSW' : config['reco_cmssw'],
                                      'RECO_SCRAM_ARCH' : config['reco_scram_arch'],
                                      'ALCA_SKIM' : config['alca_skim'],
                                      'DQM_SEQ' : config['dqm_seq'],
                                      'GLOBAL_TAG' : config['global_tag'][:50],
                                      'SCENARIO' : config['scenario'],
                                      'MULTICORE' : config['multicore'],
                                      'WRITE_TIERS' : config['write_tiers'],
                                      'WRITE_DQM' : config['write_dqm'] } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'STREAM' : config['stream'] } )

            insertExpressConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertExpressConfigs")
            insertExpressConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateExpressConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateExpressConfigs")
            updateExpressConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def updateRecoConfigsT0DataSvc(self):
        """
        _updateRecoConfigsT0DataSvc_

        Check which reco_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getRecoConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetRecoConfigs")
        recoConfigs = getRecoConfigsDAO.execute(transaction = False)

        if len(recoConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in recoConfigs:
                bindsInsert.append( { 'RUN' : config['run'],
                                      'PRIMDS' : config['primds'],
                                      'CMSSW' : config['cmssw'],
                                      'SCRAM_ARCH' : config['scram_arch'],
                                      'ALCA_SKIM' : config['alca_skim'],
                                      'PHYSICS_SKIM' : config['physics_skim'],
                                      'DQM_SEQ' : config['dqm_seq'],
                                      'GLOBAL_TAG' : config['global_tag'][:50],
                                      'SCENARIO' : config['scenario'],
                                      'MULTICORE' : config['multicore'],
                                      'WRITE_RECO' : config['write_reco'],
                                      'WRITE_DQM' : config['write_dqm'],
                                      'WRITE_AOD' : config['write_aod'],
                                      'WRITE_MINIAOD' : config['write_miniaod'] } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'PRIMDS' : config['primds'] } )

            insertRecoConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRecoConfigs")
            insertRecoConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateRecoConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateRecoConfigs")
            updateRecoConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def updateRecoReleaseConfigsT0DataSvc(self):
        """
        _updateRecoReleaseConfigsT0DataSvc_

        Insert information about PromptReco release into the Tier0 Data Service.

        That means updating the reco_locked records in run granularity (where one
        released dataset means the whole run is locked).

        Also insert and update the run_primds_done records to track PromptReco status.

        """
        getRunDatasetNewDAO = self.daoFactory(classname = "Tier0Feeder.GetRunDatasetNew")
        getRunDatasetReleasedDAO = self.daoFactory(classname = "Tier0Feeder.GetRunDatasetReleased")
        getRunDatasetDoneDAO = self.daoFactory(classname = "Tier0Feeder.GetRunDatasetDone")

        updateRecoReleaseConfigsDAO = self.daoFactory(classname = "Tier0Feeder.UpdateRecoReleaseConfigs")

        insertRecoLockedDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRecoLocked")
        updateRecoLockedDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.UpdateRecoLocked")

        insertRunDatasetDoneDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRunDatasetDone")
        updateRunDatasetDoneDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.UpdateRunDatasetDone")

        # first check for records that are completely new
        # insert the two Tier0 Data Service records for them
        # update reco release records accordingly
        runDatasetNew = getRunDatasetNewDAO.execute(transaction = False)
        foundRuns = set()
        bindsInsertLocked = []
        bindsInsertDone = []
        bindsUpdate = []
        for runDataset in runDatasetNew:
            run = runDataset['run']
            if run not in foundRuns:
                bindsInsertLocked.append( { 'RUN': run } )
                foundRuns.add(run)
            bindsInsertDone.append( { 'RUN': run,
                                      'PRIMDS': runDataset['primds'] } )
            bindsUpdate.append( { 'RUN' : run,
                                  'PRIMDS_ID': runDataset['primds_id'],
                                  'IN_DATASVC' : 1 } )

        if len(bindsInsertLocked) > 0:
            insertRecoLockedDAO.execute(binds = bindsInsertLocked, transaction = False)
        if len(bindsInsertDone) > 0:
            insertRunDatasetDoneDAO.execute(binds = bindsInsertDone, transaction = False)
        if len(bindsUpdate) > 0:
            updateRecoReleaseConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        # then check for reco release and lock runs in the Tier0 Data Service
        runDatasetReleased = getRunDatasetReleasedDAO.execute(transaction = False)
        foundRuns = set()
        bindsUpdateLocked = []
        bindsUpdate = []
        for runDataset in runDatasetReleased:
            run = runDataset['run']
            if run not in foundRuns:
                bindsUpdateLocked.append( { 'RUN': run } )
                foundRuns.add(run)
            bindsUpdate.append( { 'RUN' : run,
                                  'PRIMDS_ID': runDataset['primds_id'],
                                  'IN_DATASVC' : 2 } )

        if len(bindsUpdateLocked) > 0:
            updateRecoLockedDAO.execute(binds = bindsUpdateLocked, transaction = False)
        if len(bindsUpdate) > 0:
            updateRecoReleaseConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        # finally check for reco completions and mark this in the Tier0 Data Service
        runDatasetDone = getRunDatasetDoneDAO.execute(transaction = False)
        bindsUpdateDone = []
        bindsUpdate = []
        for runDataset in runDatasetDone:
            run = runDataset['run']
            bindsUpdateDone.append( { 'RUN' : run,
                                      'PRIMDS' : runDataset['primds'] } )
            bindsUpdate.append( { 'RUN' : run,
                                  'PRIMDS_ID': runDataset['primds_id'],
                                  'IN_DATASVC' : 3 } )
        if len(bindsUpdateDone) > 0:
            updateRunDatasetDoneDAO.execute(binds = bindsUpdateDone, transaction = False)
        if len(bindsUpdate) > 0:
            updateRecoReleaseConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def lockDatasetsT0DataSvc(self):
        """
        _lockDatasetsT0DataSvc_

        Publish dataset information into the Tier0 Data Service.

        """
        getDatasetLockedDAO = self.daoFactory(classname = "T0DataSvc.GetDatasetLocked")
        datasetConfigs = getDatasetLockedDAO.execute(transaction = False)

        if len(datasetConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in datasetConfigs:
                bindsInsert.append( { 'PATH' : config['path'] } )
                bindsUpdate.append( { 'ID' : config['id'] } )

            insertDatasetLockedDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertDatasetLocked")
            insertDatasetLockedDAO.execute(binds = bindsInsert, transaction = False)

            updateDatasetLockedDAO = self.daoFactory(classname = "T0DataSvc.UpdateDatasetLocked")
            updateDatasetLockedDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def terminate(self, params):
        """
        _terminate_

        Kill the code after one final pass when called by the master thread.

        """
        logging.debug("terminating immediately")
예제 #42
0
파일: Request.py 프로젝트: lucacopa/WMCore
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db,
                                                 couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host,
                                    config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser
        
        TO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        #TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):

        data = cherrypy.request.body.read()
        if data:
            request_args = JsonWrapper.loads(data)
            if requestName:
                request_args["RequestName"] = requestName
            if isinstance(request_args, dict):
                request_args = [request_args]

        else:
            # actually this is error case
            #cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        #cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(
                    multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            #elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(
                JsonWrapper.loads(data))
        else:
            # actually this is error case
            #cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(
                param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            #remove this
            #tmp = []
            #for prop in param.kwargs:
            #    tmp.append(prop)
            #for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method in ['GET']:
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe,
                                          validate_request_update_args,
                                          requestName)
                #TO: handle multiple clone


#                 if len(param.args) == 2:
#                     #validate clone case
#                     if param.args[0] == "clone":
#                         param.args.pop()
#                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    #special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe,
                                                validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe,
                                              validate_request_create_args)

        except Exception as ex:
            #TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)

    @restcall
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.
            
        TODO:
        stuff like this has to filtered out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4        

        """
        if len(kwargs) == 0:
            kwargs['status'] = "running"
            options = {"descending": True, 'include_docs': True, 'limit': 200}
            request_docs = self.reqmgr_db.loadView("ReqMgr", "bystatus",
                                                   options)
            return rows([request_docs])

        # list of status
        status = kwargs.get("status", False)
        # list of request names
        name = kwargs.get("name", False)
        request_type = kwargs.get("request_type", False)
        prep_id = kwargs.get("prep_id", False)
        inputdataset = kwargs.get("inputdataset", False)
        outputdataset = kwargs.get("outputdataset", False)
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", False)
        workqueue = kwargs.get("workqueue", False)
        team = kwargs.get("team", False)

        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        option = {}
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []

        if status and not team and not request_type:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bystatus", option, status))
        if status and team:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byteamandstatus", option, [[team, status]]))
        if status and request_type:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byteamandstatus", option, [[team, status]]))
        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byprepid", option, prep_id))
        if inputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bydate", option, date_range))
        if campaign:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bycampaign", option, campaign))
        if workqueue:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byworkqueue", option, workqueue))

        #get interaction of the request
        result = self._intersection_of_request_info(request_info)
        if len(result) == 0:
            return []
        return rows([result])

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        #TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

    def _get_couch_view(self, couchdb, couchapp, view, options, keys):

        if not options:
            options = {}
        options.setdefault("include_docs", True)
        if isinstance(keys, basestring):
            keys = [keys]
        result = couchdb.loadView(couchapp, view, options, keys)

        request_info = {}
        for item in result["rows"]:
            request_info[item["id"]] = item.get('doc', None)
            if request_info[item["id"]] != None:
                self.filterCouchInfo(request_info[item["id"]])
        return request_info

    #TODO move this out of this class
    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def get_wmstats_view(self, view, options, keys):
        return self._get_couch_view(self.wmstatsCouch, "WMStats", view,
                                    options, keys)

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _updateRequest(self, workload, request_args):

        if workload == None:
            (workload, request_args) = self.initialize_clone(
                request_args["OriginalRequestName"])
            return self.post(workload, request_args)

        dn = cherrypy.request.user.get("dn", "unknown")

        if "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(
                workload.name(), request_args)
        # if is not just updating status
        else:
            if len(request_args) > 1 or "RequestStatus" not in request_args:
                try:
                    workload.updateArguments(request_args)
                except Exception as ex:
                    msg = traceback.format_exc()
                    cherrypy.log("Error for request args %s: %s" %
                                 (request_args, msg))
                    raise InvalidSpecParameterValue(str(ex))

                # trailing / is needed for the savecouchUrl function
                workload.saveCouch(self.config.couch_host,
                                   self.config.couch_reqmgr_db)

            req_status = request_args.get("RequestStatus", None)
            # If it is aborted or force-complete transition call workqueue to cancel the request
            if req_status == "aborted" or req_status == "force-complete":
                self.gq_service.cancelWorkflow(workload.name())

            report = self.reqmgr_db_service.updateRequestProperty(
                workload.name(), request_args, dn)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall
    def put(self, workload_pair_list):
        "workloadPairList is a list of tuple containing (workload, requeat_args)"
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
        # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    @restcall
    def post(self, workload_pair_list, multi_update_flag=False):
        """
        Create and update couchDB with  a new request. 
        request argument is passed from validation 
        (validation convert cherrypy.request.body data to argument)
                        
        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.
        
        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces ; 
            currently the chain relies on a number of things coming in #4705
        
        2) reqInputArgs = Utilities.unidecode(JsonWrapper.loads(body))
            (from ReqMgrRESTModel.putRequest)
                
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            cherrypy.log("INFO: Create request, input args: %s ..." %
                         request_args)
            request_args['RequestWorkflow'] = sanitizeURL(
                "%s/%s/%s/spec" %
                (request_args["CouchURL"], request_args["CouchWorkloadDBName"],
                 workload.name()))['url']
            workload.saveCouch(request_args["CouchURL"],
                               request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #43
0
class TaskArchiverTest(EmulatedUnitTestCase):
    """
    TestCase for TestTaskArchiver module
    """

    _setup_done = False
    _teardown = False
    _maxMessage = 10
    OWNERDN = os.environ[
        'OWNERDN'] if 'OWNERDN' in os.environ else "Generic/OWNERDN"

    def setUp(self):
        """
        setup for test.
        """
        super(TaskArchiverTest, self).setUp()
        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.testInit.setSchema(
            customModules=["WMCore.WMBS", "WMComponent.DBS3Buffer"],
            useDefault=False)
        self.databaseName = "taskarchiver_t_0"
        self.testInit.setupCouch("%s/workloadsummary" % self.databaseName,
                                 "WorkloadSummary")
        self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump")
        self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump")
        self.testInit.setupCouch("wmagent_summary_t", "WMStats")
        self.testInit.setupCouch("wmagent_summary_central_t", "WMStats")
        self.testInit.setupCouch("stat_summary_t", "SummaryStats")
        reqmgrdb = "reqmgrdb_t"
        self.testInit.setupCouch(reqmgrdb, "ReqMgr")

        reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqmgrdb)
        self.requestWriter = RequestDBWriter(reqDBURL)
        self.requestWriter.defaultStale = {}

        self.daofactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getJobs = self.daofactory(classname="Jobs.GetAllJobs")
        self.inject = self.daofactory(
            classname="Workflow.MarkInjectedWorkflows")

        self.testDir = self.testInit.generateWorkDir()
        os.makedirs(os.path.join(self.testDir, 'specDir'))

        self.nJobs = 10
        self.campaignName = 'aCampaign'

        return

    def tearDown(self):
        """
        Database deletion
        """

        self.testInit.clearDatabase(modules=["WMCore.WMBS"])
        self.testInit.delWorkDir()
        self.testInit.tearDownCouch()
        return

    def getConfig(self):
        """
        _createConfig_

        General config file
        """
        config = self.testInit.getConfiguration()
        # self.testInit.generateWorkDir(config)

        config.section_("General")
        config.General.workDir = "."
        config.General.ReqMgr2ServiceURL = "https://cmsweb-dev.cern.ch/reqmgr2"

        config.section_("JobStateMachine")
        config.JobStateMachine.couchurl = os.getenv("COUCHURL",
                                                    "cmssrv52.fnal.gov:5984")
        config.JobStateMachine.couchDBName = self.databaseName
        config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t'
        config.JobStateMachine.summaryStatsDBName = 'stat_summary_t'

        config.component_("JobCreator")
        config.JobCreator.jobCacheDir = os.path.join(self.testDir, 'testDir')

        config.component_("TaskArchiver")
        config.TaskArchiver.componentDir = self.testDir
        config.TaskArchiver.WorkQueueParams = {
            'CacheDir': config.JobCreator.jobCacheDir
        }
        config.TaskArchiver.pollInterval = 60
        config.TaskArchiver.logLevel = 'INFO'
        config.TaskArchiver.timeOut = 0
        config.TaskArchiver.histogramKeys = [
            'AvgEventTime', 'writeTotalMB', 'jobTime'
        ]
        config.TaskArchiver.histogramBins = 5
        config.TaskArchiver.histogramLimit = 5
        config.TaskArchiver.perfPrimaryDatasets = [
            'SingleMu', 'MuHad', 'MinimumBias'
        ]
        config.TaskArchiver.perfDashBoardMinLumi = 50
        config.TaskArchiver.perfDashBoardMaxLumi = 9000
        config.TaskArchiver.dqmUrl = 'https://cmsweb.cern.ch/dqm/dev/'
        config.TaskArchiver.dashBoardUrl = 'http://dashboard43.cern.ch/dashboard/request.py/putluminositydata'
        config.TaskArchiver.workloadSummaryCouchDBName = "%s/workloadsummary" % self.databaseName
        config.TaskArchiver.localWMStatsURL = "%s/%s" % (
            config.JobStateMachine.couchurl,
            config.JobStateMachine.jobSummaryDBName)
        config.TaskArchiver.workloadSummaryCouchURL = config.JobStateMachine.couchurl
        config.TaskArchiver.requireCouch = True

        config.component_("AnalyticsDataCollector")
        config.AnalyticsDataCollector.centralRequestDBURL = '%s/reqmgrdb_t' % config.JobStateMachine.couchurl
        config.AnalyticsDataCollector.RequestCouchApp = "ReqMgr"

        config.section_("ACDC")
        config.ACDC.couchurl = config.JobStateMachine.couchurl
        config.ACDC.database = config.JobStateMachine.couchDBName

        # Make the jobCacheDir
        os.mkdir(config.JobCreator.jobCacheDir)

        # addition for Alerts messaging framework, work (alerts) and control
        # channel addresses to which the component will be sending alerts
        # these are destination addresses where AlertProcessor:Receiver listens
        config.section_("Alert")
        config.Alert.address = "tcp://127.0.0.1:5557"
        config.Alert.controlAddr = "tcp://127.0.0.1:5559"

        config.section_("Agent")
        config.Agent.serverDN = "/we/bypass/myproxy/logon"

        return config

    def createWorkload(self, workloadName):
        """
        _createTestWorkload_

        Creates a test workload for us to run on, hold the basic necessities.
        """

        workload = testWorkload(workloadName)

        taskMaker = TaskMaker(workload,
                              os.path.join(self.testDir, 'workloadTest'))
        taskMaker.skipSubscription = True
        taskMaker.processWorkload()

        workload.setCampaign(self.campaignName)

        workload.save(workloadName)

        return workload

    def createTestJobGroup(self,
                           config,
                           name="TestWorkthrough",
                           filesetName="TestFileset",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           jobType="Processing"):
        """
        Creates a group of several jobs

        """

        testWorkflow = Workflow(spec=specLocation,
                                owner=self.OWNERDN,
                                name=name,
                                task=task,
                                owner_vogroup="",
                                owner_vorole="")
        testWorkflow.create()
        self.inject.execute(names=[name], injected=True)

        testWMBSFileset = Fileset(name=filesetName)
        testWMBSFileset.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        outputWMBSFileset = Fileset(name='%sOutput' % filesetName)
        outputWMBSFileset.create()
        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation('malpaquet')
        testFileC.create()
        outputWMBSFileset.addFile(testFileC)
        outputWMBSFileset.commit()
        outputWMBSFileset.markOpen(0)

        testWorkflow.addOutput('output', outputWMBSFileset)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow,
                                        type=jobType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, self.nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        changer = ChangeState(config)

        report1 = Report()
        report2 = Report()
        if error:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 "WMComponent_t/JobAccountant_t/fwjrs",
                                 "badBackfillJobReport.pkl")
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        else:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'mergeReport1.pkl')
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        report1.load(filename=path1)
        report2.load(filename=path2)

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        for i in range(self.nJobs):
            if i < self.nJobs / 2:
                testJobGroup.jobs[i]['fwjr'] = report1
            else:
                testJobGroup.jobs[i]['fwjr'] = report2
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup

    def createGiantJobSet(self,
                          name,
                          config,
                          nSubs=10,
                          nJobs=10,
                          nFiles=1,
                          spec="spec.xml"):
        """
        Creates a massive set of jobs

        """

        jobList = []

        for i in range(0, nSubs):
            # Make a bunch of subscriptions
            localName = '%s-%i' % (name, i)
            testWorkflow = Workflow(spec=spec,
                                    owner=self.OWNERDN,
                                    name=localName,
                                    task="Test",
                                    owner_vogroup="",
                                    owner_vorole="")
            testWorkflow.create()

            testWMBSFileset = Fileset(name=localName)
            testWMBSFileset.create()

            testSubscription = Subscription(fileset=testWMBSFileset,
                                            workflow=testWorkflow)
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            filesToComplete = []

            for j in range(0, nJobs):
                # Create jobs for each subscription
                testFileA = File(lfn="%s-%i-lfnA" % (localName, j),
                                 size=1024,
                                 events=10)
                testFileA.addRun(
                    Run(
                        10, *[
                            11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
                            37, 38, 39, 40
                        ]))
                testFileA.setLocation('malpaquet')
                testFileA.create()

                testWMBSFileset.addFile(testFileA)
                testWMBSFileset.commit()

                filesToComplete.append(testFileA)

                testJob = Job(name='%s-%i' % (localName, j))
                testJob.addFile(testFileA)
                testJob['retry_count'] = 1
                testJob['retry_max'] = 10
                testJobGroup.add(testJob)
                jobList.append(testJob)

                for k in range(0, nFiles):
                    # Create output files
                    testFile = File(lfn="%s-%i-output" % (localName, k),
                                    size=1024,
                                    events=10)
                    testFile.addRun(Run(10, *[12312]))
                    testFile.setLocation('malpaquet')
                    testFile.create()

                    testJobGroup.output.addFile(testFile)

                testJobGroup.output.commit()

            testJobGroup.commit()

            changer = ChangeState(config)

            changer.propagate(testJobGroup.jobs, 'created', 'new')
            changer.propagate(testJobGroup.jobs, 'executing', 'created')
            changer.propagate(testJobGroup.jobs, 'complete', 'executing')
            changer.propagate(testJobGroup.jobs, 'success', 'complete')
            changer.propagate(testJobGroup.jobs, 'cleanout', 'success')

            testWMBSFileset.markOpen(0)

            testSubscription.completeFiles(filesToComplete)

        return jobList

    def getPerformanceFromDQM(self, dqmUrl, dataset, run):
        # Make function to fetch this from DQM. Returning Null or False if it fails
        getUrl = "%sjsonfairy/archive/%s%s/DQM/TimerService/event_byluminosity" % (
            dqmUrl, run, dataset)
        # Assert if the URL is assembled as expected
        if run == 207214:
            self.assertEqual(
                'https://cmsweb.cern.ch/dqm/dev/jsonfairy/archive/207214/MinimumBias/Commissioning10-v4/DQM/DQM/TimerService/event_byluminosity',
                getUrl)
        # let's suppose it works..
        testResponseFile = open(
            os.path.join(getTestBase(),
                         'WMComponent_t/TaskArchiver_t/DQMGUIResponse.json'),
            'r')
        response = testResponseFile.read()
        testResponseFile.close()
        responseJSON = json.loads(response)
        return responseJSON

    def filterInterestingPerfPoints(self, responseJSON, minLumi, maxLumi):
        worthPoints = {}
        points = responseJSON["hist"]["bins"]["content"]
        for i in range(responseJSON["hist"]["xaxis"]["first"]["id"],
                       responseJSON["hist"]["xaxis"]["last"]["id"]):
            # is the point worth it? if yes add to interesting points dictionary.
            # 1 - non 0
            # 2 - between minimum and maximum expected luminosity
            # FIXME : 3 - population in dashboard for the bin interval < 100
            # Those should come from the config :
            if points[i] == 0:
                continue
            binSize = responseJSON["hist"]["xaxis"]["last"][
                "value"] / responseJSON["hist"]["xaxis"]["last"]["id"]
            # Fetching the important values
            instLuminosity = i * binSize
            timePerEvent = points[i]

            if instLuminosity > minLumi and instLuminosity < maxLumi:
                worthPoints[instLuminosity] = timePerEvent
        return worthPoints

    def publishPerformanceDashBoard(self, dashBoardUrl, PD, release,
                                    worthPoints):
        dashboardPayload = []
        for instLuminosity in worthPoints:
            timePerEvent = int(worthPoints[instLuminosity])
            dashboardPayload.append({
                "primaryDataset": PD,
                "release": release,
                "integratedLuminosity": instLuminosity,
                "timePerEvent": timePerEvent
            })

        data = "{\"data\":%s}" % str(dashboardPayload).replace("\'", "\"")

        # let's suppose it works..
        testDashBoardPayloadFile = open(
            os.path.join(getTestBase(),
                         'WMComponent_t/TaskArchiver_t/DashBoardPayload.json'),
            'r')
        testDashBoardPayload = testDashBoardPayloadFile.read()
        testDashBoardPayloadFile.close()

        self.assertEqual(data, testDashBoardPayload)

        return True

    def populateWorkflowWithCompleteStatus(self, name="TestWorkload"):
        schema = generate_reqmgr_schema(1)
        schema[0]["RequestName"] = name

        self.requestWriter.insertGenericRequest(schema[0])
        result = self.requestWriter.updateRequestStatus(name, "completed")
        return result

    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect",
            jobType="LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                  "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump",
                              "jobsByWorkflowName",
                              options={
                                  "startkey": [workflowName],
                                  "endkey": [workflowName, {}]
                              })['rows']
        fwjrdb.loadView("FWJRDump",
                        "fwjrsByWorkflowName",
                        options={
                            "startkey": [workflowName],
                            "endkey": [workflowName, {}]
                        })['rows']

        self.assertEqual(len(jobs), 2 * self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])

        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(
            os.path.exists(
                os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        workloadSummary = workdatabase.document(id="TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'],
                         sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(),
                         ['/Electron/MorePenguins-v0/RECO'])
        self.assertEqual(
            sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']
                   ['tasks']),
            ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect'])
        # Check performance
        # Check histograms
        self.assertAlmostEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['average'],
            0.89405199999999996,
            places=2)
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['nEvents'], 10)

        # Check standard performance
        self.assertAlmostEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['average'],
            17.786300000000001,
            places=2)
        self.assertAlmostEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['stdDev'],
            0.0,
            places=2)

        # Check worstOffenders
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['worstOffenders'], [{
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 2
            }])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'],
                         {'1': 10})
        logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar'
        self.assertEqual(workloadSummary['logArchives'], {
            '/TestWorkload/ReReco/LogCollect':
            [logCollectPFN for _ in range(10)]
        })

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance'][
                '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEqual(
                    workloadSummary['performance']
                    ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                    workloadSummary['performance']['/TestWorkload/ReReco']
                    ['cmsRun1'][x][y],
                    places=2)

        return

    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=True)
        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect",
            jobType="LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb = CouchServer(config.JobStateMachine.couchurl)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump",
                       "jobsByWorkflowName",
                       options={
                           "startkey": [workload.name()],
                           "endkey": [workload.name(), {}]
                       })['rows']
        fwjrdb.loadView("FWJRDump",
                        "fwjrsByWorkflowName",
                        options={
                            "startkey": [workload.name()],
                            "endkey": [workload.name(), {}]
                        })['rows']

        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id=workload.name())

        self.assertEqual(
            workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'],
            500)
        self.assertTrue('99999' in workloadSummary['errors']
                        ['/TestWorkload/ReReco']['cmsRun1'])

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco'][
            'cmsRun1']['99999']['runs']
        self.assertEqual(
            failedRunInfo, {'10': [[12312, 12312]]},
            "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['average']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['stdDev']['Failed Jobs'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return

    @attr("integration")
    def testC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """
        import cProfile
        import pstats

        name = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name=name,
                                         config=config,
                                         nSubs=10,
                                         nJobs=1000,
                                         nFiles=10)

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()

        cProfile.runctx("cleanCouch.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
        return

    @attr("integration")
    def testD_Timing(self):
        """
        _Timing_

        This is to see how fast things go.
        """
        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()
        jobList = self.createGiantJobSet(name=name,
                                         config=config,
                                         nSubs=10,
                                         nJobs=1000,
                                         nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        startTime = time.time()
        testTaskArchiver.algorithm()
        stopTime = time.time()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)
        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        logging.info("TaskArchiver took %f seconds", (stopTime - startTime))

    def testDQMRecoPerformanceToDashBoard(self):

        myThread = threading.currentThread()

        listRunsWorkflow = self.dbsDaoFactory(classname="ListRunsWorkflow")

        # Didn't like to have done that, but the test doesn't provide all info I need in the system, so faking it:
        myThread.dbi.processData(
            """insert into dbsbuffer_workflow(id, name) values (1, 'TestWorkload')""",
            transaction=False)
        myThread.dbi.processData(
            """insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (1, '/store/t/e/s/t.test', 1, 1)""",
            transaction=False)
        myThread.dbi.processData(
            """insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (2, '/store/t/e/s/t.test2', 1, 1)""",
            transaction=False)
        myThread.dbi.processData(
            """insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207214, 100, 1)""",
            transaction=False)
        myThread.dbi.processData(
            """insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207215, 200, 2)""",
            transaction=False)

        config = self.getConfig()

        dqmUrl = getattr(config.TaskArchiver, "dqmUrl")
        perfDashBoardMinLumi = getattr(config.TaskArchiver,
                                       "perfDashBoardMinLumi")
        perfDashBoardMaxLumi = getattr(config.TaskArchiver,
                                       "perfDashBoardMaxLumi")
        dashBoardUrl = getattr(config.TaskArchiver, "dashBoardUrl")

        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=True)
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect",
            jobType="LogCollect")

        # Adding request type as ReReco, real ReqMgr requests have it
        workload.data.request.section_("schema")
        workload.data.request.schema.RequestType = "ReReco"
        workload.data.request.schema.CMSSWVersion = 'test_compops_CMSSW_5_3_6_patch1'
        workload.getTask('ReReco').addInputDataset(name='/a/b/c',
                                                   primary='a',
                                                   processed='b',
                                                   tier='c')

        interestingPDs = getattr(config.TaskArchiver, "perfPrimaryDatasets")
        interestingDatasets = []
        # Are the datasets from this request interesting? Do they have DQM output? One might ask afterwards if they have harvest
        for dataset in workload.listOutputDatasets():
            (nothing, PD, procDataSet, dataTier) = dataset.split('/')
            if PD in interestingPDs and dataTier == "DQM":
                interestingDatasets.append(dataset)
        # We should have found 1 interesting dataset
        self.assertAlmostEqual(len(interestingDatasets), 1)
        if len(interestingDatasets) == 0:
            return
        # Request will be only interesting for performance if it's a ReReco or PromptReco
        (isReReco, isPromptReco) = (False, False)
        if getattr(workload.data.request.schema, "RequestType",
                   None) == 'ReReco':
            isReReco = True
        # Yes, few people like magic strings, but have a look at :
        # https://github.com/dmwm/T0/blob/master/src/python/T0/RunConfig/RunConfigAPI.py#L718
        # Might be safe enough
        # FIXME: in TaskArchiver, add a test to make sure that the dataset makes sense (procDataset ~= /a/ERA-PromptReco-vVERSON/DQM)
        if re.search('PromptReco', workload.name()):
            isPromptReco = True
        if not (isReReco or isPromptReco):
            return

        self.assertTrue(isReReco)
        self.assertFalse(isPromptReco)

        # We are not interested if it's not a PromptReco or a ReReco
        if not (isReReco or isPromptReco):
            return
        if isReReco:
            release = getattr(workload.data.request.schema, "CMSSWVersion")
            if not release:
                logging.info("no release for %s, bailing out", workload.name())
        else:
            release = getattr(
                workload.tasks.Reco.steps.cmsRun1.application.setup,
                "cmsswVersion")
            if not release:
                logging.info("no release for %s, bailing out", workload.name())

        self.assertEqual(release, "test_compops_CMSSW_5_3_6_patch1")
        # If all is true, get the run numbers processed by this worklfow
        runList = listRunsWorkflow.execute(workflow=workload.name())
        self.assertEqual([207214, 207215], runList)
        # GO to DQM GUI, get what you want
        # https://cmsweb.cern.ch/dqm/offline/jsonfairy/archive/211313/PAMuon/HIRun2013-PromptReco-v1/DQM/DQM/TimerService/event
        for dataset in interestingDatasets:
            (nothing, PD, procDataSet, dataTier) = dataset.split('/')
            worthPoints = {}
            for run in runList:
                responseJSON = self.getPerformanceFromDQM(dqmUrl, dataset, run)
                worthPoints.update(
                    self.filterInterestingPerfPoints(responseJSON,
                                                     perfDashBoardMinLumi,
                                                     perfDashBoardMaxLumi))

            # Publish dataset performance to DashBoard.
            if not self.publishPerformanceDashBoard(dashBoardUrl, PD, release,
                                                    worthPoints):
                logging.info(
                    "something went wrong when publishing dataset %s to DashBoard",
                    dataset)

        return
예제 #44
0
class Tier0FeederPoller(BaseWorkerThread):

    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package = "T0.WMBS",
                                     logger = logging,
                                     dbinterface = myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.transferSystemBaseDir = getattr(config.Tier0Feeder, "transferSystemBaseDir", None)
        if self.transferSystemBaseDir != None:
            if not os.path.exists(self.transferSystemBaseDir):
                self.transferSystemBaseDir = None

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy", None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = config.AnalyticsDataCollector.RequestCouchApp)

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging, dburl = hltConfConnectUrl, options = {})
        dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package = "T0.WMBS",
                                       logger = logging,
                                       dbinterface = dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(classname = "RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging, dburl = storageManagerConnectUrl, options = {})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging, dburl = popConLogConnectUrl, options = {})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(package = "T0.WMBS",
                                                 logger = logging,
                                                 dbinterface = dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(classname = "Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase, "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging, dburl = t0datasvcConnectUrl, options = {})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(package = "T0.WMBS",
                                                      logger = logging,
                                                      dbinterface = dbInterfaceT0DataSvc)

        return

    def algorithm(self, parameters = None):
        """
        _algorithm_

        """
        logging.debug("Running Tier0Feeder algorithm...")
        myThread = threading.currentThread()

        findNewRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRuns")
        findNewRunStreamsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRunStreams")
        findNewExpressRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewExpressRuns")
        releaseExpressDAO = self.daoFactory(classname = "Tier0Feeder.ReleaseExpress")
        feedStreamersDAO = self.daoFactory(classname = "Tier0Feeder.FeedStreamers")
        markWorkflowsInjectedDAO = self.daoFactory(classname = "Tier0Feeder.MarkWorkflowsInjected")

        tier0Config = None
        try:
            tier0Config = loadConfigurationFile(self.tier0ConfigFile)
        except:
            # usually happens when there are syntax errors in the configuration
            logging.exception("Cannot load Tier0 configuration file, not configuring new runs and run/streams")

        # only configure new runs and run/streams if we have a valid Tier0 configuration
        if tier0Config != None:

            #
            # find new runs, setup global run settings and stream/dataset/trigger mapping
            #
            runHltkeys = findNewRunsDAO.execute(transaction = False)
            for run, hltkey in sorted(runHltkeys.items()):

                hltConfig = None

                # local runs have no hltkey and are configured differently
                if hltkey != None:

                    # retrieve HLT configuration and make sure it's usable
                    try:
                        hltConfig = self.getHLTConfigDAO.execute(hltkey, transaction = False)
                        if hltConfig['process'] == None or len(hltConfig['mapping']) == 0:
                            raise RuntimeError("HLTConfDB query returned no process or mapping")
                    except:
                        logging.exception("Can't retrieve hltkey %s for run %d" % (hltkey, run))
                        continue

                try:
                    RunConfigAPI.configureRun(tier0Config, run, hltConfig)
                except:
                    logging.exception("Can't configure for run %d" % (run))

            #
            # find unconfigured run/stream with data
            # populate RunConfig, setup workflows/filesets/subscriptions
            # 
            runStreams = findNewRunStreamsDAO.execute(transaction = False)
            for run in sorted(runStreams.keys()):
                for stream in sorted(runStreams[run]):
                    try:
                        RunConfigAPI.configureRunStream(tier0Config,
                                                        run, stream,
                                                        self.specDirectory,
                                                        self.dqmUploadProxy)
                    except:
                        logging.exception("Can't configure for run %d and stream %s" % (run, stream))

        #
        # stop and close runs based on RunSummary and StorageManager records
        #
        RunLumiCloseoutAPI.stopRuns(self.dbInterfaceStorageManager)
        RunLumiCloseoutAPI.closeRuns(self.dbInterfaceStorageManager)

        #
        # release runs for Express
        #
        runs = findNewExpressRunsDAO.execute(transaction = False)

        if len(runs) > 0:

            binds = []
            for run in runs:
                binds.append( { 'RUN' : run } )

            if self.getExpressReadyRunsDAO != None:
                runs = self.getExpressReadyRunsDAO.execute(binds = binds, transaction = False)

            if len(runs) > 0:

                binds = []
                for run in runs:
                    binds.append( { 'RUN' : run } )

                releaseExpressDAO.execute(binds = binds, transaction = False)

        #
        # release runs for PromptReco
        #
        RunConfigAPI.releasePromptReco(tier0Config,
                                       self.specDirectory,
                                       self.dqmUploadProxy)

        #
        # insert express and reco configs into Tier0 Data Service
        #
        if self.haveT0DataSvc:
            self.updateRunStreamDoneT0DataSvc()
            self.updateExpressConfigsT0DataSvc()
            self.updateRecoConfigsT0DataSvc()
            self.updateRecoReleaseConfigsT0DataSvc()
            self.lockDatasetsT0DataSvc()

        #
        # mark express and repack workflows as injected if certain conditions are met
        # (we don't do it immediately to prevent the TaskArchiver from cleaning up too early)
        #
        markWorkflowsInjectedDAO.execute(self.transferSystemBaseDir != None,
                                         transaction = False)

        #
        # close stream/lumis for run/streams that are active (fileset exists and open)
        #
        RunLumiCloseoutAPI.closeLumiSections(self.dbInterfaceStorageManager)

        #
        # feed new data into exisiting filesets
        #
        try:
            myThread.transaction.begin()
            feedStreamersDAO.execute(conn = myThread.transaction.conn, transaction = True)
        except:
            logging.exception("Can't feed data, bailing out...")
            raise
        else:
            myThread.transaction.commit()

        #
        # run ended and run/stream fileset open
        #    => check for complete lumi_closed record, all lumis finally closed and all data feed
        #          => if all conditions satisfied, close the run/stream fileset
        #
        RunLumiCloseoutAPI.closeRunStreamFilesets()

        #
        # check and delete active split lumis
        #
        RunLumiCloseoutAPI.checkActiveSplitLumis()

        #
        # insert workflows into CouchDB for monitoring
        #
        self.feedCouchMonitoring()

        #
        # Update Couch when Repack and Express have closed input filesets (analog to old T0 closeout)
        #
        self.closeOutRealTimeWorkflows()

        #
        # send repacked notifications to StorageManager
        #
        if self.transferSystemBaseDir != None:
            self.notifyStorageManager()

        #
        # upload PCL conditions to DropBox
        #
        ConditionUploadAPI.uploadConditions(self.dropboxuser, self.dropboxpass, self.serviceProxy)

        return

    def feedCouchMonitoring(self):
        """
        _feedCouchMonitoring_

        check for workflows that haven't been uploaded to Couch for monitoring yet

        """
        getStreamerWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetStreamerWorkflowsForMonitoring")
        getPromptRecoWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetPromptRecoWorkflowsForMonitoring")
        markTrackedWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkTrackedWorkflowMonitoring")
        workflows = getStreamerWorkflowsForMonitoringDAO.execute()
        workflows += getPromptRecoWorkflowsForMonitoringDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            logging.debug(" Going to publish %d workflows" % len(workflows))
            for (workflowId, run, workflowName) in workflows:
                logging.info(" Publishing workflow %s to monitoring" % workflowName)
                #TODO: add more information about workflow if there need to be kept longer than 
                # workflow life cycle.
                doc = {}
                doc["RequestName"] =   workflowName
                doc["Run"]      =   run
                response = self.localRequestCouchDB.insertGenericRequest(doc)
                if response == "OK" or "EXISTS":
                    logging.info(" Successfully uploaded request %s" % workflowName)
                    # Here we have to trust the insert, if it doesn't happen will be easy to spot on the logs
                    markTrackedWorkflowMonitoringDAO.execute(workflowId)

        return

    def closeOutRealTimeWorkflows(self):
        """
        _closeOutRealTimeWorkflows_

        Updates couch with the closeout status of Repack and Express
        PromptReco should be closed out automatically

        """
        getNotClosedOutWorkflowsDAO = self.daoFactory(classname = "Tier0Feeder.GetNotClosedOutWorkflows")
        workflows = getNotClosedOutWorkflowsDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            for workflow in workflows:
                (workflowId, filesetId, filesetOpen, workflowName) = workflow
                # find returns -1 if the string is not found
                if workflowName.find('PromptReco') >= 0:
                    logging.debug("Closing out instantaneously PromptReco Workflow %s" % workflowName)
                    self.updateClosedState(workflowName, workflowId)
                else :
                    # Check if fileset (which you already know) is closed or not
                    # FIXME: No better way to do it? what comes from the DAO is a string, casting bool or int doesn't help much.
                    # Works like that :
                    if filesetOpen == '0':
                        self.updateClosedState(workflowName, workflowId)

        return

    def updateClosedState(self, workflowName, workflowId):
        """
        _updateClosedState_

        Mark a workflow as Closed

        """
        markCloseoutWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkCloseoutWorkflowMonitoring")

        response = self.localRequestCouchDB.updateRequestStatus(workflowName, 'Closed')

        if response == "OK" or "EXISTS":
            logging.debug("Successfully closed workflow %s" % workflowName)
            markCloseoutWorkflowMonitoringDAO.execute(workflowId)

        return

    def notifyStorageManager(self):
        """
        _notifyStorageManager_

        Find all finished streamers for closed all run/stream
        Send the notification message to StorageManager
        Update the streamer status to finished (deleted = 1)

        """
        getFinishedStreamersDAO = self.daoFactory(classname = "SMNotification.GetFinishedStreamers")
        markStreamersFinishedDAO = self.daoFactory(classname = "SMNotification.MarkStreamersFinished")

        allFinishedStreamers = getFinishedStreamersDAO.execute(transaction = False)

        chunkSize = 50
        for finishedStreamers in [ allFinishedStreamers[i:i+chunkSize] for i in range(0, len(allFinishedStreamers), chunkSize) ]:

            streamers = []
            filenameParams = ""

            for (id, lfn) in finishedStreamers:
                streamers.append(id)
                filenameParams += "-FILENAME %s " % os.path.basename(lfn)

            logging.debug("Notifying transfer system about processed streamers")
            p = subprocess.Popen("/bin/bash", stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            output, error = p.communicate("""
            export T0_BASE_DIR=%s
            export T0ROOT=${T0_BASE_DIR}/T0
            export CONFIG=${T0_BASE_DIR}/Config/TransferSystem_CERN.cfg
 
            export PERL5LIB=${T0ROOT}/perl_lib

            unset LANGUAGE
            unset LC_ALL
            unset LC_CTYPE
            export LANG=C

            ${T0ROOT}/operations/sendRepackedStatus.pl --config $CONFIG %s
            """ % (self.transferSystemBaseDir, filenameParams))

            if len(error) > 0:
                logging.error("ERROR: Could not notify transfer system about processed streamers")
                logging.error("ERROR: %s" % error)
            else:
                markStreamersFinishedDAO.execute(streamers, transaction = False)

        return

    def updateRunStreamDoneT0DataSvc(self):
        """
        _updateRunStreamDoneT0DataSvc_

        Check if a run/stream workflow (express or repack) is finished and
        cleaned up and push a completion record into the Tier0 Data Service.

        """
        getRunStreamDoneDAO = self.daoFactory(classname = "T0DataSvc.GetRunStreamDone")
        runStreamDone = getRunStreamDoneDAO.execute(transaction = False)

        if len(runStreamDone) > 0:

            binds = []
            for runStream in runStreamDone:
                binds.append( { 'RUN' : runStream['run'],
                                'STREAM' : runStream['stream'] } )

            insertRunStreamDoneDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRunStreamDone")
            insertRunStreamDoneDAO.execute(binds = binds, transaction = False)

            updateRunStreamDoneDAO = self.daoFactory(classname = "T0DataSvc.UpdateRunStreamDone")
            updateRunStreamDoneDAO.execute(binds = binds, transaction = False)

        return

    def updateExpressConfigsT0DataSvc(self):
        """
        _updateExpressConfigsT0DataSvc_

        Check which express_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getExpressConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetExpressConfigs")
        expressConfigs = getExpressConfigsDAO.execute(transaction = False)

        if len(expressConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in expressConfigs:
                bindsInsert.append( { 'RUN' : config['run'],
                                      'STREAM' : config['stream'],
                                      'CMSSW' : config['cmssw'],
                                      'SCRAM_ARCH' : config['scram_arch'],
                                      'RECO_CMSSW' : config['reco_cmssw'],
                                      'RECO_SCRAM_ARCH' : config['reco_scram_arch'],
                                      'ALCA_SKIM' : config['alca_skim'],
                                      'DQM_SEQ' : config['dqm_seq'],
                                      'GLOBAL_TAG' : config['global_tag'][:50],
                                      'SCENARIO' : config['scenario'] } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'STREAM' : config['stream'] } )

            insertExpressConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertExpressConfigs")
            insertExpressConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateExpressConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateExpressConfigs")
            updateExpressConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def updateRecoConfigsT0DataSvc(self):
        """
        _updateRecoConfigsT0DataSvc_

        Check which reco_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getRecoConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetRecoConfigs")
        recoConfigs = getRecoConfigsDAO.execute(transaction = False)

        if len(recoConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in recoConfigs:
                bindsInsert.append( { 'RUN' : config['run'],
                                      'PRIMDS' : config['primds'],
                                      'CMSSW' : config['cmssw'],
                                      'SCRAM_ARCH' : config['scram_arch'],
                                      'ALCA_SKIM' : config['alca_skim'],
                                      'PHYSICS_SKIM' : config['physics_skim'],
                                      'DQM_SEQ' : config['dqm_seq'],
                                      'GLOBAL_TAG' : config['global_tag'][:50],
                                      'SCENARIO' : config['scenario'] } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'PRIMDS' : config['primds'] } )

            insertRecoConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRecoConfigs")
            insertRecoConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateRecoConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateRecoConfigs")
            updateRecoConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def updateRecoReleaseConfigsT0DataSvc(self):
        """
        _updateRecoReleaseConfigsT0DataSvc_

        Insert information about PromptReco release into the Tier0 Data Service.
        Already aggregate by run, if one primary dataset is released that means
        the whole run is considered released.

        """
        getRecoReleaseConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetRecoReleaseConfigs")
        recoReleaseConfigs = getRecoReleaseConfigsDAO.execute(transaction = False)

        if len(recoReleaseConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in recoReleaseConfigs:

                locked = int(config['released'] > 0)

                bindsInsert.append( { 'RUN' : config['run'],
                                      'LOCKED' : locked } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'IN_DATASVC' : locked + 1 } )

            insertRecoReleaseConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRecoReleaseConfigs")
            insertRecoReleaseConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateRecoReleaseConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateRecoReleaseConfigs")
            updateRecoReleaseConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def lockDatasetsT0DataSvc(self):
        """
        _lockDatasetsT0DataSvc_

        Publish dataset information into the Tier0 Data Service.

        """
        getDatasetLockedDAO = self.daoFactory(classname = "T0DataSvc.GetDatasetLocked")
        datasetConfigs = getDatasetLockedDAO.execute(transaction = False)

        if len(datasetConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in datasetConfigs:
                bindsInsert.append( { 'PATH' : config['path'] } )
                bindsUpdate.append( { 'ID' : config['id'] } )

            insertDatasetLockedDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertDatasetLocked")
            insertDatasetLockedDAO.execute(binds = bindsInsert, transaction = False)

            updateDatasetLockedDAO = self.daoFactory(classname = "T0DataSvc.UpdateDatasetLocked")
            updateDatasetLockedDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def terminate(self, params):
        """
        _terminate_

        Kill the code after one final pass when called by the master thread.

        """
        logging.debug("terminating immediately")
예제 #45
0
    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package = "T0.WMBS",
                                     logger = logging,
                                     dbinterface = myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.transferSystemBaseDir = getattr(config.Tier0Feeder, "transferSystemBaseDir", None)
        if self.transferSystemBaseDir != None:
            if not os.path.exists(self.transferSystemBaseDir):
                self.transferSystemBaseDir = None

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy", None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = config.AnalyticsDataCollector.RequestCouchApp)

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging, dburl = hltConfConnectUrl, options = {})
        dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package = "T0.WMBS",
                                       logger = logging,
                                       dbinterface = dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(classname = "RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging, dburl = storageManagerConnectUrl, options = {})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging, dburl = popConLogConnectUrl, options = {})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(package = "T0.WMBS",
                                                 logger = logging,
                                                 dbinterface = dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(classname = "Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase, "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging, dburl = t0datasvcConnectUrl, options = {})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(package = "T0.WMBS",
                                                      logger = logging,
                                                      dbinterface = dbInterfaceT0DataSvc)

        return
예제 #46
0
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp="ReqMgr")
        # this need for the post validtiaon
        self.gq_service = WorkQueue(config.couch_host, config.couch_workqueue_db)

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        no_multi_key = ["detail", "_nostale", "date_range", "common_dict"]
        for key, value in param.kwargs.items():
            # convert string to list
            if key not in no_multi_key and isinstance(value, basestring):
                param.kwargs[key] = [value]

        detail = param.kwargs.get('detail', True)
        if detail in (False, "false", "False", "FALSE"):
            detail = False

        if "status" in param.kwargs and detail:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        """Can't retrieve bulk archived status requests with detail option True,
                           set detail=false or use other search arguments""")

        for prop in param.kwargs.keys():
            safe.kwargs[prop] = param.kwargs.pop(prop)
        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):
        data = cherrypy.request.body.read()
        if data:
            request_args = json.loads(data)
        else:
            request_args = {}
        cherrypy.log('Updating request "%s" with these user-provided args: %s' % (requestName, request_args))

        # In case key args are also passed and request body also exists.
        # If the request.body is dictionary update the key args value as well
        if isinstance(request_args, dict):
            for prop in param.kwargs.keys():
                request_args[prop] = param.kwargs.pop(prop)

            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        for args in request_args:
            workload, r_args = valFunc(args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(json.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, safe):

        request_names = json.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method == 'GET':
                self._validateGET(param, safe)

            elif method == 'PUT':
                args_length = len(param.args)

                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe, validate_request_update_args, requestName)

            elif method == 'POST':
                args_length = len(param.args)
                if args_length == 2 and param.args[0] == "clone":
                    # handles clone workflow.- don't validtate args here
                    param.kwargs['OriginalRequestName'] = param.args[1]
                    param.args.pop()
                    param.args.pop()
                    self._validateRequestBase(param, safe, validate_clone_create_args)
                elif args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe, validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(safe)
                else:
                    self._validateRequestBase(param, safe, validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def _maskResult(self, mask, result):
        """
        If a mask of parameters was provided in the query string, then filter
        the request key/values accordingly.
        :param mask: a list of strings (keys of the request dictionary)
        :param result: a dict key'ed by the request name, with the whole
            request dictionary as a value
        :return: updates the result object in place and returns it (dict)
        """

        if len(mask) == 1 and mask[0] == "DAS":
            mask = ReqMgrConfigDataCache.getConfig("DAS_RESULT_FILTER")["filter_list"]

        if len(mask) > 0:
            maskedResult = {}
            for reqName, reqDict in result.items():
                reqInfo = RequestInfo(reqDict)
                maskedResult.setdefault(reqName, {})
                for maskKey in mask:
                    foundValue = reqInfo.get(maskKey, None)
                    maskedResult[reqName].update({maskKey: foundValue})

            return maskedResult
        else:
            return result

    @restcall(formats=[('text/plain', PrettyJSONFormat()), ('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to masked out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        ### pop arguments unrelated to the user query
        mask = kwargs.pop("mask", [])
        detail = kwargs.pop("detail", True)
        common_dict = int(kwargs.pop("common_dict", 0))  # modifies the response format
        nostale = kwargs.pop("_nostale", False)

        ### these are the query strings supported by this API
        status = kwargs.get("status", [])
        name = kwargs.get("name", [])
        request_type = kwargs.get("request_type", [])
        prep_id = kwargs.get("prep_id", [])
        inputdataset = kwargs.get("inputdataset", [])
        outputdataset = kwargs.get("outputdataset", [])
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", [])
        team = kwargs.get("team", [])
        mc_pileup = kwargs.get("mc_pileup", [])
        data_pileup = kwargs.get("data_pileup", [])
        requestor = kwargs.get("requestor", [])

        # further tweaks to the couch queries
        if len(status) == 1 and status[0] == "ACTIVE":
            status = ACTIVE_STATUS
        if detail in (False, "false", "False", "FALSE"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # everything should be stale view. this only needs for test
        if nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []
        queryMatched = False  # flag to avoid calling the same view twice
        if len(kwargs) == 2:
            if status and team:
                query_keys = [[t, s] for t in team for s in status]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byteamandstatus",
                                                                                 option, query_keys))
                queryMatched = True
            elif status and request_type:
                query_keys = [[s, rt] for rt in request_type for s in status]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("requestsbystatusandtype",
                                                                                 option, query_keys))
                queryMatched = True
            elif status and requestor:
                query_keys = [[s, r] for r in requestor for s in status]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatusandrequestor",
                                                                                 option, query_keys))
                queryMatched = True
        elif len(kwargs) == 3:
            if status and request_type and requestor:
                query_keys = [[s, rt, req] for s in status for rt in request_type for req in requestor]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatusandtypeandrequestor",
                                                                                 option, query_keys))
                queryMatched = True

        # anything else that hasn't matched the query combination above
        if not queryMatched:
            if status:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatus",
                                                                                 option, status))
            if name:
                request_info.append(self.reqmgr_db_service.getRequestByNames(name))
            if request_type:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bytype",
                                                                                 option, request_type))
            if prep_id:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byprepid",
                                                                                 option, prep_id))
            if inputdataset:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byinputdataset",
                                                                                 option, inputdataset))
            if outputdataset:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byoutputdataset",
                                                                                 option, outputdataset))
            if date_range:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydate",
                                                                                 option, date_range))
            if campaign:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bycampaign",
                                                                                 option, campaign))
            if mc_pileup:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bymcpileup",
                                                                                 option, mc_pileup))
            if data_pileup:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydatapileup",
                                                                                 option, data_pileup))

        # get the intersection of the request data
        result = self._intersection_of_request_info(request_info)

        if not result:
            return []

        result = self._maskResult(mask, result)

        if not option["include_docs"]:
            return result.keys()

        # set the return format. default format has request name as a key
        # if is set to one it returns list of dictionary with RequestName field.
        if common_dict == 1:
            response_list = result.values()
        else:
            response_list = [result]
        return rows(response_list)

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

    def _retrieveResubmissionChildren(self, request_name):
        """
        Fetches all the direct children requests from CouchDB.
        Response from CouchDB view is in the following format:
            [{u'id': u'child_workflow_name',
              u'key': u'parent_workflow_name',
              u'value': 'current_request_status'}]
        :param request_name: string with the parent workflow name
        :return: a list of dictionaries with the parent and child workflow and the child status
        """
        result = self.reqmgr_db.loadView('ReqMgr', 'childresubmissionrequests', keys=[request_name])['rows']
        childrenRequestAndStatus = []
        for childInfo in result:
            childrenRequestAndStatus.append(childInfo)
            childrenRequestAndStatus.extend(self._retrieveResubmissionChildren(childInfo['id']))
        return childrenRequestAndStatus

    def _handleNoStatusUpdate(self, workload, request_args, dn):
        """
        For no-status update, we only support the following parameters:
         1. RequestPriority
         2. Global workqueue statistics, while acquiring a workflow
        """
        if 'RequestPriority' in request_args:
            # Yes, we completely ignore any other arguments posted by the user (web UI case)
            request_args = {'RequestPriority': request_args['RequestPriority']}
            validate_request_priority(request_args)
            # must update three places: GQ elements, workload_cache and workload spec
            self.gq_service.updatePriority(workload.name(), request_args['RequestPriority'])
            report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
            workload.setPriority(request_args['RequestPriority'])
            workload.saveCouchUrl(workload.specUrl())
            cherrypy.log('Updated priority of "{}" to: {}'.format(workload.name(), request_args['RequestPriority']))
        elif workqueue_stat_validation(request_args):
            report = self.reqmgr_db_service.updateRequestStats(workload.name(), request_args)
            cherrypy.log('Updated workqueue statistics of "{}", with:  {}'.format(workload.name(), request_args))
        else:
            msg = "There are invalid arguments for no-status update: %s" % request_args
            raise InvalidSpecParameterValue(msg)

        return report

    def _handleAssignmentApprovedTransition(self, workload, request_args, dn):
        """
        Allows only two arguments: RequestStatus and RequestPriority
        """
        if "RequestPriority" not in request_args:
            msg = "There are invalid arguments for assignment-approved transition: %s" % request_args
            raise InvalidSpecParameterValue(msg)

        validate_request_priority(request_args)
        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
        return report

    def _handleAssignmentStateTransition(self, workload, request_args, dn):
        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['HardTimeout'] = request_args['SoftTimeout'] + request_args['GracePeriod']

        # Only allow extra value update for assigned status
        cherrypy.log("Assign request %s, input args: %s ..." % (workload.name(), request_args))
        try:
            workload.updateArguments(request_args)
        except Exception as ex:
            msg = traceback.format_exc()
            cherrypy.log("Error for request args %s: %s" % (request_args, msg))
            raise InvalidSpecParameterValue(str(ex))

        # validate/update OutputDatasets after ProcessingString and AcquisionEra is updated
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        validateOutputDatasets(request_args['OutputDatasets'], workload.getDbsUrl())

        # by default, it contains all unmerged LFNs (used by sites to protect the unmerged area)
        request_args['OutputModulesLFNBases'] = workload.listAllOutputModulesLFNBases()

        # Add parentage relation for step chain, task chain:
        chainMap = workload.getChainParentageSimpleMapping()
        if chainMap:
            request_args["ChainParentageMap"] = chainMap

        # save the spec first before update the reqmgr request status to prevent race condition
        # when workflow is pulled to GQ before site white list is updated
        workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)
        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)

        return report

    def _handleOnlyStateTransition(self, workload, request_args, dn):
        """
        It handles only the state transition.
        Special handling needed if a request is aborted or force completed.
        """
        # if we got here, then the main workflow has been already validated
        # and the status transition is allowed
        req_status = request_args["RequestStatus"]
        cascade = request_args.get("cascade", False)

        if req_status in ["aborted", "force-complete"]:
            # cancel the workflow first
            self.gq_service.cancelWorkflow(workload.name())

        # cascade option is only supported for these 3 statuses. If set, we need to
        # find all the children requests and perform the same status transition
        if req_status in ["rejected", "closed-out", "announced"] and cascade:
            childrenNamesAndStatus = self._retrieveResubmissionChildren(workload.name())
            msg = "Workflow {} has {} ".format(workload.name(), len(childrenNamesAndStatus))
            msg += "children workflows to have a status transition to: {}".format(req_status)
            cherrypy.log(msg)
            for childInfo in childrenNamesAndStatus:
                if check_allowed_transition(childInfo['value'], req_status):
                    cherrypy.log('Updating request status for {} to {}.'.format(childInfo['id'], req_status))
                    self.reqmgr_db_service.updateRequestStatus(childInfo['id'], req_status, dn)
                else:
                    msg = "Status transition from {} to {} ".format(childInfo['value'], req_status)
                    msg += "not allowed for workflow: {}, skipping it!".format(childInfo['id'])
                    cherrypy.log(msg)
        # then update the original/parent workflow status in couchdb
        cherrypy.log('Updating request status for {} to {}.'.format(workload.name(), req_status))
        report = self.reqmgr_db_service.updateRequestStatus(workload.name(), req_status, dn)
        return report

    def _updateRequest(self, workload, request_args):
        dn = get_user_info().get("dn", "unknown")

        if "RequestStatus" not in request_args:
            report = self._handleNoStatusUpdate(workload, request_args, dn)
        else:
            req_status = request_args["RequestStatus"]
            if len(request_args) == 2 and req_status == "assignment-approved":
                report = self._handleAssignmentApprovedTransition(workload, request_args, dn)
            elif len(request_args) > 1 and req_status == "assigned":
                report = self._handleAssignmentStateTransition(workload, request_args, dn)
            elif len(request_args) == 1 or (len(request_args) == 2 and "cascade" in request_args):
                report = self._handleOnlyStateTransition(workload, request_args, dn)
            else:
                msg = "There are invalid arguments with this status transition: %s" % request_args
                raise InvalidSpecParameterValue(msg)

        if report == 'OK':
            return {workload.name(): "OK"}
        return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        """workloadPairList is a list of tuple containing (workload, request_args)"""
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL("%s/%s/%s/spec" % (request_args["CouchURL"],
                                                                         request_args["CouchWorkloadDBName"],
                                                                         workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()

        # Add initial priority only for the creation of the request
        request_args['InitialPriority'] = request_args["RequestPriority"]

        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self, workload_pair_list, multi_update_flag=False, multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(json.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)

            cherrypy.log("Create request, input args: %s ..." % request_args)
            try:
                workload.saveCouch(request_args["CouchURL"], request_args["CouchWorkloadDBName"],
                                   metadata=request_args)
                out.append({'request': workload.name()})
            except Exception as ex:
                # then it failed to add the spec file as attachment
                # we better delete the original request to avoid confusion in wmstats
                cherrypy.log("Error saving request spec to couch: %s " % str(ex))
                self.delete(request_args['RequestName'])

        return out
예제 #47
0
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """

    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False):
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)

        if not self.useReqMgrForCompletionCheck:
            # sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL,
                                                       couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(self.config.General.ReqMgr2ServiceURL)

        # Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    @timeFunction
    def algorithm(self, parameters=None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        # Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(classname="Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut=self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return

    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """

        finishedWorkflowsDAO = self.daoFactory(classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)

    def killCondorJobsByWFStatus(self, statusList):
        if isinstance(statusList, basestring):
            statusList = [statusList]
        reqNames = self.centralCouchDBWriter.getRequestByStatus(statusList)
        logging.info("There are %d requests in %s status in central couch.", len(reqNames), statusList)
        self.workQueue.killWMBSWorkflows(reqNames)
        return reqNames

    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:

        1. Notify the WorkQueue about finished subscriptions
        2. mark workflow as completed in the dbsbuffer_workflow table
        """
        if not finishedwfs:
            return

        logging.info("Found %d candidate workflows for completing:", len(finishedwfs))
        completedWorkflowsDAO = self.dbsDaoFactory(classname="UpdateWorkflowsToCompleted")

        centralCouchAlive = True
        try:
            self.killCondorJobsByWFStatus(["force-complete", "aborted"])
        except Exception as ex:
            centralCouchAlive = False
            logging.error("we will try again when remote couch server comes back\n%s", str(ex))

        if centralCouchAlive:
            logging.info("Marking subscriptions as Done ...")
            for workflow in finishedwfs:
                try:
                    # Notify the WorkQueue, if there is one
                    if self.workQueue is not None:
                        subList = []
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)

                    # Tier-0 case, the agent has to mark it completed
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(workflow, "completed")
                        logging.info("status updated to completed %s", workflow)

                    completedWorkflowsDAO.execute([workflow])

                except TaskArchiverPollerException as ex:
                    # Something didn't go well when notifying the workqueue, abort!!!
                    logging.error("Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    # Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return

    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId=sub)
            except WorkQueueNoMatchingElements:
                # Subscription wasn't known to WorkQueue, feel free to clean up
                logging.debug("Local WorkQueue knows nothing about this subscription: %s", sub)
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
예제 #48
0
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host, config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser

        TODO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return
        
        no_multi_key = ["detail", "_nostale", "date_range", "common_dict"]
        for key, value in param.kwargs.items():
            # convert string to list
            if key not in no_multi_key and isinstance(value, basestring):
                param.kwargs[key] = [value]
        
        detail = param.kwargs.get('detail', True)
        if detail in (False, "false", "False", "FALSE"):
            detail = False
            
        if "status" in param.kwargs and detail:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        """Can't retrieve bulk archived status requests with detail option True, 
                           set detail=false or use other search arguments""")
                    
        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):
        data = cherrypy.request.body.read()
        if data:
            request_args = json.loads(data)
            if requestName:
                request_args["RequestName"] = requestName

        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(json.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            # remove this
            # tmp = []
            # for prop in param.kwargs:
            #    tmp.append(prop)
            # for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, param, safe, valFunc):

        request_names = json.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method == 'GET':
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe, validate_request_update_args, requestName)
                # TO: handle multiple clone
            #                 if len(param.args) == 2:
            #                     #validate clone case
            #                     if param.args[0] == "clone":
            #                         param.args.pop()
            #                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe, validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(param, safe, validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe, validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)
    
    def _maskTaskStepChain(self, masked_dict, req_dict, chain_name, mask_key):
        
        mask_exist = False
        num_loop = req_dict["%sChain" % chain_name]
        for i in range(num_loop):
            if mask_key in req_dict["%s%s" % (chain_name, i+1)]:
                mask_exist = True
                break
        if mask_exist: 
            defaultValue = masked_dict[mask_key]
            masked_dict[mask_key] = []
            # assume mask_key is list if the condition doesn't meet.
            
            for i in range(num_loop):
                chain = req_dict["%s%s" % (chain_name, i+1)]
                if mask_key in chain:
                    chain_key = "%sName" % chain_name
                    masked_dict[mask_key].append({chain_key: chain[chain_key], mask_key: chain[mask_key]})
                else:
                    if isinstance(defaultValue, dict):
                        value = defaultValue.get(chain_key, None)
                    else:
                        value = defaultValue
                    masked_dict[mask_key].append({chain_key: chain[chain_key], mask_key: chain[mask_key]})
        return
                 
    def _mask_result(self, mask, result):
        
        if len(mask) == 1 and mask[0] == "DAS":
            mask = ReqMgrConfigDataCache.getConfig("DAS_RESULT_FILTER")["filter_list"]
        
        if len(mask) > 0:
            masked_result = {}
            for req_name, req_info in result.items():
                masked_result.setdefault(req_name, {})
                for mask_key in mask:
                    masked_result[req_name].update({mask_key: req_info.get(mask_key, None)})
                    if "TaskChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name], req_info, "Task", mask_key)
                    elif "StepChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name], req_info,"Step", mask_key)
                        
            return masked_result
        else:
            return result
    
    @restcall(formats=[('text/plain', PrettyJSONFormat()), ('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to masked out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        # list of status
        status = kwargs.get("status", [])
        # list of request names
        name = kwargs.get("name", [])
        request_type = kwargs.get("request_type", [])
        prep_id = kwargs.get("prep_id", [])
        inputdataset = kwargs.get("inputdataset", [])
        outputdataset = kwargs.get("outputdataset",[])
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", [])
        workqueue = kwargs.get("workqueue", [])
        team = kwargs.get("team", [])
        mc_pileup = kwargs.get("mc_pileup", [])
        data_pileup = kwargs.get("data_pileup", [])
        requestor = kwargs.get("requestor", [])
        mask = kwargs.get("mask", [])
        detail = kwargs.get("detail", True)
        # set the return format. default format has requset name as a key
        # if is set to one it returns list of dictionary with RequestName field.
        common_dict = int(kwargs.get("common_dict", 0))
        if detail in (False, "false", "False", "FALSE"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []
        
        if len(status) == 1 and status[0] == "ACTIVE":
            status = ACTIVE_STATUS
        if status and not team and not request_type and not requestor:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatus", option, status))
        if status and team:
            query_keys = [[t, s] for t in team for s in status] 
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView("byteamandstatus", option, query_keys))
        if status and request_type:
            query_keys = [[s, rt] for rt in request_type for s in status]
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("requestsbystatusandtype", 
                                                                             option, query_keys))
        if status and requestor:
            query_keys = [[s, r] for r in requestor for s in status] 
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView("bystatusandrequestor", option, query_keys))        
            
        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byprepid", option, prep_id))
        if inputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydate", option, date_range))
        if campaign:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bycampaign", option, campaign))
        if workqueue:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byworkqueue", option, workqueue))
        if mc_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bymcpileup", option, mc_pileup))
        if data_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydatapileup", option, data_pileup))
        # get interaction of the request
        result = self._intersection_of_request_info(request_info)
        
        if len(result) == 0:
            return []
        
        result = self._mask_result(mask, result)
        # If detail is set to False return just list of request name
        if not option["include_docs"]:
            return result.keys()
        
        if common_dict == 1:
            response_list = result.values()
        else:
            response_list = [result] 
        return rows(response_list)

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

        # TODO move this out of this class

    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _retrieveResubmissionChildren(self, request_name):

        result = self.reqmgr_db.loadView('ReqMgr', 'childresubmissionrequests', keys=[request_name])['rows']
        childrenRequestNames = []
        for child in result:
            childrenRequestNames.append(child['id'])
            childrenRequestNames.extend(self._retrieveResubmissionChildren(child['id']))
        return childrenRequestNames

    def _handleNoStatusUpdate(self, workload, request_args):
        """
        only few values can be updated without state transition involved
        currently 'RequestPriority' and 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'
        """
        if 'RequestPriority' in request_args:
            # must update three places: GQ elements, workload_cache and workload spec
            self.gq_service.updatePriority(workload.name(), request_args['RequestPriority'])
            report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args)
            workload.setPriority(request_args['RequestPriority'])
            workload.saveCouchUrl(workload.specUrl())
        elif "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(workload.name(), request_args)
        else:
            raise InvalidSpecParameterValue("can't update value without state transition: %s" % request_args)
        
        return report

    def _handleAssignmentApprovedTransition(self, workload, request_args, dn):
        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
        return report
        
    def _handleAssignmentStateTransition(self, workload, request_args, dn):
        
        req_status = request_args["RequestStatus"]
        if req_status == "assigned" and not request_args.get('Team', '').strip():
            raise InvalidSpecParameterValue("Team must be set during workflow assignment: %s" % request_args)
            
        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['SoftTimeout'] = int(request_args['SoftTimeout'])
            #TODO: not sure why GracePeriod when passed from web ingerface but convert here
            request_args['GracePeriod'] = int(request_args['GracePeriod'])
            request_args['HardTimeout'] = request_args['SoftTimeout'] + request_args['GracePeriod']
        
        #Only allow extra value update for assigned status
        cherrypy.log("INFO: Assign request, input args: %s ..." % request_args)
        try:
            workload.updateArguments(request_args)
        except Exception as ex:
            msg = traceback.format_exc()
            cherrypy.log("Error for request args %s: %s" % (request_args, msg))
            raise InvalidSpecParameterValue(str(ex))
        
        # validate/update OutputDatasets after ProcessingString and AcquisionEra is updated
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        validateOutputDatasets(request_args['OutputDatasets'], workload.getDbsUrl())

        # legacy update schema to support ops script
        loadRequestSchema(workload, request_args)

        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
        workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)
        return report

    def _handleCascadeUpdate(self, workload, request_args, dn):
        
        """
        only closed-out and announced has this option
        """
        req_status = request_args["RequestStatus"]
        # check whehter it is casecade option
        if request_args["cascade"]:
            cascade_list = self._retrieveResubmissionChildren(workload.name())
            for req_name in cascade_list:
                self.reqmgr_db_service.updateRequestStatus(req_name, req_status, dn)
        # update original workflow status
        report = self.reqmgr_db_service.updateRequestStatus(workload.name(), req_status, dn)
        return report
    
    def _handleOnlyStateTransition(self, workload, req_status, dn):
        """
        It handles only the state transition. Special handling needed if a
        request is aborted or force completed.
        """
        if req_status in ["aborted", "force-complete"]:
            # cancel the workflow first
            self.gq_service.cancelWorkflow(workload.name())
        #update the request status in couchdb   
        report = self.reqmgr_db_service.updateRequestStatus(workload.name(), req_status, dn)
        return report
    
    def _updateRequest(self, workload, request_args):
        dn = cherrypy.request.user.get("dn", "unknown")

        if workload is None:
            (workload, request_args) = self.initialize_clone(request_args["OriginalRequestName"])
            return self.post([workload, request_args])

        if "RequestStatus" not in request_args:
            report = self._handleNoStatusUpdate(workload, request_args)
            
        else:
            req_status = request_args["RequestStatus"]
            # assignment-approved only allow Priority update
            if len(request_args) == 2 and req_status == "assignment-approved":
                report = self._handleAssignmentApprovedTransition(workload, request_args, dn)
            elif len(request_args) > 1 and req_status == "assigned":
                report = self._handleAssignmentStateTransition(workload, request_args, dn)
            elif len(request_args) == 2 and req_status in ["closed-out", "announced"] and \
                "cascade" in request_args:
                report = self._handleCascadeUpdate(workload, request_args, dn)
            elif len(request_args) == 1:
                # If status chnage is to aborted, force-complete, rejected, ignore other argument
                report = self._handleOnlyStateTransition(workload, req_status, dn)
            else:
                raise InvalidSpecParameterValue(
                    "can't update value except transition to assigned status: %s" % request_args)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        """workloadPairList is a list of tuple containing (workload, requeat_args)"""
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL("%s/%s/%s/spec" % (request_args["CouchURL"],
                                                                         request_args["CouchWorkloadDBName"],
                                                                         workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        
        #Add initial priority only for the creation of the request
        request_args['InitialPriority'] = request_args["RequestPriority"]
         
        # TODO: remove this after reqmgr2 replice reqmgr (reqmgr2Only)
        request_args['ReqMgr2Only'] = True
        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self, workload_pair_list, multi_update_flag=False, multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(json.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)
            
            # legacy update schema to support ops script
            loadRequestSchema(workload, request_args)
            
            cherrypy.log("INFO: Create request, input args: %s ..." % request_args)
            workload.saveCouch(request_args["CouchURL"], request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #49
0
class Tier0FeederPoller(BaseWorkerThread):

    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package = "T0.WMBS",
                                     logger = logging,
                                     dbinterface = myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = getattr(config.Tier0Feeder, "dropboxuser", None)
        self.dropboxpass = getattr(config.Tier0Feeder, "dropboxpass", None)

        self.transferSystemBaseDir = getattr(config.Tier0Feeder, "transferSystemBaseDir", None)
        if self.transferSystemBaseDir != None:
            if not os.path.exists(self.transferSystemBaseDir):
                self.transferSystemBaseDir = None

        self.dqmUploadProxy = getattr(config.Tier0Feeder, "dqmUploadProxy", None)
        self.serviceProxy = getattr(config.Tier0Feeder, "serviceProxy", None)

        self.localRequestCouchDB = RequestDBWriter(config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = config.AnalyticsDataCollector.RequestCouchApp)

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging, dburl = hltConfConnectUrl, options = {})
        dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package = "T0.WMBS",
                                       logger = logging,
                                       dbinterface = dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(classname = "RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging, dburl = storageManagerConnectUrl, options = {})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging, dburl = popConLogConnectUrl, options = {})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(package = "T0.WMBS",
                                                 logger = logging,
                                                 dbinterface = dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(classname = "Tier0Feeder.GetExpressReadyRuns")

        self.haveT0DataSvc = False
        if hasattr(config, "T0DataSvcDatabase"):
            t0datasvcConnectUrl = getattr(config.T0DataSvcDatabase, "connectUrl", None)
            if t0datasvcConnectUrl != None:
                self.haveT0DataSvc = True
                dbFactoryT0DataSvc = DBFactory(logging, dburl = t0datasvcConnectUrl, options = {})
                dbInterfaceT0DataSvc = dbFactoryT0DataSvc.connect()
                self.daoFactoryT0DataSvc = DAOFactory(package = "T0.WMBS",
                                                      logger = logging,
                                                      dbinterface = dbInterfaceT0DataSvc)

        return

    def algorithm(self, parameters = None):
        """
        _algorithm_

        """
        logging.debug("Running Tier0Feeder algorithm...")
        myThread = threading.currentThread()

        findNewRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRuns")
        findNewRunStreamsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRunStreams")
        findNewExpressRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewExpressRuns")
        releaseExpressDAO = self.daoFactory(classname = "Tier0Feeder.ReleaseExpress")
        feedStreamersDAO = self.daoFactory(classname = "Tier0Feeder.FeedStreamers")
        markWorkflowsInjectedDAO = self.daoFactory(classname = "Tier0Feeder.MarkWorkflowsInjected")

        tier0Config = None
        try:
            tier0Config = loadConfigurationFile(self.tier0ConfigFile)
        except:
            # usually happens when there are syntax errors in the configuration
            logging.exception("Cannot load Tier0 configuration file, not configuring new runs and run/streams")

        # only configure new runs and run/streams if we have a valid Tier0 configuration
        if tier0Config != None:

            #
            # find new runs, setup global run settings and stream/dataset/trigger mapping
            #
            runHltkeys = findNewRunsDAO.execute(transaction = False)
            for run, hltkey in sorted(runHltkeys.items()):

                hltConfig = None

                # local runs have no hltkey and are configured differently
                if hltkey != None:

                    # retrieve HLT configuration and make sure it's usable
                    try:
                        hltConfig = self.getHLTConfigDAO.execute(hltkey, transaction = False)
                        if hltConfig['process'] == None or len(hltConfig['mapping']) == 0:
                            raise RuntimeError, "HLTConfDB query returned no process or mapping"
                    except:
                        logging.exception("Can't retrieve hltkey %s for run %d" % (hltkey, run))
                        continue

                try:
                    RunConfigAPI.configureRun(tier0Config, run, hltConfig)
                except:
                    logging.exception("Can't configure for run %d" % (run))

            #
            # find unconfigured run/stream with data
            # populate RunConfig, setup workflows/filesets/subscriptions
            # 
            runStreams = findNewRunStreamsDAO.execute(transaction = False)
            for run in sorted(runStreams.keys()):
                for stream in sorted(runStreams[run]):
                    try:
                        RunConfigAPI.configureRunStream(tier0Config,
                                                        run, stream,
                                                        self.specDirectory,
                                                        self.dqmUploadProxy)
                    except:
                        logging.exception("Can't configure for run %d and stream %s" % (run, stream))

        #
        # end runs which are active and have ended according to the EoR StorageManager records
        #
        RunLumiCloseoutAPI.endRuns(self.dbInterfaceStorageManager)

        #
        # release runs for Express
        #
        runs = findNewExpressRunsDAO.execute(transaction = False)

        if len(runs) > 0:

            binds = []
            for run in runs:
                binds.append( { 'RUN' : run } )

            if self.getExpressReadyRunsDAO != None:
                runs = self.getExpressReadyRunsDAO.execute(binds = binds, transaction = False)

            if len(runs) > 0:

                binds = []
                for run in runs:
                    binds.append( { 'RUN' : run } )

                releaseExpressDAO.execute(binds = binds, transaction = False)

        #
        # release runs for PromptReco
        #
        RunConfigAPI.releasePromptReco(tier0Config,
                                       self.specDirectory,
                                       self.dqmUploadProxy)

        #
        # insert express and reco configs into Tier0 Data Service
        #
        if self.haveT0DataSvc:
            self.updateExpressConfigsT0DataSvc()
            self.updateRecoConfigsT0DataSvc()
            self.updateRecoReleaseConfigsT0DataSvc()

        #
        # mark express and repack workflows as injected if certain conditions are met
        # (we don't do it immediately to prevent the TaskArchiver from cleaning up too early)
        #
        markWorkflowsInjectedDAO.execute(self.transferSystemBaseDir != None,
                                         transaction = False)

        #
        # close stream/lumis for run/streams that are active (fileset exists and open)
        #
        RunLumiCloseoutAPI.closeLumiSections(self.dbInterfaceStorageManager)

        #
        # feed new data into exisiting filesets
        #
        try:
            myThread.transaction.begin()
            feedStreamersDAO.execute(conn = myThread.transaction.conn, transaction = True)
        except:
            logging.exception("Can't feed data, bailing out...")
            raise
        else:
            myThread.transaction.commit()

        #
        # run ended and run/stream fileset open
        #    => check for complete lumi_closed record, all lumis finally closed and all data feed
        #          => if all conditions satisfied, close the run/stream fileset
        #
        RunLumiCloseoutAPI.closeRunStreamFilesets()

        #
        # check and delete active split lumis
        #
        RunLumiCloseoutAPI.checkActiveSplitLumis()

        #
        # insert workflows into CouchDB for monitoring
        #
        self.feedCouchMonitoring()

        #
        # Update Couch when Repack and Express have closed input filesets (analog to old T0 closeout)
        #
        self.closeOutRealTimeWorkflows()

        #
        # send repacked notifications to StorageManager
        #
        if self.transferSystemBaseDir != None:
            self.notifyStorageManager()

        #
        # upload PCL conditions to DropBox
        #
        ConditionUploadAPI.uploadConditions(self.dropboxuser, self.dropboxpass, self.serviceProxy)

        return

    def feedCouchMonitoring(self):
        """
        _feedCouchMonitoring_

        check for workflows that haven't been uploaded to Couch for monitoring yet

        """
        getStreamerWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetStreamerWorkflowsForMonitoring")
        getPromptRecoWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetPromptRecoWorkflowsForMonitoring")
        markTrackedWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkTrackedWorkflowMonitoring")
        workflows = getStreamerWorkflowsForMonitoringDAO.execute()
        workflows += getPromptRecoWorkflowsForMonitoringDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            logging.debug(" Going to publish %d workflows" % len(workflows))
            for (workflowId, run, workflowName) in workflows:
                logging.info(" Publishing workflow %s to monitoring" % workflowName)
                #TODO: add more information about workflow if there need to be kept longer than 
                # workflow life cycle.
                doc = {}
                doc["RequestName"] =   workflowName
                doc["Run"]      =   run
                response = self.localRequestCouchDB.insertGenericRequest(doc)
                if response == "OK" or "EXISTS":
                    logging.info(" Successfully uploaded request %s" % workflowName)
                    # Here we have to trust the insert, if it doesn't happen will be easy to spot on the logs
                    markTrackedWorkflowMonitoringDAO.execute(workflowId)

        return

    def closeOutRealTimeWorkflows(self):
        """
        _closeOutRealTimeWorkflows_

        Updates couch with the closeout status of Repack and Express
        PromptReco should be closed out automatically

        """
        getNotClosedOutWorkflowsDAO = self.daoFactory(classname = "Tier0Feeder.GetNotClosedOutWorkflows")
        workflows = getNotClosedOutWorkflowsDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            for workflow in workflows:
                (workflowId, filesetId, filesetOpen, workflowName) = workflow
                # find returns -1 if the string is not found
                if workflowName.find('PromptReco') >= 0:
                    logging.debug("Closing out instantaneously PromptReco Workflow %s" % workflowName)
                    self.updateClosedState(workflowName, workflowId)
                else :
                    # Check if fileset (which you already know) is closed or not
                    # FIXME: No better way to do it? what comes from the DAO is a string, casting bool or int doesn't help much.
                    # Works like that :
                    if filesetOpen == '0':
                        self.updateClosedState(workflowName, workflowId)

        return

    def updateClosedState(self, workflowName, workflowId):
        """
        _updateClosedState_

        Mark a workflow as Closed

        """
        markCloseoutWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkCloseoutWorkflowMonitoring")

        response = self.localRequestCouchDB.updateRequestStatus(workflowName, 'Closed')

        if response == "OK" or "EXISTS":
            logging.debug("Successfully closed workflow %s" % workflowName)
            markCloseoutWorkflowMonitoringDAO.execute(workflowId)

        return

    def notifyStorageManager(self):
        """
        _notifyStorageManager_

        Find all finished streamers for closed all run/stream
        Send the notification message to StorageManager
        Update the streamer status to finished (deleted = 1)

        """
        getFinishedStreamersDAO = self.daoFactory(classname = "SMNotification.GetFinishedStreamers")
        markStreamersFinishedDAO = self.daoFactory(classname = "SMNotification.MarkStreamersFinished")

        allFinishedStreamers = getFinishedStreamersDAO.execute(transaction = False)

        num = len(allFinishedStreamers)/50
        for finishedStreamers in [allFinishedStreamers[i::num] for i in range(num)]:

            streamers = []
            filenameParams = ""

            for (id, lfn) in finishedStreamers:
                streamers.append(id)
                filenameParams += "-FILENAME %s " % os.path.basename(lfn)

            logging.debug("Notifying transfer system about processed streamers")
            p = subprocess.Popen("/bin/bash",stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
            output, error = p.communicate("""
            export T0_BASE_DIR=%s
            export T0ROOT=${T0_BASE_DIR}/T0
            export CONFIG=${T0_BASE_DIR}/Config/TransferSystem_CERN.cfg
 
            export PERL5LIB=${T0ROOT}/perl_lib
 
            ${T0ROOT}/operations/sendRepackedStatus.pl --config $CONFIG %s
            """ % (self.transferSystemBaseDir, filenameParams))

            if len(error) > 0:
                logging.error("ERROR: Could not notify transfer system about processed streamers")
                logging.error("ERROR: %s" % error)

            markStreamersFinishedDAO.execute(streamers, transaction = False)

        return

    def updateExpressConfigsT0DataSvc(self):
        """
        _updateExpressConfigsT0DataSvc_

        Check which express_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getExpressConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetExpressConfigs")
        expressConfigs = getExpressConfigsDAO.execute(transaction = False)

        if len(expressConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in expressConfigs:
                bindsInsert.append( { 'RUN' : config['run'],
                                      'STREAM' : config['stream'],
                                      'CMSSW' : config['cmssw'],
                                      'SCRAM_ARCH' : config['scram_arch'],
                                      'RECO_CMSSW' : config['reco_cmssw'],
                                      'RECO_SCRAM_ARCH' : config['reco_scram_arch'],
                                      'GLOBAL_TAG' : config['global_tag'][:50],
                                      'SCENARIO' : config['scenario'] } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'STREAM' : config['stream'] } )

            insertExpressConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertExpressConfigs")
            insertExpressConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateExpressConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateExpressConfigs")
            updateExpressConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def updateRecoConfigsT0DataSvc(self):
        """
        _updateRecoConfigsT0DataSvc_

        Check which reco_config rows are missing
        in the Tier0 Data Service and insert them,
        also record that fact in t0ast

        """
        getRecoConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetRecoConfigs")
        recoConfigs = getRecoConfigsDAO.execute(transaction = False)

        if len(recoConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in recoConfigs:
                bindsInsert.append( { 'RUN' : config['run'],
                                      'PRIMDS' : config['primds'],
                                      'CMSSW' : config['cmssw'],
                                      'SCRAM_ARCH' : config['scram_arch'],
                                      'GLOBAL_TAG' : config['global_tag'][:50],
                                      'SCENARIO' : config['scenario'] } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'PRIMDS' : config['primds'] } )

            insertRecoConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRecoConfigs")
            insertRecoConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateRecoConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateRecoConfigs")
            updateRecoConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def updateRecoReleaseConfigsT0DataSvc(self):
        """
        _updateRecoReleaseConfigsT0DataSvc_

        Insert information about PromptReco release into the Tier0 Data Service.
        Already aggregate by run, if one primary dataset is released that means
        the whole run is considered released.

        """
        getRecoReleaseConfigsDAO = self.daoFactory(classname = "T0DataSvc.GetRecoReleaseConfigs")
        recoReleaseConfigs = getRecoReleaseConfigsDAO.execute(transaction = False)

        if len(recoReleaseConfigs) > 0:

            bindsInsert = []
            bindsUpdate = []
            for config in recoReleaseConfigs:

                locked = int(config['released'] > 0)

                bindsInsert.append( { 'RUN' : config['run'],
                                      'LOCKED' : locked } )
                bindsUpdate.append( { 'RUN' : config['run'],
                                      'IN_DATASVC' : locked + 1 } )

            insertRecoReleaseConfigsDAO = self.daoFactoryT0DataSvc(classname = "T0DataSvc.InsertRecoReleaseConfigs")
            insertRecoReleaseConfigsDAO.execute(binds = bindsInsert, transaction = False)

            updateRecoReleaseConfigsDAO = self.daoFactory(classname = "T0DataSvc.UpdateRecoReleaseConfigs")
            updateRecoReleaseConfigsDAO.execute(binds = bindsUpdate, transaction = False)

        return

    def terminate(self, params):
        """
        _terminate_

        Kill the code after one final pass when called by the master thread.

        """
        logging.debug("terminating immediately")
예제 #50
0
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host, config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser

        TODO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        if "status" in param.kwargs and isinstance(param.kwargs["status"], basestring):
            param.kwargs["status"] = [param.kwargs["status"]]
        if "status" in param.kwargs:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        "Can't retrieve bulk archived status requests, use other search arguments")

        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):

        data = cherrypy.request.body.read()
        if data:
            request_args = JsonWrapper.loads(data)
            if requestName:
                request_args["RequestName"] = requestName
            if isinstance(request_args, dict):
                request_args = [request_args]

        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(JsonWrapper.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            # remove this
            # tmp = []
            # for prop in param.kwargs:
            #    tmp.append(prop)
            # for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, param, safe, valFunc):

        request_names = JsonWrapper.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method in ['GET']:
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe, validate_request_update_args, requestName)
                # TO: handle multiple clone
            #                 if len(param.args) == 2:
            #                     #validate clone case
            #                     if param.args[0] == "clone":
            #                         param.args.pop()
            #                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe, validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(param, safe, validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe, validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)

    @restcall(formats=[('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to filtered out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        if len(kwargs) == 0:
            kwargs['status'] = "running"
            options = {"descending": True, 'include_docs': True, 'limit': 200}
            request_docs = self.reqmgr_db.loadView("ReqMgr", "bystatus", options)
            return rows([request_docs])

        # list of status
        status = kwargs.get("status", False)
        # list of request names
        name = kwargs.get("name", False)
        request_type = kwargs.get("request_type", False)
        prep_id = kwargs.get("prep_id", False)
        inputdataset = kwargs.get("inputdataset", False)
        outputdataset = kwargs.get("outputdataset", False)
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", False)
        workqueue = kwargs.get("workqueue", False)
        team = kwargs.get("team", False)
        mc_pileup = kwargs.get("mc_pileup", False)
        data_pileup = kwargs.get("data_pileup", False)
        detail = kwargs.get("detail", True)
        if detail in (False, "false", "False"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []

        if status and not team and not request_type:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatus", option, status))
        if status and team:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView("byteamandstatus", option, [[team, status]]))
        if status and request_type:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("requestsbystatusandtype", option,
                                                                             [[status, request_type]]))
        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byprepid", option, prep_id))
        if inputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydate", option, date_range))
        if campaign:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bycampaign", option, campaign))
        if workqueue:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byworkqueue", option, workqueue))
        if mc_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bymcpileup", option, mc_pileup))
        if data_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydatapileup", option, data_pileup))
        # get interaction of the request
        result = self._intersection_of_request_info(request_info)
        if len(result) == 0:
            return []
        return rows([result])

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

        # TODO move this out of this class

    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _retrieveResubmissionChildren(self, request_name):

        result = self.reqmgr_db.loadView('ReqMgr', 'childresubmissionrequests', keys=[request_name])['rows']
        childrenRequestNames = []
        for child in result:
            childrenRequestNames.append(child['id'])
            childrenRequestNames.extend(self._retrieveResubmissionChildren(child['id']))
        return childrenRequestNames

    def _updateRequest(self, workload, request_args):

        if workload == None:
            (workload, request_args) = self.initialize_clone(request_args["OriginalRequestName"])
            return self.post(workload, request_args)

        dn = cherrypy.request.user.get("dn", "unknown")

        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['HardTimeout'] = request_args['SoftTimeout'] + request_args['GracePeriod']

        if 'RequestPriority' in request_args:
            self.gq_service.updatePriority(workload.name(), request_args['RequestPriority'])

        if "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(workload.name(), request_args)
        # if is not just updating status
        else:
            req_status = request_args.get("RequestStatus", None)

            if len(request_args) >= 1 and req_status == None:
                try:
                    workload.updateArguments(request_args)
                except Exception as ex:
                    msg = traceback.format_exc()
                    cherrypy.log("Error for request args %s: %s" % (request_args, msg))
                    raise InvalidSpecParameterValue(str(ex))

                # trailing / is needed for the savecouchUrl function
                workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)

            elif (req_status in ["closed-out"  "announced"]) and request_args.get("cascade", False):
                cascade_list = self._retrieveResubmissionChildren(workload.name)
                for req_name in cascade_list:
                    report = self.reqmgr_db_service.updateRequestStatus(req_name, req_status)

            # If it is aborted or force-complete transition call workqueue to cancel the request
            else:
                if req_status == "aborted" or req_status == "force-complete":
                    self.gq_service.cancelWorkflow(workload.name())
                report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        "workloadPairList is a list of tuple containing (workload, requeat_args)"
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL("%s/%s/%s/spec" % (request_args["CouchURL"],
                                                                         request_args["CouchWorkloadDBName"],
                                                                         workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()

        # TODO: remove this after reqmgr2 replice reqmgr (reqmgr2Only)
        request_args['ReqMgr2Only'] = True
        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self, workload_pair_list, multi_update_flag=False, multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(JsonWrapper.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)

            cherrypy.log("INFO: Create request, input args: %s ..." % request_args)
            workload.saveCouch(request_args["CouchURL"], request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #51
0
    def __init__(self, rest, config):

        super(StepChainParentageFixTask, self).__init__(config)
        self.reqmgrDB = RequestDBWriter(config.reqmgrdb_url)
        self.dbsSvc = DBS3Reader(config.dbs_url, logger=self.logger)
        self.statusToCheck = ["announced", "normal-archived"]
예제 #52
0
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(
                    **self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)

        if not self.useReqMgrForCompletionCheck:
            # sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(
                self.config.TaskArchiver.ReqMgr2ServiceURL)

        # Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    def algorithm(self, parameters=None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp
             ) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        # Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(
            classname="Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut=self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return

    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """

        finishedWorkflowsDAO = self.daoFactory(
            classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(
            onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)

    def killCondorJobsByWFStatus(self, statusList):
        if isinstance(statusList, basestring):
            statusList = [statusList]
        reqNames = self.centralCouchDBWriter.getRequestByStatus(statusList)
        logging.info(
            "There are %d requests in 'aborted' status in central couch.",
            len(reqNames))
        for wf in reqNames:
            self.workQueue.killWMBSWorkflow(wf)
        return reqNames

    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:

        1. Notify the WorkQueue about finished subscriptions
        2. mark workflow as completed in the dbsbuffer_workflow table
        """
        if len(finishedwfs) == 0:
            return

        logging.info("Found %d candidate workflows for completing: %s",
                     len(finishedwfs), finishedwfs.keys())
        completedWorkflowsDAO = self.dbsDaoFactory(
            classname="UpdateWorkflowsToCompleted")

        centralCouchAlive = True
        try:
            abortedWorkflows = self.killCondorJobsByWFStatus(["aborted"])
            self.killCondorJobsByWFStatus(["force-complete"])
        except Exception as ex:
            centralCouchAlive = False
            logging.error(
                "we will try again when remote couch server comes back\n%s",
                str(ex))

        if centralCouchAlive:
            for workflow in finishedwfs:
                try:
                    # Notify the WorkQueue, if there is one
                    if self.workQueue != None:
                        subList = []
                        logging.info("Marking subscriptions as Done ...")
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)

                    # Tier-0 case, the agent has to mark it completed
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(
                            workflow, "completed")
                        logging.info("status updated to completed %s",
                                     workflow)

                    completedWorkflowsDAO.execute([workflow])

                except TaskArchiverPollerException as ex:
                    # Something didn't go well when notifying the workqueue, abort!!!
                    logging.error(
                        "Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    # Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return

    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId=sub)
            except WorkQueueNoMatchingElements:
                # Subscription wasn't known to WorkQueue, feel free to clean up
                logging.debug(
                    "Local WorkQueue knows nothing about this subscription: %s",
                    sub)
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
예제 #53
0
class StepChainParentageFixTask(CherryPyPeriodicTask):
    """
    Upldates StepChain parentage periodically
    """
    def __init__(self, rest, config):

        super(StepChainParentageFixTask, self).__init__(config)
        self.reqmgrDB = RequestDBWriter(config.reqmgrdb_url)
        self.dbsSvc = DBS3Reader(config.dbs_url, logger=self.logger)
        self.statusToCheck = ["closed-out", "announced", "normal-archived"]

    def setConcurrentTasks(self, config):
        """
        sets the list of functions which
        """
        self.concurrentTasks = [{
            'func': self.fixStepChainParentage,
            'duration': config.parentageFixDuration
        }]

    def fixStepChainParentage(self, config):
        """
        Look through the stepchain workflows with ParentageResolved flag is False.
        Fix the StepChain parentage and update the ParentageResolved flag to True
        """
        self.logger.info(
            "Running fixStepChainParentage thread for statuses: %s",
            self.statusToCheck)
        childDatasets = set()
        requests = set()
        requestsByChildDataset = {}
        for status in self.statusToCheck:
            reqByChildDS = getChildDatasetsForStepChainMissingParent(
                self.reqmgrDB, status)
            self.logger.info(
                "Retrieved %d datasets to fix parentage, in status: %s",
                len(reqByChildDS), status)
            childDatasets = childDatasets.union(set(reqByChildDS.keys()))
            # We need to just get one of the StepChain workflow if multiple workflow contains the same datasets. (i.e. ACDC)
            requestsByChildDataset.update(reqByChildDS)

            for wfs in reqByChildDS.values():
                requests = requests.union(wfs)

        failedRequests = set()
        totalChildDS = len(childDatasets)
        fixCount = 0
        for childDS in childDatasets:
            self.logger.info("Resolving parentage for dataset: %s", childDS)
            start = time.time()
            try:
                failedBlocks = self.dbsSvc.fixMissingParentageDatasets(
                    childDS, insertFlag=True)
            except Exception as exc:
                self.logger.exception(
                    "Failed to resolve parentage data for dataset: %s. Error: %s",
                    childDS, str(exc))
                failedRequests = failedRequests.union(
                    requestsByChildDataset[childDS])
            else:
                if failedBlocks:
                    self.logger.warning(
                        "These blocks failed to be resolved and will be retried later: %s",
                        failedBlocks)
                    failedRequests = failedRequests.union(
                        requestsByChildDataset[childDS])
                else:
                    fixCount += 1
                    self.logger.info(
                        "Parentage for '%s' successfully updated. Processed %s out of %s datasets.",
                        childDS, fixCount, totalChildDS)
            timeTaken = time.time() - start
            self.logger.info("    spent %s secs on this dataset: %s",
                             timeTaken, childDS)

        requestsToUpdate = requests - failedRequests

        for request in requestsToUpdate:
            try:
                self.reqmgrDB.updateRequestProperty(
                    request, {"ParentageResolved": True})
                self.logger.info(
                    "Marked ParentageResolved=True for request: %s", request)
            except Exception as exc:
                self.logger.error(
                    "Failed to update 'ParentageResolved' flag to True for request: %s",
                    request)

        msg = "A total of %d requests have been processed, where %d will have to be retried in the next cycle."
        self.logger.info(msg, len(requestsToUpdate), len(failedRequests))