コード例 #1
0
class RequestDBTest(unittest.TestCase):
    """
    """

    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["ReqMgr"]
        self.testInit = TestInitCouchApp("RequestDBServiceTest")
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = "requsetdb_t"
        self.testInit.setupCouch(dbName, *self.couchApps)
        self.requestWriter = RequestDBWriter(self.testInit.couchUrl, dbName)
        self.requestReader = RequestDBReader(self.testInit.couchUrl, dbName)
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result = self.requestWriter.insertGenericRequest(schema[0])

        self.assertEquals(len(result), 1, "insert fail")

        self.assertEquals(
            self.requestWriter.updateRequestStatus(schema[0]["RequestName"], "failed"), "OK", "update fail"
        )
        self.assertEquals(
            self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"), "Error: document not found"
        )
        result = self.requestWriter.updateRequestProperty(schema[0]["RequestName"], {"Teams": ["teamA"]})
        self.assertEquals(
            self.requestWriter.updateRequestProperty(schema[0]["RequestName"], {"Teams": ["teamA"]}),
            "OK",
            "update fail",
        )
        self.assertEquals(
            self.requestWriter.updateRequestProperty("not_exist_schema", {"Teams": "teamA"}),
            "Error: document not found",
        )

        result = self.requestWriter.getRequestByNames([schema[0]["RequestName"]])
        self.assertEquals(len(result), 1, "should be 1")
        result = self.requestWriter.getRequestByStatus(["failed"], False, 1)
        self.assertEquals(len(result), 1, "should be 1")
コード例 #2
0
ファイル: T0RequestDB_t.py プロジェクト: BrunoCoimbra/WMCore
class T0RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["T0Request"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 't0_requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.requestWriter = RequestDBWriter(reqDBURL, self.couchApps[0])
        self.requestReader = RequestDBReader(reqDBURL, self.couchApps[0])
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEqual(len(result), 1, 'insert fail');
        
        result = self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "assigned")

        self.assertEqual(result, 'not allowed state assigned', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "new"),
                          'Error: document not found')
        
        allowedStates = ["Closed", "Merge", "AlcaSkim", "Harvesting",  
                         "Processing Done", "completed"]
        for state in allowedStates:
            self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], state),
                          'OK')
        
        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "Processing Done"),
                          'not allowed transition completed to Processing Done')  
        
        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "normal-archived"),
                          'OK')  
        result = self.requestWriter.getRequestByStatus(["normal-archived"], False, 1)
        self.assertEqual(len(result), 1, "should be 1 but %s" % result)
コード例 #3
0
ファイル: T0RequestDB_t.py プロジェクト: vytjan/WMCore
class T0RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["T0Request"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 't0_requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        reqDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.requestWriter = RequestDBWriter(reqDBURL, self.couchApps[0])
        self.requestReader = RequestDBReader(reqDBURL, self.couchApps[0])
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEqual(len(result), 1, 'insert fail');

        result = self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "assigned")

        self.assertEqual(result, 'not allowed state assigned', 'update fail')
        self.assertEqual(self.requestWriter.updateRequestStatus("not_exist_schema", "new"),
                          'Error: document not found')

        allowedStates = ["Closed", "Merge", "AlcaSkim", "Harvesting",
                         "Processing Done", "completed"]
        for state in allowedStates:
            self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], state),
                          'OK')

        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "Processing Done"),
                          'not allowed transition completed to Processing Done')

        self.assertEqual(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "normal-archived"),
                          'OK')
        result = self.requestWriter.getRequestByStatus(["normal-archived"], False, 1)
        self.assertEqual(len(result), 1, "should be 1 but %s" % result)
コード例 #4
0
class RequestDBTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["ReqMgr"]
        self.testInit = TestInitCouchApp('RequestDBServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 'requsetdb_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        self.requestWriter = RequestDBWriter(self.testInit.couchUrl, dbName)
        self.requestReader = RequestDBReader(self.testInit.couchUrl, dbName)
        self.requestWriter.defaultStale = {}
        self.requestReader.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testRequestDBWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        result =  self.requestWriter.insertGenericRequest(schema[0])

        self.assertEquals(len(result), 1, 'insert fail');
        
        self.assertEquals(self.requestWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail')
        self.assertEquals(self.requestWriter.updateRequestStatus("not_exist_schema", "assigned"),
                          'Error: document not found')
        result = self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']})
        self.assertEquals(self.requestWriter.updateRequestProperty(schema[0]['RequestName'], 
                                                                   {'Teams': ['teamA']}), 'OK', 'update fail')
        self.assertEquals(self.requestWriter.updateRequestProperty("not_exist_schema", {'Teams': 'teamA'}),
                          'Error: document not found')
        
        result = self.requestWriter.getRequestByNames([schema[0]['RequestName']])
        self.assertEquals(len(result), 1, "should be 1")
        result = self.requestWriter.getRequestByStatus(["failed"], False, 1)
        self.assertEquals(len(result), 1, "should be 1")
コード例 #5
0
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(
                    **self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)

        if not self.useReqMgrForCompletionCheck:
            # sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(
                self.config.TaskArchiver.ReqMgr2ServiceURL)

        # Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    def algorithm(self, parameters=None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp
             ) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        # Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(
            classname="Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut=self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return

    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """

        finishedWorkflowsDAO = self.daoFactory(
            classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(
            onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)

    def killCondorJobsByWFStatus(self, statusList):
        if isinstance(statusList, basestring):
            statusList = [statusList]
        reqNames = self.centralCouchDBWriter.getRequestByStatus(statusList)
        logging.info(
            "There are %d requests in 'aborted' status in central couch.",
            len(reqNames))
        for wf in reqNames:
            self.workQueue.killWMBSWorkflow(wf)
        return reqNames

    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:

        1. Notify the WorkQueue about finished subscriptions
        2. mark workflow as completed in the dbsbuffer_workflow table
        """
        if len(finishedwfs) == 0:
            return

        logging.info("Found %d candidate workflows for completing: %s",
                     len(finishedwfs), finishedwfs.keys())
        completedWorkflowsDAO = self.dbsDaoFactory(
            classname="UpdateWorkflowsToCompleted")

        centralCouchAlive = True
        try:
            abortedWorkflows = self.killCondorJobsByWFStatus(["aborted"])
            self.killCondorJobsByWFStatus(["force-complete"])
        except Exception as ex:
            centralCouchAlive = False
            logging.error(
                "we will try again when remote couch server comes back\n%s",
                str(ex))

        if centralCouchAlive:
            for workflow in finishedwfs:
                try:
                    # Notify the WorkQueue, if there is one
                    if self.workQueue != None:
                        subList = []
                        logging.info("Marking subscriptions as Done ...")
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)

                    # Tier-0 case, the agent has to mark it completed
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(
                            workflow, "completed")
                        logging.info("status updated to completed %s",
                                     workflow)

                    completedWorkflowsDAO.execute([workflow])

                except TaskArchiverPollerException as ex:
                    # Something didn't go well when notifying the workqueue, abort!!!
                    logging.error(
                        "Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    # Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return

    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId=sub)
            except WorkQueueNoMatchingElements:
                # Subscription wasn't known to WorkQueue, feel free to clean up
                logging.debug(
                    "Local WorkQueue knows nothing about this subscription: %s",
                    sub)
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
コード例 #6
0
class Tier0PluginTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.requestCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.requestCouchDB, 'T0Request')
        self.testDir = self.testInit.generateWorkDir()
        reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB)
        self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request")
        self.requestDBWriter._setNoStale()

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return

    def tearDown(self):
        """
        _tearDown_

        Clear databases and delete files
        """
        self.testInit.tearDownCouch()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()

        return

    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW',
                      'RepackMergewrite_SingleNeutrino_RAW']

        self.stateMap = {'Merge': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
                                   **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return

    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = ['ExpressMergewrite_StreamZFast_DQM', 'ExpressMergewrite_ExpressPhysics_FEVT',
                            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO', 'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
                            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT',
                            'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO']
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged'

        self.stateMap = {'Merge': [],
                         'Harvesting': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name='TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in [x for x in secondLevelTasks if not x.count('CleanupUnmerged')]:
            secondLevelWorkflow = Workflow(task='/%s/Express/%s' % (workflowName, task), **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
                                ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)]:
            harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' % (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return

    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = PromptRecoWorkloadFactory.getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        factory = PromptRecoWorkloadFactory()
        testArguments["EnableHarvesting"] = True
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        workload = factory.factoryWorkloadConstruction(workflowName, testArguments)

        wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset)

        self.stateMap = {'AlcaSkim': [],
                         'Merge': [],
                         'Harvesting': [],
                         'Processing Done': []}
        self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
                      '%s/RecoMergewrite_AOD',
                      '%s/RecoMergewrite_DQM',
                      '%s/RecoMergewrite_RECO']
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO')
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM')
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return

    def verifyStateTransitions(self, transitionMethod='markFinished', transitionTrigger=True):
        """
        _verifyStateTransitions_

        Utility method which goes through the list of states in self.orderedStates and
        finishes the tasks that demand a state transition in each step. This according
        to the defined transition method and trigger.
        It verifies that the request document in WMStats is moving according to the transitions
        """

        for idx in range(0, len(self.orderedStates) * 2):
            nextState = self.orderedStates[idx / 2]
            if (idx / 2) == 0:
                currentState = 'Closed'
            else:
                currentState = self.orderedStates[idx / 2 - 1]
            if idx % 2 == 0:
                for transitionObject in self.stateMap[nextState][:-1]:
                    method = getattr(transitionObject, transitionMethod)
                    method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState])
                self.assertEqual(len(currentStateWorkflows), 1, 'Workflow moved incorrectly from %s' % currentState)
                self.assertEqual(len(nextStateWorkflows), 0, 'Workflow moved incorrectly to %s' % nextState)
            else:
                transitionObject = self.stateMap[nextState][-1]
                method = getattr(transitionObject, transitionMethod)
                method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState])
                self.assertEqual(len(currentStateWorkflows), 0,
                                 'Workflow did not move correctly from %s' % currentState)
                self.assertEqual(len(nextStateWorkflows), 1, 'Workflow did not move correctly to %s' % nextState)
        return

    def testA_RepackStates(self):
        """
        _testA_RepackStates_

        Setup an environment with a Repack workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupRepackWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions('markOpen', False)

        return

    def testB_ExpressStates(self):
        """
        _testB_ExpressStates_

        Setup an environment with a Express workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupExpressWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return

    def testC_PromptRecoStates(self):
        """
        _testC_PromptRecoStates_

        Setup an environment with a PromptReco workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupPromptRecoWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return
コード例 #7
0
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(
                    **self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)

        if not self.useReqMgrForCompletionCheck:
            #sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(
                self.config.AnalyticsDataCollector.localT0RequestDBURL,
                couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(
                self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(
                self.config.TaskArchiver.ReqMgr2ServiceURL)
            #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager(
                {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})

        #Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    def algorithm(self, parameters=None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp
             ) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        #Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(
            classname="Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut=self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return

    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return 
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """

        finishedWorkflowsDAO = self.daoFactory(
            classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(
            onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)

    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:
        
        1. Notify the WorkQueue about finished subscriptions
        2. update dbsbuffer_workflow table with finished subscription
        """

        #Only delete those where the upload and notification succeeded
        logging.info("Found %d candidate workflows for completing: %s" %
                     (len(finishedwfs), finishedwfs.keys()))
        # update the completed flag in dbsbuffer_workflow table so blocks can be closed
        # create updateDBSBufferWorkflowComplete DAO
        if len(finishedwfs) == 0:
            return

        completedWorkflowsDAO = self.dbsDaoFactory(
            classname="UpdateWorkflowsToCompleted")

        centralCouchAlive = True
        try:
            #TODO: need to enable when reqmgr2 -wmstats is ready
            #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict");
            abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus(
                ["aborted"])
            logging.info(
                "There are %d requests in 'aborted' status in central couch." %
                len(abortedWorkflows))
            forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus(
                ["force-complete"])
            logging.info(
                "List of 'force-complete' workflows in central couch: %s" %
                forceCompleteWorkflows)

        except Exception as ex:
            centralCouchAlive = False
            logging.error(
                "we will try again when remote couch server comes back\n%s" %
                str(ex))

        if centralCouchAlive:
            for workflow in finishedwfs:
                try:
                    #Notify the WorkQueue, if there is one
                    if self.workQueue != None:
                        subList = []
                        logging.info("Marking subscriptions as Done ...")
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)

                    #Now we know the workflow as a whole is gone, we can delete the information from couch
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(
                            workflow, "completed")
                        logging.info("status updated to completed %s" %
                                     workflow)

                    if workflow in abortedWorkflows:
                        #TODO: remove when reqmgr2-wmstats deployed
                        newState = "aborted-completed"
                    elif workflow in forceCompleteWorkflows:
                        newState = "completed"
                    else:
                        newState = None

                    if newState != None:
                        # update reqmgr workload document only request mgr is installed
                        if not self.useReqMgrForCompletionCheck:
                            # commented out untill all the agent is updated so every request have new state
                            # TODO: agent should be able to write reqmgr db diretly add the right group in
                            # reqmgr
                            self.requestLocalCouchDB.updateRequestStatus(
                                workflow, newState)
                        else:
                            try:
                                #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                                logging.info(
                                    "Updating status to '%s' in both oracle and couchdb ..."
                                    % newState)
                                self.reqmgrSvc.updateRequestStatus(
                                    workflow, newState)
                                #And replace with this - remove all the excption
                                #self.reqmgr2Svc.updateRequestStatus(workflow, newState)
                            except httplib.HTTPException as ex:
                                # If we get an HTTPException of 404 means reqmgr2 request
                                if ex.status == 404:
                                    # try reqmgr2 call
                                    msg = "%s : reqmgr2 request: %s" % (
                                        workflow, str(ex))
                                    logging.warning(msg)
                                    self.reqmgr2Svc.updateRequestStatus(
                                        workflow, newState)
                                else:
                                    msg = "%s : fail to update status %s  with HTTP error: %s" % (
                                        workflow, newState, str(ex))
                                    logging.error(msg)
                                    raise ex

                        logging.info("status updated to '%s' : %s" %
                                     (newState, workflow))

                    completedWorkflowsDAO.execute([workflow])

                except TaskArchiverPollerException as ex:

                    #Something didn't go well when notifying the workqueue, abort!!!
                    logging.error(
                        "Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    #Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return

    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId=sub)
            except WorkQueueNoMatchingElements:
                #Subscription wasn't known to WorkQueue, feel free to clean up
                logging.debug(
                    "Local WorkQueue knows nothing about this subscription: %s"
                    % sub)
                pass
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
コード例 #8
0
ファイル: TaskArchiverPoller.py プロジェクト: vkuznet/WMCore
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """

    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.config = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False):
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)

        if not self.useReqMgrForCompletionCheck:
            # sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL,
                                                       couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB
        else:
            self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL)

            self.reqmgr2Svc = ReqMgr(self.config.General.ReqMgr2ServiceURL)

        # Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname="Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    @timeFunction
    def algorithm(self, parameters=None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                    and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        # Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(classname="Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut=self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return

    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """

        finishedWorkflowsDAO = self.daoFactory(classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)

    def killCondorJobsByWFStatus(self, statusList):
        if isinstance(statusList, basestring):
            statusList = [statusList]
        reqNames = self.centralCouchDBWriter.getRequestByStatus(statusList)
        logging.info("There are %d requests in %s status in central couch.", len(reqNames), statusList)
        self.workQueue.killWMBSWorkflows(reqNames)
        return reqNames

    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:

        1. Notify the WorkQueue about finished subscriptions
        2. mark workflow as completed in the dbsbuffer_workflow table
        """
        if not finishedwfs:
            return

        logging.info("Found %d candidate workflows for completing:", len(finishedwfs))
        completedWorkflowsDAO = self.dbsDaoFactory(classname="UpdateWorkflowsToCompleted")

        centralCouchAlive = True
        try:
            self.killCondorJobsByWFStatus(["force-complete", "aborted"])
        except Exception as ex:
            centralCouchAlive = False
            logging.error("we will try again when remote couch server comes back\n%s", str(ex))

        if centralCouchAlive:
            logging.info("Marking subscriptions as Done ...")
            for workflow in finishedwfs:
                try:
                    # Notify the WorkQueue, if there is one
                    if self.workQueue is not None:
                        subList = []
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)

                    # Tier-0 case, the agent has to mark it completed
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(workflow, "completed")
                        logging.info("status updated to completed %s", workflow)

                    completedWorkflowsDAO.execute([workflow])

                except TaskArchiverPollerException as ex:
                    # Something didn't go well when notifying the workqueue, abort!!!
                    logging.error("Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    # Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return

    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId=sub)
            except WorkQueueNoMatchingElements:
                # Subscription wasn't known to WorkQueue, feel free to clean up
                logging.debug("Local WorkQueue knows nothing about this subscription: %s", sub)
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
コード例 #9
0
ファイル: TaskArchiverPoller.py プロジェクト: huohuo21/WMCore
class TaskArchiverPoller(BaseWorkerThread):
    """
    Polls for Ended jobs

    List of attributes

    requireCouch:  raise an exception on couch failure instead of ignoring
    """
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        
        self.dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                     logger = myThread.logger, 
                                     dbinterface = myThread.dbi)

        self.config      = config
        self.jobCacheDir = self.config.JobCreator.jobCacheDir

        if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False:
            # Get workqueue setup from config unless overridden
            if hasattr(self.config.TaskArchiver, 'WorkQueueParams'):
                self.workQueue = localQueue(**self.config.TaskArchiver.WorkQueueParams)
            else:
                from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig
                self.workQueue = queueFromConfig(self.config)
        else:
            self.workQueue = None

        self.timeout           = getattr(self.config.TaskArchiver, "timeOut", None)
        self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        
        if not self.useReqMgrForCompletionCheck:
            #sets the local monitor summary couch db
            self.requestLocalCouchDB = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                   couchapp = self.config.AnalyticsDataCollector.RequestCouchApp)
            self.centralCouchDBWriter = self.requestLocalCouchDB;
        else:
            self.centralCouchDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL)
            
            self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
            #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
            self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})

        #Load the cleanout state ID and save it
        stateIDDAO = self.daoFactory(classname = "Jobs.GetStateID")
        self.stateID = stateIDDAO.execute("cleanout")

        return

    def terminate(self, params):
        """
        _terminate_

        This function terminates the job after a final pass
        """
        logging.debug("terminating. doing one more pass before we die")
        self.algorithm(params)
        return

    def algorithm(self, parameters = None):
        """
        _algorithm_

        Executes the two main methods of the poller:
        1. findAndMarkFinishedSubscriptions
        2. completeTasks
        Final result is that finished workflows get their summary built and uploaded to couch,
        and all traces of them are removed from the agent WMBS and couch (this last one on demand).
        """
        try:
            self.findAndMarkFinishedSubscriptions()
            (finishedwfs, finishedwfsWithLogCollectAndCleanUp) = self.getFinishedWorkflows()
            # set the data cache which can be used other thread (no ther thread should set the data cache)
            DataCache.setFinishedWorkflows(finishedwfsWithLogCollectAndCleanUp)
            self.completeTasks(finishedwfs)
        except WMException:
            myThread = threading.currentThread()
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise
        except Exception as ex:
            myThread = threading.currentThread()
            msg = "Caught exception in TaskArchiver\n"
            msg += str(ex)
            if getattr(myThread, 'transaction', False) \
                   and getattr(myThread.transaction, 'transaction', False):
                myThread.transaction.rollback()
            raise TaskArchiverPollerException(msg)

        return

    def findAndMarkFinishedSubscriptions(self):
        """
        _findAndMarkFinishedSubscriptions_

        Find new finished subscriptions and mark as finished in WMBS.
        """
        myThread = threading.currentThread()

        myThread.transaction.begin()

        #Get the subscriptions that are now finished and mark them as such
        logging.info("Polling for finished subscriptions")
        finishedSubscriptions = self.daoFactory(classname = "Subscriptions.MarkNewFinishedSubscriptions")
        finishedSubscriptions.execute(self.stateID, timeOut = self.timeout)
        logging.info("Finished subscriptions updated")

        myThread.transaction.commit()

        return
    
    
    def getFinishedWorkflows(self):
        """
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Get finished workflows with logCollect and Cleanup only.
        3. combined those and make return 
           finishedwfs - without LogCollect and CleanUp task
           finishedwfsWithLogCollectAndCleanUp - including LogCollect and CleanUp task
        """
        
        finishedWorkflowsDAO = self.daoFactory(classname = "Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()
        finishedLogCollectAndCleanUpwfs = finishedWorkflowsDAO.execute(onlySecondary=True)
        finishedwfsWithLogCollectAndCleanUp = {}
        for wf in finishedLogCollectAndCleanUpwfs:
            if wf in finishedwfs:
                finishedwfsWithLogCollectAndCleanUp[wf] = finishedwfs[wf]
        return (finishedwfs, finishedwfsWithLogCollectAndCleanUp)
        
    def completeTasks(self, finishedwfs):
        """
        _completeTasks_

        This method will call several auxiliary methods to do the following:
        
        1. Notify the WorkQueue about finished subscriptions
        2. update dbsbuffer_workflow table with finished subscription
        """


        #Only delete those where the upload and notification succeeded
        logging.info("Found %d candidate workflows for completing: %s" % (len(finishedwfs),finishedwfs.keys()))
        # update the completed flag in dbsbuffer_workflow table so blocks can be closed
        # create updateDBSBufferWorkflowComplete DAO
        if len(finishedwfs) == 0:
            return
        
        completedWorkflowsDAO = self.dbsDaoFactory(classname = "UpdateWorkflowsToCompleted")
        
        centralCouchAlive = True
        try:
            #TODO: need to enable when reqmgr2 -wmstats is ready
            #abortedWorkflows = self.reqmgrCouchDBWriter.getRequestByStatus(["aborted"], format = "dict");
            abortedWorkflows = self.centralCouchDBWriter.getRequestByStatus(["aborted"])
            logging.info("There are %d requests in 'aborted' status in central couch." % len(abortedWorkflows))
            forceCompleteWorkflows = self.centralCouchDBWriter.getRequestByStatus(["force-complete"])
            logging.info("List of 'force-complete' workflows in central couch: %s" % forceCompleteWorkflows)
            
        except Exception as ex:
            centralCouchAlive = False
            logging.error("we will try again when remote couch server comes back\n%s" % str(ex))
        
        if centralCouchAlive:
            for workflow in finishedwfs:
                try:
                    #Notify the WorkQueue, if there is one
                    if self.workQueue != None:
                        subList = []
                        logging.info("Marking subscriptions as Done ...")
                        for l in finishedwfs[workflow]["workflows"].values():
                            subList.extend(l)
                        self.notifyWorkQueue(subList)
                    
                    #Now we know the workflow as a whole is gone, we can delete the information from couch
                    if not self.useReqMgrForCompletionCheck:
                        self.requestLocalCouchDB.updateRequestStatus(workflow, "completed")
                        logging.info("status updated to completed %s" % workflow)
    
                    if workflow in abortedWorkflows:
                        #TODO: remove when reqmgr2-wmstats deployed
                        newState = "aborted-completed"
                    elif workflow in forceCompleteWorkflows:
                        newState = "completed"
                    else:
                        newState = None
                        
                    if newState != None:
                        # update reqmgr workload document only request mgr is installed
                        if not self.useReqMgrForCompletionCheck:
                            # commented out untill all the agent is updated so every request have new state
                            # TODO: agent should be able to write reqmgr db diretly add the right group in
                            # reqmgr
                            self.requestLocalCouchDB.updateRequestStatus(workflow, newState)
                        else:
                            try:
                                #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced)
                                logging.info("Updating status to '%s' in both oracle and couchdb ..." % newState)
                                self.reqmgrSvc.updateRequestStatus(workflow, newState)
                                #And replace with this - remove all the excption
                                #self.reqmgr2Svc.updateRequestStatus(workflow, newState)
                            except httplib.HTTPException as ex:
                                # If we get an HTTPException of 404 means reqmgr2 request
                                if ex.status == 404:
                                    # try reqmgr2 call
                                    msg = "%s : reqmgr2 request: %s" % (workflow, str(ex))
                                    logging.warning(msg)
                                    self.reqmgr2Svc.updateRequestStatus(workflow, newState)
                                else:
                                    msg = "%s : fail to update status %s  with HTTP error: %s" % (workflow, newState, str(ex))
                                    logging.error(msg)
                                    raise ex
                            
                        logging.info("status updated to '%s' : %s" % (newState, workflow))
                    
                    completedWorkflowsDAO.execute([workflow])
        
                except TaskArchiverPollerException as ex:

                    #Something didn't go well when notifying the workqueue, abort!!!
                    logging.error("Something bad happened while archiving tasks.")
                    logging.error(str(ex))
                    continue
                except Exception as ex:
                    #Something didn't go well on couch, abort!!!
                    msg = "Problem while archiving tasks for workflow %s\n" % workflow
                    msg += "Exception message: %s" % str(ex)
                    msg += "\nTraceback: %s" % traceback.format_exc()
                    logging.error(msg)
                    continue
        return
    
    def notifyWorkQueue(self, subList):
        """
        _notifyWorkQueue_

        Tells the workQueue component that a particular subscription,
        or set of subscriptions, is done.  Receives confirmation
        """

        for sub in subList:
            try:
                self.workQueue.doneWork(SubscriptionId = sub)
            except WorkQueueNoMatchingElements:
                #Subscription wasn't known to WorkQueue, feel free to clean up
                logging.info("Local WorkQueue knows nothing about this subscription: %s" % sub)
                pass
            except Exception as ex:
                msg = "Error talking to workqueue: %s\n" % str(ex)
                msg += "Tried to complete the following: %s\n" % sub
                raise TaskArchiverPollerException(msg)

        return
コード例 #10
0
ファイル: Tier0Plugin_t.py プロジェクト: todor-ivanov/WMCore
class Tier0PluginTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.requestCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.requestCouchDB, 'T0Request')
        self.testDir = self.testInit.generateWorkDir()
        reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB)
        self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request")
        self.requestDBWriter._setNoStale()

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return

    def tearDown(self):
        """
        _tearDown_

        Clear databases and delete files
        """
        self.testInit.tearDownCouch()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()

        return

    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = [
            'RepackMergewrite_QuadElectron_RAW',
            'RepackMergewrite_TriPhoton_RAW',
            'RepackMergewrite_SingleNeutrino_RAW'
        ]

        self.stateMap = {'Merge': [], 'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {
            'spec': specPath,
            'owner': 'ItsAMeMario',
            'name': workflowName,
            'wfType': 'tier0'
        }
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' %
                                     (workflowName, task),
                                     **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(
            task=
            '/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
            **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return

    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = [
            'ExpressMergewrite_StreamZFast_DQM',
            'ExpressMergewrite_ExpressPhysics_FEVT',
            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO',
            'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT',
            'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO'
        ]
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged'

        self.stateMap = {'Merge': [], 'Harvesting': [], 'Processing Done': []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name='TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {
            'spec': specPath,
            'owner': 'ItsAMeMario',
            'name': workflowName,
            'wfType': 'tier0'
        }
        topLevelWorkflow = Workflow(task='/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in [
                x for x in secondLevelTasks if not x.count('CleanupUnmerged')
        ]:
            secondLevelWorkflow = Workflow(task='/%s/Express/%s' %
                                           (workflowName, task),
                                           **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [
            ('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
            ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)
        ]:
            harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' %
                                          (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return

    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = PromptRecoWorkloadFactory.getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        factory = PromptRecoWorkloadFactory()
        testArguments["EnableHarvesting"] = True
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        workload = factory.factoryWorkloadConstruction(workflowName,
                                                       testArguments)

        wmbsHelper = WMBSHelper(workload,
                                'Reco',
                                'SomeBlock',
                                cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask,
                                              wmbsHelper.topLevelFileset)

        self.stateMap = {
            'AlcaSkim': [],
            'Merge': [],
            'Harvesting': [],
            'Processing Done': []
        }
        self.orderedStates = [
            'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'
        ]

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = [
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
            '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM',
            '%s/RecoMergewrite_RECO'
        ]
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO'
        )
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' %
                                          (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' %
                                          (topLevelTask, dataTier, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM'
        )
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return

    def verifyStateTransitions(self,
                               transitionMethod='markFinished',
                               transitionTrigger=True):
        """
        _verifyStateTransitions_

        Utility method which goes through the list of states in self.orderedStates and
        finishes the tasks that demand a state transition in each step. This according
        to the defined transition method and trigger.
        It verifies that the request document in WMStats is moving according to the transitions
        """

        for idx in range(0, len(self.orderedStates) * 2):
            nextState = self.orderedStates[idx // 2]
            if (idx // 2) == 0:
                currentState = 'Closed'
            else:
                currentState = self.orderedStates[idx // 2 - 1]
            if idx % 2 == 0:
                for transitionObject in self.stateMap[nextState][:-1]:
                    method = getattr(transitionObject, transitionMethod)
                    method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [nextState])
                self.assertEqual(
                    len(currentStateWorkflows), 1,
                    'Workflow moved incorrectly from %s' % currentState)
                self.assertEqual(
                    len(nextStateWorkflows), 0,
                    'Workflow moved incorrectly to %s' % nextState)
            else:
                transitionObject = self.stateMap[nextState][-1]
                method = getattr(transitionObject, transitionMethod)
                method(transitionTrigger)
                self.plugin([], self.requestDBWriter, self.requestDBWriter)
                currentStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [currentState])
                nextStateWorkflows = self.requestDBWriter.getRequestByStatus(
                    [nextState])
                self.assertEqual(
                    len(currentStateWorkflows), 0,
                    'Workflow did not move correctly from %s' % currentState)
                self.assertEqual(
                    len(nextStateWorkflows), 1,
                    'Workflow did not move correctly to %s' % nextState)
        return

    def testA_RepackStates(self):
        """
        _testA_RepackStates_

        Setup an environment with a Repack workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupRepackWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions('markOpen', False)

        return

    def testB_ExpressStates(self):
        """
        _testB_ExpressStates_

        Setup an environment with a Express workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupExpressWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return

    def testC_PromptRecoStates(self):
        """
        _testC_PromptRecoStates_

        Setup an environment with a PromptReco workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupPromptRecoWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return